NetCDF(network Common Data Form)网络通用数据格式。NetCDF 文件中的数据以数组形式存储。例如:某个位置处随时间变化的温度以一维数组的形式存储。某个区域内在指定时间的温度以二维数组的形式存储。
四维 (4D) 数据(如某个区域内随时间和高度变化的温度)以一系列二维数组的形式存储。
本次使用的样例数据为2018-2020年的月均温度netCDF数据
下面主要介绍如何使用Python和R语言批量将上述nc格式的温度数据转为常用的tif格式数据。
import os
import netCDF4 as nc
import numpy as np
from osgeo import gdal,osr,ogr
import glob
def nc2tif(data,Output_folder):
tmp_data = nc.Dataset(data) #利用.Dataset()方法读取nc数据
# print(tmp_data)
#
# root group (NETCDF3_CLASSIC data model, file format NETCDF3):
# dimensions(sizes): lon(7849), lat(5146), time(12)
# variables(dimensions): float64 lon(lon), float64 lat(lat), float64 time(time), int16 tmp(time, lat, lon)
# print(tmp_data.variables) #lon, lat, time, tmp
# {'lon':
# float64 lon(lon)
# long_name: longitude
# unit: degree
# unlimited dimensions:
# current shape = (7849,)
# filling on, default _FillValue of 9.969209968386869e+36 used, 'lat':
# float64 lat(lat)
# long_name: latitude
# unit: degree
# unlimited dimensions:
# current shape = (5146,)
# filling on, default _FillValue of 9.969209968386869e+36 used, 'time':
# float64 time(time)
# long_name: time
# unit: data.01-data.12
# calendar: gregorian
# unlimited dimensions:
# current shape = (12,)
# filling on, default _FillValue of 9.969209968386869e+36 used, 'tmp':
# int16 tmp(time, lat, lon)
# long_name: 0.1 monthly mean temperature
# unit: degree centigrade
# missing_value: -32768.0
# unlimited dimensions:
# current shape = (12, 5146, 7849)
# filling on, default _FillValue of -32767 used}
Lat_data = tmp_data.variables['lat'][:]
Lon_data = tmp_data.variables['lon'][:]
# print(Lat_data)
# [58.63129069 58.62295736 58.61462403 ... 15.77295736 15.76462403
# 15.75629069]
# print(Lon_data)
# [ 71.29005534 71.29838867 71.30672201 ... 136.67338867 136.68172201
# 136.69005534]
tmp_arr = np.asarray(tmp_data.variables['tmp'])
#影像的左上角&右下角坐标
Lonmin, Latmax, Lonmax, Latmin = [Lon_data.min(), Lat_data.max(), Lon_data.max(), Lat_data.min()]
# Lonmin, Latmax, Lonmax, Latmin
# (71.29005533854166, 58.63129069182766, 136.6900553385789, 15.756290691830095)
#分辨率计算
Num_lat = len(Lat_data) #5146
Num_lon = len(Lon_data) #7849
Lat_res = (Latmax - Latmin) / (float(Num_lat) - 1)
Lon_res = (Lonmax - Lonmin) / (float(Num_lon) - 1)
#print(Num_lat, Num_lon)
#print(Lat_res, Lon_res)
# 5146 7849
# 0.00833333333333286 0.008333333333338078
for i in range(len(tmp_arr[:])):
#i=0,1,2,3,4,5,6,7,8,9,...
#创建tif文件
driver=gdal.GetDriverByName('GTiff')
out_tif_name = Output_folder +'\\'+ data.split('\\')[-1].split('.')[0] + '_' + str(i+1) + '.tif'
out_tif = driver.Create(out_tif_name, Num_lon, Num_lat, 1, gdal.GDT_Int16)
#设置影像的显示范围
#Lat_re前需要添加负号
geotransform = (Lonmin, Lon_res, 0.0, Latmax, 0.0, -Lat_res)
out_tif.SetGeoTransform(geotransform)
#定义投影
prj = osr.SpatialReference()
prj.ImportFromEPSG(4326)
out_tif.SetProjection(prj.ExportToWkt())
#数据导出
out_tif.GetRasterBand(1).WriteArray(tmp_arr[i]) #将数据写入内存,此时没有写入到硬盘
out_tif.FlushCache() #将数据写入到硬盘
out_tif = None #关闭tif文件
def main():
Input_folder = 'G:/learnpy/data/'
Output_folder = 'G:/learnpy/data/nc/nc2tif'
#读取所有数据
data_list = glob.glob(Input_folder+'*.nc')
for i in range(len(data_list)):
data = data_list[i]
nc2tif(data, Output_folder)
print(data+'转tif成功, 你真棒!')
main()
wd <- "G:/Rdata/Tmp/"
setwd(wd)
library(pacman)
p_load(raster, ncdf4,sf,ggplot2,RColorBrewer)
nc_data = list.files(wd,pattern = ".nc")
nc_data
# [1] "tmp_2018.nc" "tmp_2019.nc" "tmp_2020.nc"
for (i in nc_data)
{
nc_raster <- brick(i,varname = "tmp")
for(j in 1:12)
{
super1 = substr(i, 1, 7)
super2 = c(1:12)
super3 = paste(super1, super2, sep = '_')
writeRaster(nc_raster[[j]],filename = super3[[j]], format = "GTiff",overwrite=TRUE,
dataType='INT2S')
}
print(paste(i,'转换tif成功,你太棒了!'))
}
写在前面:
因为需要用到JBTools和ncdf.tools两个包,且CRAN只有存档数据了,所以需要先下载到本地进行安装。但是需要先安装foreach, gplots 这个两个包 (再安装JBTools 再安装ncdf.tools(存在依赖关系)),附下载链接
https://cran.r-project.org/src/contrib/Archive/JBTools/
https://cran.r-project.org/src/contrib/Archive/ncdf.tools/
代码部分:
wd <- "D:/Rdata/nc/"
setwd(wd)
library(pacman)
p_load(raster, ncdf4,sf,ggplot2,RColorBrewer,foreach, gplots, rgdal)
#install加载JBTools和ncdf.tools包
p_load(JBTools, ncdf.tools)
atm2015 <- nc_open("atm_ssp245_2015_01.nc4")
View(atm2015) #4维数据,8个变量
names(atm2015$var) #查看变量
# "hur" "ps" "psl" "ta" "tos" "ua" "va" "zg"
time = ncvar_get(atm2015,"time")
date = convertDateNcdf2R(time, units = "days",
origin = as.POSIXct("1850-01-01 00:00:00", tz = "UTC"),
time.format ="%Y-%m-%d %H:%M:%S")
date1 = gsub(":","_",date)
date2 = gsub(" ","_",date1)
ta = atm2015$var$ta
nc_data = list.files(wd,pattern = ".nc")
nc_data
for (i in nc_data)
{
nc_raster <- brick(i,varname = "ta")
for(j in 1:123) #因为此数据ta变量有123个波段
{
writeRaster(nc_raster[[j]],filename = date2[[j]], format = "GTiff",
overwrite=TRUE, dataType='INT2S')
}
print(paste(i,'转换tif成功,你太棒了!'))
}
#可视化
ta1 <- raster("D:/Rdata/nc/2015-01-01_06_00_00.tif")
crs(ta1)
# CRS arguments: +proj=longlat +datum=WGS84 +no_defs
plot(ta1)