平时使用MODIS的数据做科研,都不是使用一天或者几天的数据,都是使用几年的数据。所以介绍一下MODIS的批量下载。
关于MODIS账号的注册和MODIS常用的下载源,可以看我上一篇文章《【MODIS】MODIS数据的常用下载源》,有详细的介绍。
如果还有什么疑问,可以下面留言,或者私信联系我。
无论哪种方法,我们都需要生成自己的APP Key,这样才能在批量下载的时候通过MODIS网站的验证。
先点击profile,再点击APP Keys进入APP Key的生成页面。
在红色框的位置输入任意的字符串,然后点击绿色框处的creat按钮,就会在紫色框的位置生成一串APP Key,我们要把这串APP Key记下来。
进入数据地址,点击红框的“See wget Download Command”,然后就会出现绿框展示的内容。将紫色框的***APP_KEY***使用我们自己生成的APP Key替换掉。将
***TARGET_DIRECTORY_ON_YOUR_FILE_SYSTEM***使用我们希望下载到的目录替换掉。
然后就会变成下面的Linux命令
wget -e robots=off -m -np -R .html,.tmp -nH --cut-dirs=3 "https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/61/MOD04_3K/2019/001/" --header "Authorization: Bearer
0C20DA2C-1A5D-11EA-A18D-DD1" -P modis_download
在Linux系统执行这个命令,就会下载这个页面下载所有文件
Python代码的来源:地址
将下面的代码保存为modis_download.py,然后执行
python modis_download.py -s https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/61/MOD04_3K/2019/001/ -d modis_download -t 0C20DA2C-1A5D-11EA-A18D-DD1
-s:后面跟下载数据的页面
-d:后面跟下载到哪个目录
-t:后面跟的是APP Key
from __future__ import (division, print_function, absolute_import, unicode_literals)
import argparse
import os
import os.path
import shutil
import sys
try:
from StringIO import StringIO # python2
except ImportError:
from io import StringIO # python3
################################################################################
USERAGENT = 'tis/download.py_1.0--' + sys.version.replace('\n', '').replace('\r', '')
def geturl(url, token=None, out=None):
headers = {'user-agent': USERAGENT}
if not token is None:
headers['Authorization'] = 'Bearer ' + token
try:
import ssl
CTX = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2)
if sys.version_info.major == 2:
import urllib2
try:
fh = urllib2.urlopen(urllib2.Request(url, headers=headers), context=CTX)
if out is None:
return fh.read()
else:
shutil.copyfileobj(fh, out)
except urllib2.HTTPError as e:
print('HTTP GET error code: %d' % e.code(), file=sys.stderr)
print('HTTP GET error message: %s' % e.message, file=sys.stderr)
except urllib2.URLError as e:
print('Failed to make request: %s' % e.reason, file=sys.stderr)
return None
else:
from urllib.request import urlopen, Request, URLError, HTTPError
try:
fh = urlopen(Request(url, headers=headers), context=CTX)
if out is None:
return fh.read().decode('utf-8')
else:
shutil.copyfileobj(fh, out)
except HTTPError as e:
print('HTTP GET error code: %d' % e.code(), file=sys.stderr)
print('HTTP GET error message: %s' % e.message, file=sys.stderr)
except URLError as e:
print('Failed to make request: %s' % e.reason, file=sys.stderr)
return None
except AttributeError:
# OS X Python 2 and 3 don't support tlsv1.1+ therefore... curl
import subprocess
try:
args = ['curl', '--fail', '-sS', '-L', '--get', url]
for (k, v) in headers.items():
args.extend(['-H', ': '.join([k, v])])
if out is None:
# python3's subprocess.check_output returns stdout as a byte string
result = subprocess.check_output(args)
return result.decode('utf-8') if isinstance(result, bytes) else result
else:
subprocess.call(args, stdout=out)
except subprocess.CalledProcessError as e:
print('curl GET error message: %' + (e.message if hasattr(e, 'message') else e.output), file=sys.stderr)
return None
################################################################################
DESC = "This script will recursively download all files if they don't exist from a LAADS URL and stores them to the specified path"
def sync(src, dest, tok):
'''synchronize src url with dest directory'''
try:
import csv
files = [f for f in csv.DictReader(StringIO(geturl('%s.csv' % src, tok)), skipinitialspace=True)]
except ImportError:
import json
files = json.loads(geturl(src + '.json', tok))
# use os.path since python 2/3 both support it while pathlib is 3.4+
for f in files:
# currently we use filesize of 0 to indicate directory
filesize = int(f['size'])
path = os.path.join(dest, f['name'])
url = src + '/' + f['name']
if filesize == 0:
try:
print('creating dir:', path)
os.mkdir(path)
sync(src + '/' + f['name'], path, tok)
except IOError as e:
print("mkdir `%s': %s" % (e.filename, e.strerror), file=sys.stderr)
sys.exit(-1)
else:
try:
if not os.path.exists(path):
print('downloading: ', path)
with open(path, 'w+b') as fh:
geturl(url, tok, fh)
else:
print('skipping: ', path)
except IOError as e:
print("open `%s': %s" % (e.filename, e.strerror), file=sys.stderr)
sys.exit(-1)
return 0
def _main(argv):
parser = argparse.ArgumentParser(prog=argv[0], description=DESC)
parser.add_argument('-s', '--source', dest='source', metavar='URL', help='Recursively download files at URL',
required=True)
parser.add_argument('-d', '--destination', dest='destination', metavar='DIR',
help='Store directory structure in DIR', required=True)
parser.add_argument('-t', '--token', dest='token', metavar='TOK', help='Use app token TOK to authenticate',
required=True)
args = parser.parse_args(argv[1:])
if not os.path.exists(args.destination):
os.makedirs(args.destination)
return sync(args.source, args.destination, args.token)
if __name__ == '__main__':
try:
sys.exit(_main(sys.argv))
except KeyboardInterrupt:
sys.exit(-1)