【InSAR 笔记2】哨兵一号精轨数据批量下载【修改0704】

1.原文链接

2.过程

  • 新建txt,重命名为download.py
  • 复制代码
  • python download.py
  • 根据错误修改代码,直至成功

3.修改

问题:重复下载
解决:加入对 已下载文件及其大小的判断,跳过完整下载的EOF
依然存在的问题:下载速度特别慢

# -*- coding:utf-8 -*-
# Author:PasserQi
# Time:2019-4-5
# 下载文件夹下哨兵数据的精轨数据
# 须知:文件夹下的哨兵数据需解压。不想解压可以修改程序的第43行,.SAFE该为.zip
import urllib
from bs4 import BeautifulSoup
import re
import os
import datetime
import time

from urllib.request import Request, urlopen


# 需要修改的参数
dir_path = r'/media/lll/My Passport/1.s1_data' # 哨兵数据存在的目录(linux)
# dir_path = r'D:\1.s1_data' # 哨兵数据存在的目录(win10)
out_path = r'/media/lll/My Passport/2.s1_precision_oribit_data' #精轨数据保存的目录(linux)
# out_path = r'D:\2.s1_precision_oribit_data' # 精轨数据保存的目录(win10)
FILE_TYPE = ".zip" #文件格式:.SAFE .zip
IsDownload = True #是否下载:True False

download_urls = []
error_url = []
url_prefix = 'https://qc.sentinel1.eo.esa.int/aux_poeorb/' #下载地址
def download(dest_dir, url):
    print("downloading from:{}\n\t to {}\n".format(url, dest_dir))
    try:
        urllib.request.urlretrieve(url, dest_dir, callbackfunc)
    except:
        error_url.append(url)
        print ('\tError retrieving the URL:', dest_dir)
    else: # 没有异常
        print ("\t[done]")
        if url in error_url: #在错误列表里
            error_url.remove(url)
def callbackfunc(blocknum, blocksize, totalsize):
    '''回调函数
    @blocknum: 已经下载的数据块
    @blocksize: 数据块的大小
    @totalsize: 远程文件的大小
    '''
    percent = 100.0 * blocknum * blocksize / totalsize
    if percent > 100:
        percent = 100
    print ("%.2f%%"% percent)

def get_yestoday(mytime):
    myday = datetime.datetime( int(mytime[0:4]),int(mytime[4:6]),int(mytime[6:8]) )
    delta = datetime.timedelta(days=-1)
    my_yestoday = myday + delta
    my_yes_time = my_yestoday.strftime('%Y%m%d')
    return my_yes_time

def get_total_size(response):
    try:      
        file_size = response.info().getheader('Content-Length').strip()
    except AttributeError:
        try:
             file_size = response.getheader('Content-Length').strip()
        except AttributeError:
             print ("> Problem getting size")
             return None
    return int(file_size)
if __name__ == '__main__':
    # 获得files
    files = os.listdir(dir_path)

    #files = [
    #   "S1A_IW_SLC__1SDV_20180201T101712_20180201T101742_020412_022E1C_43FD.SAFE",
    #   "S1A_IW_SLC__1SDV_20180213T101712_20180213T101742_020587_0233BB_CA75.SAFE",
    #   "S1A_IW_SLC__1SDV_20180309T101712_20180309T101742_020937_023ED6_693E.SAFE",
    #   ]
                 
    for file in files:
        if not file.endswith(FILE_TYPE):
            continue
         
        # ###########################
        # 按文件名上的信息查找EOF
        # 拼接URL
        url_param_json = {}
        url_param_json['sentinel1__mission'] = file[0:3]
        date = re.findall(r"\d{8}",file)[0]

        # 若参数为20170316,则搜索的是20170317的数据
        # 所以参数应该提前一天
        # 求date的前一天
        date = get_yestoday(date)

        # 在字符串指定位置插入指定字符
        # 例:20170101 --> 2017-01-01
        tmp = list(date)
        tmp.insert(4,'-');tmp.insert(7,'-')
        date = "".join(tmp)
        url_param_json['validity_start'] = date

        # 获得EOF下载网址
        url_param = urllib.parse.urlencode(url_param_json) #url参数
        url = 'https://qc.sentinel1.eo.esa.int/aux_poeorb/?%s' % url_param #拼接
        print ("url:{}".format(url))
        html = urllib.request.urlopen(url)  # 获取html
        dom = BeautifulSoup(html, 'lxml') # 解析html文档
        a_list = dom.findAll("a")  # 找出
        eof_lists = [a['href'] for a in a_list if a['href'].endswith('.EOF')]  # 找出EOF
        for eof in eof_lists:
            if IsDownload:
                eof_name = eof.split('/')[-1] #名字
                savefile = os.path.join(out_path, eof_name) #保存路径
                
                
             # see if we've already download this file and if it is that it is the correct size
                download_file = os.path.basename(eof).split('?')[0]
                print(eof)
                if os.path.isfile(download_file):
                    try:
                        request = Request(eof)
                        request.get_method = lambda : 'HEAD'
                        response = urlopen(request, timeout=30)
                        remote_size = get_total_size(response)
                     # Check that we were able to derive a size.
                        if remote_size:
                            local_size = os.path.getsize(download_file)
                            if remote_size < (local_size+(local_size*.01)) and remote_size > (local_size-(local_size*.01)):
                                print (" > Download file {0} exists! \n > Skipping download of {1}. ".format(download_file, eof))
        #                         return None,None
                                break
                             #partial file size wasn't full file size, lets blow away the chunk and start again
                            print (" > Found {0} but it wasn't fully downloaded. Removing file and downloading again.".format(download_file))
                            os.remove(download_file)       
                    except ssl.CertificateError as e:
                        print (" > ERROR: {0}".format(e))
                        print (" > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag")
        #                 return False,None
                        break
                    except HTTPError as e:
                        if e.code == 401:
                            print (" > IMPORTANT: Your user may not have permission to download this type of data!")
                        else:
                            print (" > Unknown Error, Could not get file HEAD: {0}".format(e))
                    except URLError as e:
                        print ("URL Error (from HEAD): {0}, {1}".format( e.reason, eof))
                        if "ssl.c" in "{0}".format(e.reason):
                            print ("IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error.")
        #                 return False,None
                        break
            
            
                download(savefile, eof)
            else:
                download_urls.append(eof)


    if IsDownload: #下载
        print ("------------------------------------")
        print ("开始下载出错的数据")
        # 下载出错的数据重新下载
        while len(error_url)!=0:
            print ("出错的数据有")
            print (error_url)
            for eof in error_url:
                savefile = os.path.join(out_path, eof)
                download(savefile, url_prefix + eof)
        print ("全部下载成功,无出错文件")
    else: #不下载
        with open(os.path.join(out_path, u"下载链接.txt"), "w+") as f:
            for eof in download_urls:
                f.write(eof)
                f.write("\n")
            f.close()


你可能感兴趣的:(InSAR)