python LAADS+Selenium应该如何运用?

from selenium import webdriver

from time import sleep

import tempfile

import os,sys

import pandas as游戏中的pd

import geopandas as gpd

import time

构建查询地址

def GetURL(ProductID,StartTime,EndTime,search_file):

 # 查询边界

 data = gpd.GeoDataFrame.from_file(search_file)

 bbox = (data.bounds.values)[0].tolist()

 # 研究区范围,左上角和右下角。根据需要构造字符串

 Area = str(round(bbox[0],1))+','+str(round(bbox[3],1))+','+str(round(bbox[2],1))+','+str(round(bbox[1],1))

 # 输入 MODIS 轨道矢量

 modis_grid_file = 'E:\***\modis_WGS84_grid_world.shp'

 modis_grid = gpd.GeoDataFrame.from_file(modis_grid_file)

 # 查询边界覆盖的轨道中心坐标

 modis_intersection = modis_grid[modis_grid.intersects(data.geometry[0])]

 path_row = 'Tile:'

 for mv in modis_intersection.values:

     path_row += "H"+str(mv[1])+"V"+str(mv[2])+","

 # 根据以上信息构建 Search 页的网址

 path_row = path_row[0:-1]

url='

 return url

使用 Selenium 查询影像

def SearchFileList(url):

 # 创建文件夹,命名规则为程序运行的时刻

 # 将使用 selenium 下载的文件使用该文件夹存储

 csvdir = 'E:\\***\\' + str(time.time()).replace('.','')

 os.mkdir(csvdir)

 # 配置 selenium 的参数

 options = webdriver.ChromeOptions()

 prefs = {'profile.default_content_settings.popups': 0, 'download.default_directory': csvdir}

 options.add_experimental_option('prefs', prefs)

 chromedriver = r"C:\***\Google\Chrome\Application\chromedriver.exe"#chromedriver.exe 的本地路径

 # options.add_argument('--headless')  # 有无浏览器界面模式,根据需要设置

 driver = webdriver.Chrome(executable_path=chromedriver,options=options)

 # 自动打开 Search 页

 driver.get(url)

 # 浏览器打开 Search 页后,外汇跟单gendan5.com还要留足时间让服务器进行数据检索

 # 所以这里 sleep50 秒,可以根据网速自行设定

 sleep(50)

 # 当然也可以判断搜索结果,也就是包含 csv 的标签是否出现

 # WebDriverWait(driver,

 # 下载 csv 文件

 # 找到文本包含 csv 的标签

 # csvElement = driver.find_element_by_link_text('csv')

 csvElement = driver.find_element_by_xpath('// *[ @ id = "tab4download"] / a[2]')

 # 点击下载

 csvElement.click()

 # 留下下载 csv 文件的时间

 sleep(20)

 # 关闭浏览器

 driver.quit()

 return csvdir

下载影像

def MODISDown(FileDir):

 # 获取下载的 csv 文件的文件名

 csvfilename = os.listdir(FileDir)[0]

 # 构造文件路径

 csvfilepath = os.path.join(FileDir, csvfilename)

 # print(csvfilepath)

 csvvalues = pd.read_csv(csvfilepath).values

 os.remove(csvfilepath)

 os.rmdir(FileDir)

 # 下载数据

 file_count = 0

 for cv in csvvalues:

     file_count += 1

     # 构建数据的下载链接

     modislink='[1]

     outdir = 'E:/***/MODIS/'+(cv[1].split("/"))[5]

     # outdir = 'E:/Temp/' + (cv[1].split("/"))[5]

     if not os.path.isdir(outdir):

         os.mkdir(www.sangpi.comoutdir)

     path = outdir + '/' + (cv[1].split("/"))[7]

     if not os.path.exists(path):

         print("({0}/{1}) Downloading {2}".format(file_count, len(csvvalues), modislink.split("/")[-1]))

         with open(path, 'w+b') as out:

             geturl(modislink, out)

获取下载链接并下载影像数据

def geturl(url,out=None):

 USERAGENT = 'tis/download.py_1.0--' + sys.version.replace('\n', '').replace('\r', '')

 headers = { 'user-agent' : USERAGENT }

 token = '******' # 你的 token, 可登陆 Earthdata 网站后在 profile 中得到

 headers['Authorization'] = 'Bearer ' + token

 try:

     import ssl

     CTX = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2)

     from urllib.request import urlopen, Request, URLError, HTTPError

     try:

         response = urlopen(Request(url, headers=headers), context=CTX)

         if out is None:

             return response.read().decode('utf-8')

         else:

             start = time.time()

             # 将连接中的下载文件写入临时文件 并返回文件写入进度

             chunk_read(response, out, report_hook=chunk_report)

             elapsed = max(time.time() - start,1.0)

             # 平均下载速度

             rate = (get_total_size(response) / 1024 ** 2) / elapsed

             print("Downloaded {0}b in {1:.2f}secs, Average Rate: {2:.2f}MB/sec".format(get_total_size(response), elapsed, rate))

             # shutil.copyfileobj(response, out)

     except HTTPError as e:

         print('HTTP GET error code: %d' % e.code(), file=sys.stderr)

         print('HTTP GET error message: %s' % e.message, file=sys.stderr)

     except URLError as e:

         print('Failed to make request: %s' % e.reason, file=sys.stderr)

     return None

 except AttributeError:

     # OS X Python 2 and 3 don't support tlsv1.1+ therefore... curl

     import subprocess

     try:

         args = ['curl', '--fail', '-sS', '-L', '--get', url]

         for (k,v) in headers.items():

             args.extend(['-H', ': '.join([k, v])])

         if out is None:

             # python3's subprocess.check_output returns stdout as a byte string

             result = subprocess.check_output(args)

             return result.decode('utf-8') if isinstance(result, bytes) else result

         else:

             subprocess.call(args, stdout=out)

     except subprocess.CalledProcessError as e:

         print('curl GET error message: %' + (e.message if hasattr(e, 'message') else e.output), file=sys.stderr)

     return None

chunk_read modified from

def chunk_read( response, local_file, chunk_size=10240, report_hook=None):

 # 完整文件大小

 file_size = get_total_size(response)

 # 下载文件大小

 bytes_so_far = 0

 # 文件写入本地

 while 1:

     try:

         # 从地址中读取固定大小文件对象

         chunk = response.read(chunk_size)

     except:

         sys.stdout.write("\n > There was an error reading data. \n")

         break

     try:

         # 将读取出的文件对象写入本地文件

         local_file.write(chunk)

     except TypeError:

         local_file.write(chunk.decode(local_file.encoding))

     # 写入完成即更新已下载文件大小

     bytes_so_far += len(chunk)



     if not chunk:

         break

     if report_hook:

         # 获取下载进度

         report_hook(bytes_so_far, file_size)

 return bytes_so_far

def chunk_report( bytes_so_far, file_size):

 if file_size is not None:

     # 计算下载进度游戏进度的百分比

     percent = float(bytes_so_far) / file_size

     percent = round(percent * 100, 2)

     sys.stdout.write(" > Downloaded %d of %d bytes (%0.2f%%)\r" %

                      (bytes_so_far, file_size, percent))

 else:

     # We couldn't figure out the size.

     sys.stdout.write(" > Downloaded %d of unknown Size\r" % (bytes_so_far))

def get_total_size(response):

try:

   file_size = response.info().getheader('Content-Length').strip()

except AttributeError:

   try:

      file_size = response.getheader('Content-Length').strip()

   except AttributeError:

      print ("> Problem getting size")

      return None

return int(file_size)

if name == "__main__":

 # 定义要下载数据的信息

 ProductID = 'MOD021KM--61/'  # 产品号 #sys.argv[1]#

 # 设置数据的起始和截至时间。其实就是根据需要构造一个简单的字符串

 StartTime = '2020-06-01'  # 开始时间 #sys.argv[2]#

 EndTime = '2020-06-03'  # 截至日期 #sys.argv[3]#

 search_file = r'E:\***\ 北京市 .shp'  # 查询范围 #sys.argv[4]#

 # 构建查询地址

 url = GetURL(ProductID,StartTime,EndTime,search_file)

 # 获取数据列表

 csvdir = SearchFileList(url)

 # 根据列表下载数据

 MODISDown(csvdir)

你可能感兴趣的:(python LAADS+Selenium应该如何运用?)