python爬取豆瓣图片到本地,并用java上传至服务

换行缩进不要用tab键!!!!

import requests
import urllib
import json
import os,sys
from lxml import etree

def main():
    f = open('test1.txt', 'w')
    path = os.getcwd()+'/image';
    print(path)
    if not os.path.isdir(path):
        os.makedirs(path)
    for i in range(1):
        url = 'https://movie.douban.com/j/search_subjects?type=tv&tag=%E7%83%AD%E9%97%A8&sort=recommend&page_limit=20&page_start='+str(i*20)
        print(url)
        data = requests.get(url)
        res = json.loads(data.text)
        result = []
        for x in res["subjects"]:
            result.append(x["cover"])
        for i in range(0, len(result)):
            f.write(result[i]+'\n')
            filename = result[i].strip().split('/')[-1]
            print(result[i])
			#urllib.request.urlretrieve(result[i], filename)
            imagepath = path+"/"+filename
            response = urllib.urlopen(result[i])
            cat_img = response.read()
            with open(imagepath, 'wb') as tag:
             tag.write(cat_img)

if __name__ == '__main__':
	main()

数据从豆瓣的接口中获取的,并非是返回的网页中通过正则获取。

 url = 'https://movie.douban.com/j/search_subjects?type=tv&tag=%E7%83%AD%E9%97%A8&sort=recommend&page_limit=20&page_start='+str(i*20)

因此,把返回的数据进行json化,然后获取其中的list.

  data = requests.get(url)
  res = json.loads(data.text)

python爬取豆瓣图片到本地,并用java上传至服务_第1张图片
循环获取subjects中的元素的cover,写入到result中。

 result = []
 for x in res["subjects"]:
     result.append(x["cover"])

第一层for循环是为了改变页码,获取更多信息。

1.打开文件,写入数据

 f = open('test1.txt', 'w')

2.文件存放路径

path = os.getcwd()+'/image';
if not os.path.isdir(path):
   os.makedirs(path)

3.获取爬去链接的后面的名字

filename = result[i].strip().split('/')[-1]

4.下载文件到本地

 response = urllib.urlopen(result[i])
 cat_img = response.read()
 with open(imagepath, 'wb') as tag:
     tag.write(cat_img)

java 把文件上传到服务器

 @GetMapping("/uploadImage")
    public Object uploadFile() throws Exception {
        String dirPath = "/Users/davi/Documents/GitHub/python/image";
        File dir = new File(dirPath);
        File[] files = dir.listFiles(); // 该文件目录下文件全部放入数组
        String type = "resource";

        final FileType fileType = FileType.valueOf(type.toUpperCase());

        FileWriter fw = new FileWriter("/Users/davi/Desktop/imageout.txt");
        for (int i = 0; i < files.length; i++) {
            if (files[i].isFile()) { // 判断是文件还是文件夹
                File file = files[i];
                FileInputStream fileInputStream = new FileInputStream(file);
                //个人的上传服务接口
                final MyFileCreateRequest createFileRequest = new MyFileCreateRequest(fileType, file.getName(), "multipart/form-data; boundary=----WebKitFormBoundaryNKE6Mjn6vK5PxlhK", fileInputStream);
                final MyFileInfoVo result = this.myservice.createFile(createFileRequest);
                System.out.println("upload image result:" + result.getUrl());
                fw.write(result.getUrl()+"\n");
            }
        }
        fw.close();
        return Results.singleResult(ResultCodes.SUCCESS);
    }

爬取的图片
python爬取豆瓣图片到本地,并用java上传至服务_第2张图片

你可能感兴趣的:(python)