weilai8游戏爬虫

#!/usr/bin/python
# -*- coding: UTF-8 -*-
#!/usr/bin/python
# -*- coding: UTF-8 -*-
import os,csv
import re
import random
import time
import requests
from lxml import etree


from urllib.parse import quote, unquote

page=98
sess = requests.Session()#创建一个session保持连接
agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:108.0) Gecko/20100101 Firefox/108.0'#设置浏览头伪装成浏览器
headers = {
    'Accept': 'application/json, text/javascript, */*; q=0.01',
    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
    # 'Authorization': 'Bearer 8d9587cb442ea21ac8a7bb2a40fa1de8',
    'Cache-Control': 'no-cache',
    'Connection': 'keep-alive',
    'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
    'Origin': 'https://weilai8.vip/',
    'Pragma': 'no-cache',
    'Referer': 'https://weilai8.vip/member/login.php',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-origin',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.63',
    'X-Requested-With': 'XMLHttpRequest',
    'sec-ch-ua': '"Chromium";v="110", "Not A(Brand";v="24", "Microsoft Edge";v="110"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
}

M_email = "[email protected]"
M_pwd="123456"
data = {'M_pwd':M_pwd,'M_email':M_email,'add':""}

url= "https://weilai8.vip/member/login.php?action=login&from=index.php"
#url='https://weiali8.vip'


sess.post(url,data=data,headers=headers,allow_redirects=True)


#re=sess.get('https://258yx.vip/member/index.php')

def Purchase_goods(genkey=None, id=None):
    """
    购买商品
    :param genkey:
    :param id:
    :return:
    """

    # params = {
    #     'type': 'productinfo',
    #     'id': id,
    #
    # }
    data={
        'no':1,
        'button':'立即购买',
        'genkey':genkey,
    }

    url = f"https://weilai8.vip/buy.php?type=productinfo&id={id}"
    headers={
        'Referer':f'https://weilai8.vip/?type = productinfo&id={id}',
    }

    respone = sess.post(url,data=data,headers=headers)
    # print(respone.headers.get('Location'))
    # exit()
    # print(respone.text)
    print(respone.text)


    with open('js.txt', 'w') as f:
        f.write(respone.text)

    resut = open('js.txt', 'r')
    context = str(resut.read())
    js_url = context.split('=')
    js_url[1] = js_url[1].strip("'")
    locaion_url = f"https://weilai8.vip/{js_url[1]}={js_url[2]}=0"
    print(locaion_url)
    resut.close()
    respone = sess.get(locaion_url)
    print(respone.status_code)







    # locaion_url = f"https://weilai8.vip/member/unlogin.php?type=fahuo&id={id}&genkey={genkey}"
    # print(locaion_url)
    #
    # respone = sess.get(locaion_url)

    # value = quote('立即购买')
    #
    # data ={"no":1,"button":value,'genkey':genkey}
    # sess.post(url,data=data).text
    # fa_url=f'https://258yx.vip/member/unlogin.php?genkey={genkey}&address=0'
    #
    # rspone=sess.get(fa_url)

    select = etree.HTML(respone.text)
    shop_name = select.xpath(r'//div[@class="col-md-9"]//p/text()')[0][4:]
    shop_msg = select.xpath(r'//div[@class="panel-body"]//textarea/text()')[0]

    if not os.path.exists('宅男小游戏'):
        os.mkdir('宅男小游戏')

        with open(os.path.join('宅男小游戏', '格斗小游戏.csv'), 'w', encoding='utf-8', newline='') as f:
            csv_write = csv.writer(f)
            csv_write.writerow(['游戏名称','下载地址'])
    with open(os.path.join('宅男小游戏', '格斗小游戏.csv'), 'a', encoding='utf-8', newline='') as f:
        csv_write = csv.writer(f)
        csv_write.writerow([shop_name, shop_msg])





    print(f'INFO游戏{shop_name}成功获取')


for e in range(1, page + 1):


            params = {
                'type': 'product',
                'id': '32',
                'page': e,
                'M_id': "",
                'tag':""
            }

            url = f'https://weilai8.vip'
            page_dict = sess.get(url,params=params)






            select = etree.HTML(page_dict.text)
            game_urls = select.xpath(r'//div[@class="course-item"]//a/@href')
            url_list=[]
            for i in game_urls:
                if i not in url_list and  re.search(r'productinfo',i):
                        url_list.append(i)



            for i in url_list:
                game_url = url + i


                game_id = i.split('=')[-1]

                res = sess.get(game_url)
                print(game_url)
                gen = re.findall(r'genkey=(.\w+)',res.text)
                print(gen)



                select = etree.HTML(res.text)

                #genkey = select.xpath(r"//input[@name='genkey']/@value")[0]

                Purchase_goods(genkey=gen,id=game_id)













你可能感兴趣的:(python,游戏,爬虫)