中国土地市场网
function stringToHex(str) {
var val = "";
for (var i = 0; i < str.length; i++) {
if (val == "") val = str.charCodeAt(i).toString(16); else val += str.charCodeAt(i).toString(16);
}
return val;
}
function YunSuoAutoJump(text) {
// var width = screen.width;
var width = "1920";
// var height = screen.height;
var height = "1080";
if (text == null){
var screendate = width + "," + height;
} else {
var screendate = text;
}
// var curlocation = window.location.href;
// if (-1 == curlocation.indexOf("security_verify_")) {
// document.cookie = "srcurl=" + stringToHex(window.location.href) + ";path=/;";
// }
var location = "/default.aspx?tabid=226&security_verify_data=" + stringToHex(screendate);
return location;
}
// 接收传入的参数
var _ = process.argv.splice(2)
console.log(YunSuoAutoJump(_[0]));
# -*- coding: utf-8 -*-
# @Time : 2019/11/1 9:45
# @Author :
import os
import re
import requests
from lxml import etree
def generate_signature(value):
"""
generate _signature parameter
:param value:share_url id
:return:signature string
"""
cwd = os.path.dirname(__file__)
p = os.popen('cd %s && node landchina.js %s' % (cwd, value))
return p.readlines()[0]
def landchina():
# 构建session会话
s = requests.Session()
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Host': 'www.landchina.com',
'Pragma': 'no-cache',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-User': '?1',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36',
}
s.headers = headers
# 第一次请求
url = "https://www.landchina.com/default.aspx?tabid=226"
resp1 = s.get(url=url)
cookie1 = resp1.headers["Set-Cookie"]
cookie_re = "(security_session_verify=\w+;)"
security_session_verify = "".join(re.findall(cookie_re, cookie1))
security_session_verify_url = generate_signature("https://www.landchina.com/default.aspx?tabid=226").strip()
s.headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Cookie': security_session_verify + " srcurl=" + security_session_verify_url[
security_session_verify_url.rfind("=") + 1:],
'Host': 'www.landchina.com',
'Pragma': 'no-cache',
'Referer': 'https://www.landchina.com/default.aspx?tabid=226',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'same-origin',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36',
}
var_re = "(security_verify_data.*)"
var = "".join(re.findall(var_re, generate_signature("")))
# 第二次请求
url2 = "https://www.landchina.com/default.aspx?tabid=226&" + var
resp2 = s.get(url2)
cookie2 = resp2.headers["Set-Cookie"]
cookie_re = "(security_session_mid_verify=\w+;)"
security_session_mid_verify = "".join(re.findall(cookie_re, cookie2))
s.headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Cookie': '%s; %s' % (security_session_verify, security_session_mid_verify),
'Host': 'www.landchina.com',
'Pragma': 'no-cache',
'Referer': url2,
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'same-origin',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36',
}
# 第三次请求
rep = s.get(url=url)
mytree = etree.HTML(rep.text)
urls = mytree.xpath('//*[@id="TAB_contentTable"]//tr/td[@class="queryCellBordy"]/a')
for ur in urls:
title = "".join(ur.xpath(".//text()"))
c_url = "https://www.landchina.com/"+"".join(ur.xpath("./@href"))
data = {
"title" : title,
"c_url": c_url,
}
print(data)
if __name__ == '__main__':
landchina()