故事是面临跳槽的我,被一个hr问我说有没有爬过360指数,我说我没有但是如果是审核标准的话,我完全可以抽空爬爬看。完事那个hr没有回我。好气哦!!!闲来无事我就打开了360指数看了看,确实是有发现有一定的反爬,需要登陆还有就是有的指数需要通过图片来识别,还行啊,360还是给了点余地的。
1.登陆
我是直接用游览器登陆后的cookie
2.指数识别
像图片中的这个指数82032,他其实是图片展示出来的
https://trends.so.com/index/csssprite?q=肖战&area=全国&from=20190916&to=20191015&click=13&t=index
#click是第13个点,从20190916到20191015是30个点,从1开始数
#t是类别 index是关注趋势 media是曝光量
#得到的数据
{
"status":0,
"data":{
"肖战":{
"css":"",
"img":""
}
},
"msg":"success"
}
得到的数据中img是一个base4图片,
在zhishu.js中找到了处理的函数,就是根据定位只露出相应的文字
function E(e, t, n, r, s, u) {
e.$http.post("/index/csssprite?q=" + o.WIN.encodeURIComponent(t.join(",")) + "&area=" + (e[n.mod + "Param"] && e[n.mod + "Param"].region || "\u5168\u56fd") + "&from=" + n.period.from.split("-").join("") + "&to=" + n.period.to.split("-").join("") + "&click=" + r + "&t=" + (/^trend/.test(n.mod) ? "index" : n.mod) + (e.tk ? "&tk=" + e.tk : ""), {
before: function(t) {
this.spriteRequest && this.spriteRequest.abort(),
this.spriteRequest = t
}
}).then(function(e) {
if (e.body && e.body.status === 0 && e.body.data) {
var t = e.body.data
, r = "";
n.data.forEach(function(e) {
var n = e.query;
if (t[n] && t[n].css && t[n].img) {
r || (r += '');
var s = n.split("+").map(function(e) {
return (0,
i.cutStr)(e, v, "...")
}).join("+");
r += '- ' + s + '' + t[n].css + "
"
} else
r || (r += ''),
r += ""
});
if (r) {
r += "
",
s.innerHTML = r;
var o = s.querySelectorAll("span")
, a = 0
, f = o.length;
for (; a < f; a++)
w({
backgroundImage: "url(" + o[a].parentNode.getAttribute("data-img") + ")",
height: h + "px"
}, o[a])
}
} else
s.innerHTML = '\u6682\u65e0\u6570\u636e'
}, function() {
s.innerHTML = '\u6682\u65e0\u6570\u636e'
})
}
识别文字的函数,用了pytesseract,可以自行安装一下,需要识别的文字工工整整挺好识别的。
这个图片真的贼小,把图片按定位裁剪下来,拼接到一起之后还是识别不出来,合成之后放大了三倍,pytesseract成功能识别出来。而且准确率超高。我还没看到识别出错的。
def get_rep_json(url):
"""
获取json
:param url: 请求接口
:return:
"""
hearder = {
"Cookie":"",#这里需要填入游览器登陆后的cookie
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36"
}
response = requests.get(url, headers=hearder)
response_data = response.json()
print(response_data)
return response_data
class Base4ToNumber:
def __init__(self, q, area, fr, to):
self.q = q
self.area = area
self.fr = fr
self.to = to
def base4_to_image(self):
"""
将base4字符串转化为image对象
"""
base4 = self.data_item["img"]
image_data = base64.b64decode(base4.split(',')[1])
image = Image.open(BytesIO(image_data))
self.image_bg = image
def get_number_for_img(self):
"""
根据图片的定位,截取图片,识别数字
:param image_bg: 带有数字的图片,plt-image对象
:param number_css:css样式
:return: 数字
"""
# 获取位置
number_location_list = re.findall("""background-position:(.*?)['";]""", self.data_item["css"])
number_location_int_list = [[abs(eval(x)) for x in str_location.strip().split("px") if x] for str_location in
number_location_list]
# 获得数字截图并拼接
number = Image.new("RGBA", (200, 38), (255, 255, 255))
for i, number_location in enumerate(number_location_int_list):
image_number = self.image_bg.crop((number_location[0], 0, sum(number_location), self.image_bg.height))
image_number = image_number.resize((image_number.width * 3, image_number.height * 3), Image.ANTIALIAS)
number.paste(image_number, (i * 20, 0))
# 识别数字
num = pytesseract.image_to_string(number)
number.close()
self.image_bg.close()
return num
def get_image_base4(self, index, t):
"""
根据获取到的css和图片,拼接出数字图片并识别返回
例:{"status":0,"data":{"\u96ea\u8389":{"css":"<\/span><\/span><\/span>","img":"data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAoAAAAAmCAYAAABUFyr5AAAIJUlEQVR4nOzcCawdVQHG8X+3x9JSoFaoiAoEEYxQUNCKCxBwZzNWwI3gEnFFA4qGqCiLGiLFuOBGKSIqYkVUAqQ0qAjVpqi0uFQpQkGwVEBau9Hy+swk3yQnk3nv3VehKZ3\/L5m8e889M3POzCT3e+fM3NFIkiSpUwyAkiRJHTMmf\/cEJgOTsqwCdgPWAf2psw3wDGBlD9vdGdg26\/dil9RfW5Q9Ddi+KJuQNv437yclwE5padOzgI3As4EdgdV5v7mNzXGs27cH8Ghej09\/VvW4rXdnO\/\/J+11yfNYUdap97ZpjMzb9rj0957J\/mP1U6z6zuBaqZaA4l\/tn38t7bHebkV4fQzkI2B14oFFebf\/AHOOHgB1yzOvrqSofBzz2BLRBkqSnpIeBK4ul+kI9B\/hkUeczwFk9bGtf4AZgpx73XQW7xcDHGuXnZKmdBKxIoKucB0wHFrVs8w+p\/zfgR8BfgPf22J4nUhX4fle8X5a\/fcCvgMNHsK3qvLy8eH8t8JNGndtSNhv4YOOzqvzMHvYzPfv6Y5bq9RvzWXUsZwKzgB+OoO1NVaicA+wzgnXObik7FPhl+l1em0cCdwKXpJ2fBd6Ta6Z2XsokSeqcsfm7IYGpdAEwH7g4I4UnAIfks9HA6\/IFXoWan2Y0p6p3Reo+2mMbqv3c0lLeNjK2OMHm80XZioTIuv42xesbgQ\/l86ovCxJqquD1+7RzdNq8H\/A4MBW4CXhDyvfJqONd2ebe2d7tCTJvynqzi9HJ4XwZuCohcCi7JpDd0xjB3CmjcKMzslXvd\/u0p2l\/4O\/Ai4GJjRHTozMqeH3ez85SB\/IvFXWnAc\/J63t76OfOwPE5XguKMPxIzuP3EuI2Nvr7D+BfwP3AeuAw4F3FOfh+\/lZB9Ku59hakbDxweULg4qIthj1JkqK+B3BURqv2KL7gqxD1deA04OPAjGL67JIExmWZbt2Q8lcDf80XeK0KVHMby9R89pqse1tL21a1hMBrgWMTdGp3A3tltPDcTF82w0m1ne9mXRJuZiawjk8b3gq8M6FwRkYyT03YOrfY1jkJfhPSl\/4E6bmZdhzOKTnuFw9Try8Bdl1C6FHFZ8dmBK3a5zFF+XbA2xLq+oryj+Zcfg04vbGfyZlu78WtuRY+AVzXQ\/05OT7LinNeuzPB9qiW\/u6bQPq8nJ+x6Vt5TQwkHO6VsoUpPyzhfnGPfZIkqXPGFn+\/mNf9CRGVb+dLvyr7VMqq8HNEvngHGts7NPVLCxvhpbZjgkQVVk5u+bxtBLAKTpc2pnOXJLg+PyNJuyVYNP0794TVZhWjXrWfJXT0pd3HAxdlmZzAcUCmHU\/KyOWlWfdF6cvsln3XqhDzYeDXQ9SpvTSjdjPzvgx604Ev5HicAfwg5RckHL8y5UdkOn8c8M8spyfwVeUX5r7OgQTTM4og1ebq3BYwEfhAUT6xCOVrihHGtdnXzEHuHZ0HvCRBcbD+PgD8PKH8msb6R6f8uJz7q3J9Luvh+EqS1FlDTQGTqc1r8uVaT9NNSphqhj8ScJo39k9N0CidkYcalib8vSJTtwcXo4FtgWEUcBlwc0IYCYAH5aGE\/twn1xYAn9sYGbxvkP7253hsLEZIZyUgPZyQMZBg81Cx7vI8aNFsb3mc1iXw\/CZ\/57e0obZzpkpr9esdMhV7Ud7vmVGy1cA3UvYd4M95CIYivFc+krBWB\/NT0u8rhmgLCdZnZd\/bJsS+Fngw06uvT73rEspI2anp7+Ut18HqjNgO1d+hLM09fjcA5wO\/yHnYvaXuxuJ6J9fb4z3sQ5KkrU4vPwOzofHk6H0JA1Na6tbTsaU6aJTLwkzn3pov4Y2NkEk+a7s3cH1C2PS8rwLgmzMden2mcpsB8AXAW\/JAw6aoQueJCcn1iN\/tuaeuNi0Pn9AoK9sykP5WoehbCSG1KRlBrd2Ve\/dIkNwvr4\/JiN+0LD\/O\/YpjUo8E8R0Sou5tBN+7G+H3\/pYnaNvsUtyTtzL\/BEzOZzOKczujWGdVQt8hLQ\/5kFC+ZJj+kuuvea1OSiA9KGF6bQL2LRmlPbhR\/54c31HZ1ssy4ihJUufUIyJ9jfvwTixuuG96LPcFzk2w2DGh5JE8DDI398kNN7oyp9GOCY0AdXYC4Wkt636zeEJ5Se4ZuykBYK982U9OSJyWacmTE2A2xcoEvimZRiVB4470d0wecJgPvCpPmK7PSNsJLdtblAD86WJ07h0JPfPy\/o7s8+Y8UPNgyqs+faXY1tWZjv1twuDyjPx9rsefmLmxx2OwMNubk+vlTxllHMyYtGlpwtqsxufjMoV75CD9LX\/eZqAI+CsSxFfmPsJjU3Z46q1J2J+VEdtxmVa+LNfrolxXtxbHWpIk9Wh08Vt8pTPzsxv\/r+0zkrWluDAjjU3btTz8sd0IfgandmXu2WuaWPxe43BGZb+91t8UE0ZwXkYVv0nYdP4g4b7ub\/Nnb8ioZl+j7EDghYPsf6eW+uO3sOtKkqStxvtbpoKfqqZmhG3ekxysjnsSt72l2Rt43zB12gKgJEnSZnEA8PbiB6i1eRw1yH2mkiRJkiRJGolengKWJEnSVsQAKEmS1DEGQEmSpI4xAEqSJHWMAVCSJKljDICSJEkdYwCUJEnqGAOgJElSxxgAJUmSOsYAKEmS1DEGQEmSpI4xAEqSJHWMAVCSJKljDICSJEkdYwCUJEnqGAOgJElSxxgAJUmSOsYAKEmS1DEGQEmSpI4xAEqSJHWMAVCSJKljDICSJEkdYwCUJEnqGAOgJElSx\/wvAAD\/\/0F6uWOWxFvVAAAAAElFTkSuQmCC"}},"msg":"success"}
:param index: 点的序号,从一开始
:param t: 类型
:return: 拼接后的数字
"""
url = f"https://trends.so.com/index/csssprite?q={self.q}&area={self.area}&from={self.fr}&to={self.to}&click={index}&t={t}"
response_data = get_rep_json(url)
self.data_item = response_data["data"][self.q]
self.base4_to_image()
number = self.get_number_for_img()
return number
这俩问题解决了就没啥问题了,总结一下数据接口
关注度:https://trends.so.com/index/overviewJson?area=全国&q=肖战
关注趋势:https://trends.so.com/index/soIndexJson?area=全国&q=肖战&from=20181016&to=20191015&s=0
曝光量:https://trends.so.com/index/soMediaJson?q=肖战&from=20181016&to=20191015&s=0
24小时关注:https://trends.so.com/index/indexqueryhour?q=肖战&t=30
需求分布:https://trends.so.com/index/radarJson?t=30&q=肖战
用户画像:https://trends.so.com/index/indexquerygraph?t=30&area=全国&q=肖战
def get_soIndexJson(self):
"""
关注趋势:list
"""
url = f"https://trends.so.com/index/soIndexJson?area={self.area}&q={self.q}&from={self.strftime(self.fr)}&to={self.strftime(self.to)}&s=0"
response = get_rep_json(url)
self.soIndexList = []
if response['status'] == 0:
fr = self.fr
one_step = datetime.timedelta(days=response['data']['step'])
i = 0
while fr < self.to:
i += 1
number = self.number_img.get_image_base4(i, "index")
to = fr + one_step
date= fr if response['data']['step']==1 else f"{self.strftime(fr)}-{self.strftime(to)}"
self.soIndexList.append({
"date":date,
"value":number
})
fr=to
用户画像收到数据,年龄和省份可以在zhishu.js中找到对应
完整代码(没有存储)
import pytesseract
import base64
from PIL import Image
from io import BytesIO
import re
import requests
import datetime
PROVINCE={'01': '北京', '03': '天津', '14': '河北', '16': '山西', '07': '内蒙古', '17': '辽宁', '18': '吉林', '19': '黑龙江', '02': '上海', '20': '江苏', '21': '浙江', '23': '安徽', '24': '福建', '22': '江西', '15': '山东', '13': '河南', '08': '湖北', '09': '湖南', '10': '广东', '11': '广西', '12': '海南', '04': '重庆', '25': '四川', '26': '贵州', '27': '云南', '05': '西藏', '28': '陕西', '29': '甘肃', '30': '青海', '31': '宁夏', '06': '新疆', '32': '香港', '33': '澳门', '34': '台湾'}
AGE={
"01": "18\u5c81\u53ca\u4ee5\u4e0b",
"02": "19-24\u5c81",
"03": "25-34\u5c81",
"04": "35-49\u5c81",
"05": "50\u5c81\u53ca\u4ee5\u4e0a"
}
def get_rep_json(url):
"""
获取json
:param url: 请求接口
:return:
"""
hearder = {
"Cookie": "",#这里需要填入游览器登陆后的cookie
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36"
}
response = requests.get(url, headers=hearder)
response_data = response.json()
print(response_data)
return response_data
def yesterday():
"""
:return: 昨天日期
"""
today = datetime.date.today()
oneday = datetime.timedelta(days=1)
yesterday = today - oneday
return yesterday
class Base4ToNumber:
def __init__(self, q, area, fr, to):
self.q = q
self.area = area
self.fr = fr
self.to = to
def base4_to_image(self):
"""
将base4字符串转化为image对象
"""
base4 = self.data_item["img"]
image_data = base64.b64decode(base4.split(',')[1])
image = Image.open(BytesIO(image_data))
self.image_bg = image
def get_number_for_img(self):
"""
根据图片的定位,截取图片,识别数字
:param image_bg: 带有数字的图片,plt-image对象
:param number_css:css样式
:return: 数字
"""
# 获取位置
number_location_list = re.findall("""background-position:(.*?)['";]""", self.data_item["css"])
number_location_int_list = [[abs(eval(x)) for x in str_location.strip().split("px") if x] for str_location in
number_location_list]
# 获得数字截图并拼接
number = Image.new("RGBA", (200, 38), (255, 255, 255))
for i, number_location in enumerate(number_location_int_list):
image_number = self.image_bg.crop((number_location[0], 0, sum(number_location), self.image_bg.height))
image_number = image_number.resize((image_number.width * 3, image_number.height * 3), Image.ANTIALIAS)
number.paste(image_number, (i * 20, 0))
# 识别数字
num = pytesseract.image_to_string(number)
number.close()
self.image_bg.close()
return num
def get_image_base4(self, index, t):
"""
根据获取到的css和图片,拼接出数字图片并识别返回
例:{"status":0,"data":{"\u96ea\u8389":{"css":"<\/span><\/span><\/span>","img":"data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAoAAAAAmCAYAAABUFyr5AAAIJUlEQVR4nOzcCawdVQHG8X+3x9JSoFaoiAoEEYxQUNCKCxBwZzNWwI3gEnFFA4qGqCiLGiLFuOBGKSIqYkVUAqQ0qAjVpqi0uFQpQkGwVEBau9Hy+swk3yQnk3nv3VehKZ3\/L5m8e889M3POzCT3e+fM3NFIkiSpUwyAkiRJHTMmf\/cEJgOTsqwCdgPWAf2psw3wDGBlD9vdGdg26\/dil9RfW5Q9Ddi+KJuQNv437yclwE5padOzgI3As4EdgdV5v7mNzXGs27cH8Ghej09\/VvW4rXdnO\/\/J+11yfNYUdap97ZpjMzb9rj0957J\/mP1U6z6zuBaqZaA4l\/tn38t7bHebkV4fQzkI2B14oFFebf\/AHOOHgB1yzOvrqSofBzz2BLRBkqSnpIeBK4ul+kI9B\/hkUeczwFk9bGtf4AZgpx73XQW7xcDHGuXnZKmdBKxIoKucB0wHFrVs8w+p\/zfgR8BfgPf22J4nUhX4fle8X5a\/fcCvgMNHsK3qvLy8eH8t8JNGndtSNhv4YOOzqvzMHvYzPfv6Y5bq9RvzWXUsZwKzgB+OoO1NVaicA+wzgnXObik7FPhl+l1em0cCdwKXpJ2fBd6Ta6Z2XsokSeqcsfm7IYGpdAEwH7g4I4UnAIfks9HA6\/IFXoWan2Y0p6p3Reo+2mMbqv3c0lLeNjK2OMHm80XZioTIuv42xesbgQ\/l86ovCxJqquD1+7RzdNq8H\/A4MBW4CXhDyvfJqONd2ebe2d7tCTJvynqzi9HJ4XwZuCohcCi7JpDd0xjB3CmjcKMzslXvd\/u0p2l\/4O\/Ai4GJjRHTozMqeH3ez85SB\/IvFXWnAc\/J63t76OfOwPE5XguKMPxIzuP3EuI2Nvr7D+BfwP3AeuAw4F3FOfh+\/lZB9Ku59hakbDxweULg4qIthj1JkqK+B3BURqv2KL7gqxD1deA04OPAjGL67JIExmWZbt2Q8lcDf80XeK0KVHMby9R89pqse1tL21a1hMBrgWMTdGp3A3tltPDcTF82w0m1ne9mXRJuZiawjk8b3gq8M6FwRkYyT03YOrfY1jkJfhPSl\/4E6bmZdhzOKTnuFw9Try8Bdl1C6FHFZ8dmBK3a5zFF+XbA2xLq+oryj+Zcfg04vbGfyZlu78WtuRY+AVzXQ\/05OT7LinNeuzPB9qiW\/u6bQPq8nJ+x6Vt5TQwkHO6VsoUpPyzhfnGPfZIkqXPGFn+\/mNf9CRGVb+dLvyr7VMqq8HNEvngHGts7NPVLCxvhpbZjgkQVVk5u+bxtBLAKTpc2pnOXJLg+PyNJuyVYNP0794TVZhWjXrWfJXT0pd3HAxdlmZzAcUCmHU\/KyOWlWfdF6cvsln3XqhDzYeDXQ9SpvTSjdjPzvgx604Ev5HicAfwg5RckHL8y5UdkOn8c8M8spyfwVeUX5r7OgQTTM4og1ebq3BYwEfhAUT6xCOVrihHGtdnXzEHuHZ0HvCRBcbD+PgD8PKH8msb6R6f8uJz7q3J9Luvh+EqS1FlDTQGTqc1r8uVaT9NNSphqhj8ScJo39k9N0CidkYcalib8vSJTtwcXo4FtgWEUcBlwc0IYCYAH5aGE\/twn1xYAn9sYGbxvkP7253hsLEZIZyUgPZyQMZBg81Cx7vI8aNFsb3mc1iXw\/CZ\/57e0obZzpkpr9esdMhV7Ud7vmVGy1cA3UvYd4M95CIYivFc+krBWB\/NT0u8rhmgLCdZnZd\/bJsS+Fngw06uvT73rEspI2anp7+Ut18HqjNgO1d+hLM09fjcA5wO\/yHnYvaXuxuJ6J9fb4z3sQ5KkrU4vPwOzofHk6H0JA1Na6tbTsaU6aJTLwkzn3pov4Y2NkEk+a7s3cH1C2PS8rwLgmzMden2mcpsB8AXAW\/JAw6aoQueJCcn1iN\/tuaeuNi0Pn9AoK9sykP5WoehbCSG1KRlBrd2Ve\/dIkNwvr4\/JiN+0LD\/O\/YpjUo8E8R0Sou5tBN+7G+H3\/pYnaNvsUtyTtzL\/BEzOZzOKczujWGdVQt8hLQ\/5kFC+ZJj+kuuvea1OSiA9KGF6bQL2LRmlPbhR\/54c31HZ1ssy4ihJUufUIyJ9jfvwTixuuG96LPcFzk2w2DGh5JE8DDI398kNN7oyp9GOCY0AdXYC4Wkt636zeEJ5Se4ZuykBYK982U9OSJyWacmTE2A2xcoEvimZRiVB4470d0wecJgPvCpPmK7PSNsJLdtblAD86WJ07h0JPfPy\/o7s8+Y8UPNgyqs+faXY1tWZjv1twuDyjPx9rsefmLmxx2OwMNubk+vlTxllHMyYtGlpwtqsxufjMoV75CD9LX\/eZqAI+CsSxFfmPsJjU3Z46q1J2J+VEdtxmVa+LNfrolxXtxbHWpIk9Wh08Vt8pTPzsxv\/r+0zkrWluDAjjU3btTz8sd0IfgandmXu2WuaWPxe43BGZb+91t8UE0ZwXkYVv0nYdP4g4b7ub\/Nnb8ioZl+j7EDghYPsf6eW+uO3sOtKkqStxvtbpoKfqqZmhG3ekxysjnsSt72l2Rt43zB12gKgJEnSZnEA8PbiB6i1eRw1yH2mkiRJkiRJGolengKWJEnSVsQAKEmS1DEGQEmSpI4xAEqSJHWMAVCSJKljDICSJEkdYwCUJEnqGAOgJElSxxgAJUmSOsYAKEmS1DEGQEmSpI4xAEqSJHWMAVCSJKljDICSJEkdYwCUJEnqGAOgJElSxxgAJUmSOsYAKEmS1DEGQEmSpI4xAEqSJHWMAVCSJKljDICSJEkdYwCUJEnqGAOgJElSx\/wvAAD\/\/0F6uWOWxFvVAAAAAElFTkSuQmCC"}},"msg":"success"}
:param index: 点的序号,从一开始
:param t: 类型
:return: 拼接后的数字
"""
url = f"https://trends.so.com/index/csssprite?q={self.q}&area={self.area}&from={self.fr}&to={self.to}&click={index}&t={t}"
response_data = get_rep_json(url)
self.data_item = response_data["data"][self.q]
self.base4_to_image()
number = self.get_number_for_img()
return number
class Trends360:
def __init__(self, keywords: list, area="全国", to=yesterday(), fr_step=30):
self.q = "+".join(keywords)
self.area = area
self.to = to
self.fr_date(fr_step-1)
self.number_img = Base4ToNumber(self.q, area, self.strftime(self.fr), self.strftime(to))
self.overviewJson = None
def fr_date(self, num):
"""
计算以天为单位的日期赋值给fr,例 前三天的日期
:param num: 几天
:return:
"""
dates = datetime.timedelta(days=num)
self.fr = self.to - dates
@staticmethod
def strftime(date):
"""
:param date: 时间类型
:return:string格式化时间
"""
return date.strftime("%Y%m%d")
def get_overviewJson(self):
"""
关注度
:param q: 关键词
:param area: 区域
:return: 返回关注度相关参数
"""
url = f"https://trends.so.com/index/overviewJson?area={self.area}&q={self.q}"
response = get_rep_json(url)
none_data = {'week_year_ratio': '--',
'month_year_ratio': '--',
'week_chain_ratio': '--',
'month_chain_ratio': '--',
'week_index': 0, 'month_index': 0}
self.overviewJson = none_data if response['status'] == 7001 else response['data'][0]['data']
def get_soIndexJson(self):
"""
关注趋势:list
"""
url = f"https://trends.so.com/index/soIndexJson?area={self.area}&q={self.q}&from={self.strftime(self.fr)}&to={self.strftime(self.to)}&s=0"
response = get_rep_json(url)
self.soIndexList = []
if response['status'] == 0:
fr = self.fr
one_step = datetime.timedelta(days=response['data']['step'])
i = 0
while fr < self.to:
i += 1
number = self.number_img.get_image_base4(i, "index")
to = fr + one_step
date= fr if response['data']['step']==1 else f"{self.strftime(fr)}-{self.strftime(to)}"
self.soIndexList.append({
"date":date,
"value":number
})
fr=to
def get_soMediaJson(self):
"""
曝光量:list
"""
url = f"https://trends.so.com/index/soMediaJson?q={self.q}&from={self.strftime(self.fr)}&to={self.strftime(self.to)}&s=0"
response = get_rep_json(url)
self.soMediaJson = []
if response['status'] == 0:
fr = self.fr
one_step = datetime.timedelta(days=response['data']['step'])
i = 0
while fr < self.to:
i += 1
number = self.number_img.get_image_base4(i, "media")
to = fr + one_step
date= fr if response['data']['step']==1 else f"{self.strftime(fr)}-{self.strftime(to)}"
self.soMediaJson.append({
"date":date,
"value":number
})
fr=to
def get_indexqueryhour(self):
"""
24小时关注
"""
url=f"https://trends.so.com/index/indexqueryhour?q={self.q}&t=30"
response = get_rep_json(url)
self.indexqueryhour = []
if response['status']==0:
fr = datetime.datetime.strptime(response['data']["period"]['from'], '%Y-%m-%d')
one_step = datetime.timedelta(days=1)
for number in response['data']['index'][self.q].split("|"):
self.indexqueryhour.append({
"date": fr,
"value": number
})
fr+=one_step
def get_radarJson(self):
"""
需求分布
"""
url=f"https://trends.so.com/index/radarJson?t=30&q={self.q}"
response = get_rep_json(url)
self.from_query=response['data']['from_query'] if response['status'] == 0 else []
self.list = response['data']['list'] if response['status'] == 0 else []
self.to_query = response['data']['to_query'] if response['status'] == 0 else []
self.burst_query = response['data']['burst_query'] if response['status'] == 0 else []
def get_indexquerygraph(self):
"""
用户画像
"""
url=f"https://trends.so.com/index/indexquerygraph?t=30&area={self.area}&q={self.q}"
response = get_rep_json(url)
if response['status'] == 0:
province=response["data"]['province']
self.interest = response["data"]['interest']
self.sex = response["data"]['sex']
self.age = response["data"]['age']
self.province=[{"entity":PROVINCE[str(each['entity'])],"percent":each['percent']}for each in province]
for each in self.age:
each['entity']=AGE[str(each["entity"])]
def run(self):
"""
运行函数
"""
self.get_overviewJson()
self.get_soIndexJson()
self.get_soMediaJson()
self.get_indexqueryhour()
self.get_indexquerygraph()
pass
if __name__ == "__main__":
# number_img = Base4ToNumber("陈情令", "全国", 20190916, 20191015)
# num=number_img.get_image_base4(1,"index")
# print(num)
trends360 = Trends360(["陈情令"])
trends360.run()