故事是面临跳槽的我,被一个hr问我说有没有爬过360指数,我说我没有但是如果是审核标准的话,我完全可以抽空爬爬看。完事那个hr没有回我。好气哦!!!闲来无事我就打开了360指数看了看,确实是有发现有一定的反爬,需要登陆还有就是有的指数需要通过图片来识别,还行啊,360还是给了点余地的。
1.登陆
我是直接用游览器登陆后的cookie
2.指数识别
像图片中的这个指数82032,他其实是图片展示出来的
https://trends.so.com/index/csssprite?q=肖战&area=全国&from=20190916&to=20191015&click=13&t=index
#click是第13个点,从20190916到20191015是30个点,从1开始数
#t是类别 index是关注趋势 media是曝光量
#得到的数据
{
"status":0,
"data":{
"肖战":{
"css":"",
"img":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAoAAAAAmCAYAAABUFyr5AAAIo0lEQVR4nOzdC4xcZd3H8e9ud1loy8LS9oVXKHhBsQi0KAbQklACClhQjPcoXkAEJdQQFRAUw0UUAU28YQKCxKgQRIwI0nIVQREwJYuiiCJyUVBKuRXpQmtO8jvJk5OZ2e1Nbc/3k0x2zpkze5555iTz2//zPLP9SJIkqVUMgJIkSS0zaQLH7Az8H/Bol8cPA54EHk+gnANsBTxSHNMH7Ai8GFgG/Gs12joTeBZYke1BYOucuzIDeCG3NTENmJxzNVX9sHHjsenpo+q8T6dPXwaMpK3PTeB8g8VxmwFTupy/NjNt2SK3qj/H0u5dgY2ApTl2atr4VLa3yPs0lrbOBoaBxybYP7ukzf+Y4PGddGpnrdv1Vh2/DfBwsW912n8AsB1w3xq0X5Kk9drAOI9fnFDTn+Dz7g7H7Af8AXgAWJTgtzLHvyXhptr/EPAMsBMwbzVC4OnA5cBl2T4E2B/4ULbPBW4FzlzF39u0ID8/29hfBamfA+cBZ2Xfh3P8TQki8xNqbgd+CrwcuDPHdbM9cAawT7ar13gMsKTHc34C/KXowy8mkP2waMsdwPFp07eAbYEngGOBxcCPgWuAUWAH4Dbg0+P0zVeA/89rvAv4zDjHd7JNl3bS43p7Xd7/TfP+fz7X1aq0f0r+EJmZP0Km5jp9ZjVegyRJ670ZRXj5GLBl8dj9xf2/Fve3zLFvAr4HzAXeBfyoOObSPD4HuKLLud8JvLe49fVo58HAd4vtHwAH5v7OCVGXphpEql57FcfvlX2VWcBRwJEJNKVP5db0jYS/T2R7q4Sw4cZxWyUckddzD/CSbM9IGDwir3eX7D8v/VftO6dHH9QW5zxN9ZD+lARu8ntvLcLRacDbcn8oP6uw9WDuz0kFs7Z99o00XtfdRXVx14Txo3N85dV5fe9LpfaIVP66tZMu11tf/sCofvfhuVbp0f7K3gmLh6W/NwF2B64DbgFuBq4H9ujRx5IkbbD6Uz25MhWiMeDaVFfIB+UngeNyDKn+LEr16RXAvtm/Wz5gazcAewL35hwHdZhz+EwqPtNyjl6uBl6ftg3lw3tRHvs48HXga6lwkSCyoHj+giKcHJwK20CqSKWncyu9MX1ze7FvXvrqyR5t3iiv+YkEmWtTKR1KoKmrfMcDJ6Tyd3Lx/K0zHFobHKeP+hPOj04Yrl2R1zy5cXw97Fy993/O/c2BU4tjTsmw8auA32bfygTAWenTqxICl+Y9qYLhe4APAu9IqH0l8JFx2tnpeqvOtRx4ad6XO8dpPwnp52eIeEreu1vTpqeyXf3x8atx+lOSpA1SPQQ8mgoXqartkSG6yxJOhoGP5vE9U9U6P9sH5efkYp4Z+bAezs/qd56Y4dmzU/EiwWTjDJcekg978rw6rCxLyHoubZqXYHVdPsi3STB6MLdjEyh7OTPDsyPASQkKLxTtLm2WQDIfOLTYP1LMgzsrfXZyQtJ2CUIzU3lbklBUnednCSOHFVWrf+b+WNGHsxNkNk8F68kMS9dVuOr3PF+8Z48mkJ2S53y5aGsVuL6dKlzTYMJzHcBvyHOnJ3jtkmrZfo15ictSZVuWkH9G9r89gZ8MM++Q9+vOTAmgRzs7XW+k76sQ+ebMq7ykR/trFySYlt4AXJT7BwDf6dAfkiRt8OoAWFaxliTcvCjhZY+EtBszzDfSmJ9W31/aCF7Tion5fwQ+kErStQlAo3nsm/lwv7d47uHF8O6VxbDoJQmKA0UI6E+Iqy3oUOmiWPAyPcFgYYYcBxpDz82K3qk57tAMIw+l2lmFv9fkmBPSxhnZfiDtvKZYbFAFy18ngGzeGOrdF/h7qn17J4SNZviUYv5aGU73z3NKoznvUILoTdlfPffCzGG8vji+L2H8YuCXxf4L8n49ln5emUU+5XD3Ztk31GjHWFGpfD7BeizBrb9HOzfpcr09kv7/fkLv6ZkD+WyP9tfvQdNFHfZJktQ6dQDcsQhBs4EvJIw8lCrQ8gSeKjz9KXPuyHNm5f6NCWJ1sJmfYDSYAEDC4sMJkaTKc18qgKVzusyFW5gq20aZw0djbiJF4BpO4CTBY6fcn5sq2ol5PV9tPP/mohJJqpTb5v6K4nZdKonTU8ErVx+vSFg7NpXV3bNvatr/+8Zq5tNS3RrJ3MbdiuoeE1yoUPZzX/qoDLbLE+aOysILsqjj7qIiW7swQX1JKpUktM3KeSZl+P93GfpdFd3a2e16G8tczSkZNn62WPzSrf2SJKmHOgA+nxC2aaox9xQVp4XFqs96DtjiVJOWFl/3cnXmmS3Oh/bCzLGal5D1YALZHTnHtAydXlXMAzu8w9eClJanHSsbAamTuxMWFiWc1RWhXwCfSxjqL4ZUaycnnB2T7YWN/qpC3G+yfVwC4/35ipvmYpdbsoDhyFQ6J2WY8+kEnPdn4cfl6cdH0r6jE25WxT6pVv4tQ89ndlhJfG6x4va1WchzVxaKkIrjaKqg9UKTepi6Cl1fSnVyMH8kjPcVN6vSzse7XG8DqSAfnLmUe+f979V+SZI0jrkZXpuUANg0NcNzTcNdvkdw4w7H9xffd/eftmmHxSd9GYbtZHKX19vNQIZDx3NEY57aSQm8a9Ok4nv+1tTZmc/XtEmx+nZ19Wpnt+ttTlYWS5KktWBuhh21br01Vdb5WR17W1bG/q+ZnaH5Wyb4ReGSJGk9M5DFF86hWvcuy1Dx7Kz0PXAN/5vGurIyQ8AHrIX/qiJJkiRJkqT/trUxV0ySJEnrEQOgJElSyxgAJUmSWsYAKEmS1DIGQEmSpJYxAEqSJLWMAVCSJKllDICSJEktYwCUJElqGQOgJElSyxgAJUmSWsYAKEmS1DIGQEmSpJYxAEqSJLWMAVCSJKllDICSJEktYwCUJElqGQOgJElSyxgAJUmSWsYAKEmS1DIGQEmSpJYxAEqSJLWMAVCSJKllDICSJEkt8+8AAAD//8hy57jPlSCGAAAAAElFTkSuQmCC"
}
},
"msg":"success"
}
得到的数据中img是一个base4图片,
在zhishu.js中找到了处理的函数,就是根据定位只露出相应的文字
function E(e, t, n, r, s, u) {
e.$http.post("/index/csssprite?q=" + o.WIN.encodeURIComponent(t.join(",")) + "&area=" + (e[n.mod + "Param"] && e[n.mod + "Param"].region || "\u5168\u56fd") + "&from=" + n.period.from.split("-").join("") + "&to=" + n.period.to.split("-").join("") + "&click=" + r + "&t=" + (/^trend/.test(n.mod) ? "index" : n.mod) + (e.tk ? "&tk=" + e.tk : ""), {
before: function(t) {
this.spriteRequest && this.spriteRequest.abort(),
this.spriteRequest = t
}
}).then(function(e) {
if (e.body && e.body.status === 0 && e.body.data) {
var t = e.body.data
, r = "";
n.data.forEach(function(e) {
var n = e.query;
if (t[n] && t[n].css && t[n].img) {
r || (r += '');
var s = n.split("+").map(function(e) {
return (0,
i.cutStr)(e, v, "...")
}).join("+");
r += '- ' + s + '' + t[n].css + "
"
} else
r || (r += ''),
r += ""
});
if (r) {
r += "
",
s.innerHTML = r;
var o = s.querySelectorAll("span")
, a = 0
, f = o.length;
for (; a < f; a++)
w({
backgroundImage: "url(" + o[a].parentNode.getAttribute("data-img") + ")",
height: h + "px"
}, o[a])
}
} else
s.innerHTML = '\u6682\u65e0\u6570\u636e'
}, function() {
s.innerHTML = '\u6682\u65e0\u6570\u636e'
})
}
识别文字的函数,用了pytesseract,可以自行安装一下,需要识别的文字工工整整挺好识别的。
这个图片真的贼小,把图片按定位裁剪下来,拼接到一起之后还是识别不出来,合成之后放大了三倍,pytesseract成功能识别出来。而且准确率超高。我还没看到识别出错的。
def get_rep_json(url):
"""
获取json
:param url: 请求接口
:return:
"""
hearder = {
"Cookie":"",#这里需要填入游览器登陆后的cookie
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36"
}
response = requests.get(url, headers=hearder)
response_data = response.json()
print(response_data)
return response_data
class Base4ToNumber:
def __init__(self, q, area, fr, to):
self.q = q
self.area = area
self.fr = fr
self.to = to
def base4_to_image(self):
"""
将base4字符串转化为image对象
"""
base4 = self.data_item["img"]
image_data = base64.b64decode(base4.split(',')[1])
image = Image.open(BytesIO(image_data))
self.image_bg = image
def get_number_for_img(self):
"""
根据图片的定位,截取图片,识别数字
:param image_bg: 带有数字的图片,plt-image对象
:param number_css:css样式
:return: 数字
"""
# 获取位置
number_location_list = re.findall("""background-position:(.*?)['";]""", self.data_item["css"])
number_location_int_list = [[abs(eval(x)) for x in str_location.strip().split("px") if x] for str_location in
number_location_list]
# 获得数字截图并拼接
number = Image.new("RGBA", (200, 38), (255, 255, 255))
for i, number_location in enumerate(number_location_int_list):
image_number = self.image_bg.crop((number_location[0], 0, sum(number_location), self.image_bg.height))
image_number = image_number.resize((image_number.width * 3, image_number.height * 3), Image.ANTIALIAS)
number.paste(image_number, (i * 20, 0))
# 识别数字
num = pytesseract.image_to_string(number)
number.close()
self.image_bg.close()
return num
def get_image_base4(self, index, t):
"""
根据获取到的css和图片,拼接出数字图片并识别返回
例:{"status":0,"data":{"\u96ea\u8389":{"css":"<\/span><\/span><\/span>","img":"data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAoAAAAAmCAYAAABUFyr5AAAIJUlEQVR4nOzcCawdVQHG8X+3x9JSoFaoiAoEEYxQUNCKCxBwZzNWwI3gEnFFA4qGqCiLGiLFuOBGKSIqYkVUAqQ0qAjVpqi0uFQpQkGwVEBau9Hy+swk3yQnk3nv3VehKZ3\/L5m8e889M3POzCT3e+fM3NFIkiSpUwyAkiRJHTMmf\/cEJgOTsqwCdgPWAf2psw3wDGBlD9vdGdg26\/dil9RfW5Q9Ddi+KJuQNv437yclwE5padOzgI3As4EdgdV5v7mNzXGs27cH8Ghej09\/VvW4rXdnO\/\/J+11yfNYUdap97ZpjMzb9rj0957J\/mP1U6z6zuBaqZaA4l\/tn38t7bHebkV4fQzkI2B14oFFebf\/AHOOHgB1yzOvrqSofBzz2BLRBkqSnpIeBK4ul+kI9B\/hkUeczwFk9bGtf4AZgpx73XQW7xcDHGuXnZKmdBKxIoKucB0wHFrVs8w+p\/zfgR8BfgPf22J4nUhX4fle8X5a\/fcCvgMNHsK3qvLy8eH8t8JNGndtSNhv4YOOzqvzMHvYzPfv6Y5bq9RvzWXUsZwKzgB+OoO1NVaicA+wzgnXObik7FPhl+l1em0cCdwKXpJ2fBd6Ta6Z2XsokSeqcsfm7IYGpdAEwH7g4I4UnAIfks9HA6\/IFXoWan2Y0p6p3Reo+2mMbqv3c0lLeNjK2OMHm80XZioTIuv42xesbgQ\/l86ovCxJqquD1+7RzdNq8H\/A4MBW4CXhDyvfJqONd2ebe2d7tCTJvynqzi9HJ4XwZuCohcCi7JpDd0xjB3CmjcKMzslXvd\/u0p2l\/4O\/Ai4GJjRHTozMqeH3ez85SB\/IvFXWnAc\/J63t76OfOwPE5XguKMPxIzuP3EuI2Nvr7D+BfwP3AeuAw4F3FOfh+\/lZB9Ku59hakbDxweULg4qIthj1JkqK+B3BURqv2KL7gqxD1deA04OPAjGL67JIExmWZbt2Q8lcDf80XeK0KVHMby9R89pqse1tL21a1hMBrgWMTdGp3A3tltPDcTF82w0m1ne9mXRJuZiawjk8b3gq8M6FwRkYyT03YOrfY1jkJfhPSl\/4E6bmZdhzOKTnuFw9Try8Bdl1C6FHFZ8dmBK3a5zFF+XbA2xLq+oryj+Zcfg04vbGfyZlu78WtuRY+AVzXQ\/05OT7LinNeuzPB9qiW\/u6bQPq8nJ+x6Vt5TQwkHO6VsoUpPyzhfnGPfZIkqXPGFn+\/mNf9CRGVb+dLvyr7VMqq8HNEvngHGts7NPVLCxvhpbZjgkQVVk5u+bxtBLAKTpc2pnOXJLg+PyNJuyVYNP0794TVZhWjXrWfJXT0pd3HAxdlmZzAcUCmHU\/KyOWlWfdF6cvsln3XqhDzYeDXQ9SpvTSjdjPzvgx604Ev5HicAfwg5RckHL8y5UdkOn8c8M8spyfwVeUX5r7OgQTTM4og1ebq3BYwEfhAUT6xCOVrihHGtdnXzEHuHZ0HvCRBcbD+PgD8PKH8msb6R6f8uJz7q3J9Luvh+EqS1FlDTQGTqc1r8uVaT9NNSphqhj8ScJo39k9N0CidkYcalib8vSJTtwcXo4FtgWEUcBlwc0IYCYAH5aGE\/twn1xYAn9sYGbxvkP7253hsLEZIZyUgPZyQMZBg81Cx7vI8aNFsb3mc1iXw\/CZ\/57e0obZzpkpr9esdMhV7Ud7vmVGy1cA3UvYd4M95CIYivFc+krBWB\/NT0u8rhmgLCdZnZd\/bJsS+Fngw06uvT73rEspI2anp7+Ut18HqjNgO1d+hLM09fjcA5wO\/yHnYvaXuxuJ6J9fb4z3sQ5KkrU4vPwOzofHk6H0JA1Na6tbTsaU6aJTLwkzn3pov4Y2NkEk+a7s3cH1C2PS8rwLgmzMden2mcpsB8AXAW\/JAw6aoQueJCcn1iN\/tuaeuNi0Pn9AoK9sykP5WoehbCSG1KRlBrd2Ve\/dIkNwvr4\/JiN+0LD\/O\/YpjUo8E8R0Sou5tBN+7G+H3\/pYnaNvsUtyTtzL\/BEzOZzOKczujWGdVQt8hLQ\/5kFC+ZJj+kuuvea1OSiA9KGF6bQL2LRmlPbhR\/54c31HZ1ssy4ihJUufUIyJ9jfvwTixuuG96LPcFzk2w2DGh5JE8DDI398kNN7oyp9GOCY0AdXYC4Wkt636zeEJ5Se4ZuykBYK982U9OSJyWacmTE2A2xcoEvimZRiVB4470d0wecJgPvCpPmK7PSNsJLdtblAD86WJ07h0JPfPy\/o7s8+Y8UPNgyqs+faXY1tWZjv1twuDyjPx9rsefmLmxx2OwMNubk+vlTxllHMyYtGlpwtqsxufjMoV75CD9LX\/eZqAI+CsSxFfmPsJjU3Z46q1J2J+VEdtxmVa+LNfrolxXtxbHWpIk9Wh08Vt8pTPzsxv\/r+0zkrWluDAjjU3btTz8sd0IfgandmXu2WuaWPxe43BGZb+91t8UE0ZwXkYVv0nYdP4g4b7ub\/Nnb8ioZl+j7EDghYPsf6eW+uO3sOtKkqStxvtbpoKfqqZmhG3ekxysjnsSt72l2Rt43zB12gKgJEnSZnEA8PbiB6i1eRw1yH2mkiRJkiRJGolengKWJEnSVsQAKEmS1DEGQEmSpI4xAEqSJHWMAVCSJKljDICSJEkdYwCUJEnqGAOgJElSxxgAJUmSOsYAKEmS1DEGQEmSpI4xAEqSJHWMAVCSJKljDICSJEkdYwCUJEnqGAOgJElSxxgAJUmSOsYAKEmS1DEGQEmSpI4xAEqSJHWMAVCSJKljDICSJEkdYwCUJEnqGAOgJElSx\/wvAAD\/\/0F6uWOWxFvVAAAAAElFTkSuQmCC"}},"msg":"success"}
:param index: 点的序号,从一开始
:param t: 类型
:return: 拼接后的数字
"""
url = f"https://trends.so.com/index/csssprite?q={self.q}&area={self.area}&from={self.fr}&to={self.to}&click={index}&t={t}"
response_data = get_rep_json(url)
self.data_item = response_data["data"][self.q]
self.base4_to_image()
number = self.get_number_for_img()
return number
这俩问题解决了就没啥问题了,总结一下数据接口
关注度:https://trends.so.com/index/overviewJson?area=全国&q=肖战
关注趋势:https://trends.so.com/index/soIndexJson?area=全国&q=肖战&from=20181016&to=20191015&s=0
曝光量:https://trends.so.com/index/soMediaJson?q=肖战&from=20181016&to=20191015&s=0
24小时关注:https://trends.so.com/index/indexqueryhour?q=肖战&t=30
需求分布:https://trends.so.com/index/radarJson?t=30&q=肖战
用户画像:https://trends.so.com/index/indexquerygraph?t=30&area=全国&q=肖战
def get_soIndexJson(self):
"""
关注趋势:list
"""
url = f"https://trends.so.com/index/soIndexJson?area={self.area}&q={self.q}&from={self.strftime(self.fr)}&to={self.strftime(self.to)}&s=0"
response = get_rep_json(url)
self.soIndexList = []
if response['status'] == 0:
fr = self.fr
one_step = datetime.timedelta(days=response['data']['step'])
i = 0
while fr < self.to:
i += 1
number = self.number_img.get_image_base4(i, "index")
to = fr + one_step
date= fr if response['data']['step']==1 else f"{self.strftime(fr)}-{self.strftime(to)}"
self.soIndexList.append({
"date":date,
"value":number
})
fr=to
用户画像收到数据,年龄和省份可以在zhishu.js中找到对应
完整代码(没有存储)
import pytesseract
import base64
from PIL import Image
from io import BytesIO
import re
import requests
import datetime
PROVINCE={'01': '北京', '03': '天津', '14': '河北', '16': '山西', '07': '内蒙古', '17': '辽宁', '18': '吉林', '19': '黑龙江', '02': '上海', '20': '江苏', '21': '浙江', '23': '安徽', '24': '福建', '22': '江西', '15': '山东', '13': '河南', '08': '湖北', '09': '湖南', '10': '广东', '11': '广西', '12': '海南', '04': '重庆', '25': '四川', '26': '贵州', '27': '云南', '05': '西藏', '28': '陕西', '29': '甘肃', '30': '青海', '31': '宁夏', '06': '新疆', '32': '香港', '33': '澳门', '34': '台湾'}
AGE={
"01": "18\u5c81\u53ca\u4ee5\u4e0b",
"02": "19-24\u5c81",
"03": "25-34\u5c81",
"04": "35-49\u5c81",
"05": "50\u5c81\u53ca\u4ee5\u4e0a"
}
def get_rep_json(url):
"""
获取json
:param url: 请求接口
:return:
"""
hearder = {
"Cookie": "",#这里需要填入游览器登陆后的cookie
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36"
}
response = requests.get(url, headers=hearder)
response_data = response.json()
print(response_data)
return response_data
def yesterday():
"""
:return: 昨天日期
"""
today = datetime.date.today()
oneday = datetime.timedelta(days=1)
yesterday = today - oneday
return yesterday
class Base4ToNumber:
def __init__(self, q, area, fr, to):
self.q = q
self.area = area
self.fr = fr
self.to = to
def base4_to_image(self):
"""
将base4字符串转化为image对象
"""
base4 = self.data_item["img"]
image_data = base64.b64decode(base4.split(',')[1])
image = Image.open(BytesIO(image_data))
self.image_bg = image
def get_number_for_img(self):
"""
根据图片的定位,截取图片,识别数字
:param image_bg: 带有数字的图片,plt-image对象
:param number_css:css样式
:return: 数字
"""
# 获取位置
number_location_list = re.findall("""background-position:(.*?)['";]""", self.data_item["css"])
number_location_int_list = [[abs(eval(x)) for x in str_location.strip().split("px") if x] for str_location in
number_location_list]
# 获得数字截图并拼接
number = Image.new("RGBA", (200, 38), (255, 255, 255))
for i, number_location in enumerate(number_location_int_list):
image_number = self.image_bg.crop((number_location[0], 0, sum(number_location), self.image_bg.height))
image_number = image_number.resize((image_number.width * 3, image_number.height * 3), Image.ANTIALIAS)
number.paste(image_number, (i * 20, 0))
# 识别数字
num = pytesseract.image_to_string(number)
number.close()
self.image_bg.close()
return num
def get_image_base4(self, index, t):
"""
根据获取到的css和图片,拼接出数字图片并识别返回
例:{"status":0,"data":{"\u96ea\u8389":{"css":"<\/span><\/span><\/span>","img":"data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAoAAAAAmCAYAAABUFyr5AAAIJUlEQVR4nOzcCawdVQHG8X+3x9JSoFaoiAoEEYxQUNCKCxBwZzNWwI3gEnFFA4qGqCiLGiLFuOBGKSIqYkVUAqQ0qAjVpqi0uFQpQkGwVEBau9Hy+swk3yQnk3nv3VehKZ3\/L5m8e889M3POzCT3e+fM3NFIkiSpUwyAkiRJHTMmf\/cEJgOTsqwCdgPWAf2psw3wDGBlD9vdGdg26\/dil9RfW5Q9Ddi+KJuQNv437yclwE5padOzgI3As4EdgdV5v7mNzXGs27cH8Ghej09\/VvW4rXdnO\/\/J+11yfNYUdap97ZpjMzb9rj0957J\/mP1U6z6zuBaqZaA4l\/tn38t7bHebkV4fQzkI2B14oFFebf\/AHOOHgB1yzOvrqSofBzz2BLRBkqSnpIeBK4ul+kI9B\/hkUeczwFk9bGtf4AZgpx73XQW7xcDHGuXnZKmdBKxIoKucB0wHFrVs8w+p\/zfgR8BfgPf22J4nUhX4fle8X5a\/fcCvgMNHsK3qvLy8eH8t8JNGndtSNhv4YOOzqvzMHvYzPfv6Y5bq9RvzWXUsZwKzgB+OoO1NVaicA+wzgnXObik7FPhl+l1em0cCdwKXpJ2fBd6Ta6Z2XsokSeqcsfm7IYGpdAEwH7g4I4UnAIfks9HA6\/IFXoWan2Y0p6p3Reo+2mMbqv3c0lLeNjK2OMHm80XZioTIuv42xesbgQ\/l86ovCxJqquD1+7RzdNq8H\/A4MBW4CXhDyvfJqONd2ebe2d7tCTJvynqzi9HJ4XwZuCohcCi7JpDd0xjB3CmjcKMzslXvd\/u0p2l\/4O\/Ai4GJjRHTozMqeH3ez85SB\/IvFXWnAc\/J63t76OfOwPE5XguKMPxIzuP3EuI2Nvr7D+BfwP3AeuAw4F3FOfh+\/lZB9Ku59hakbDxweULg4qIthj1JkqK+B3BURqv2KL7gqxD1deA04OPAjGL67JIExmWZbt2Q8lcDf80XeK0KVHMby9R89pqse1tL21a1hMBrgWMTdGp3A3tltPDcTF82w0m1ne9mXRJuZiawjk8b3gq8M6FwRkYyT03YOrfY1jkJfhPSl\/4E6bmZdhzOKTnuFw9Try8Bdl1C6FHFZ8dmBK3a5zFF+XbA2xLq+oryj+Zcfg04vbGfyZlu78WtuRY+AVzXQ\/05OT7LinNeuzPB9qiW\/u6bQPq8nJ+x6Vt5TQwkHO6VsoUpPyzhfnGPfZIkqXPGFn+\/mNf9CRGVb+dLvyr7VMqq8HNEvngHGts7NPVLCxvhpbZjgkQVVk5u+bxtBLAKTpc2pnOXJLg+PyNJuyVYNP0794TVZhWjXrWfJXT0pd3HAxdlmZzAcUCmHU\/KyOWlWfdF6cvsln3XqhDzYeDXQ9SpvTSjdjPzvgx604Ev5HicAfwg5RckHL8y5UdkOn8c8M8spyfwVeUX5r7OgQTTM4og1ebq3BYwEfhAUT6xCOVrihHGtdnXzEHuHZ0HvCRBcbD+PgD8PKH8msb6R6f8uJz7q3J9Luvh+EqS1FlDTQGTqc1r8uVaT9NNSphqhj8ScJo39k9N0CidkYcalib8vSJTtwcXo4FtgWEUcBlwc0IYCYAH5aGE\/twn1xYAn9sYGbxvkP7253hsLEZIZyUgPZyQMZBg81Cx7vI8aNFsb3mc1iXw\/CZ\/57e0obZzpkpr9esdMhV7Ud7vmVGy1cA3UvYd4M95CIYivFc+krBWB\/NT0u8rhmgLCdZnZd\/bJsS+Fngw06uvT73rEspI2anp7+Ut18HqjNgO1d+hLM09fjcA5wO\/yHnYvaXuxuJ6J9fb4z3sQ5KkrU4vPwOzofHk6H0JA1Na6tbTsaU6aJTLwkzn3pov4Y2NkEk+a7s3cH1C2PS8rwLgmzMden2mcpsB8AXAW\/JAw6aoQueJCcn1iN\/tuaeuNi0Pn9AoK9sykP5WoehbCSG1KRlBrd2Ve\/dIkNwvr4\/JiN+0LD\/O\/YpjUo8E8R0Sou5tBN+7G+H3\/pYnaNvsUtyTtzL\/BEzOZzOKczujWGdVQt8hLQ\/5kFC+ZJj+kuuvea1OSiA9KGF6bQL2LRmlPbhR\/54c31HZ1ssy4ihJUufUIyJ9jfvwTixuuG96LPcFzk2w2DGh5JE8DDI398kNN7oyp9GOCY0AdXYC4Wkt636zeEJ5Se4ZuykBYK982U9OSJyWacmTE2A2xcoEvimZRiVB4470d0wecJgPvCpPmK7PSNsJLdtblAD86WJ07h0JPfPy\/o7s8+Y8UPNgyqs+faXY1tWZjv1twuDyjPx9rsefmLmxx2OwMNubk+vlTxllHMyYtGlpwtqsxufjMoV75CD9LX\/eZqAI+CsSxFfmPsJjU3Z46q1J2J+VEdtxmVa+LNfrolxXtxbHWpIk9Wh08Vt8pTPzsxv\/r+0zkrWluDAjjU3btTz8sd0IfgandmXu2WuaWPxe43BGZb+91t8UE0ZwXkYVv0nYdP4g4b7ub\/Nnb8ioZl+j7EDghYPsf6eW+uO3sOtKkqStxvtbpoKfqqZmhG3ekxysjnsSt72l2Rt43zB12gKgJEnSZnEA8PbiB6i1eRw1yH2mkiRJkiRJGolengKWJEnSVsQAKEmS1DEGQEmSpI4xAEqSJHWMAVCSJKljDICSJEkdYwCUJEnqGAOgJElSxxgAJUmSOsYAKEmS1DEGQEmSpI4xAEqSJHWMAVCSJKljDICSJEkdYwCUJEnqGAOgJElSxxgAJUmSOsYAKEmS1DEGQEmSpI4xAEqSJHWMAVCSJKljDICSJEkdYwCUJEnqGAOgJElSx\/wvAAD\/\/0F6uWOWxFvVAAAAAElFTkSuQmCC"}},"msg":"success"}
:param index: 点的序号,从一开始
:param t: 类型
:return: 拼接后的数字
"""
url = f"https://trends.so.com/index/csssprite?q={self.q}&area={self.area}&from={self.fr}&to={self.to}&click={index}&t={t}"
response_data = get_rep_json(url)
self.data_item = response_data["data"][self.q]
self.base4_to_image()
number = self.get_number_for_img()
return number
class Trends360:
def __init__(self, keywords: list, area="全国", to=yesterday(), fr_step=30):
self.q = "+".join(keywords)
self.area = area
self.to = to
self.fr_date(fr_step-1)
self.number_img = Base4ToNumber(self.q, area, self.strftime(self.fr), self.strftime(to))
self.overviewJson = None
def fr_date(self, num):
"""
计算以天为单位的日期赋值给fr,例 前三天的日期
:param num: 几天
:return:
"""
dates = datetime.timedelta(days=num)
self.fr = self.to - dates
@staticmethod
def strftime(date):
"""
:param date: 时间类型
:return:string格式化时间
"""
return date.strftime("%Y%m%d")
def get_overviewJson(self):
"""
关注度
:param q: 关键词
:param area: 区域
:return: 返回关注度相关参数
"""
url = f"https://trends.so.com/index/overviewJson?area={self.area}&q={self.q}"
response = get_rep_json(url)
none_data = {'week_year_ratio': '--',
'month_year_ratio': '--',
'week_chain_ratio': '--',
'month_chain_ratio': '--',
'week_index': 0, 'month_index': 0}
self.overviewJson = none_data if response['status'] == 7001 else response['data'][0]['data']
def get_soIndexJson(self):
"""
关注趋势:list
"""
url = f"https://trends.so.com/index/soIndexJson?area={self.area}&q={self.q}&from={self.strftime(self.fr)}&to={self.strftime(self.to)}&s=0"
response = get_rep_json(url)
self.soIndexList = []
if response['status'] == 0:
fr = self.fr
one_step = datetime.timedelta(days=response['data']['step'])
i = 0
while fr < self.to:
i += 1
number = self.number_img.get_image_base4(i, "index")
to = fr + one_step
date= fr if response['data']['step']==1 else f"{self.strftime(fr)}-{self.strftime(to)}"
self.soIndexList.append({
"date":date,
"value":number
})
fr=to
def get_soMediaJson(self):
"""
曝光量:list
"""
url = f"https://trends.so.com/index/soMediaJson?q={self.q}&from={self.strftime(self.fr)}&to={self.strftime(self.to)}&s=0"
response = get_rep_json(url)
self.soMediaJson = []
if response['status'] == 0:
fr = self.fr
one_step = datetime.timedelta(days=response['data']['step'])
i = 0
while fr < self.to:
i += 1
number = self.number_img.get_image_base4(i, "media")
to = fr + one_step
date= fr if response['data']['step']==1 else f"{self.strftime(fr)}-{self.strftime(to)}"
self.soMediaJson.append({
"date":date,
"value":number
})
fr=to
def get_indexqueryhour(self):
"""
24小时关注
"""
url=f"https://trends.so.com/index/indexqueryhour?q={self.q}&t=30"
response = get_rep_json(url)
self.indexqueryhour = []
if response['status']==0:
fr = datetime.datetime.strptime(response['data']["period"]['from'], '%Y-%m-%d')
one_step = datetime.timedelta(days=1)
for number in response['data']['index'][self.q].split("|"):
self.indexqueryhour.append({
"date": fr,
"value": number
})
fr+=one_step
def get_radarJson(self):
"""
需求分布
"""
url=f"https://trends.so.com/index/radarJson?t=30&q={self.q}"
response = get_rep_json(url)
self.from_query=response['data']['from_query'] if response['status'] == 0 else []
self.list = response['data']['list'] if response['status'] == 0 else []
self.to_query = response['data']['to_query'] if response['status'] == 0 else []
self.burst_query = response['data']['burst_query'] if response['status'] == 0 else []
def get_indexquerygraph(self):
"""
用户画像
"""
url=f"https://trends.so.com/index/indexquerygraph?t=30&area={self.area}&q={self.q}"
response = get_rep_json(url)
if response['status'] == 0:
province=response["data"]['province']
self.interest = response["data"]['interest']
self.sex = response["data"]['sex']
self.age = response["data"]['age']
self.province=[{"entity":PROVINCE[str(each['entity'])],"percent":each['percent']}for each in province]
for each in self.age:
each['entity']=AGE[str(each["entity"])]
def run(self):
"""
运行函数
"""
self.get_overviewJson()
self.get_soIndexJson()
self.get_soMediaJson()
self.get_indexqueryhour()
self.get_indexquerygraph()
pass
if __name__ == "__main__":
# number_img = Base4ToNumber("陈情令", "全国", 20190916, 20191015)
# num=number_img.get_image_base4(1,"index")
# print(num)
trends360 = Trends360(["陈情令"])
trends360.run()