代码仅供学习交流,请勿用于非法用途
create database drugs_;
use drugs_;
create table `drug`(
`id` int primary key auto_increment,
`kgId` varchar(15) unique comment '药品id',
`proprietaryName` text default null comment 'proprietaryName',
`productSpecification` text default null comment 'productSpecification',
`manufacturer` text default null comment 'manufacturer',
`otc` text default null comment 'otc',
`medInsurance` text default null comment 'medInsurance',
`cw` text default null comment 'cw',
`commonName` text default null comment 'commonName',
`imageUrlList` text default null comment 'imageUrlList',
`englishName` text default null comment 'englishName',
`dosageForms` text default null comment 'dosageForms',
`mainMaterial` text default null comment 'mainMaterial',
`storage` text default null comment 'storage',
`permissionNumber` text default null comment 'permissionNumber',
`atcNameZh` text default null comment 'atcNameZh',
`fda` text default null comment 'fda',
`priceRange` text default null comment 'priceRange',
`packagePrice` text default null comment 'packagePrice',
`componentId` text default null comment 'componentId',
`pedia` text default null comment 'englishName'
)engine=MyISAM charset=utf8;
create table `guide`(
`id` int primary key auto_increment,
`key` varchar(60) unique comment 'key',
`title` text default null comment 'title',
`source` text default null comment 'source',
`keyWords` text default null comment 'keyWords',
`summary` text default null comment 'summary',
`userProCompleted` text default null comment 'userProCompleted',
`url` text default null comment 'url'
)engine=MyISAM charset=utf8;
create table `check`(
`id` int primary key auto_increment,
`key` varchar(60) unique comment 'key',
`name` text default null comment 'name',
`alias_name` text default null comment 'alias_name',
`english_name` text default null comment 'english_name',
`introduction` text default null comment 'introduction',
`type` text default null comment 'type',
`fasting` text default null comment 'fasting',
`Indications` text default null comment 'Indications',
`reference` text default null comment 'reference',
`attention` text default null comment 'attention',
`prompt` text default null comment 'prompt',
`Specimen` text default null comment 'Specimen',
`Clinical` text default null comment 'Clinical',
`Inspection` text default null comment 'Inspection',
`Adverse` text default null comment 'Adverse'
)engine=MyISAM charset=utf8;
create table `symptom`(
`id` int primary key auto_increment,
`key` varchar(60) unique comment 'key',
`name` text default null comment 'name',
`introduction` text default null comment 'introduction',
`Pathogeny` text default null comment 'Pathogeny',
`diagnosis` text default null comment 'diagnosis',
`Prevention` text default null comment 'Prevention'
)engine=MyISAM charset=utf8;
create table `disease`(
`id` int primary key auto_increment,
`key` varchar(60) unique comment 'key',
`name` text default null comment 'name',
`alias_name` text default null comment 'alias_name',
`english_name` text default null comment 'english_name',
`Abbreviation` text default null comment 'Abbreviation',
`ICD` text default null comment 'ICD',
`Department` text default null comment 'Department',
`introduction` text default null comment 'introduction',
`Pathogeny` text default null comment 'Pathogeny',
`Pathology` text default null comment 'Pathology',
`historyKeyPoints` text default null comment 'historyKeyPoints',
`keyPointsOfSymptoms` text default null comment 'keyPointsOfSymptoms',
`keyPointsOfPhysicalExamination` text default null comment 'keyPointsOfPhysicalExamination',
`Transfer` text default null comment 'Transfer',
`laboratory` text default null comment 'laboratory',
`Imaging` text default null comment 'Imaging',
`Other` text default null comment 'Other',
`clinical` text default null comment 'clinical',
`stages` text default null comment 'stages',
`diagnosis` text default null comment 'diagnosis',
`principles` text default null comment 'principles',
`generalTreatment` text default null comment 'generalTreatment',
`Medication` text default null comment 'Medication',
`surgicalTreatment` text default null comment 'surgicalTreatment',
`otherTreatment` text default null comment 'otherTreatment',
`commonComplications` text default null comment 'commonComplications',
`prognosis` text default null comment 'prognosis',
`followUp` text default null comment 'followUp',
`Prevention` text default null comment 'Prevention'
)engine=MyISAM charset=utf8;
create table `instruction`(
`id` int primary key auto_increment,
`key` varchar(60) unique comment 'key',
`title` text default null comment 'title',
`mainFunction` text default null comment 'mainFunction',
`usageList` text default null comment 'usageList'
)engine=MyISAM charset=utf8;
create table `case`(
`id` int primary key auto_increment,
`key` varchar(60) unique comment 'key',
`source` text default null comment 'source',
`publishDate` text default null comment 'publishDate',
`keywords` text default null comment 'keywords',
`title` text default null comment 'title',
`sex` text default null comment 'sex',
`age` text default null comment 'age',
`complaint` text default null comment 'complaint',
`medicalHistory` text default null comment 'medicalHistory',
`check` text default null comment 'check',
`conclusion` text default null comment 'conclusion',
`treatmentProcess` text default null comment 'treatmentProcess'
)engine=MyISAM charset=utf8;
import requests
from queue import Queue
import json
import threading
import MySQLdb
import time
import re
'''
@Author :王磊
@Date :2019/9/19
@Description:某微信小程序药品数据爬取
'''
#########################################################################
# 数据库账号
mysql_user = "root"
# 数据库密码
mysql_password = "root"
# 数据库名称
mysql_database = "drugs_"
# 指南模块pdf存储路径
guide_pdf_path = "c:/users/it1002/Desktop/pds/"
##########################################################################
token = "eyJhbGciOiJIUzUxMiJ9.eyJhcHasaWNhdGlvbkFjY291bnRJbmZvIjp7ImlkIjo5NTkxNywiY2hhbm5lbElkIjoiMTEwMDQ5MDAwMCIsImluc3RpdHV0aW9uSWQiOiIxMjQ0NDQwMzAwMDAzMzEwMDAwMDAwIiwicm9sZSI6MSwic291cmNlIjoxLCJzZXNzaW9uVHlwZSI6IndlY2hhdCIsImlzQXV0b0xvZ2luIjpmYWxzZSwiY29tbW9uVXNlcklkIjpudWxsLCJwYXltZW50TGV2ZWwiOm51bGx9LCJleHAiOjE1ODM3MjkzMTR9.NLehOcnaVrB5ckxOJSEqQLlpWKVUutEDPabgJStUSHc_RL4GrWj48W3UX4Pdm3Ju4-ziNSGm8WhdPvK4hdEcrg"
class drugSpider(threading.Thread):
def __init__(self, atcCodeQueue, *args, **kwargs):
super(drugSpider, self).__init__(*args, **kwargs)
self.atcCodeQueue = atcCodeQueue
def getDrugs(self, key, page):
offset = (page - 1) * 10
url = "https://med-askbob.pingan.com/pedia/drug/product/list?key=" + key + "&atcCode=&filterType=common_name&relationType=&specification=&forms=&offset=" + str(offset) + "&pageSize=10"
resp = getHtml(url)['data']
return (resp['list'], resp['hasMore'])
def getDrugsList(self, atcCode, page):
offset = (page - 1) * 10
url = "https://med-askbob.pingan.com/pedia/drug/common/list?pageSize=10&offset=" + str(offset) + "&atcCode=" + atcCode
resp = getHtml(url)['data']
return (resp['list'], resp['hasMore'])
def getDrugDetail(self, key):
url = "https://med-askbob.pingan.com/pedia/drug/product/detail?key=" + key + "&relationType="
resp = getHtml(url)['data']
drugs = {}
try:
drugs['kgId'] = resp['basicProperty']['kgId']
except Exception as e:
drugs['kgId'] = ""
try:
drugs['proprietaryName'] = resp['basicProperty']['proprietaryName']
except Exception as e:
drugs['proprietaryName'] = ""
try:
drugs['productSpecification'] = resp['basicProperty']['productSpecification']
except Exception as e:
drugs['productSpecification'] = ""
try:
drugs['manufacturer'] = resp['basicProperty']['manufacturer']
except Exception as e:
drugs['manufacturer'] = ""
try:
drugs['otc'] = resp['basicProperty']['otc']
except Exception as e:
drugs['otc'] = ""
try:
drugs['medInsurance'] = resp['basicProperty']['medInsurance']
except Exception as e:
drugs['medInsurance'] = ""
try:
drugs['cw'] = resp['basicProperty']['cw']
except Exception as e:
drugs['cw'] = ""
try:
drugs['commonName'] = resp['basicProperty']['commonName']
except Exception as e:
drugs['commonName'] = ""
try:
drugs['imageUrlList'] = str(resp['basicProperty']['imageUrlList']).replace("\'", "\"")
except Exception as e:
drugs['imageUrlList'] = ""
try:
drugs['englishName'] = resp['basicProperty']['englishName']
except Exception as e:
drugs['englishName'] = ""
try:
drugs['dosageForms'] = resp['basicProperty']['dosageForms']
except Exception as e:
drugs['dosageForms'] = ""
try:
drugs['mainMaterial'] = resp['basicProperty']['mainMaterial']
except Exception as e:
drugs['mainMaterial'] = ""
try:
drugs['storage'] = resp['basicProperty']['storage']
except Exception as e:
drugs['storage'] = ""
try:
drugs['permissionNumber'] = resp['basicProperty']['permissionNumber']
except Exception as e:
drugs['permissionNumber'] = ""
try:
drugs['atcNameZh'] = resp['basicProperty']['atcNameZh']
except Exception as e:
drugs['atcNameZh'] = ""
try:
drugs['fda'] = resp['basicProperty']['fda']
except Exception as e:
drugs['fda'] = ""
try:
drugs['priceRange'] = resp['basicProperty']['priceRange']
except Exception as e:
drugs['priceRange'] = ""
try:
drugs['packagePrice'] = resp['basicProperty']['packagePrice'].replace("\'", "\"")
except Exception as e:
drugs['packagePrice'] = ""
try:
drugs['componentId'] = resp['basicProperty']['componentId']
except Exception as e:
drugs['componentId'] = ""
try:
drugs['pedia'] = str(resp['pedia']).replace("\'", "\"")
except Exception as e:
drugs['pedia'] = ""
return drugs
def pipLine(self, drug):
try:
conn = MySQLdb.connect(user=mysql_user, password=mysql_password, database=mysql_database, charset='utf8')
cursor = conn.cursor()
cursor.execute("insert into drug(kgId, proprietaryName, productSpecification, manufacturer, otc, medInsurance, cw, commonName, imageUrlList, englishName, dosageForms, mainMaterial, storage, permissionNumber, atcNameZh, fda, priceRange, packagePrice, componentId, pedia) "
"values('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s')" %
(drug['kgId'], drug['proprietaryName'], drug['productSpecification'], drug['manufacturer'], drug['otc'], drug['medInsurance'], drug['cw'], drug['commonName'], drug['imageUrlList'], drug['englishName'], drug['dosageForms'], drug['mainMaterial'], drug['storage'], drug['permissionNumber'], drug['atcNameZh'], drug['fda'], drug['priceRange'], drug['packagePrice'], drug['componentId'], drug['pedia']))
conn.commit()
except Exception as e:
print(e)
pass
def run(self):
while True:
if self.atcCodeQueue.empty():
break
actCode = self.atcCodeQueue.get()
drugsPage = 1
while True:
drugsTuple = self.getDrugsList(actCode, drugsPage)
try:
for metaDurgs in drugsTuple[0]:
key = metaDurgs['key']
drugListPage = 1
while True:
drugsTuple = self.getDrugs(key, drugListPage)
for drug in drugsTuple[0]:
dkey = drug['key']
drugDetail = self.getDrugDetail(dkey)
self.pipLine(drugDetail)
time.sleep(5)
if drugsTuple[1]:
drugListPage += 1
else:
break
if drugsTuple[1]:
drugsPage += 1
continue
else:
break
except Exception as e:
break
class guideSpider(threading.Thread):
def __init__(self, keyQueue, *args, **kwargs):
super(guideSpider, self).__init__(*args, **kwargs)
self.keyQueue = keyQueue
def parser(self, key):
url = "https://med-askbob.pingan.com/pedia/guide/detail?key=" + key
resp = getHtml(url)
guide = {}
try:
guide['key'] = key.replace("\'", "\"")
except Exception as e:
guide['key'] = ""
try:
guide['title'] = str(resp['data']['title']).replace("\'", "\"")
except Exception as e:
guide['title'] = ""
try:
guide['source'] = str(resp['data']['source']).replace("\'", "\"")
except Exception as e:
guide['source'] = ""
try:
guide['keyWords'] = str(resp['data']['keyWords']).replace("\'", "\"")
except Exception as e:
guide['keyWords'] = ""
try:
guide['summary'] = str(resp['data']['summary']).replace("\'", "\"")
except Exception as e:
guide['summary'] = ""
try:
guide['userProCompleted'] = str(resp['data']['userProCompleted']).replace("\'", "\"")
except Exception as e:
guide['userProCompleted'] = ""
try:
guide['url'] = resp['data']['url']
except Exception as e:
guide['url'] = ""
if guide['url']:
downLoadFile(guide['url'], guide_pdf_path + key + ".pdf")
return guide
def pipLine(self, guide):
try:
conn = MySQLdb.connect(user=mysql_user, password=mysql_password, database=mysql_database, charset='utf8')
cursor = conn.cursor()
cursor.execute("insert into guide(`key`, title, source, keyWords, summary, userProCompleted, url) "
"values('%s', '%s', '%s', '%s', '%s', '%s', '%s')" %
(guide['key'], guide['title'], guide['source'], guide['keyWords'], guide['summary'], guide['userProCompleted'], guide['url'])
)
conn.commit()
except Exception as e:
print(e)
pass
def run(self):
while True:
if self.keyQueue.empty():
break
key = self.keyQueue.get()
guide = self.parser(key)
self.pipLine(guide)
class checkSpider(threading.Thread):
def __init__(self, keyQueue, *args, **kwargs):
super(checkSpider, self).__init__(*args, **kwargs)
self.keyQueue = keyQueue
def getCheckSubCategoryList(self, category):
url = "https://med-askbob.pingan.com/pedia/check/dic?category=" + category
return getHtml(url)['data']
def pushCheck(self, check, key, value):
value = value.replace("\'", "\"")
if key == '标准名称':
check['name'] = value
elif key == '别名':
check['alias_name'] = value
elif key == '英文名称':
check['english_name'] = value
elif key == '简介':
check['introduction'] = value
elif key == '分类':
check['type'] = value
elif key == '是否空腹':
check['fasting'] = value
elif key == '检查提示':
check['prompt'] = value
elif key == '适应证':
check['Indications'] = value
elif key == '参考值':
check['reference'] = value
elif key == '注意事项':
check['attention'] = value
elif key == '标本要求':
check['Specimen'] = value
elif key == '临床意义':
check['Clinical'] = value
elif key == '检查过程':
check['Inspection'] = value
elif key == '不良反应':
check['Adverse'] = value
return check
def getCheckDetail(self, key):
url = "https://med-askbob.pingan.com/pedia/check/detail?key=" + key
pedias = getHtml(url)['data']['pedia']
check = {}
check['key'] = key
check['name'] = ""
check['alias_name'] = ""
check['english_name'] = ""
check['introduction'] = ""
check['type'] = ""
check['fasting'] = ""
check['prompt'] = ""
check['Indications'] = ""
check['reference'] = ""
check['attention'] = ""
check['Specimen'] = ""
check['Clinical'] = ""
check['Inspection'] = ""
check['Adverse'] = ""
for pedia in pedias:
key__ = pedia['key']
metaPedias = pedia['value']
key_ = ""
value = ""
try:
if len(metaPedias) > 1:
for metaPedia in metaPedias:
try:
key_ = metaPedia['key']
value = str(metaPedia['value']).replace("\'", "\"")
except Exception as e:
key_ = key__
value = str(metaPedias).replace("\'", "\"")
check = self.pushCheck(check, key_, value)
else:
try:
key_ = metaPedias['key']
value = str(metaPedias['value']).replace("\'", "\"")
except Exception as e:
key_ = key__
value = str(metaPedias).replace("\'", "\"")
check = self.pushCheck(check, key_, value)
except Exception as e:
key_ = key__
value = str(metaPedias).replace("\'", "\"")
check = self.pushCheck(check, key_, value)
return check
def pipLine(self, check):
try:
conn = MySQLdb.connect(user=mysql_user, password=mysql_password, database=mysql_database, charset='utf8')
cursor = conn.cursor()
cursor.execute("insert into `check`(`key`, name, alias_name, english_name, introduction, `type`, fasting, Indications, reference, attention, prompt, Specimen, Clinical, Inspection, Adverse) "
"values('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s')" %
(check['key'], check['name'], check['alias_name'], check['english_name'], check['introduction'], check['type'], check['fasting'], check['Indications'], check['reference'], check['attention'], check['prompt'], check['Specimen'], check['Clinical'], check['Inspection'], check['Adverse'])
)
conn.commit()
except Exception as e:
print(e)
pass
def run(self):
while True:
if self.keyQueue.empty():
break
key = self.keyQueue.get()
subCategoryList = self.getCheckSubCategoryList(key)
for subCategory in subCategoryList:
subCategorys = subCategoryList[subCategory]
for subCategory_ in subCategorys:
key_ = subCategory_['key']
check = self.getCheckDetail(key_)
self.pipLine(check)
class symptomSpider(threading.Thread):
def __init__(self, symptomKeyQueue, *args, **kwargs):
super(symptomSpider, self).__init__(*args, **kwargs)
self.symptomKeyQueue = symptomKeyQueue
def getSymptomDetail(self, key):
url = "https://med-askbob.pingan.com/pedia/symptom/detail?key=" + key
pedia = getHtml(url)['data']['pedia']
symptom = {}
symptom['key'] = key
try:
symptom['name'] = pedia[0]['value'][0]['value'].replace("(", "(").replace(")", ")").replace("\'", "\"")
except Exception as e:
symptom['name'] = ''
try:
symptom['introduction'] = getValueStr(pedia[0]['value'][1]['value']).replace("(", "(").replace(")", ")").replace("\'", "\"")
except Exception as e:
symptom['introduction'] = ''
try:
symptom['Pathogeny'] = getValueStr(pedia[1]['value']).replace("(", "(").replace(")", ")").replace("\'", "\"")
except Exception as e:
symptom['Pathogeny'] = ''
try:
symptom['diagnosis'] = getValueStr(pedia[2]['value']).replace("(", "(").replace(")", ")").replace("\'", "\"")
except Exception as e:
symptom['diagnosis'] = ''
try:
symptom['Prevention'] = getValueStr(pedia[3]['value']).replace("(", "(").replace(")", ")").replace("\'", "\"")
except Exception as e:
symptom['Prevention'] = ''
return symptom
def pipLine(self, symptom):
try:
conn = MySQLdb.connect(user=mysql_user, password=mysql_password, database=mysql_database, charset='utf8')
cursor = conn.cursor()
cursor.execute("insert into `symptom`(`key`, name, introduction, Pathogeny, diagnosis, Prevention) "
"values('%s', '%s', '%s', '%s', '%s', '%s')" %
(symptom['key'], symptom['name'], symptom['introduction'], symptom['Pathogeny'], symptom['diagnosis'], symptom['Prevention'])
)
conn.commit()
except Exception as e:
print(e)
pass
def run(self):
while True:
if self.symptomKeyQueue.empty():
break
symptomKey = self.symptomKeyQueue.get()
symptom = self.getSymptomDetail(symptomKey)
self.pipLine(symptom)
exit(0)
class diseaseSpider(threading.Thread):
def __init__(self, diseaseKeyQueue, *args, **kwargs):
super(diseaseSpider, self).__init__(*args, **kwargs)
self.diseaseKeyQueue = diseaseKeyQueue
def getDiseaseDetail(self, key):
url = "https://med-askbob.pingan.com/pedia/disease/detail?key=" + key
pedia = getHtml(url)['data']['pedia']
disease = {}
disease['key'] = key
try:
disease['name'] = pedia[0]['value'][0]['value']
except Exception as e:
disease['name'] = ''
try:
disease['alias_name'] = getValueStr(pedia[0]['value'][1]['value'])
except Exception as e:
disease['alias_name'] = ''
try:
disease['english_name'] = getValueStr(pedia[0]['value'][2]['value'])
except Exception as e:
disease['english_name'] = ''
try:
disease['Abbreviation'] = getValueStr(pedia[0]['value'][3]['value'])
except Exception as e:
disease['Abbreviation'] = ''
try:
disease['ICD'] = getValueStr(pedia[0]['value'][4]['value'])
except Exception as e:
disease['ICD'] = ''
try:
disease['Department'] = getValueStr(pedia[0]['value'][5]['value'])
except Exception as e:
disease['Department'] = ''
try:
disease['introduction'] = getValueStr(pedia[0]['value'][6]['value'])
except Exception as e:
disease['introduction'] = ''
try:
disease['Pathogeny'] = getValueStr(pedia[1]['value'][0]['value'])
except Exception as e:
disease['Pathogeny'] = ''
try:
disease['Pathology'] = getValueStr(pedia[1]['value'][1]['value'])
except Exception as e:
disease['Pathology'] = ''
try:
disease['historyKeyPoints'] = getValueStr(pedia[2]['value'][0]['value'])
except Exception as e:
disease['historyKeyPoints'] = ''
try:
disease['keyPointsOfSymptoms'] = getValueStr(pedia[2]['value'][1]['value'])
except Exception as e:
disease['keyPointsOfSymptoms'] = ''
try:
disease['keyPointsOfPhysicalExamination'] = getValueStr(pedia[2]['value'][2]['value'])
except Exception as e:
disease['keyPointsOfPhysicalExamination'] = ''
try:
disease['Transfer'] = getValueStr(pedia[2]['value'][3]['value'])
except Exception as e:
disease['Transfer'] = ''
try:
disease['laboratory'] = getValueStr(pedia[2]['value'][4]['value'][0]['value'])
except Exception as e:
disease['laboratory'] = ''
try:
disease['Imaging'] = getValueStr(pedia[2]['value'][4]['value'][1]['value'])
except Exception as e:
disease['Imaging'] = ''
try:
disease['Other'] = getValueStr(pedia[2]['value'][4]['value'][2]['value'])
except Exception as e:
disease['Other'] = ''
try:
disease['clinical'] = getValueStr(pedia[2]['value'][5]['value'])
except Exception as e:
disease['clinical'] = ''
try:
disease['stages'] = getValueStr(pedia[2]['value'][6]['value'])
except Exception as e:
disease['stages'] = ''
try:
disease['stages'] = getValueStr(pedia[2]['value'][6]['value'])
except Exception as e:
disease['stages'] = ''
try:
disease['diagnosis'] = getValueStr(pedia[3]['value'])
except Exception as e:
disease['diagnosis'] = ''
try:
disease['principles'] = getValueStr(pedia[4]['value'][0]['value'])
except Exception as e:
disease['principles'] = ''
try:
disease['generalTreatment'] = getValueStr(pedia[4]['value'][1]['value'])
except Exception as e:
disease['generalTreatment'] = ''
try:
disease['Medication'] = getValueStr(pedia[4]['value'][2]['value'])
except Exception as e:
disease['Medication'] = ''
try:
disease['surgicalTreatment'] = getValueStr(pedia[4]['value'][3]['value'])
except Exception as e:
disease['surgicalTreatment'] = ''
try:
disease['otherTreatment'] = getValueStr(pedia[4]['value'][4]['value'])
except Exception as e:
disease['otherTreatment'] = ''
try:
disease['commonComplications'] = getValueStr(pedia[5]['value'])
except Exception as e:
disease['commonComplications'] = ''
try:
disease['prognosis'] = getValueStr(pedia[6]['value'])
except Exception as e:
disease['prognosis'] = ''
try:
disease['followUp'] = getValueStr(pedia[7]['value'])
except Exception as e:
disease['followUp'] = ''
try:
disease['Prevention'] = getValueStr(pedia[8]['value'])
except Exception as e:
disease['Prevention'] = ''
return disease
def pipLine(self, disease):
try:
conn = MySQLdb.connect(user=mysql_user, password=mysql_password, database=mysql_database, charset='utf8')
cursor = conn.cursor()
cursor.execute("insert into `disease`(`key`, name, alias_name, english_name, Abbreviation, ICD, Department, introduction, Pathogeny, Pathology, historyKeyPoints, keyPointsOfSymptoms, keyPointsOfPhysicalExamination, Transfer, laboratory, Imaging, Other, clinical, stages, diagnosis, principles, generalTreatment, Medication, surgicalTreatment, otherTreatment, commonComplications, prognosis, followUp, Prevention) "
"values('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s')" %
(disease['key'], disease['name'], disease['alias_name'], disease['english_name'], disease['Abbreviation'], disease['ICD'], disease['Department'], disease['introduction'], disease['Pathogeny'], disease['Pathology'], disease['historyKeyPoints'], disease['keyPointsOfSymptoms'], disease['keyPointsOfPhysicalExamination'], disease['Transfer'], disease['laboratory'], disease['Imaging'], disease['Other'], disease['clinical'], disease['stages'], disease['diagnosis'], disease['principles'], disease['generalTreatment'], disease['Medication'], disease['surgicalTreatment'], disease['otherTreatment'], disease['commonComplications'], disease['prognosis'], disease['followUp'], disease['Prevention'])
)
conn.commit()
except Exception as e:
print(e)
pass
def run(self):
while True:
if self.diseaseKeyQueue.empty():
break
diseaseKey = self.diseaseKeyQueue.get()
disease = self.getDiseaseDetail(diseaseKey)
self.pipLine(disease)
class insrtuctionSpider(threading.Thread):
def __init__(self, insrtuctionKeyQueue, *args, **kwargs):
super(insrtuctionSpider, self).__init__(*args, **kwargs)
self.insrtuctionKeyQueue = insrtuctionKeyQueue
def getInstructionDetail(self, key):
url = "https://med-askbob.pingan.com/pedia/oldu/detail?key=" + key + "&type=oldu"
instructionResp = getHtml(url)['data']
instruction = {}
instruction['key'] = key
try:
instruction['title'] = instructionResp['title']
except Exception as e:
instruction['title'] = ""
try:
instruction['mainFunction'] = str(instructionResp['mainFunction']).replace("\'", "\"")
except Exception as e:
instruction['mainFunction'] = ""
try:
instruction['usageList'] = str(instructionResp['usageList']).replace("\'", "\"")
except Exception as e:
instruction['usageList'] = ""
return instruction
def pipLine(self, instruction):
try:
conn = MySQLdb.connect(user=mysql_user, password=mysql_password, database=mysql_database, charset='utf8')
cursor = conn.cursor()
cursor.execute("insert into `instruction`(`key`, title, mainFunction, usageList) "
"values('%s', '%s', '%s', '%s')" %
(instruction['key'], instruction['title'], instruction['mainFunction'], instruction['usageList'])
)
conn.commit()
except Exception as e:
print(e)
pass
def run(self):
while True:
if self.insrtuctionKeyQueue.empty():
break
key = self.insrtuctionKeyQueue.get()
instruction = self.getInstructionDetail(key)
self.pipLine(instruction)
class caseSpider(threading.Thread):
def __init__(self, *args, **kwargs):
super(caseSpider, self).__init__(*args, **kwargs)
def pipLine(self, case):
try:
conn = MySQLdb.connect(user=mysql_user, password=mysql_password, database=mysql_database, charset='utf8')
cursor = conn.cursor()
cursor.execute("insert into `case`(`key`, source, publishDate, keywords, title, sex, age, complaint, medicalHistory, `check`, conclusion, treatmentProcess) "
"values('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s')" %
(case['key'], case['source'], case['publishDate'], case['keywords'], case['title'], case['sex'], case['age'], case['complaint'], case['medicalHistory'], case['check'], case['conclusion'], case['treatmentProcess'])
)
conn.commit()
except Exception as e:
print(e)
pass
def getCaseListTuple(self, page):
offset = str((int(page) - 1) * 10)
url = "https://med-askbob.pingan.com/pedia/case/list?pageSize=10&offset=" + offset + "&allSecondDept=&dept="
resp = getHtml(url)['data']
try:
return (resp['list'], resp['hasMore'])
except Exception as e:
return None
def getCaseDetail(self, key):
url = "https://med-askbob.pingan.com/pedia/case/detail?key=" + key + "&type=case"
caseDateil = getHtml(url)['data']
case = {}
case['key'] = key
try:
case['source'] = caseDateil['source']
except Exception as e:
case['source'] = ""
try:
case['publishDate'] = caseDateil['publishDate']
except Exception as e:
case['publishDate'] = ""
try:
keywords = caseDateil['keywords']
keywords_ = []
req = re.compile(r'>(.*?)<')
for keyword in keywords:
try:
keywords_.append(re.findall(req, keyword)[0])
except Exception as e:
pass
case['keywords'] = str(keywords_).replace("\'", "\"")
except Exception as e:
case['keywords'] = ""
try:
case['title'] = caseDateil['title']
except Exception as e:
case['title'] = ""
try:
case['sex'] = caseDateil['pedia'][0]['value'][0]['value'][0]['value']
except Exception as e:
case['sex'] = ""
try:
case['age'] = caseDateil['pedia'][0]['value'][0]['value'][1]['value']
except Exception as e:
case['age'] = ""
try:
case['complaint'] = str(caseDateil['pedia'][0]['value'][1]['value']).replace("\'", "\"")
except Exception as e:
case['complaint'] = ""
try:
case['medicalHistory'] = str(caseDateil['pedia'][0]['value'][2]['value']).replace("\'", "\"")
except Exception as e:
case['medicalHistory'] = ""
try:
case['check'] = str(caseDateil['pedia'][1]['value'][0]['value']).replace("\'", "\"")
except Exception as e:
case['check'] = ""
try:
case['conclusion'] = str(caseDateil['pedia'][1]['value'][1]['value']).replace("\'", "\"")
except Exception as e:
case['conclusion'] = ""
try:
case['treatmentProcess'] = str(caseDateil['pedia'][2]['value'][0]['value']).replace("\'", "\"")
except Exception as e:
case['treatmentProcess'] = ""
return case
def run(self):
startPage = 1
while True:
caseListTuple = self.getCaseListTuple(startPage)
if caseListTuple:
caseList = caseListTuple[0]
for case_ in caseList:
key = case_['key']
case = self.getCaseDetail(key)
self.pipLine(case)
if caseListTuple[1]:
startPage += 1
continue
else:
break
headers = {
"authentication": token,
"User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 12_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/7.0.5(0x17000523) NetType/WIFI Language/zh_CN",
"Referer": "https://med-askbob.pingan.com/front_pedia/drugEntranceOne"
}
def downLoadFile(url, path):
try:
resp = requests.get(url, timeout=10)
with open(path, "wb") as f:
f.write(resp.content)
except Exception as e:
pass
def getHtml(url):
while True:
try:
resp = requests.get(url, headers=headers, timeout=10)
return json.loads(resp.content.decode("utf-8"))
except Exception as e:
continue
def postHtml(url, data):
while True:
try:
resp = requests.post(url, headers=headers, data=data, timeout=10)
return json.loads(resp.content.decode("utf-8"))
except Exception as e:
continue
def getValueStr(values):
valStr = ""
for val in values:
try:
valStr += val['value'] + "\r\n"
except Exception as e:
pass
return valStr
####################################################
# 药品模块函数库
####################################################
def getDrugCategoryList(url):
resp = getHtml(url)
categoryLists = resp['data']
categoryList = []
for category in categoryLists:
atcCode = category['atcCode']
categoryList.append(atcCode)
return categoryList
def getDrugCategoryQueue():
zUrl = "https://med-askbob.pingan.com/pedia/drug/category?type=Z"
xUrl = "https://med-askbob.pingan.com/pedia/drug/category?type=X"
zList = getDrugCategoryList(zUrl)
xList = getDrugCategoryList(xUrl)
zList[0: 0] = xList
atcCodeQueue = Queue(0)
for atcCode in zList:
atcCodeQueue.put(atcCode)
return atcCodeQueue
####################################################
# 指南模块函数库
####################################################
def getGuideCategoryList(page):
offset = str((page - 1) * 10)
url = "https://med-askbob.pingan.com/pedia/guide/list?pageSize=10&offset=" + offset + "&dept=&allSecondDept=&source=&type="
resp = getHtml(url)['data']
return (resp['list'], resp['hasMore'])
def getGuideKeyQueue():
startPage = 1
guideKeyQueue = Queue(0)
while True:
guideCategoryTuple = getGuideCategoryList(startPage)
guideCategorys = guideCategoryTuple[0]
for guideCategory in guideCategorys:
key = guideCategory['key']
guideKeyQueue.put(key)
if guideCategoryTuple[1]:
startPage += 1
break
else:
break
return guideKeyQueue
####################################################
# 检验检查函数库
####################################################
def getCheckCategoryList():
url = "https://med-askbob.pingan.com/pedia/check/category"
return getHtml(url)['data']
def getCheckKeyQueue():
catrgoryList = getCheckCategoryList()
keyQueue = Queue(0)
for level1 in catrgoryList:
if level1['childList']:
level2 = level1['childList']
for level2_ in level2:
if level2_['childList']:
level3 = level2_['childList']
for level3_ in level3:
keyQueue.put(level3_['key'])
else:
keyQueue.put(level2_['key'])
return keyQueue
####################################################
# 症状模块函数库
####################################################
def getSymptomList():
url = "https://med-askbob.pingan.com/pedia/symptom/dic?dept=&allSecondDept=true"
return getHtml(url)['data']
def getSymptomKeyQueue():
symptomKeyQueue = Queue(0)
symptomList = getSymptomList()
for symptom in symptomList:
symptomList_ = symptomList[symptom]
for symptom_ in symptomList_:
symptomKeyQueue.put(symptom_['key'])
return symptomKeyQueue
####################################################
# 疾病函数库
####################################################
def getDiseaseList():
url = "https://med-askbob.pingan.com/pedia/disease/dic?dept=&allSecondDept=true"
return getHtml(url)['data']
def getDiseaseKeyQueue():
diseaseList = getDiseaseList()
diseaseKeyQueue = Queue(0)
for disease in diseaseList:
diseaseList_ = diseaseList[disease]
for disease_ in diseaseList_:
diseaseKeyQueue.put(disease_['key'])
return diseaseKeyQueue
####################################################
# 超说明书函数库
####################################################
def getInstructionParCategoryList():
url = "https://med-askbob.pingan.com/pedia/oldu/category"
return getHtml(url)['data']
def getInstructionChilCategoryList(atcCode):
url = "https://med-askbob.pingan.com/pedia/oldu/list?atcCode=" + atcCode
return getHtml(url)['data']
def getInsrtuctionKeyQueue():
insrtuctionKeyQueue = Queue(0)
pars = getInstructionParCategoryList()
nums = 0
for par in pars:
atcCode = par['atcCode']
chils = getInstructionChilCategoryList(atcCode)
for chil in chils:
try:
insrtuctionKeyQueue.put(chil['key'])
nums += 1
## 测试删除
if nums > 5:
return insrtuctionKeyQueue
except Exception as e:
continue
return insrtuctionKeyQueue
def main():
# 药品模块
atcCodeQueue = getDrugCategoryQueue()
for i in range(1):
d = drugSpider(atcCodeQueue)
d.start()
# 指南模块
guideKeyQueue = getGuideKeyQueue()
for i in range(1):
g = guideSpider(guideKeyQueue)
g.start()
# 检验检查模块
checkKeyQueue = getCheckKeyQueue()
for i in range(1):
c = checkSpider(checkKeyQueue)
c.start()
# 症状模块
symptomKeyQueue = getSymptomKeyQueue()
for i in range(1):
s = symptomSpider(symptomKeyQueue)
s.start()
# 疾病模块
diseaseKeyQueue = getDiseaseKeyQueue()
for i in range(1):
d = diseaseSpider(diseaseKeyQueue)
d.start()
# 超说明书模块
insrtuctionKeyQueue = getInsrtuctionKeyQueue()
for i in range(1):
i = insrtuctionSpider(insrtuctionKeyQueue)
i.start()
# 病例模块
for i in range(1):
c = caseSpider()
c.start()
if __name__ == '__main__':
main()