#导入OS模块
import os
#待搜索的目录路径
path = "Day1-homework"
#待搜索的名称
filename = "2020"
#定义保存结果的数组
result = []
import re
def findfiles():
#在这里写下您的查找文件代码吧!
for root, dirs, files in os.walk(path):
# print('root: ',root) #当前目录路径
# print('dirs: ',dirs) #当前路径下所有子目录
# print('files: ',files) #当前路径下所有非目录子文件
for f in files:
if filename in f:
result.append(os.path.join(root,f))
for index,name in enumerate(result):
print('第{}个文件,文件名是:{}'.format(index+1,name))
if __name__ == '__main__':
findfiles()
所有标签
tables = soup.find_all('table',{'class':'table-view log-set-param'})
crawl_table_title = "参赛学员"
for table in tables:
#对当前节点前面的标签和字符串进行查找 #找到参赛学员那一栏的数据
table_titles = table.find_previous('div').find_all('h3') #前一个节点是h3
for title in table_titles:
if(crawl_table_title in title):
return table
except Exception as e:
print(e)
找图片分连接
def crawl_pic_urls():
'''
爬取每个选手的百度百科图片,并保存
'''
with open('work/'+ today + '.json', 'r', encoding='UTF-8') as file:
json_array = json.loads(file.read())
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
}
pic_urls=[]
public_url='https://baike.baidu.com'
for star in json_array:
name = star['name'] #总共109个人
link = star['link']
#!!!请在以下完成对每个选手图片的爬取,将所有图片url存储在一个列表pic_urls中!!!
response = requests.get(link,headers=headers)
soup = BeautifulSoup(response.text,'lxml')
pic_list_url=soup.select('.summary-pic a')[0].get('href')
pic_list_url=public_url+pic_list_url
#向选手图片列表页面发送http get请求
pic_list_response=requests.get(pic_list_url,headers=headers)
bs=BeautifulSoup(pic_list_response.text,'lxml')
pic_list_html=bs.select('.pic-list img')
pic_urls=[]
for pic_html in pic_list_html:
pic_url=pic_html.get('src')
if 'resize'in pic_url:
pic_url=pic_url.split('?')[0] #确保下载的是原图。
pic_urls.append(pic_url)
# # #!!!根据图片链接列表pic_urls, 下载所有图片,保存在以name命名的文件夹中!!!
down_pic(name,pic_urls)
三、数据可视化
此处精妙之处在于如何导入数据到图表容器里面
import matplotlib.pyplot as plt
import numpy as np
import json
import matplotlib.font_manager as font_manager
import os
#显示matplotlib生成的图形
%matplotlib inline
with open('data/data31557/20200422.json', 'r', encoding='UTF-8') as file:
json_array = json.loads(file.read())
#绘制小姐姐区域分布柱状图,x轴为地区,y轴为该区域的小姐姐数量
zones = []
for star in json_array:
zone = star['zone']
zones.append(zone)
print(len(zones))
print('zones: ',zones)
zone_list = []
count_list = []
for zone in zones:
if zone not in zone_list:
count = zones.count(zone)
zone_list.append(zone)
count_list.append(count)
print('zone_list: ',zone_list)
print('count_list: ',count_list)
if not os.path.exists('/home/aistudio/work/result'):
os.makedirs('/home/aistudio/work/result')
# 设置显示中文
plt.rcParams['font.sans-serif'] = ['SimHei'] # 指定默认字体
plt.figure(figsize=(20,15))
plt.bar(range(len(count_list)), count_list,color='r',tick_label=zone_list,facecolor='#9999ff',edgecolor='white')
#这里很关键,tick_label就是X轴数据,count_list是y轴数据
# 这里是调节横坐标的倾斜度,rotation是度数,以及设置刻度字体大小
plt.xticks(rotation=45,fontsize=20)
plt.yticks(fontsize=20)
plt.legend()
plt.title('''《青春有你2》参赛选手''',fontsize = 24)
plt.savefig('/home/aistudio/work/result/bar_result.jpg')
plt.show()
这里很关键,tick_label就是X轴数据,count_list是y轴数据
四、图像样本生成自制数据集标签文本,以及finetune
此处精妙之处是与Day01相呼应,唯一难搞的是喂入模型的数据路径不能错
import os
filepath1='dataset/file'
filepath2='dataset/test'
result=[] #存储图片路径
#路径还是需要调整一下 file里面的文件需要取出
def findfile(filepath):
for root,dirs,files in os.walk(filepath):
for file in files:
result.append(os.path.join(root,file))
return result
def createRecord(result):
record=[]
for s in result:
#s是子路径
name=s.split('/')[2]
s=s.replace('dataset/','')
if '虞书欣' in name:
record.append(s+' '+'0')
elif '许佳琪'in name:
record.append(s+' '+'1')
elif '赵小棠' in name:
record.append(s+' '+'2')
elif '安崎' in name:
record.append(s+' '+'3')
elif '王承渲'in name:
record.append(s+' '+'4')
return (record)
def writefile(result,filepath):
for r in result:
with open(filepath,'a',encoding='utf-8') as f:
f.write(r+'\r\n')
result=findfile(filepath1)
record=createRecord(result)
writefile(record,'dataset/train_list.txt')
返回做IO数目最多的50条语句以及它们的执行计划。
select top 50
(total_logical_reads/execution_count) as avg_logical_reads,
(total_logical_writes/execution_count) as avg_logical_writes,
(tot
The CUDA 5 Release Candidate is now available at http://developer.nvidia.com/<wbr></wbr>cuda/cuda-pre-production. Now applicable to a broader set of algorithms, CUDA 5 has advanced fe
Essential Studio for WinRT界面控件包含了商业平板应用程序开发中所需的所有控件,如市场上运行速度最快的grid 和chart、地图、RDL报表查看器、丰富的文本查看器及图表等等。同时,该控件还包含了一组独特的库,用于从WinRT应用程序中生成Excel、Word以及PDF格式的文件。此文将对其另外一个强大的控件——网格控件进行专门的测评详述。
网格控件功能
1、
Project Euler是个数学问题求解网站,网站设计的很有意思,有很多problem,在未提交正确答案前不能查看problem的overview,也不能查看关于problem的discussion thread,只能看到现在problem已经被多少人解决了,人数越多往往代表问题越容易。
看看problem 1吧:
Add all the natural num
Adding id and class names to CMenu
We use the id and htmlOptions to accomplish this. Watch.
//in your view
$this->widget('zii.widgets.CMenu', array(
'id'=>'myMenu',
'items'=>$this-&g
Given a collection of integers that might contain duplicates, nums, return all possible subsets.
Note:
Elements in a subset must be in non-descending order.
The solution set must not conta