py笔记
函数的嵌套
def fun1():
def fun2():
def fun3():
print("Hello world!")
return fun3
return fun2
a=fun1()
# a()
fun1()()()
Hello world!
函数的闭包
def fun1(x):
def fun2(y):
print(x+y)
return fun2
fun1(2)(3)
5
装饰器
def addTips(fun):
def wrap(*arg,**kwargs):
print("this is oper before")
result = fun(*arg,**kwargs)
#return result
print("this is oper after")
return result
return wrap
@addTips
def add():
print("进行添加的操作")
add()
this is oper before
进行添加的操作
this is oper after
@addTips
def add1(x,y):
print(x+y)
add1(2,6)
this is oper before
8
this is oper after
@addTips
def add2(x,y):
return x+y
print(add2(2,3))
this is oper before
this is oper after
5
def addTips(i):
def wrap1(fun):
def wrap(*arg,**kwargs):
print("this is oper before")
result = 0
if i > 10:
result = fun(*arg,**kwargs)
else:
print("you not have any auth")
#return result
print("this is oper after")
return result
return wrap
return wrap1
@addTips(11)
def add(x,y):
print(x+y)
add(2,3)
this is oper before
5
this is oper after
迭代器
import itertools #引入迭代工具
x = range(1,5)
#排列
com1 = itertools.combinations(x,3)
for i in com1:
print(i)
(1, 2, 3)
(1, 2, 4)
(1, 3, 4)
(2, 3, 4)
#组合
x = range(1,3)
com2 = itertools.permutations(x,2)
for i in com2:
print(i)
(1, 2)
(2, 1)
#笛卡尔积
x = range(1,3)
y = list("abc")
com3 = itertools.product(x,y)
for i in com3:
print(i)
(1, 'a')
(1, 'b')
(1, 'c')
(2, 'a')
(2, 'b')
(2, 'c')
#串联迭代器
com4 = itertools.chain(com1,com2,com3)
for i in com4:
print(i)
匿名函数
print([i**2 for i in range(8) if i%2==0])
[0, 4, 16, 36]
a = [[1,2,3],[44,55,66],[77,88,99]]
print([j for i in a for j in ib
[1, 2, 3, 44, 55, 66, 77, 88, 99]
lambda 表达式
from functools import reduce
lambda1 = lambda x: x**2
lambda2 = lambda x,y:x+y
lambda3 = lambda x:x%2==0
#python map reduce filter
print(list(map(lambda1,range(8))))
print(reduce(lambda2,range(8)))
print(list(filter(lambda3,range(8))))
[0, 1, 4, 9, 16, 25, 36, 49]
28
[0, 2, 4, 6]
计算5!+4!+3!+2!+1!
#使用map和reduce函数
#创建一个相乘的函数
mult = lambda x,y:x*y
jiecheng = lambda n:reduce(mult,range(1,n+1))
sumResult = lambda x:x+y
print(list(map(jiecheng,range(1,6))))
a = list(map(jiecheng,range(1,6)))
sumResult = lambda x,y :x+y
result = reduce(sumResult,a)
print(result)
[1, 2, 6, 24, 120]
153
crud mysql
#pymysql mysqldb sqlalchemy
import pymysql
# 1.创建数据库连接
conn = pymysql.connect(host="127.0.0.1",user='root',password='root',db='jobdb',charset='utf8')
# 2.创建操作游标
cursor = conn.cursor()
# 3.创建字符编码与自动提交
cursor.execute('set names utf8')
cursor.execute('set autocommit=1')
# 4.编写sql语句与crud
cursor.execute('show tables')
sql = 'select * from jobdata limit 5'
print(sql)
# 5.执行sql获取结果
huangshu= cursor.execute(sql)
print(huangshu)
# 得到结果集的3种方式 fetchone fetchall fetchmany
result = cursor.fetchall()
print(result)
# 5.关闭游标和资源
cursor.close()
conn.close()
select * from jobdata limit 5
5
((1, 'Java', 'JAVA开发', '至真信息招聘', '3k-5k', '广州', '经验应届毕业生', '本科及以上', '全职', 'https://www.lagou.com/jobs/2574027.html'), (2, 'Java', 'JAVA工程师', '车主无忧研发中心招聘', '8k-15k', '广州', '经验1-3年', '本科及以上', '全职', 'https://www.lagou.com/jobs/2738861.html'), (3, 'Java', 'JAVA开发工程师', '卡宝宝技术部招聘', '7k-14k', '广州', '经验1-3年', '本科及以上', '全职', 'https://www.lagou.com/jobs/2983700.html'), (4, 'Java', 'JAVA开发工程师', '奥威亚开发部招聘', '10k-15k', '广州', '经验1-3年', '本科及以上', '全职', 'https://www.lagou.com/jobs/2979903.html'), (5, 'Java', 'Java', '橙医生技术部招聘', '8k-15k', '广州', '经验1-3年', '大专及以上', '全职', 'https://www.lagou.com/jobs/1049383.html'))
Time
import time
#时间戳
print(time.time())
#当前时间,打印出时间原组
print(time.localtime())
# 2017-12-21 15:12:34 %I 12时进制
print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime()))
print(time.strptime('18-08-05 15:12:34','%y-%m-%d %H:%M:%S'))
1535507407.372698
time.struct_time(tm_year=2018, tm_mon=8, tm_mday=29, tm_hour=9, tm_min=50, tm_sec=7, tm_wday=2, tm_yday=241, tm_isdst=0)
2018-08-29 09:50:07
time.struct_time(tm_year=2018, tm_mon=8, tm_mday=5, tm_hour=15, tm_min=12, tm_sec=34, tm_wday=6, tm_yday=217, tm_isdst=-1)
numpy ndarray
import numpy as np
# array zero empty arange linspace random
a = np.array([[2,3,4],[3,4,6]],dtype=np.float)
print(a)
[[ 2. 3. 4.]
[ 3. 4. 6.]]
b = np.zeros([2,3],dtype=np.int)
print(b)
c = np.ones([2,3])
print(c)
d = np.empty((2,2))
print(d)
[[0 0 0]
[0 0 0]]
[[ 1. 1. 1.]
[ 1. 1. 1.]]
[[ 1.27422810e-311 2.33419537e-312]
[ 8.48798317e-313 8.70018275e-313]]
print(np.arange(1,10,2))
[1 3 5 7 9]
#等差数列
e = np.linspace(1,10,7,endpoint=False)
print(e)
[ 1. 2.28571429 3.57142857 4.85714286 6.14285714 7.42857143
8.71428571]
#等比数列
f = np.logspace(1,8,10,dtype=np.int)
print(f)
[ 10 59 359 2154 12915 77426 464158
2782559 16681005 100000000]
# random
g = np.random.random((2,3,4))
print(g)
[[[ 0.25702055 0.98376051 0.05585929 0.6229259 ]
[ 0.43491759 0.82742601 0.71256847 0.85558227]
[ 0.52091075 0.40166471 0.6514136 0.07676163]]
[[ 0.35497533 0.24796663 0.299326 0.88137736]
[ 0.69668304 0.25432368 0.05165381 0.04155755]
[ 0.36588213 0.73055171 0.98575158 0.35841199]]]
print(np.random.randint(1,10,5))
print(g.imag)
[4 2 5 7 9]
[[[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]]
[[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]]]
astype数据类型转化 shape 数据的大小
a = np.array([2900,3,4])
b = a.astype(np.float)
print(b)
[ 2900. 3. 4.]
d = np.random.random((2,3,5))
print(d,d.shape)
[[[ 0.43498115 0.10203122 0.49607491 0.89564589 0.45151256]
[ 0.09605607 0.55805833 0.62551507 0.02656035 0.92505993]
[ 0.06856241 0.74115976 0.70165392 0.38507679 0.97801944]]
[[ 0.02604068 0.61297118 0.21805418 0.89961424 0.90764166]
[ 0.5773425 0.31270289 0.40884587 0.12665826 0.21801203]
[ 0.27060303 0.91790138 0.32900214 0.42332602 0.71642873]]] (2, 3, 5)
e = d.reshape((3,10))
print(e,e.shape)
[[ 0.43498115 0.10203122 0.49607491 0.89564589 0.45151256 0.09605607
0.55805833 0.62551507 0.02656035 0.92505993]
[ 0.06856241 0.74115976 0.70165392 0.38507679 0.97801944 0.02604068
0.61297118 0.21805418 0.89961424 0.90764166]
[ 0.5773425 0.31270289 0.40884587 0.12665826 0.21801203 0.27060303
0.91790138 0.32900214 0.42332602 0.71642873]] (3, 10)
数组之间的运算
#数组和列表之间的运算是不同的,数组之间的运算是对应位置之间的运算
a = np.array([[1,2,3],[9,6,3]])
b = np.array([[4,5,6],[8,5,2]])
print(a+b)
print(a-b)
print(a*b)
print(a/b)
print(b.shape)
[[ 5 7 9]
[17 11 5]]
[[-3 -3 -3]
[ 1 1 1]]
[[ 4 10 18]
[72 30 6]]
[[ 0.25 0.4 0.5 ]
[ 1.125 1.2 1.5 ]]
(2, 3)
数组作为矩阵
d = b.reshape((3,2))
#点乘等操作
c = a.dot(d)
print(c)
[[31 27]
[87 99]]
切片与索引
import numpy as np
a = np.random.random((2,3,4))
print(a)
print(a[0][1][1])
print(a[0,:,1:3])
print(a[1,:])
#切片的获得是一个视图,在上面操作就相当于在原数组上进行操作
[[[ 0.28211282 0.48742378 0.26441143 0.22300644]
[ 0.33328369 0.46119033 0.33145709 0.97908699]
[ 0.78985878 0.0059506 0.78379064 0.77909717]]
[[ 0.62321309 0.41451579 0.99233426 0.80932283]
[ 0.08101114 0.25087679 0.61401797 0.25568332]
[ 0.90907157 0.91881851 0.83182079 0.00798843]]]
0.461190326442
[[ 0.48742378 0.26441143]
[ 0.46119033 0.33145709]
[ 0.0059506 0.78379064]]
[[ 0.62321309 0.41451579 0.99233426 0.80932283]
[ 0.08101114 0.25087679 0.61401797 0.25568332]
[ 0.90907157 0.91881851 0.83182079 0.00798843]]
#布尔索引
a = np.random.random((4,4,2))
print(a)
c = a > 0.5
print(c)
d = a[c]
print(d)
[[[ 4.44650177e-01 4.84549948e-01]
[ 1.63097853e-01 6.95758696e-01]
[ 7.74627929e-01 4.02019958e-01]
[ 3.92688349e-01 8.47190342e-02]]
[[ 6.50061373e-01 8.67824711e-01]
[ 5.22127742e-01 4.45195482e-01]
[ 9.07419468e-01 3.82062991e-01]
[ 9.62987734e-01 4.04880803e-01]]
[[ 4.70068137e-01 8.64142833e-01]
[ 4.25149078e-02 3.89552038e-01]
[ 1.61686526e-01 9.76093351e-01]
[ 1.27423596e-01 2.76400473e-01]]
[[ 7.27683444e-01 5.72721041e-01]
[ 2.27198847e-04 6.36574348e-01]
[ 7.75000651e-01 7.31438620e-02]
[ 1.93906550e-01 6.48449010e-01]]]
[[[False False]
[False True]
[ True False]
[False False]]
[[ True True]
[ True False]
[ True False]
[ True False]]
[[False True]
[False False]
[False True]
[False False]]
[[ True True]
[False True]
[ True False]
[False True]]]
[ 0.6957587 0.77462793 0.65006137 0.86782471 0.52212774 0.90741947
0.96298773 0.86414283 0.97609335 0.72768344 0.57272104 0.63657435
0.77500065 0.64844901]
#花式索引
a = np.arange(32).reshape(8,-1)
print(a)
print(a[1:3])
#非连续的行
print(a[[0,3,5]])
#非连续的行与列,这种取出的是0,0 3,3 5,2三个元素
print(a[[0,3,5],[0,3,2]])
#使用索引器取出元素,这种取出的是0,0 0,2, 0,3 3,0 3,2 3,3 ......
print(a[np.ix_([0,3,5],[0,2,3])])
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]
[12 13 14 15]
[16 17 18 19]
[20 21 22 23]
[24 25 26 27]
[28 29 30 31]]
[[ 4 5 6 7]
[ 8 9 10 11]]
[[ 0 1 2 3]
[12 13 14 15]
[20 21 22 23]]
[ 0 15 22]
[[ 0 2 3]
[12 14 15]
[20 22 23]]
#转置
a = np.arange(24).reshape((2,3,4))
print(a)
b = a.T
print(b,b.shape)
[[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
[[12 13 14 15]
[16 17 18 19]
[20 21 22 23]]]
[[[ 0 12]
[ 4 16]
[ 8 20]]
[[ 1 13]
[ 5 17]
[ 9 21]]
[[ 2 14]
[ 6 18]
[10 22]]
[[ 3 15]
[ 7 19]
[11 23]]] (4, 3, 2)
np func
#函数
arr = np.array([-2,2,4,-3,100])
a = np.fabs(arr)
print(np.fabs(arr))#绝对值
print(np.sqrt(a))
print(np.square(arr))
print(np.sign(arr))
print(np.modf(arr))
print(np.nan)
print(np.inf)
[ 2. 2. 4. 3. 100.]
[ 1.41421356 1.41421356 2. 1.73205081 10. ]
[ 4 4 16 9 10000]
[-1 1 1 -1 1]
(array([-0., 0., 0., -0., 0.]), array([ -2., 2., 4., -3., 100.]))
nan
inf
#二元函数 dot mod(数组相同) greater (比较运算) logical_and()逻辑与或等
print(np.power(arr,2))
a = np.random.randint(2,100,(3,4,5))
print(a)
print(a.min(),a.max(),a.mean(),a.std(),a.sum())
#对一行与一列的聚合
print(a.max(axis=0))#0列的方向
print(a.max(axis=1))#1行的方向
[ 4 4 16 9 10000]
[[[61 50 33 85 52]
[88 27 33 49 87]
[61 19 96 35 47]
[45 93 60 73 14]]
[[45 61 93 63 60]
[ 5 97 87 82 8]
[82 94 15 71 54]
[15 34 71 33 23]]
[[90 96 10 55 69]
[71 46 48 80 41]
[25 93 85 34 25]
[91 34 85 98 82]]]
5 98 57.65 27.5201895585 3459
# 三元函数 np.where x if condition else y
a = np.array([[3,5],[2,8]])
b = np.array([[1,6],[4,3]])
print(a)
print(b)
c = a > b
print(a[c])
print(np.where(c,a,b))
[[3 5]
[2 8]]
[[1 6]
[4 3]]
[3 8]
[[3 6]
[4 8]]
Pandas
#Series
import numpy as np
import pandas as pd
# Series DataFrame
#将np中的数组转化为Series,就是一维数组上加上了索引,同时这个索引也可以自定义
arr = np.array([1,3,6,np.nan,8,9])
s1 = pd.Series(arr)
print(s1)
s1.index = [u'中国','vf','vfd','cds','cc']
0 1.0
1 3.0
2 6.0
3 NaN
4 8.0
5 9.0
dtype: float64
# 用列表创建Series
a = [5,6,9,8,7]
s2 = pd.Series(a)
print(s2)
# 用字典的方式创建索引
dict1 = {'语文':88,'数学':96}
s3 = pd.Series(dict1)
print(s3)
0 5
1 6
2 9
3 8
4 7
dtype: int64
数学 96
语文 88
dtype: int64
# 通过索引和下标创建Series对象
#index index 放到Series中可以多加,同时Series之间的相加会根据索引进行自动对齐
print(s1[3:])
print(s2[2:5])
3 NaN
4 8.0
5 9.0
dtype: float64
2 9
3 8
4 7
dtype: int64
DataFrame
#DataFrame 创建
import pandas as pd
import numpy as np
# 通过二维数组和字典进行创建,加入了行和列的索引,由Series组成的字典
arr = [['dd',22],['as',12],['cds',88]]
df = pd.DataFrame(arr)
print(df)
print(df.columns)
#通过字典方式创建,字典中的values必须是一维数组的形式,必须是同样大小的一维数组,也可以是单个数字,单个数字表示所有相同
dict1 = {'语文':[88,90,16],'数学':[90,86,36],'英语':[85,26,99],'政治':88}
df1 = pd.DataFrame(dict1)
print(df1)
0 1
0 dd 22
1 as 12
2 cds 88
RangeIndex(start=0, stop=2, step=1)
政治 数学 英语 语文
0 88 90 85 88
1 88 86 26 90
2 88 36 99 16
#对DataFrame的操作是先取列再取行与ndarray不同
#print(df1['0'])取行是错的
print(df1['语文'])
print(df1['语文'][0])
#当然取行也行,但是需要使用ix属性
print(df1.ix[0])#注意ix后不加括号
print(df1.ix[0]['语文'])
0 88
1 90
2 16
Name: 语文, dtype: int64
88
政治 88
数学 90
英语 85
语文 88
Name: 0, dtype: int64
88
# 切片操作,同时他可以取到边值
print(df1.ix[1:2,1])
df1['语文'][0] = np.nan
print(df1)#数据类型也改变
df1.ix[0]['语文'] = np.nan#无法改变
print(df1)
1 86
2 36
Name: 数学, dtype: int64
政治 数学 英语 语文
0 88 90 85 88
1 88 86 26 90
2 88 36 99 16
政治 数学 英语 语文
0 88 90 85 88
1 88 86 26 90
2 88 36 99 16
Pandas 常见统计方法
#pd.read_csv pd.read_json pd.isnull pd.notnull
#pd.dropna() fillna() pd.describe()所有的数据描述unique values_count .isin([11])
#多层索引,可以给索引设置二维数组
# 取值的新方法
print(df1['profile'][15])
print(df1.ix[15]['profile'])
#iloc对下标值的操作
#loc对索引值的操作
import matplotlib.pyplot as plt
import numpy as np
# 生成数据
dataOut = np.arange(24).reshape((4,6))
# 保存数据
np.savetxt('data.txt',dataOut,fmt='%.2e')
# 读取数据
data = np.loadtxt('data.txt')
print(data)
y = np.random.randint(1,11,5)
print(y)
x = np.arange(len(y))
print(x)
plt.plot(x,y,color='r')
plt.bar(x,y,color='g')
#plt.pie(y)
plt.show()
[[ 0. 1. 2. 3. 4. 5.]
[ 6. 7. 8. 9. 10. 11.]
[ 12. 13. 14. 15. 16. 17.]
[ 18. 19. 20. 21. 22. 23.]]
[ 1 10 9 6 7]
[0 1 2 3 4]
matplotlib draw figure
#中文显示问题解决
plt.rcParams['font.sans-serif']='SimHei'
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
mpl.rcParams['font.family'] = 'sans-serif'
mpl.rcParams['font.sans-serif'] = ['SimHei']
mpl.rcParams['axes.unicode_minus'] = False
x = np.arange(-5,5)
y = np.sin(x)
print(x,y)
#颜色 线条 标记
plt.plot(x,y,color='r',linestyle='--',marker='o')
plt.show()
[-5 -4 -3 -2 -1 0 1 2 3 4] [ 0.95892427 0.7568025 -0.14112001 -0.90929743 -0.84147098 0.
0.84147098 0.90929743 0.14112001 -0.7568025 ]
[图片上传失败...(image-637407-1537096026060)]
python 中的多线程
# 线程
import time
import threading
def music(name,loop):
for i in range(loop):
print('在听音乐:%s 时间 %s'%(name,time.ctime()))
time.sleep(1)
def movie(name,loop):
for i in range(loop):
print('在看电影:%s 时间 %s'%(name,time.ctime()))
time.sleep(1)
# 1.创建线程
t1 = threading.Thread(target=music,args=('模特',3))
t2 = threading.Thread(target=movie,args=('速度',5))
# 线程守护,设置线程守护需要在线程启动之前
t1.setDaemon(True)
t2.setDaemon(True)
t1.start()
t2.start()
# t1.join()
# t2.join()
# 主线程优先级高
# print('任务结束了 %s'%(time.ctime()))
print('任务结束了 %s'%(time.ctime()))
在听音乐:模特 时间 Thu Aug 30 16:25:07 2018
任务结束了 Thu Aug 30 16:25:07 2018
在看电影:速度 时间 Thu Aug 30 16:25:07 2018
在听音乐:模特 时间 Thu Aug 30 16:25:08 2018
在看电影:速度 时间 Thu Aug 30 16:25:08 2018
在听音乐:模特 时间 Thu Aug 30 16:25:09 2018
在看电影:速度 时间 Thu Aug 30 16:25:09 2018
在看电影:速度 时间 Thu Aug 30 16:25:10 2018
在看电影:速度 时间 Thu Aug 30 16:25:11 2018
锁
balance = 0
lock = threading.Lock()
def change(n):
global balance
balance += n
balance -= n
def run_thread(n):
for i in range(1000000):
lock.acquire()
try:
change(n)
finally:
lock.release()
#线程
t1 = threading.Thread(target=run_thread,args=(4,))
t2 = threading.Thread(target=run_thread,args=(5,))
t1.start()
t2.start()
t1.join()
t2.join()
print(balance)
0
多进程
import time
import multprocessing
#单进程
def work_1(filename,n):
print('work_1 start')
for i in range(n):
with open(filename,'a') as f_obj:
f_obj.write('www.ai111.com.cn \n')
time.sleep(1)
print('work_1 end')
def work_2(filename,n):
print('work_2 start')
for i in range(n):
with open(filename,'a') as f_obj:
f_obj.write('www.ai111.com.cn \n')
time.sleep(1)
print('work_2 end')
work_1('zhang.txt',3)
work_2('xiao.txt',4)
``
```python
# 同步执行 异步执行
p1 = multprocessing.Process(target=work_1,args=('zhang.txt',3))
p2 = multprocessing.Process(target=work_1,args=('zhang.txt',4))
p1.start()
p2.start()
#异步
lock = multiprocessing.Lock()
lock.acquire()
lock.release()
### 进程池
import os
import multiprocessing
import time
def work(n):
print('run work(%s) start,word id :%s'%(n,os.getpid))
time.sleep(2)
print('run work(%s) end,word id :%s'%(n,os.getpid))
print('父进程 id:%s'%os.getpid())
#创建一个进程池,包含两个进程
p = multiprocessing.Pool(2)
#提交5个任务去进程池
for i in range(5):
#同步提交
p.apply_async(work,args=(i,))
#异步提交
p.apply(work,args=(i,))
p.close()
p.join()
进程通信
# 通过队列和管道 Queue and Pipe
import time
import multiprocessingcessing
def put(q):
for value in ['a','b','c']:
print("发送 %s 到 queue"%value)
q.put(value)
time.sleep(1)
def get(q):
while True:
value = q.get(True)
print('从queue中取到数据:%s'%value)
q = multiprocessing.Queue()
pWrite = multiprocessing.Process(target=put,args=(q,))
pReader = multiprocessing.Process(target=get,args=(q,))
pWrite.start()
pReader.start()
# 通过队列和管道 Queue and Pipe
import time
import multiprocessingcessing
#管道的方式
def put(q):
for value in ['a','b','c']:
print("发送 %s 到 queue"%value)
#q.put(value)
p[1].send(value)
time.sleep(1)
def get(q):
while True:
#value = q.get(True)
value = p[0].recv()
print('从queue中取到数据:%s'%value)
# true全双工 false半双工
p = multiprocessing.Pipe(duplex=False)
pWrite = multiprocessing.Process(target=put,args=(q,))
pReader = multiprocessing.Process(target=get,args=(q,))
pWrite.start()
pReader.start()
线程间通信
import time
import threading
import queue
q = queue.Queue(maxsize=10)
def producer(name):
count=1
while True: