python numpy pandas

py笔记

函数的嵌套

def fun1():
    def fun2():
        def fun3():
            print("Hello world!")
        return fun3
    return fun2
a=fun1()
# a()
fun1()()()

Hello world!

函数的闭包

def fun1(x):
    def fun2(y):
        print(x+y)
    return fun2
fun1(2)(3)
5

装饰器

def addTips(fun):
    def wrap(*arg,**kwargs):
        print("this is oper before")
        result = fun(*arg,**kwargs)
        #return result
        print("this is oper after")
        return result
    return wrap
@addTips
def add():
    print("进行添加的操作")
    
add()
this is oper before
进行添加的操作
this is oper after
@addTips
def add1(x,y):
    print(x+y)
add1(2,6)
this is oper before
8
this is oper after
@addTips
def add2(x,y):
    return x+y

print(add2(2,3))
this is oper before
this is oper after
5
def addTips(i):
    def wrap1(fun):
        def wrap(*arg,**kwargs):
            print("this is oper before")
            result = 0
            if i > 10:
                result = fun(*arg,**kwargs)
            else:
                print("you not have any auth")
            #return result
            print("this is oper after")
            return result
        return wrap
    return wrap1

@addTips(11)
def add(x,y):
    print(x+y)
    
add(2,3)
this is oper before
5
this is oper after

迭代器

import itertools #引入迭代工具
x = range(1,5)
#排列
com1 = itertools.combinations(x,3)
for i in com1:
    print(i)
(1, 2, 3)
(1, 2, 4)
(1, 3, 4)
(2, 3, 4)
#组合
x = range(1,3)
com2 = itertools.permutations(x,2)
for i in com2:
    print(i)
(1, 2)
(2, 1)
#笛卡尔积
x = range(1,3)
y = list("abc")
com3 = itertools.product(x,y)
for i in com3:
    print(i)
(1, 'a')
(1, 'b')
(1, 'c')
(2, 'a')
(2, 'b')
(2, 'c')
#串联迭代器
com4 = itertools.chain(com1,com2,com3)
for i in com4:
    print(i)

匿名函数

print([i**2 for i in range(8) if i%2==0])
[0, 4, 16, 36]
a = [[1,2,3],[44,55,66],[77,88,99]]
print([j for i in a for j in ib
[1, 2, 3, 44, 55, 66, 77, 88, 99]

lambda 表达式

from  functools  import reduce 
lambda1 = lambda x: x**2
lambda2 = lambda x,y:x+y
lambda3 = lambda x:x%2==0
#python map reduce filter
print(list(map(lambda1,range(8))))
print(reduce(lambda2,range(8)))
print(list(filter(lambda3,range(8))))
[0, 1, 4, 9, 16, 25, 36, 49]
28
[0, 2, 4, 6]

计算5!+4!+3!+2!+1!

#使用map和reduce函数
#创建一个相乘的函数
mult = lambda x,y:x*y
jiecheng = lambda n:reduce(mult,range(1,n+1))
sumResult = lambda x:x+y
print(list(map(jiecheng,range(1,6))))
a = list(map(jiecheng,range(1,6)))
sumResult = lambda x,y :x+y
result = reduce(sumResult,a)
print(result)

[1, 2, 6, 24, 120]
153

crud mysql

#pymysql   mysqldb  sqlalchemy
import pymysql
# 1.创建数据库连接
conn = pymysql.connect(host="127.0.0.1",user='root',password='root',db='jobdb',charset='utf8')
# 2.创建操作游标
cursor = conn.cursor()
# 3.创建字符编码与自动提交
cursor.execute('set names utf8')
cursor.execute('set autocommit=1')
# 4.编写sql语句与crud
cursor.execute('show tables')
sql = 'select * from jobdata limit 5'
print(sql)
# 5.执行sql获取结果
huangshu= cursor.execute(sql)
print(huangshu)
# 得到结果集的3种方式  fetchone fetchall fetchmany
result = cursor.fetchall()
print(result)
# 5.关闭游标和资源
cursor.close()
conn.close()
select * from jobdata limit 5
5
((1, 'Java', 'JAVA开发', '至真信息招聘', '3k-5k', '广州', '经验应届毕业生', '本科及以上', '全职', 'https://www.lagou.com/jobs/2574027.html'), (2, 'Java', 'JAVA工程师', '车主无忧研发中心招聘', '8k-15k', '广州', '经验1-3年', '本科及以上', '全职', 'https://www.lagou.com/jobs/2738861.html'), (3, 'Java', 'JAVA开发工程师', '卡宝宝技术部招聘', '7k-14k', '广州', '经验1-3年', '本科及以上', '全职', 'https://www.lagou.com/jobs/2983700.html'), (4, 'Java', 'JAVA开发工程师', '奥威亚开发部招聘', '10k-15k', '广州', '经验1-3年', '本科及以上', '全职', 'https://www.lagou.com/jobs/2979903.html'), (5, 'Java', 'Java', '橙医生技术部招聘', '8k-15k', '广州', '经验1-3年', '大专及以上', '全职', 'https://www.lagou.com/jobs/1049383.html'))

Time

import time
#时间戳
print(time.time())
#当前时间,打印出时间原组
print(time.localtime())
# 2017-12-21 15:12:34  %I 12时进制
print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime()))
print(time.strptime('18-08-05 15:12:34','%y-%m-%d %H:%M:%S'))
1535507407.372698
time.struct_time(tm_year=2018, tm_mon=8, tm_mday=29, tm_hour=9, tm_min=50, tm_sec=7, tm_wday=2, tm_yday=241, tm_isdst=0)
2018-08-29 09:50:07
time.struct_time(tm_year=2018, tm_mon=8, tm_mday=5, tm_hour=15, tm_min=12, tm_sec=34, tm_wday=6, tm_yday=217, tm_isdst=-1)

numpy ndarray

import numpy as np
# array zero  empty arange linspace  random
a = np.array([[2,3,4],[3,4,6]],dtype=np.float)
print(a)
[[ 2.  3.  4.]
 [ 3.  4.  6.]]
b = np.zeros([2,3],dtype=np.int)
print(b)
c = np.ones([2,3])
print(c)
d = np.empty((2,2))
print(d)
[[0 0 0]
 [0 0 0]]
[[ 1.  1.  1.]
 [ 1.  1.  1.]]
[[  1.27422810e-311   2.33419537e-312]
 [  8.48798317e-313   8.70018275e-313]]
print(np.arange(1,10,2))
[1 3 5 7 9]
#等差数列
e = np.linspace(1,10,7,endpoint=False)
print(e)
[ 1.          2.28571429  3.57142857  4.85714286  6.14285714  7.42857143
  8.71428571]
#等比数列
f = np.logspace(1,8,10,dtype=np.int)
print(f)
[       10        59       359      2154     12915     77426    464158
   2782559  16681005 100000000]
# random
g = np.random.random((2,3,4))
print(g)
[[[ 0.25702055  0.98376051  0.05585929  0.6229259 ]
  [ 0.43491759  0.82742601  0.71256847  0.85558227]
  [ 0.52091075  0.40166471  0.6514136   0.07676163]]

 [[ 0.35497533  0.24796663  0.299326    0.88137736]
  [ 0.69668304  0.25432368  0.05165381  0.04155755]
  [ 0.36588213  0.73055171  0.98575158  0.35841199]]]
print(np.random.randint(1,10,5))
print(g.imag)
[4 2 5 7 9]
[[[ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]]

 [[ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]]]

astype数据类型转化 shape 数据的大小

a = np.array([2900,3,4])
b = a.astype(np.float)
print(b)
[ 2900.     3.     4.]
d = np.random.random((2,3,5))
print(d,d.shape)
[[[ 0.43498115  0.10203122  0.49607491  0.89564589  0.45151256]
  [ 0.09605607  0.55805833  0.62551507  0.02656035  0.92505993]
  [ 0.06856241  0.74115976  0.70165392  0.38507679  0.97801944]]

 [[ 0.02604068  0.61297118  0.21805418  0.89961424  0.90764166]
  [ 0.5773425   0.31270289  0.40884587  0.12665826  0.21801203]
  [ 0.27060303  0.91790138  0.32900214  0.42332602  0.71642873]]] (2, 3, 5)
e = d.reshape((3,10))
print(e,e.shape)
[[ 0.43498115  0.10203122  0.49607491  0.89564589  0.45151256  0.09605607
   0.55805833  0.62551507  0.02656035  0.92505993]
 [ 0.06856241  0.74115976  0.70165392  0.38507679  0.97801944  0.02604068
   0.61297118  0.21805418  0.89961424  0.90764166]
 [ 0.5773425   0.31270289  0.40884587  0.12665826  0.21801203  0.27060303
   0.91790138  0.32900214  0.42332602  0.71642873]] (3, 10)

数组之间的运算

#数组和列表之间的运算是不同的,数组之间的运算是对应位置之间的运算
a = np.array([[1,2,3],[9,6,3]])
b = np.array([[4,5,6],[8,5,2]])
print(a+b)
print(a-b)
print(a*b)
print(a/b)
print(b.shape)
[[ 5  7  9]
 [17 11  5]]
[[-3 -3 -3]
 [ 1  1  1]]
[[ 4 10 18]
 [72 30  6]]
[[ 0.25   0.4    0.5  ]
 [ 1.125  1.2    1.5  ]]
(2, 3)

数组作为矩阵

d = b.reshape((3,2))
#点乘等操作
c = a.dot(d)
print(c)
[[31 27]
 [87 99]]

切片与索引

import numpy as np
a = np.random.random((2,3,4))
print(a)
print(a[0][1][1])
print(a[0,:,1:3])
print(a[1,:])
#切片的获得是一个视图,在上面操作就相当于在原数组上进行操作
[[[ 0.28211282  0.48742378  0.26441143  0.22300644]
  [ 0.33328369  0.46119033  0.33145709  0.97908699]
  [ 0.78985878  0.0059506   0.78379064  0.77909717]]

 [[ 0.62321309  0.41451579  0.99233426  0.80932283]
  [ 0.08101114  0.25087679  0.61401797  0.25568332]
  [ 0.90907157  0.91881851  0.83182079  0.00798843]]]
0.461190326442
[[ 0.48742378  0.26441143]
 [ 0.46119033  0.33145709]
 [ 0.0059506   0.78379064]]
[[ 0.62321309  0.41451579  0.99233426  0.80932283]
 [ 0.08101114  0.25087679  0.61401797  0.25568332]
 [ 0.90907157  0.91881851  0.83182079  0.00798843]]
#布尔索引
a = np.random.random((4,4,2))
print(a)
c = a > 0.5
print(c)
d = a[c]
print(d)
[[[  4.44650177e-01   4.84549948e-01]
  [  1.63097853e-01   6.95758696e-01]
  [  7.74627929e-01   4.02019958e-01]
  [  3.92688349e-01   8.47190342e-02]]

 [[  6.50061373e-01   8.67824711e-01]
  [  5.22127742e-01   4.45195482e-01]
  [  9.07419468e-01   3.82062991e-01]
  [  9.62987734e-01   4.04880803e-01]]

 [[  4.70068137e-01   8.64142833e-01]
  [  4.25149078e-02   3.89552038e-01]
  [  1.61686526e-01   9.76093351e-01]
  [  1.27423596e-01   2.76400473e-01]]

 [[  7.27683444e-01   5.72721041e-01]
  [  2.27198847e-04   6.36574348e-01]
  [  7.75000651e-01   7.31438620e-02]
  [  1.93906550e-01   6.48449010e-01]]]
[[[False False]
  [False  True]
  [ True False]
  [False False]]

 [[ True  True]
  [ True False]
  [ True False]
  [ True False]]

 [[False  True]
  [False False]
  [False  True]
  [False False]]

 [[ True  True]
  [False  True]
  [ True False]
  [False  True]]]
[ 0.6957587   0.77462793  0.65006137  0.86782471  0.52212774  0.90741947
  0.96298773  0.86414283  0.97609335  0.72768344  0.57272104  0.63657435
  0.77500065  0.64844901]
#花式索引
a = np.arange(32).reshape(8,-1)
print(a)
print(a[1:3])
#非连续的行
print(a[[0,3,5]])
#非连续的行与列,这种取出的是0,0  3,3  5,2三个元素
print(a[[0,3,5],[0,3,2]])
#使用索引器取出元素,这种取出的是0,0 0,2, 0,3 3,0 3,2  3,3  ......
print(a[np.ix_([0,3,5],[0,2,3])])
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]
 [24 25 26 27]
 [28 29 30 31]]
[[ 4  5  6  7]
 [ 8  9 10 11]]
[[ 0  1  2  3]
 [12 13 14 15]
 [20 21 22 23]]
[ 0 15 22]
[[ 0  2  3]
 [12 14 15]
 [20 22 23]]
#转置
a = np.arange(24).reshape((2,3,4))
print(a)
b = a.T
print(b,b.shape)
[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]]

 [[12 13 14 15]
  [16 17 18 19]
  [20 21 22 23]]]
[[[ 0 12]
  [ 4 16]
  [ 8 20]]

 [[ 1 13]
  [ 5 17]
  [ 9 21]]

 [[ 2 14]
  [ 6 18]
  [10 22]]

 [[ 3 15]
  [ 7 19]
  [11 23]]] (4, 3, 2)

np func

#函数
arr = np.array([-2,2,4,-3,100])
a = np.fabs(arr)
print(np.fabs(arr))#绝对值
print(np.sqrt(a))
print(np.square(arr))
print(np.sign(arr))
print(np.modf(arr))
print(np.nan)
print(np.inf)
[   2.    2.    4.    3.  100.]
[  1.41421356   1.41421356   2.           1.73205081  10.        ]
[    4     4    16     9 10000]
[-1  1  1 -1  1]
(array([-0.,  0.,  0., -0.,  0.]), array([  -2.,    2.,    4.,   -3.,  100.]))
nan
inf
#二元函数  dot  mod(数组相同)  greater (比较运算) logical_and()逻辑与或等
print(np.power(arr,2))
a = np.random.randint(2,100,(3,4,5))
print(a)
print(a.min(),a.max(),a.mean(),a.std(),a.sum())
#对一行与一列的聚合
print(a.max(axis=0))#0列的方向
print(a.max(axis=1))#1行的方向
[    4     4    16     9 10000]
[[[61 50 33 85 52]
  [88 27 33 49 87]
  [61 19 96 35 47]
  [45 93 60 73 14]]

 [[45 61 93 63 60]
  [ 5 97 87 82  8]
  [82 94 15 71 54]
  [15 34 71 33 23]]

 [[90 96 10 55 69]
  [71 46 48 80 41]
  [25 93 85 34 25]
  [91 34 85 98 82]]]
5 98 57.65 27.5201895585 3459
# 三元函数 np.where x if condition else y
a = np.array([[3,5],[2,8]])
b = np.array([[1,6],[4,3]])
print(a)
print(b)
c = a > b
print(a[c])
print(np.where(c,a,b))
[[3 5]
 [2 8]]
[[1 6]
 [4 3]]
[3 8]
[[3 6]
 [4 8]]

Pandas

#Series
import numpy as np
import pandas as pd
# Series DataFrame
#将np中的数组转化为Series,就是一维数组上加上了索引,同时这个索引也可以自定义
arr = np.array([1,3,6,np.nan,8,9])
s1 = pd.Series(arr)
print(s1)
s1.index = [u'中国','vf','vfd','cds','cc']
0    1.0
1    3.0
2    6.0
3    NaN
4    8.0
5    9.0
dtype: float64
# 用列表创建Series
a = [5,6,9,8,7]
s2 = pd.Series(a)
print(s2)
# 用字典的方式创建索引
dict1 = {'语文':88,'数学':96}
s3 = pd.Series(dict1)
print(s3)
0    5
1    6
2    9
3    8
4    7
dtype: int64
数学    96
语文    88
dtype: int64
# 通过索引和下标创建Series对象
#index  index 放到Series中可以多加,同时Series之间的相加会根据索引进行自动对齐
print(s1[3:])
print(s2[2:5])
3    NaN
4    8.0
5    9.0
dtype: float64
2    9
3    8
4    7
dtype: int64

DataFrame

#DataFrame 创建
import pandas as pd
import numpy  as np
# 通过二维数组和字典进行创建,加入了行和列的索引,由Series组成的字典
arr = [['dd',22],['as',12],['cds',88]]
df = pd.DataFrame(arr)
print(df)
print(df.columns)
#通过字典方式创建,字典中的values必须是一维数组的形式,必须是同样大小的一维数组,也可以是单个数字,单个数字表示所有相同
dict1 = {'语文':[88,90,16],'数学':[90,86,36],'英语':[85,26,99],'政治':88}
df1 = pd.DataFrame(dict1)
print(df1)
     0   1
0   dd  22
1   as  12
2  cds  88
RangeIndex(start=0, stop=2, step=1)
   政治  数学  英语  语文
0  88  90  85  88
1  88  86  26  90
2  88  36  99  16
#对DataFrame的操作是先取列再取行与ndarray不同
#print(df1['0'])取行是错的
print(df1['语文'])
print(df1['语文'][0])
#当然取行也行,但是需要使用ix属性
print(df1.ix[0])#注意ix后不加括号
print(df1.ix[0]['语文'])
0    88
1    90
2    16
Name: 语文, dtype: int64
88
政治    88
数学    90
英语    85
语文    88
Name: 0, dtype: int64
88
# 切片操作,同时他可以取到边值
print(df1.ix[1:2,1])
df1['语文'][0] = np.nan
print(df1)#数据类型也改变
df1.ix[0]['语文'] = np.nan#无法改变
print(df1)
1    86
2    36
Name: 数学, dtype: int64
   政治  数学  英语  语文
0  88  90  85  88
1  88  86  26  90
2  88  36  99  16
   政治  数学  英语  语文
0  88  90  85  88
1  88  86  26  90
2  88  36  99  16

Pandas 常见统计方法

#pd.read_csv pd.read_json   pd.isnull pd.notnull
#pd.dropna()  fillna()  pd.describe()所有的数据描述unique   values_count  .isin([11])
#多层索引,可以给索引设置二维数组
# 取值的新方法
print(df1['profile'][15])
print(df1.ix[15]['profile'])
#iloc对下标值的操作
#loc对索引值的操作
import  matplotlib.pyplot as plt
import  numpy   as   np

# 生成数据
dataOut = np.arange(24).reshape((4,6))
# 保存数据
np.savetxt('data.txt',dataOut,fmt='%.2e')
# 读取数据
data = np.loadtxt('data.txt')
print(data)
y = np.random.randint(1,11,5)
print(y)
x = np.arange(len(y))
print(x)
plt.plot(x,y,color='r')
plt.bar(x,y,color='g')
#plt.pie(y)
plt.show()
[[  0.   1.   2.   3.   4.   5.]
 [  6.   7.   8.   9.  10.  11.]
 [ 12.  13.  14.  15.  16.  17.]
 [ 18.  19.  20.  21.  22.  23.]]
[ 1 10  9  6  7]
[0 1 2 3 4]

matplotlib draw figure

#中文显示问题解决
plt.rcParams['font.sans-serif']='SimHei'
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
mpl.rcParams['font.family'] = 'sans-serif'
mpl.rcParams['font.sans-serif'] = ['SimHei']
mpl.rcParams['axes.unicode_minus'] = False

x = np.arange(-5,5)
y = np.sin(x)
print(x,y)
#颜色 线条  标记
plt.plot(x,y,color='r',linestyle='--',marker='o')
plt.show()
[-5 -4 -3 -2 -1  0  1  2  3  4] [ 0.95892427  0.7568025  -0.14112001 -0.90929743 -0.84147098  0.
  0.84147098  0.90929743  0.14112001 -0.7568025 ]

[图片上传失败...(image-637407-1537096026060)]

python 中的多线程

# 线程
import time
import threading
def music(name,loop):
    for i in range(loop):
        print('在听音乐:%s 时间 %s'%(name,time.ctime()))
        time.sleep(1)
def movie(name,loop):
    for i in range(loop):
        print('在看电影:%s 时间 %s'%(name,time.ctime()))
        time.sleep(1)
# 1.创建线程
t1 = threading.Thread(target=music,args=('模特',3))
t2 = threading.Thread(target=movie,args=('速度',5))
# 线程守护,设置线程守护需要在线程启动之前
t1.setDaemon(True)
t2.setDaemon(True)
t1.start()
t2.start()
# t1.join()
# t2.join()
# 主线程优先级高
# print('任务结束了 %s'%(time.ctime()))

print('任务结束了 %s'%(time.ctime()))
在听音乐:模特 时间 Thu Aug 30 16:25:07 2018
任务结束了 Thu Aug 30 16:25:07 2018
在看电影:速度 时间 Thu Aug 30 16:25:07 2018
在听音乐:模特 时间 Thu Aug 30 16:25:08 2018
在看电影:速度 时间 Thu Aug 30 16:25:08 2018
在听音乐:模特 时间 Thu Aug 30 16:25:09 2018
在看电影:速度 时间 Thu Aug 30 16:25:09 2018
在看电影:速度 时间 Thu Aug 30 16:25:10 2018
在看电影:速度 时间 Thu Aug 30 16:25:11 2018

balance = 0
lock = threading.Lock()
def change(n):
    global balance
    balance += n
    balance -= n

def run_thread(n):
    for i in range(1000000):
        lock.acquire()
        try:
            change(n)
        finally:
            lock.release()
        
#线程
t1 = threading.Thread(target=run_thread,args=(4,))
t2 = threading.Thread(target=run_thread,args=(5,))

t1.start()
t2.start()

t1.join()
t2.join()

print(balance)
0

多进程

import time
import multprocessing

#单进程
def work_1(filename,n):
    print('work_1 start')
    for i in range(n):
        with open(filename,'a') as f_obj:
            f_obj.write('www.ai111.com.cn \n')
            time.sleep(1)
    print('work_1 end')

def work_2(filename,n):
    print('work_2 start')
    for i in range(n):
        with open(filename,'a') as f_obj:
            f_obj.write('www.ai111.com.cn \n')
            time.sleep(1)
    print('work_2 end')

work_1('zhang.txt',3)
work_2('xiao.txt',4)
``



```python
# 同步执行  异步执行
p1 = multprocessing.Process(target=work_1,args=('zhang.txt',3))
p2 = multprocessing.Process(target=work_1,args=('zhang.txt',4))
p1.start()
p2.start()
#异步
lock = multiprocessing.Lock()
lock.acquire()
lock.release()
### 进程池
import os
import multiprocessing
import time
def work(n):
    print('run work(%s) start,word id :%s'%(n,os.getpid))
    time.sleep(2)
    print('run work(%s) end,word id :%s'%(n,os.getpid))
print('父进程 id:%s'%os.getpid())
#创建一个进程池,包含两个进程
p = multiprocessing.Pool(2)
#提交5个任务去进程池
for i in range(5):
    #同步提交
    p.apply_async(work,args=(i,))
    #异步提交
    p.apply(work,args=(i,))

    
p.close()
p.join()

进程通信

# 通过队列和管道 Queue and Pipe
import time
import multiprocessingcessing

def put(q):
    for value in ['a','b','c']:
        print("发送 %s 到 queue"%value)
        q.put(value)
        time.sleep(1)
def get(q):
    while True:
        value = q.get(True)
        print('从queue中取到数据:%s'%value)

q = multiprocessing.Queue()

pWrite = multiprocessing.Process(target=put,args=(q,))
pReader = multiprocessing.Process(target=get,args=(q,))

pWrite.start()
pReader.start()


# 通过队列和管道 Queue and Pipe
import time
import multiprocessingcessing
#管道的方式
def put(q):
    for value in ['a','b','c']:
        print("发送 %s 到 queue"%value)
        #q.put(value)
        p[1].send(value)
        time.sleep(1)
def get(q):
    while True:
        #value = q.get(True)
        value = p[0].recv()
        print('从queue中取到数据:%s'%value)
# true全双工  false半双工
p = multiprocessing.Pipe(duplex=False)

pWrite = multiprocessing.Process(target=put,args=(q,))
pReader = multiprocessing.Process(target=get,args=(q,))

pWrite.start()
pReader.start()

线程间通信

import time
import threading
import queue

q = queue.Queue(maxsize=10)
def producer(name):
    count=1
    while True:
        

你可能感兴趣的:(python numpy pandas)