# -*- coding: utf-8 -*- """ Created on Fri Jul 15 00:52:07 2016 @author: Alan Guo """ a = 1 a.bit_length() type(a) a.as_integer_ration() import decimal from decimal import Decimal decimal.getcontext() decimal.getcontext().prec = 4 e = Decimal(1) / Decimal(11) t = 'this is a string object' t.capitalize() t.split() t.find('string') t.replace(' ','|') import re # regular expression series = """ '01/18/2014 13:00:00', 100,'1st' """ dt = re.compile("'[0-9/:\s]+'") result = dt.findall(series) from datetime import datetime pydt = datetime.strptime(result[0].replace("'",""), '%m/%d/%Y %H:%M:%S') pydt datetime.datetime(2014, 1, 18, 13, 0) print pydt # 日期格式转化 date='Wed, 11 Apr 2012 09:37:05 +0800' dd=datetime.strptime(date,'%a, %d %b %Y %H:%M:%S %z') dd.strftime('%Y-%m-%d %H:%M:%S') '''%a Abbreviated weekday name %A Full weekday name %b Abbreviated month name %B Full month name %c Date and time representation appropriate for locale %d Day of month as decimal number (01 - 31) %H Hour in 24-hour format (00 - 23) %I Hour in 12-hour format (01 - 12) %j Day of year as decimal number (001 - 366) %m Month as decimal number (01 - 12) %M Minute as decimal number (00 - 59) %p Current locale's A.M./P.M. indicator for 12-hour clock %S Second as decimal number (00 - 59) %U Week of year as decimal number, with Sunday as first day of week (00 - 51) %w Weekday as decimal number (0 - 6; Sunday is 0) %W Week of year as decimal number, with Monday as first day of week (00 - 51) %x Date representation for current locale %X Time representation for current locale %y Year without century, as decimal number (00 - 99) %Y Year with century, as decimal number %z, %Z Time-zone name or abbreviation; no characters if time zone is unknown %% Percent sign ''' # tuple t = (1, 2.5, 'data') t[2] t.count('data') t.index(1) # 找到元素的位置 # list I = [1, 2.5, 'data'] l = list(t) l.append([4,3]) # append list at the end # [1, 2.5, 'data', [4, 3]] l.extend([1.0, 1.5, 2.0]) # append the elements of list # [..... [4, 3], 1.0, 1.5, 2.0] l.insert(1, 'insert') # insert object before index position # [1, 'insert', 2.5,...] l.remove('data') # remove first occurence of object # [1, 'insert', 2.5, [4, 3]...] p = l.pop(3) # remove and return object at index print l, p # [1, 'insert', 2.5, 1.0...] [4,3] l[2:5] # 3rd to 5th elements for i in range(1, 10): if i % 2 == 0: # % is for modulo print "%d is even" % i elif i % 3 == 0: print "%d is multiple of 3" % i else: print "%d is odd" % i total = 0 while total < 100: total += 1 print (total) m = [i**2 for i in range(5)] m def even(x): return x % 2 == 0 map (even, range(10)) map(lambda x: x ** 2, range(10)) filter(even, range(15)) reduce(lambda x, y: x + y, range(15)) # Dict d = {'Name': 'Angela Merkel', 'Country': 'Germany', 'Profession': 'Chancelor', 'Age': 60 } type(d) d.keys() d.values() d.items() for item in d.iteritems(): print(item) # Sets s = set(['u', 'd', 'ud', 'du', 'd', 'du']) t = set(['d', 'dd', 'uu', 'u']) s.union(t) # all of s and t s.intersection(t) # both in s and t s.difference(t) # in s but not t s.symmetric_difference(t) # in either one but not both from random import randint l = [randint(0, 10) for i in range(1000)] # 1,000 random integers between 0 and 10 len(l) # number of elements in l s = set(l) from copy import deepcopy v = [0.5, 0.75, 1.0, 1.5, 2.0] m = 3 * [deepcopy(v), ] import numpy as np a = np.array([0, 0.5, 1.0, 1.5, 2.0]) type(a) a[:2] # indexing as with list objects in 1 dimension a.sum() a.std() a.cumsum() np.sqrt(a) b = np.array([a, a*2]) b[0] # first row b[0, 2] # third element of first row b.sum() b.sum(axis=0) # column-wise sum b.sum(axis=1) b.tolist() b.flatten() ; b.flat # 用来循环,遍历, iterable b.min(0) # or b.min(1) b.prod() b.cumprod() b.any() ; b.all() b.size # 元素的总数量 c = np.zeros((2, 3, 4), dtype='i',order='C') # also: np.ones() d = np.ones_like(c, dtype='f', order='C') # also: np.zeros_like() x = np.random.standard_normal((5,1000000)) y = 2 * x + 3 # linear equation y = a * x + b C = np.array((x, y), order='C') F = np.array((x, y), order='F') x = 0.0; y = 0.0 # memory cleanup C[:2].round(2) a = np.arange(0, 4) b = np.arange(1, 5) np.add(a, b) np.add(a, b, a) x1 = 3; x2 = 2 x1//x2 # = 1 (floor divide) a = np.arange(0, 12) a.shape = 3,4 a.shape a.reshape(12,) a.shape = 4,-1 a.reshape(3,-1) a.tofile('a.bin') b = np.fromfile("a.bin", dtype = np.int32) np.save("a.npy", a) c = np.load('a.npy') np.savez(" ",a, b, ) # 保存多数组至压缩文件 a = np.arange(0, 12, 0.5).reshape(4, -1) np.savetxt("a.txt",a) # 默认以空格分隔 np.loadtxt('a.txt') np.savetxt('a.txt', a, fmt='%d', delimiter=',') np.loadtxt('a.txt', delimiter=',') #读入的时候也需要指定 a = np.arange(8) b = np.add.accumulate(a) c = a + b f = file('result.npy', 'wb') np.save(f, a) # 按顺序将a, b, c保存进文件对象f np.save(f, b) np.save(f, c) f.close() f = file('result.np', 'rb') np.load(f) # 顺序从文件对象f中读取内容 np.load(f) np.load(f) # Data Visualization import numpy as np import matplotlib as mpl import matplotlib.pyplot as plt # %matplotlib inline np.random.seed(1000) y = np.random.standard_normal(20) x = range(len(y)) plt.plot(x, y) # 也可写成 plt.plot(y) plt.plot(y.cumsum()) plt.grid(True) # adds a grid plt.axis('tight') # adjusts the axis ranges plt.plot(y.cumsum()) plt.grid(True) plt.xlim(-1, 20) plt.ylim(np.min(y.cumsum())-1, np.max(y.cumsum())+1) """ =============================== """ np.random.seed(2000) y = np.random.standard_normal((20, 2)).cumsum(axis=0) plt.figure(figsize=(7, 4)) # the figsize parameter defines the size of the figure in (width, height) plt.plot(y.cumsum(), 'b', lw=1.5) plt.plot(y.cumsum(), 'yo') plt.grid(True) plt.axis('tight') plt.xlabel('index') plt.ylabel('value') plt.title('A Simple Plot') plt.figure(figsize=(7,4)) plt.plot(y[:, 0], lw=1.5, label='1st') plt.plot(y[:, 1], lw=1.5, label='2nd') plt.plot(y, 'ro') plt.grid(True) plt.legend(loc=0) # 0: best possible plt.axis('tight') plt.xlabel('index') plt.ylabel('value') plt.title('A Simple Plot') fig, ax1 = plt.subplots() # Plot first data set using first (left) axis plt.plot(y[:, 0], 'b', lw=1.5, label='1st') plt.plot(y[:, 0], 'ro') plt.grid(True) plt.legend(loc=8) plt.axis('tight') plt.xlabel('index') plt.ylabel('value 1st') plt.title('A Simple Plot') ax2 = ax1.twinx() # plot second data set using second (right) axis plt.plot(y[:, 1], 'g', lw=1.5, label='2nd') plt.plot(y[:, 1], 'ro') plt.legend(loc=0) plt.ylabel('value 2nd') fig, ax1 = plt.subplots() plt.figure(figsize=(7,5)) plt.subplot(211) plt.plot(y[:, 0], lw=1.5, label='1st') plt.plot(y[:, 0], 'ro') plt.grid(True) plt.legend(loc=0) plt.axis('tight') plt.ylabel('value') plt.title('A Simple Plot') plt.subplot(212) plt.plot(y[:, 1], 'g', lw=1.5, label='2nd') plt.plot(y[:, 1], 'ro') plt.grid(True) plt.legend(loc=0) plt.axis('tight') plt.xlabel('index') plt.ylabel('value') plt.bar(np.arange(len(y)), y[:,1], width=0.5, color='g', label='2nd') ## Other Plot Styles # Scatter Plot y = np.random.standard_normal((1000,2)) plt.figure(figsize=(7,5)) plt.plot(y[:,0], y[:,1], 'ro') plt.grid(True) plt.xlabel('1st') plt.ylabel('2nd') plt.title('Scatter Plot') plt.figure(figsize=(7,5)) plt.scatter(y[:, 0], y[:, 1], marker='o', color='r') plt.grid(True) plt.xlabel('1st') plt.ylabel('2nd') plt.title('Scatter Plot') c = np.random.randint(0, 10, len(y)) plt.figure(figsize=(7,5)) plt.scatter(y[:,0], y[:,1],c=c,marker='o') plt.colorbar() plt.grid(True) plt.xlabel('1st') plt.ylabel('2nd') plt.title('Scatter Plot') # Histogram plt.figure(figsize=(7,4)) plt.hist(y, label=['1st', '2nd'], bins=25,color=['r','b']) # Stacked hist plt.hist(y, label=['1st', '2nd'], color=['b','g'], stacked=True, bins=20) # box plot fig,ax = plt.subplots(figsize=(7,4)) plt.boxplot(y) plt.grid(True) plt.setp(ax, xticklabels=['1st', '2nd']) ## import matplotlib.finance as mpf start = (2014, 5, 1) end = (2014, 6, 30) quotes = mpf.quotes_historical_yahoo_ochl('^GDAXI', start, end) quotes[:2] fig, ax = plt.subplots(figsize=(8,5)) fig.subplots_adjust(bottom=0.2) mpf.candlestick_ochl(ax, quotes, width=0.6, colorup='r', colordown='g') plt.grid(True) ax.xaxis_date() # dates on the x-axis ax.autoscale_view() plt.setp(plt.gca().get_xticklabels(),rotation=30) ### quotes = np.array(mpf.quotes_historical_yahoo_ochl('YHOO',start,end)) fig, (ax1, ax2) = plt.subplots(2,sharex=True, figsize=(8,6)) mpf.candlestick_ochl(ax1, quotes, width=0.6, colorup='r', colordown='g') ax1.set_title('Yahoo Inc.') ax1.set_ylabel('index level') ax1.grid(True) ax1.xaxis_date() plt.bar(quotes[:, 0]-0.25, quotes[:,5],width=0.5) ax2.set_ylabel('volume') ax2.grid(True) ax2.autoscale_view() plt.setp(plt.gca().get_xticklabels(),rotation=30) ## 3D Plotting strike = np.linspace(50, 150, 24) ttm = np.linspace(0.5, 2.5, 24) strike, ttm = np.meshgrid(strike, ttm) iv = (strike - 100)**2 / (100 * strike)/ttm # generate fake implied volatilities from mpl_toolkits.mplot3d import Axes3D fig = plt.figure(figsize=(9, 6)) ax = fig.gca(projection='3d') surf = ax.plot_surface(strike, ttm, iv, rstride=2, cstride=2, cmap=plt.cm.coolwarm, linewidth=0.5, antialiased=True) ax.set_xlabel('strike') ax.set_ylabel('time-to-maturity') ax.set_zlabel('implied volatility') fig.colorbar(surf, shrink=0.5, aspect=5) ## ############## Volatility surface fig = plt.figure(figsize=(8,5)) ax = fig.add_subplot(111, projection='3d') ax.view_init(30, 60) ax.scatter(strike, ttm, iv, zdir='z', s=25, c='b', marker='^') ax.set_xlabel('strike') ax.set_ylabel('time-to-maturity') ax.set_zlabel('implied volatility') ######################################### # pandas basic import numpy as np import pandas as pd df = pd.DataFrame([10, 20, 30, 40], columns=['numbers'], index=['a', 'b', 'c', 'd']) df df.index # the index value df.columns # the column names df.ix['c'] # selection via index df.ix[['a', 'd']] # selection of multiple indices df.ix[['a', 'd']] # selection via Index object df.sum() # sum per column df.max() df.min() df.apply(lambda x: x**2) # square of every element df**2 # agian square, this time Numpy-like df['floats'] = (1.5, 2.5, 3.5, 4.5) # new column is generated df['floats'] # selection of new column df['names'] = pd.DataFrame(['Yves', 'Guido', 'Felix', 'Francesc'], index=['d', 'a', 'b', 'c']) df.append({'number':100, 'floats':5.75, 'names':'Henry'}, ignore_index=True) # temporary object, df not changed, and index not changed df = df.append(pd.DataFrame({'numbers':100, 'float':5.75, 'names':'Henry'}, index=['z'])) # it's better this way df df = df.join(pd.DataFrame([1, 4, 9, 16, 25], index=['a', 'b', 'c', 'd', 'y'], columns=['squares']), how = 'outer') df[['numbers', 'squares']].mean() # column-wise mean df[['numbers', 'squares']].std() # column-wise std df2[df2['E'].isin(['two','four'])] df.sub(s, axis='index') # s也是个index ###### a = np.random.standard_normal((9, 4)) a.round(6) df = pd.DataFrame(a) df.columns = [['No1', 'No2', 'No3', 'No4']] df['No3'][3] ## value in column No2 at index 3 dates = pd.date_range('2015-1-1', periods=9, freq='M') dates df.index = dates ### from dataframe to array np.array(df).round(6) ### basic functions with dataframe df.sum() df.mean() df.cumsum() df.describe() np.sqrt(df) np.sqrt(df).sum() %matplotlib inline df.cumsum().plot(lw=2.0) df.plot(secondary_y='weight', figsize=(14,7)) #### 画两个y轴 type(df) df['No1'] import matplotlib.pyplot as plt df['No1'].cumsum().plot(style='r', lw=2.) plt.xlabel('date') plt.ylabel('value') ### GroupBy Operations df['Quarter'] = ['Q1', 'Q1','Q1', 'Q2', 'Q2', 'Q2', 'Q3', 'Q3', 'Q3'] groups = df.groupby('Quarter') groups.mean() groups.max() groups.size() df['Odd_Even'] = ['Odd', 'Even', 'Odd', 'Even', 'Odd', 'Even', 'Odd', 'Even', 'Odd'] groups = df.groupby(['Quarter', 'Odd_Even']) groups.size() groups.mean() ## Read financial data import pandas.io.data as web DAX = web.DataReader(name='SPY', data_source='yahoo', start='2000-1-1') DAX.info() DAX.tail() DAX['Close'].plot(figsize=(8,5)) %time DAX['Return'] = np.log(DAX['Close']/DAX['Close'].shift(1)) DAX[['Close', 'Return']].tail() DAX[['Close', 'Return']].plot(subplots=True, color='b', figsize=(8,5)) DAX['42d'] = pd.rolling(DAX['Close'], window=42).mean() DAX['252d'] = pd.rolling(DAX['Close'], window=252).mean() DAX[['Close', '42d', '252d']].tail.() DAX[['Close', '42d', '252d']].plot(figsize=(8, 5)) ### import math ## Regression Analysis import pandas as pd from urllib import urlretrieve es_url = 'http://www.stoxx.com/download/historical_values/hbrbcpe.txt' vs_url = 'http://www.stoxx.com/download/historical_values/h_vstoxx.txt' urlretrieve(es_url, './data/es.txt') urlretrieve(vs_url, './data/vs.txt') !s -o ./data/*.txt # Windows: use dir lines = open('./data/es.txt','r').readlines() lines = [line.replace(' ', '') for line in lines] for line in lines[3883:3890]: print line[41:] ## To make the data set easier to import new_file = open('./data/es50.txt', 'w') # opens a new file new_file.writelines('date' + lines[3][:-1] + ';DEL'+ lines[3][-1]) # writes the corrected third line of the original file # as first line of new file new_file.writelines(lines[4:]) # write the remaining lines of the original file new_file.close() ## new_lines = open('./data/es50.txt','r').readlines() new_lines[:5] # now import with read_csv function es = pd.read_csv('./data/es50.txt', index_col=0, parse_dates=True, sep=';', dayfirst=True) np.round(es.tail()) ## now delete helper column del es['DEL'] es.info() ## directly import from url cols = ['SX5P', 'SX5E', 'SXXP', 'SXXE', 'SXXF', 'SXXA', 'DKSF', 'DKXF'] es = pd.read_csv(es_url, index_col=0, parse_dates=True, sep=';', dayfirst=True, header=None, skiprows=4, names=cols) es.tail() ###### vs = pd.read_csv('./data/vs.txt', index_col=0, header=2, parse_dates=True, sep=',', dayfirst=True) vs.info() ### creat a new dataframe import datetime as dt data = pd.DataFrame({'EUROSTOXX' : es['SX5E'][es.index > dt.datetime(1999, 1, 1)]}) data = data.join(pd.DataFrame({'VSTOXX' : vs['V2TX'][vs.index > dt.datetime(1999, 1, 1)]})) data = data.fillna(method='ffill') data.info() data.tail() data.plot(subplots=True, grid=True, style='b', figsize=(8, 6)) rets = np.log(data / data.shift(1)) rets.head() rets.plot(subplots=True, grid=True, style='b', figsize=(8,6)) rets = rets.dropna() ix_infs = np.where(np.isinf(rets)) rets.drop(rets.index[ix_infs[0]],inplace=True) #方法一:直接 del DF['column-name'] # 直接删除 # 方法二:采用drop方法,有下面三种等价的表达式: DF= DF.drop('column_name', 1); DF.drop('column_name',axis=1, inplace=True) DF.drop([DF.columns[[0,1, 3]]], axis=1,inplace=True) # Note: zero indexed ### regression xdat = rets['EUROSTOXX'] ydat = rets['VSTOXX'] model = pd.ols(y=ydat, x=xdat) model model.beta plt.plot(xdat, ydat, 'r.') ax = plt.axis() # grab axis values x = np.linspace(ax[0], ax[1] + 0.01) plt.plot(x, model.beta[1] + model.beta[0]*x, 'b', lw=2) plt.grid(True) plt.axis('tight') plt.xlabel('EURO STOXX 50 returns') plt.ylabel('VSTOXX returns') ## Rolling correlation rets.corr() pd.rolling_corr(rets['EUROSTOXX'], rets['VSTOXX'], window=252).plot(grid=True, style='b') ##High-Frequency Data import numpy as np import pandas as pd import datetime as dt from urllib import urlretrieve %matplotlib inline ## web-based API url1 = 'http://hopey.netfonds.no/posdump.php?' url2 = 'date=%s%s%s&paper=AAPL.O&csv_format=csv' url = url1 + url2 year = '2014' month = '09' days = ['22', '23', '24', '25'] # dates might need to be updated AAPL = pd.DataFrame() for day in days: AAPL = AAPL.append(pd.read_csv(url%(year, month, day), index_col=0, header=0, parse_dates=True)) AAPL.columns = ['bid', 'bdepth', 'bdeptht', 'offer', 'odepth', 'odeptht'] # shorter column names AAPL.info() #### writing objects to disk path = 'D:\wd.python\forise\' import numpy as np from random import gauss a = [gauss(1.5, 2) for i in range(1000000)] # generation of normally distributed randoms import pickle pkl_file = open('data.pkl', 'wb') # open file for writing # Note: existing file might be overwritten %time pickle.dump(a, pkl_file) pkl_file pkl_file.close() # read data to memory pkl_file = open(path + 'data.pkl', 'rb') # open file for reading %time b = pickle.load(pkl_file) b[:5] np.allclose(np.array(a), np.array(b)) ## write two objects pkl_file = open(path + 'data.pkl', 'wb') pickle.dump(np.array(a), pkl_file) pickle.dump(np.array(a)**2, pkl_file) ## read two objects from one file, FIFO rule, ## load twice ### reading and writing text files rows = 5000 a = np.random.standard_normal((rows, 5)) # dummy data a.round(4) import pandas as pd t = pd.date_range(start='2014/1/1', periods=rows, freq='H') # set of hourly datetime range csv_file = open(path + 'data.csv', 'w') # open file for writing header = 'data,no1,no2,no3,no4.no5\n' csv_file.write(header) for t_, (no1, no2, no3, no4, no5) in zip(t,a): s = '%s,%f,%f,%f,%f,%f\n' % (t_, no1, no2, no3, no4, no5) csv_file.write(s) csv_file = open(path + 'data.csv', 'r') # open file for reading for i in range(5): print csv_file.readline(), # or we can read all the content by readlines csv_file = open(path + 'data.csv', 'r') content = csv_file.readlines() for line in content[:5]: print line, csv_file.close() ### SQL Databases import sqlite3 as sq3 query = 'CREATE TABLE numbs (Date date, No1 real, No2 real)' con = sq3.connect(path + 'numbs.db') con.execute(query) con.commit() # to make the query more effective, call the method commit ## create data import datetime as dt con.execute('INSERT INTO numbs VALUES(?, ?, ?)', (dt.datetime.now(), 0.12, 7.3)) data = np.random.standard_normal((10000, 2)).round(5) for row in data: con.execute('INSERT INTO numbs VALUES(?, ?, ?)' (dt.datetime.now(), row[0], row[1])) con.commit() ### Writing and Reading NumPy Arrays import numpy as np dtimes = np.arange('2015-01-01 10:00:00', '2021-12-31 22:00:00', dtype='datetime64[m]') # minute intervals len(dtimes) dty = np.dtype([('Date', 'datetime64[m]'), ('No1', 'f'), ('No2', 'f')]) data = np.zeros(len(dtimes), dtype=dty) data['Date'] = dtimes a = np.random.standard_normal((len(dtimes), 2)).round(5) data['No1'] = a[:,0] data['No2'] = a[:,1] %time np.save(path + 'array', data) # suffix .npy as added %time np.load(path + 'array.npy') ### Chapter 9 : Mathematical tools import numpy as np import matplotlib.pyplot as plt %matplotlib inline from datetime import datetime from xlrd import xldate_as_tuple d=datetime(xldate_as_tuple(x,0)) # x就是你那串数字的变量 # 用Pandas完成Excel中常见的任务 # http://python.jobbole.com/80972/ df.query('name == ["Kulas Inc","Barton LLC"]').head() df[df["sku"].map(lambda x: x.startswith('B1'))] df[df['sku'].str.contains('B1')].head() ## 如何利用字典循环, loop with dictionary col_names = list('abcd') dic = {} for col in col_names: temp = range(len(col_names)) # could be a Series or DataFrame dic.update({col:temp}) df = pd.DataFrame(dic, index=range(len(col_names))) dic.keys() # list in python 2, but Dict in python 3 dic.values() # list w # 批量更改列名 df.columns # Index([u'$a', u'$b', u'$c', u'$d', u'$e'], dtype=object) df.rename(columns=lambda x: x[1:], inplace=True) df = df.rename(columns=lambda x: x.replace('$', '') df = df.rename(columns={'$a': 'a', '$b': 'b'}) df.columns # Index([u'a', u'b', u'c', u'd', u'e'], dtype=object) # NaN df = pd.DataFrame(data=np.nan, columns=list('abcd')) # http://www.jb51.net/article/49457.htm # sqlalchemy, python 和 mysql 交互 from sqlalchemy.types import String data.to_sql('data_dtype', engine, dtype={'Col_1': String}) pd.read_sql_table('data', engine, index_col='id') pd.read_sql_table('data', engine, columns=['Col_1', 'Col_2']) pd.read_sql_table('data', engine, parse_dates=['Date']) pd.read_sql_table('data', engine, parse_dates={'Date': '%Y-%m-%d'}) pd.read_sql_query('SELECT * FROM data', engine) pd.read_sql_query("SELECT id, Col_1, Col_2 FROM data WHERE id = 42;", engine) # 对于pandas.Series的操作, descriptive statistics Series.cumprod() Series.cummin() Series.cummax(), .abs(), .all(),.any(),.autocorr(), .between(),.clip(),.clip_lower(),.clip_upper(),.count(),.cov(), Series.describe() Series.diff(), .factorize(), .kurt(),.mad(), Series.nlargest(),.nsmallest() # Faster than .sort_values(ascending=False\True).head(n) Series.pct_change, .prod(), .quantile(), .rank(), .sem() Series.var(), Series.unique() # 生成array,不重复的数 还有.nunique(),number of unique() Series.is_monotonic, is_unique,is_monotonic_increasing/decreasing Series.value_counts # 日期时间方法处理汇总 import datetime now = datetime.datetime.now() # datetime import time time.time() # 时间戳 timestamp time.localtime() # time tuple datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') datetime.date.now().date() # date datetime.date.today() # date # 获取明天/前N天 datetime.date.today() + datetime.timedelta(days=1) # 明天 # 获取当天开始和结束时间 datetime.datetime.combine(datetime.date.today(), datetime.time.min()) # datetime.time.max # 获取两个datetime的时间差 (datetime.datetime(2017,3,16,21,30) - datetime.datetime.now()).total_seconds() ## datetime.timedelta().total_seconds() # 获取本周/本月/上月最后一天 today = datetime.date.today() sunday = today + datetime.timedelta(6 - today.weekday()) # 本月 import calendar today = datetime.date.today() _,last_day_num = calendar.monthrange(today.year, today.month) # 返回tuple(本月第一天是星期几,这个月总共多少天) last_day = datetime.date(today.year, today.month, last_day_num) # 获取上个月的最后一天 today = datetime.date.today() first = datetime.date(day=1, month=today.month, year=today.year) lastMonth = first - datetime.timedelta(days=1) # string 转换 datetime.datetime.strptime("2014-12-31 18:20:10","%Y-%m-%d %H:%M:%S") datetime.datetime.now().timetuple() # datetime to timetuple # timetuple to datetime # timetuple -> timestamp -> datetime # datetime -> date datetime.datetime.now().date() # date -> datetime datetime.date.today() today = datetime.date.today() datetime.datetime.combine(today,datetime.time()) datetime.datetime.combine(today,datetime.time.min) # datetime <=> timestamp # datetime -> timestamp now = datetime.datetime.now() timestamp = time.mktime(now.timetuple()) # timestamp -> datetime datetime.datetime.fromtimestamp(time.time()) # Pandsas string method / working with text data s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat']) s.str.lower() s.str.upper() s.str.len() idx = pd.Index([' jack', 'jill ', ' jesse ', 'frank']) idx.str.strip() # 用来去掉空格 idx.str.lstrip() idx.str.rstrip() df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_') # 两次str