# 巧用复制黏贴
>>> import numpy as np
>>> import pandas as pd
>>> from pandas import Series, DataFrame
>>> import webbrowser
>>> link = 'http://www.tiobe.com/tiobe-index/'
>>> webbrowser.open(link) # 打开一个网页 然后把要生成的数据复制到剪切板,以下选择了编程语言前10行
True
>>> df = pd.read_clipboard() # 执行这段代码自动生存DataFrame对象
>>> df
May 2018 May 2017 Change Programming Language Ratings Change.1
0 1 1 NaN Java 16.380% +1.74%
1 2 2 NaN C 14.000% +7.00%
2 3 3 NaN C++ 7.668% +2.92%
3 4 4 NaN Python 5.192% +1.64%
4 5 5 NaN C# 4.402% +0.95%
5 6 6 NaN Visual Basic .NET 4.124% +0.73%
6 7 9 change PHP 3.321% +0.63%
7 8 7 change JavaScript 2.923% -0.15%
8 9 - change SQL 1.987% +1.99%
9 10 11 change Ruby 1.182% -1.25%
>>> type(df) # 查看类型
# 查看DataFrame的内容
>>> df.columns # 查看所有列
Index(['May 2018', 'May 2017', 'Change', 'Programming Language', 'Ratings',
'Change.1'],
dtype='object')
>>> df.Ratings # 查看Ratings这一列
0 16.380%
1 14.000%
2 7.668%
3 5.192%
4 4.402%
5 4.124%
6 3.321%
7 2.923%
8 1.987%
9 1.182%
Name: Ratings, dtype: object
# 某一列的类型为时间序列
>>> type(df['May 2018'])
# 从df中提取指定的列
>>> df_new = DataFrame(df, columns = ['May 2018', 'Change.1']) # 从df中提取两列生成新的DataFrame
>>> df_new
May 2018 Change.1
0 1 +1.74%
1 2 +7.00%
2 3 +2.92%
3 4 +1.64%
4 5 +0.95%
5 6 +0.73%
6 7 +0.63%
7 8 -0.15%
8 9 +1.99%
9 10 -1.25%
# 如果列里面多加了不存在的 会自动赋值为空
>>> df_new = DataFrame(df, columns=['May 2018', 'Change', 'Sep 2019'])
>>> df_new
May 2018 Change Sep 2019 # Sep 2019 这一列是不存在的
0 1 NaN NaN
1 2 NaN NaN
2 3 NaN NaN
3 4 NaN NaN
4 5 NaN NaN
5 6 NaN NaN
6 7 change NaN
7 8 change NaN
8 9 change NaN
9 10 change NaN
# 列的赋值
# 对序列进行赋值,使用range函数
>>> df_new['Sep 2019'] = range(0,10) # 赋值 0-9 这10个数给 Sep 2019 这一列
>>> df_new
May 2018 Change Sep 2019
0 1 NaN 0
1 2 NaN 1
2 3 NaN 2
3 4 NaN 3
4 5 NaN 4
5 6 NaN 5
6 7 change 6
7 8 change 7
8 9 change 8
9 10 change 9
# 使用np下的arange(数组)函数
>>> df_new['Sep 2019'] = np.arange(1,11)
>>> df_new
May 2018 Change Sep 2019
0 1 NaN 1
1 2 NaN 2
2 3 NaN 3
3 4 NaN 4
4 5 NaN 5
5 6 NaN 6
6 7 change 7
7 8 change 8
8 9 change 9
9 10 change 10
# 使用序列修改
>>> df_new['Sep 2019'] = pd.Series(np.arange(2,12))
>>> df_new
May 2018 Change Sep 2019
0 1 NaN 2
1 2 NaN 3
2 3 NaN 4
3 4 NaN 5
4 5 NaN 6
5 6 NaN 7
6 7 change 8
7 8 change 9
8 9 change 10
9 10 change 11
# 对单数某一列下的某些行进行赋值
>>> df_new['Sep 2019'] = pd.Series([100,200], index=[1,2])
>>> df_new
May 2018 Change Sep 2019
0 1 NaN NaN
1 2 NaN 100.0
2 3 NaN 200.0
3 4 NaN NaN
4 5 NaN NaN
5 6 NaN NaN
6 7 change NaN
7 8 change NaN
8 9 change NaN
官网:http://pandas.pydata.org/pandas-docs/version/0.14.1/