Pandas入门系列(二)-- DataFrame

数据分析汇总学习 https://blog.csdn.net/weixin_39778570/article/details/81157884

DataFrame数据框

# 巧用复制黏贴

>>> import numpy as np
>>> import pandas as pd
>>> from pandas import Series, DataFrame
>>> import webbrowser
>>> link = 'http://www.tiobe.com/tiobe-index/'
>>> webbrowser.open(link)  # 打开一个网页 然后把要生成的数据复制到剪切板,以下选择了编程语言前10行
True
>>> df = pd.read_clipboard()  # 执行这段代码自动生存DataFrame对象
>>> df
   May 2018 May 2017  Change Programming Language  Ratings Change.1
0         1        1     NaN                 Java  16.380%   +1.74%
1         2        2     NaN                    C  14.000%   +7.00%
2         3        3     NaN                  C++   7.668%   +2.92%
3         4        4     NaN               Python   5.192%   +1.64%
4         5        5     NaN                   C#   4.402%   +0.95%
5         6        6     NaN    Visual Basic .NET   4.124%   +0.73%
6         7        9  change                  PHP   3.321%   +0.63%
7         8        7  change           JavaScript   2.923%   -0.15%
8         9        -  change                  SQL   1.987%   +1.99%
9        10       11  change                 Ruby   1.182%   -1.25%
>>> type(df)  # 查看类型
 

# 查看DataFrame的内容

>>> df.columns # 查看所有列
Index(['May 2018', 'May 2017', 'Change', 'Programming Language', 'Ratings',
       'Change.1'],
      dtype='object')
>>> df.Ratings  # 查看Ratings这一列
0    16.380%
1    14.000%
2     7.668%
3     5.192%
4     4.402%
5     4.124%
6     3.321%
7     2.923%
8     1.987%
9     1.182%
Name: Ratings, dtype: object
# 某一列的类型为时间序列
>>> type(df['May 2018'])

# 从df中提取指定的列

 

>>> df_new = DataFrame(df, columns = ['May 2018', 'Change.1']) # 从df中提取两列生成新的DataFrame
>>> df_new
   May 2018 Change.1
0         1   +1.74%
1         2   +7.00%
2         3   +2.92%
3         4   +1.64%
4         5   +0.95%
5         6   +0.73%
6         7   +0.63%
7         8   -0.15%
8         9   +1.99%
9        10   -1.25%

# 如果列里面多加了不存在的  会自动赋值为空

 

>>> df_new = DataFrame(df, columns=['May 2018', 'Change', 'Sep 2019'])
>>> df_new
   May 2018  Change  Sep 2019 # Sep 2019 这一列是不存在的
0         1     NaN       NaN
1         2     NaN       NaN
2         3     NaN       NaN
3         4     NaN       NaN
4         5     NaN       NaN
5         6     NaN       NaN
6         7  change       NaN
7         8  change       NaN
8         9  change       NaN
9        10  change       NaN

# 列的赋值
# 对序列进行赋值,使用range函数

 

>>> df_new['Sep 2019'] = range(0,10) # 赋值 0-9 这10个数给 Sep 2019 这一列
>>> df_new
   May 2018  Change  Sep 2019
0         1     NaN         0
1         2     NaN         1
2         3     NaN         2
3         4     NaN         3
4         5     NaN         4
5         6     NaN         5
6         7  change         6
7         8  change         7
8         9  change         8
9        10  change         9

# 使用np下的arange(数组)函数

 

>>> df_new['Sep 2019'] = np.arange(1,11)
>>> df_new
   May 2018  Change  Sep 2019
0         1     NaN         1
1         2     NaN         2
2         3     NaN         3
3         4     NaN         4
4         5     NaN         5
5         6     NaN         6
6         7  change         7
7         8  change         8
8         9  change         9
9        10  change        10

# 使用序列修改

 

>>> df_new['Sep 2019'] = pd.Series(np.arange(2,12))
>>> df_new
   May 2018  Change  Sep 2019
0         1     NaN         2
1         2     NaN         3
2         3     NaN         4
3         4     NaN         5
4         5     NaN         6
5         6     NaN         7
6         7  change         8
7         8  change         9
8         9  change        10
9        10  change        11

# 对单数某一列下的某些行进行赋值

 

>>> df_new['Sep 2019'] = pd.Series([100,200], index=[1,2])
>>> df_new
   May 2018  Change  Sep 2019
0         1     NaN       NaN
1         2     NaN     100.0
2         3     NaN     200.0
3         4     NaN       NaN
4         5     NaN       NaN
5         6     NaN       NaN
6         7  change       NaN
7         8  change       NaN
8         9  change       NaN

 

官网:http://pandas.pydata.org/pandas-docs/version/0.14.1/

 

 

你可能感兴趣的:(python数据科学)