数据分析库pandas

import pandas as pd
import matplotlib.pyplot as plt
import math

# DataFrame
print("====================创建dataFrame开始=======================")
df1 = pd.DataFrame([[1, 2, 3], [4, 5, 6]], index=['A', "B"], columns=["C", "D", "E"])
print(df1)
print(df1.values)
print(df1.T)

print(df1.shape)
print(df1.size)
# 前几行和后几行
print(df1.head(1))
print(df1.tail(1))
# 对一列数据进行分析,数量,平均值,方差,最小值,最大值
print(df1.describe())

# 选取某一行
print(df1.loc["A"])

print("====================创建dataFrame结束=======================")
print("====================read csv开始=======================")
data = pd.read_csv("resource/fff.csv", index_col="id")
print(data.head(3))
# print(broken_data[:3])
print(data.shape)
# 取出某列
print(data['x'][:3])
# 取出某些列
print(data[["x", "y"]][:3])
# 统计每个值出现的次数,可以添加过滤条件进行去重处理
print(data["x"].value_counts())
print("====================图形化开始=======================")
# 画出形状
data['x'].plot()
# data.plot()
# plt.show()
print(data.dtypes)
print("====================图形化结束=======================")

print("====================read csv结束=======================")
print("====================时间处理开始=======================")
timedata = pd.read_csv("resource/timestamp.csv")
print(timedata.dtypes)
# 转换成时间,过滤时间
local_time = pd.to_datetime(timedata["atime"], unit="s")
print(local_time)
condition = local_time > '2101-10-10'

print(timedata[condition])

print("====================时间处理结束=======================")


print("====================cal开始=======================")
x,y=4.47,6.55
x1,y1=4.1,7.61

rate = math.sqrt((x1-x)**2+(y1-y)**2)
print(rate)


print("====================cal结束=======================")
/Users/jun/anaconda3/envs/python36/bin/python /Applications/PyCharm.app/Contents/helpers/pydev/pydev_run_in_console.py 51520 51521 /Users/jun/PycharmProjects/liaokepython/wanmenpython/ipandas.py
Running /Users/jun/PycharmProjects/liaokepython/wanmenpython/ipandas.py
import sys; print('Python %s on %s' % (sys.version, sys.platform))
sys.path.extend(['/Users/jun/PycharmProjects/liaokepython', '/Users/jun/PycharmProjects/liaokepython/wanmenpython'])
====================创建dataFrame开始=======================
   C  D  E
A  1  2  3
B  4  5  6
[[1 2 3]
 [4 5 6]]
   A  B
C  1  4
D  2  5
E  3  6
(2, 3)
6
   C  D  E
A  1  2  3
   C  D  E
B  4  5  6
             C        D        E
count  2.00000  2.00000  2.00000
mean   2.50000  3.50000  4.50000
std    2.12132  2.12132  2.12132
min    1.00000  2.00000  3.00000
25%    1.75000  2.75000  3.75000
50%    2.50000  3.50000  4.50000
75%    3.25000  4.25000  5.25000
max    4.00000  5.00000  6.00000
C    1
D    2
E    3
Name: A, dtype: int64
====================创建dataFrame结束=======================
====================read csv开始=======================
    areaCode     x     y    z             time  package tagId
id                                                           
1          1  4.65  6.55  1.2  2019/3/19 13:42     2209  B832
2          1  4.47  6.56  1.2  2019/3/19 13:42     2210  B832
3          1  4.47  6.55  1.2  2019/3/19 13:42     2211  B832
(169, 7)
id
1    4.65
2    4.47
3    4.47
Name: x, dtype: float64
       x     y
id            
1   4.65  6.55
2   4.47  6.56
3   4.47  6.55
6.03     3
5.55     3
1.63     3
2.85     2
6.40     2
5.83     2
5.57     2
5.46     2
5.65     2
8.78     2
2.91     2
5.08     2
7.64     2
3.42     2
4.65     2
4.47     2
9.35     2
9.24     2
8.44     2
5.21     2
5.58     2
5.61     2
9.08     2
3.56     2
5.60     2
8.52     1
3.92     1
4.40     1
0.17     1
8.74     1
        ..
7.90     1
9.28     1
9.27     1
6.11     1
4.06     1
6.66     1
8.06     1
9.66     1
0.55     1
2.59     1
8.29     1
8.79     1
10.07    1
3.39     1
6.21     1
7.44     1
7.40     1
4.22     1
5.91     1
1.17     1
4.67     1
8.69     1
9.09     1
4.05     1
6.88     1
5.59     1
5.80     1
7.33     1
5.70     1
8.75     1
Name: x, Length: 141, dtype: int64
====================图形化开始=======================
areaCode      int64
x           float64
y           float64
z           float64
time         object
package       int64
tagId        object
dtype: object
====================图形化结束=======================
====================read csv结束=======================
====================时间处理开始=======================
atime    int64
btime    int64
dtype: object
0   2110-06-13 20:25:51
1   2110-09-02 20:52:31
2   2100-12-10 15:05:51
Name: atime, dtype: datetime64[ns]
        atime    btime
0  4432134351    54335
1  4439134351  3454543
====================时间处理结束=======================
====================cal开始=======================
1.1227199116431494
====================cal结束=======================
PyDev console: starting.
Python 3.6.8 |Anaconda, Inc.| (default, Dec 29 2018, 19:04:46) 
[GCC 4.2.1 Compatible Clang 4.0.1 (tags/RELEASE_401/final)] on darwin

你可能感兴趣的:(数据分析库pandas)