PCA代码
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import pandas as pd
from stockstats import StockDataFrame
# # 不限制最大显示列数
pd.set_option('display.max_columns', None)
# # 不限制最大显示行数
pd.set_option('display.max_rows', None)
data = StockDataFrame.retype(pd.read_excel('siliu.xls'))
# PCA分析
X_reduced = PCA(n_components=3).fit_transform(data)
# K均值聚类
kmeans = KMeans(n_clusters=3).fit(X_reduced)
data1=pd.DataFrame(X_reduced[:,2])
#创建数据存放路径
writer = pd.ExcelWriter('dd.xlsx')
data1.to_excel(writer,header=None,index=False)
writer.save()#文件保存
writer.close()#文件关闭
RFE以及降噪代码
import pandas as pd
from stockstats import StockDataFrame
# # 不限制最大显示列数
pd.set_option('display.max_columns', None)
# # 不限制最大显示行数
pd.set_option('display.max_rows', None)
data = StockDataFrame.retype(pd.read_excel('CSCO.xls'))
print(data.shape)
# 导入RFE方法和线性回归基模型
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression
# 自变量特征
feature = data
rfe = RFE(
estim
ator=LinearRegression(), # 选择lin线性回归为基模型
n_features_to_select=5 # 选区特征数
)
# fit 方法训练选择特征属性
sFeature = rfe.fit_transform(feature, data['close'])
# 查看满足条件的属性
print(feature.columns[rfe.get_support()])
from __future__ import print_function
import pandas as pd
import matplotlib.pyplot as plt
# 时间序列
from stockstats import StockDataFrame
import statsmodels.api as sm
dta = StockDataFrame.retype(pd.read_excel('MSFT.xls'))
arrclose=dta['close'].values.tolist()
dates=dta['date'].values.tolist()
print(dates)
# 将字符串索引转换成时间索引
dta["date"].index = pd.to_datetime(dta['date'].index)
# 生成pd.Series对象
ts = dta['close']
ts.head()
dta=pd.Series(arrclose)
dta.index = pd.Index(sm.tsa.datetools.dates_from_range('2010Q1',length=240))
dta.plot(figsize=(12,8))
# 降噪
ts_log = dta
print(ts_log)
def draw_moving(timeSeries, size):
f = plt.figure(facecolor='white')
# 对size个数据进行移动平均
print("1")
rol_mean = timeSeries.rolling(2,min_periods=1).mean()
# 对size个数据进行加权移动平均
rol_weighted_mean = pd.DataFrame.ewm(timeSeries, span=size).mean()
timeSeries.plot(color='blue', label='Original')
rol_mean.plot(color='red', label='Rolling Mean')
rol_weighted_mean.plot(color='black', label='Weighted Rolling Mean')
plt.legend(loc='best')
plt.title('Rolling Mean')
data1=pd.DataFrame(rol_mean)
# 创建数据存放路径
writer = pd.ExcelWriter('aaaa1.xlsx')
data1.to_excel(writer,header=None,index=False)
print(rol_weighted_mean)
writer.save()#文件保存
writer.close()#文件关闭
draw_moving(ts_log, 240)