本文转载我自己写的文章,我个人网站DataAnswer:http://www.dataanswer.top(大数据博客和大数据问答的网站)上的文章。主要利用python获取股票数据,利用R来作图分析。
由股票代码列表获取历年股票数据
import pandas as pd
from pandas import Series,DataFrame
stock_data=pd.read_csv('/home/hadoop/mywork/stock.txt',sep=' ')
stock_data.columns=Series(['code','name','class'])
stock_data['url']=''
for i in range(len(stock_data)):
if(str(stock_data.code[i]).find('sh')<0):
stock_data['url'][i]=str(stock_data.code[i])[2:8]+".sz"
else:
stock_data['url'][i]=str(stock_data.code[i])[2:8]+".ss"
stock_data1=stock_data[0:18]
stock_data2=stock_data[19:len(stock_data)]
stock_data=pd.concat([stock_data1,stock_data2])
stock_data.index=range(len(stock_data))
for i in range(len(stock_data)):
stock_url='http://table.finance.yahoo.com/table.csv?s='+stock_data.url[i]+'&d=11&e=05&f=2015&g=d&a=00&b=00&c=2015&ignore=.csv'
stock_df=pd.read_csv(stock_url)
path='/home/hadoop/mywork/stock/'+stock_data.url[i]+".txt"
stock_df.to_csv(path)
#对不同行业的公司分层索引
import pandas as pd
from pandas import Series,DataFrame
stock_data=pd.read_csv('/home/hadoop/mywork/stock.txt',sep=' ')
stock_data.columns=Series(['code','name','class'])
key=range(len(stock_data))
stock_data['key']=1
s=stock_data.set_index(['class','key'])
s
获取各行业的公司代码,求出各行业的公司总数
import pandas as pd
from pandas import Series,DataFrame
stock_data=pd.read_csv('/home/hadoop/mywork/stock.txt',sep=' ',encoding='utf-8')
stock_data.columns=Series(['code','name','class'])
stock_total=DataFrame()
stock_total['name']=(['金融','军工','地产','环保','传媒','迪斯尼','酿酒','有色','煤炭','造纸','电力','汽车','农业'])
stock_total['sum']=0
stock1=stock_data[:44]
stock_total['sum'][0]=len(stock1)
stock2=stock_data[45:143]
stock2.index=range(len(stock2))
stock_total['sum'][1]=len(stock2)
stock3=stock_data[144:275]
stock3.index=range(len(stock3))
stock_total['sum'][2]=len(stock3)
stock4=stock_data[276:305]
stock4.index=range(len(stock4))
stock_total['sum'][3]=len(stock4)
stock5=stock_data[306:334]
stock5.index=range(len(stock5))
stock_total['sum'][4]=len(stock5)
stock6=stock_data[335:361]
stock6.index=range(len(stock6))
stock_total['sum'][5]=len(stock6)
stock7=stock_data[362:392]
stock7.index=range(len(stock7))
stock_total['sum'][6]=len(stock7)
stock8=stock_data[393:460]
stock8.index=range(len(stock8))
stock_total['sum'][7]=len(stock8)
stock9=stock_data[461:500]
stock9.index=range(len(stock9))
stock_total['sum'][8]=len(stock9)
stock10=stock_data[501:525]
stock10.index=range(len(stock10))
stock_total['sum'][9]=len(stock10)
stock11=stock_data[526:613]
stock11.index=range(len(stock11))
stock_total['sum'][10]=len(stock11)
stock12=stock_data[614:670]
stock12.index=range(len(stock12))
stock_total['sum'][11]=len(stock12)
stock13=stock_data[671:730]
stock13.index=range(len(stock13))
stock_total['sum'][12]=len(stock13)
stock_total.to_csv('/home/hadoop/total.txt')
获取农业各公司的历史股票数据
stock_data=stock13
stock_data['url']=''
for i in range(len(stock_data)):
if(str(stock_data.code[i]).find('sh')<0):
stock_data['url'][i]=str(stock_data.code[i])[2:8]+".sz"
else:
stock_data['url'][i]=str(stock_data.code[i])[2:8]+".ss"
for i in range(len(stock_data)):
stock_url='http://table.finance.yahoo.com/table.csv?s='+stock_data.url[i]+'&d=11&e=05&f=2015&g=d&a=00&b=00&c=2015&ignore=.csv'
stock_df=pd.read_csv(stock_url)
path='/home/hadoop/mywork/stock/'+stock_data.url[i]+".txt"
stock_df.to_csv(path)
#作出各行业上市公司的频数图
data=read.table(file("total.txt",encoding = 'utf-8'),header = TRUE,sep=',',stringsAsFactors = FALSE)
opar<-par(no.readonly=TRUE)
par(lwd=0.5,cex=1)
barplot(data$sum,names.arg = data$name,cex.names=0.7,col='red',horiz=TRUE,las=2)
title(main="各行业上市公司的数量",col.main="green",col.lab='green',cex.lab='0.9',cex.main='0.9')
par(opar)
作出各行业上市的比例图
toPercentage<-function(data)
{
s=sum(data$sum)
print(s)
peg=data
for(i in 1:dim(data)[1])
{
peg[i,3]=round((peg[i,3]/s)*100,0)
}
return(peg)
}
peg=toPercentage(data)
lbls=paste(peg$name," ",peg$sum,"%",sep=" ")
pie(data$sum,labels=lbls,col=rainbow(length(data)),main="上市公司所占比例",cex.main='0.9',col.main='green')
【R时间序列作图如何显示时间】
time <- seq.Date(as.Date("2008/1/1"), by = "month", length = 36)
val <- rnorm(36)
dat <- data.frame(time = time, val = val)
plot(time, val, xaxt = "n")
axis.Date(1, at = time, format = "%Y-%m")
plot(data$reserve ~ as.Date(data$period),type="b")
作出该农业中某个公司2015年High/Low/Volume/Close/Open/Adj.close的变化趋势
data=read.table(file("ss1.txt",encoding = 'utf-8'),header = TRUE,sep=',',stringsAsFactors = FALSE)
data1=data[1:222,]
opar<-par(no.readonly=TRUE)
par(mfrow=c(3,1))
plot(data1$High ~ as.Date(data1$Date),type="b",ylab='High',xlab='2015年')
plot(data1$Low~ as.Date(data1$Date),type="b",col='red',ylab='Low',xlab='2015年')
plot(data1$Volume~ as.Date(data1$Date),type="b",col='red',ylab='Volume',xlab='2015年')
par(opar)
opar<-par(no.readonly=TRUE)
par(mfrow=c(3,1))
plot(data1$Close~ as.Date(data1$Date),type="b",col='red',ylab='Close',xlab='2015年')
plot(data1$Open~ as.Date(data1$Date),type="b",col='red',ylab='Open',xlab='2015年')
plot(data1$Adj.Close~ as.Date(data1$Date),type="b",col='red',ylab='Adj.Close',xlab='2015年')
par(opar)
作出开盘价与闭盘时的差价变化趋势
plot((data1$Close-data1$Open)~ as.Date(data1$Date),type="b",col='blue',ylab='闭盘与开盘的差价',xlab='2015年')
作出差价变化的频率图
write.table(d,'d.txt',row.names = FALSE)
d=read.table('part.txt',header = FALSE,stringsAsFactors = FALSE)
plot(d$V1,d$V2,ylim = c(0,5),xlab='变化的区间',ylab='变化的频数',col='blue')
library(Hmisc)
minor(nx=20,tick.ratio=0.5)
作出变化大小出现的频率的条形图
type<-function(data)
{
n1=0
n2=0
n3=0
for(i in c(1:dim(data)[1]))
{
if((data$V1)[i]<0)
{
n1=n1+data$V2[i]
}
if((data$V1)[i]==0)
{
n2=n2+data$V2[i]
}
if((data$V1)[i]>0)
{
n3=n3+data$V2[i]
}
}
return(c(n1,n2,n3))
}
分别作出正变化、负变化、不变化频数图: