R语言获取历史股票数据

本文转载我自己写的文章,我个人网站DataAnswer:http://www.dataanswer.top(大数据博客和大数据问答的网站)上的文章。主要利用python获取股票数据,利用R来作图分析。

由股票代码列表获取历年股票数据

import pandas as pd
from pandas import Series,DataFrame
stock_data=pd.read_csv('/home/hadoop/mywork/stock.txt',sep=' ')
stock_data.columns=Series(['code','name','class'])
stock_data['url']=''
for i in range(len(stock_data)):
    if(str(stock_data.code[i]).find('sh')<0):
        stock_data['url'][i]=str(stock_data.code[i])[2:8]+".sz"
    else:
        stock_data['url'][i]=str(stock_data.code[i])[2:8]+".ss"
stock_data1=stock_data[0:18]
stock_data2=stock_data[19:len(stock_data)]
stock_data=pd.concat([stock_data1,stock_data2])
stock_data.index=range(len(stock_data))
for i in range(len(stock_data)):
    stock_url='http://table.finance.yahoo.com/table.csv?s='+stock_data.url[i]+'&d=11&e=05&f=2015&g=d&a=00&b=00&c=2015&ignore=.csv'
    stock_df=pd.read_csv(stock_url)
    path='/home/hadoop/mywork/stock/'+stock_data.url[i]+".txt"
    stock_df.to_csv(path)
#对不同行业的公司分层索引
import pandas as pd
from pandas import Series,DataFrame
stock_data=pd.read_csv('/home/hadoop/mywork/stock.txt',sep=' ')
stock_data.columns=Series(['code','name','class'])
key=range(len(stock_data))
stock_data['key']=1
s=stock_data.set_index(['class','key'])
s

获取各行业的公司代码,求出各行业的公司总数

import pandas as pd
from pandas import Series,DataFrame
stock_data=pd.read_csv('/home/hadoop/mywork/stock.txt',sep=' ',encoding='utf-8')
stock_data.columns=Series(['code','name','class'])
stock_total=DataFrame()
stock_total['name']=(['金融','军工','地产','环保','传媒','迪斯尼','酿酒','有色','煤炭','造纸','电力','汽车','农业'])
stock_total['sum']=0
stock1=stock_data[:44]
stock_total['sum'][0]=len(stock1)
stock2=stock_data[45:143]
stock2.index=range(len(stock2))
stock_total['sum'][1]=len(stock2)
stock3=stock_data[144:275]
stock3.index=range(len(stock3))
stock_total['sum'][2]=len(stock3)
stock4=stock_data[276:305]
stock4.index=range(len(stock4))
stock_total['sum'][3]=len(stock4)
stock5=stock_data[306:334]
stock5.index=range(len(stock5))
stock_total['sum'][4]=len(stock5)
stock6=stock_data[335:361]
stock6.index=range(len(stock6))
stock_total['sum'][5]=len(stock6)
stock7=stock_data[362:392]
stock7.index=range(len(stock7))
stock_total['sum'][6]=len(stock7)
stock8=stock_data[393:460]
stock8.index=range(len(stock8))
stock_total['sum'][7]=len(stock8)
stock9=stock_data[461:500]
stock9.index=range(len(stock9))
stock_total['sum'][8]=len(stock9)
stock10=stock_data[501:525]
stock10.index=range(len(stock10))
stock_total['sum'][9]=len(stock10)
stock11=stock_data[526:613]
stock11.index=range(len(stock11))
stock_total['sum'][10]=len(stock11)
stock12=stock_data[614:670]
stock12.index=range(len(stock12))
stock_total['sum'][11]=len(stock12)
stock13=stock_data[671:730]
stock13.index=range(len(stock13))
stock_total['sum'][12]=len(stock13)
stock_total.to_csv('/home/hadoop/total.txt')

获取农业各公司的历史股票数据

stock_data=stock13
stock_data['url']=''
for i in range(len(stock_data)):
    if(str(stock_data.code[i]).find('sh')<0):
        stock_data['url'][i]=str(stock_data.code[i])[2:8]+".sz"
    else:
        stock_data['url'][i]=str(stock_data.code[i])[2:8]+".ss"
for i in range(len(stock_data)):
    stock_url='http://table.finance.yahoo.com/table.csv?s='+stock_data.url[i]+'&d=11&e=05&f=2015&g=d&a=00&b=00&c=2015&ignore=.csv'
    stock_df=pd.read_csv(stock_url)
    path='/home/hadoop/mywork/stock/'+stock_data.url[i]+".txt"
    stock_df.to_csv(path)
#作出各行业上市公司的频数图
data=read.table(file("total.txt",encoding = 'utf-8'),header = TRUE,sep=',',stringsAsFactors = FALSE)
opar<-par(no.readonly=TRUE)
par(lwd=0.5,cex=1)
barplot(data$sum,names.arg = data$name,cex.names=0.7,col='red',horiz=TRUE,las=2)
title(main="各行业上市公司的数量",col.main="green",col.lab='green',cex.lab='0.9',cex.main='0.9')
par(opar)

R语言获取历史股票数据_第1张图片

作出各行业上市的比例图

toPercentage<-function(data)
    {
             s=sum(data$sum)
         print(s)
         peg=data
         for(i in 1:dim(data)[1])
         {     
                    peg[i,3]=round((peg[i,3]/s)*100,0)
         }
         return(peg)
    }
peg=toPercentage(data)
lbls=paste(peg$name," ",peg$sum,"%",sep=" ")
pie(data$sum,labels=lbls,col=rainbow(length(data)),main="上市公司所占比例",cex.main='0.9',col.main='green')

R语言获取历史股票数据_第2张图片

【R时间序列作图如何显示时间】

time <- seq.Date(as.Date("2008/1/1"), by = "month", length = 36)
val <- rnorm(36) 
dat <- data.frame(time = time, val = val)
plot(time, val, xaxt = "n")
axis.Date(1, at = time, format = "%Y-%m")
plot(data$reserve ~ as.Date(data$period),type="b")

R语言获取历史股票数据_第3张图片

作出该农业中某个公司2015年High/Low/Volume/Close/Open/Adj.close的变化趋势

data=read.table(file("ss1.txt",encoding = 'utf-8'),header = TRUE,sep=',',stringsAsFactors = FALSE)
data1=data[1:222,]
opar<-par(no.readonly=TRUE)
par(mfrow=c(3,1))
plot(data1$High ~ as.Date(data1$Date),type="b",ylab='High',xlab='2015年')
plot(data1$Low~ as.Date(data1$Date),type="b",col='red',ylab='Low',xlab='2015年')
plot(data1$Volume~ as.Date(data1$Date),type="b",col='red',ylab='Volume',xlab='2015年')
par(opar)
opar<-par(no.readonly=TRUE)
par(mfrow=c(3,1))
plot(data1$Close~ as.Date(data1$Date),type="b",col='red',ylab='Close',xlab='2015年')
plot(data1$Open~ as.Date(data1$Date),type="b",col='red',ylab='Open',xlab='2015年')
plot(data1$Adj.Close~ as.Date(data1$Date),type="b",col='red',ylab='Adj.Close',xlab='2015年')
par(opar)

作出开盘价与闭盘时的差价变化趋势

R语言获取历史股票数据_第4张图片

plot((data1$Close-data1$Open)~ as.Date(data1$Date),type="b",col='blue',ylab='闭盘与开盘的差价',xlab='2015年')

作出差价变化的频率图

R语言获取历史股票数据_第5张图片

write.table(d,'d.txt',row.names = FALSE)
d=read.table('part.txt',header = FALSE,stringsAsFactors = FALSE)
plot(d$V1,d$V2,ylim = c(0,5),xlab='变化的区间',ylab='变化的频数',col='blue')
library(Hmisc)
minor(nx=20,tick.ratio=0.5)

作出变化大小出现的频率的条形图

R语言获取历史股票数据_第6张图片

type<-function(data)
{
   n1=0
   n2=0
   n3=0
   for(i in c(1:dim(data)[1]))
  { 
  if((data$V1)[i]<0)
   {
      n1=n1+data$V2[i]
   }
   if((data$V1)[i]==0)
   {
      n2=n2+data$V2[i]
   }
   if((data$V1)[i]>0)
   {
       n3=n3+data$V2[i]
   }
  }
   return(c(n1,n2,n3))
}

分别作出正变化、负变化、不变化频数图:

R语言获取历史股票数据_第7张图片



你可能感兴趣的:(R语言实战,数据分析与统计)