R爬虫:链家租房数据爬取

单一网页尝试
lianjia_url <- "https://bj.lianjia.com/zufang/pg1/"
lianjia_web<-read_html(lianjia_url,encoding = "UTF-8")
where <- html_nodes(lianjia_web,".where")%>%html_text()
other <- html_nodes(lianjia_web,".other")%>%html_text()
chanquan <- html_nodes(lianjia_web,".chanquan")%>%html_text()
price <- html_nodes(lianjia_web,".price")%>%html_text()
data1 <- data.frame(where,other,price,chanquan)

循环爬取所有数据
lianjia_data <- data.frame(where=0,other=0,price=0,chanquan=0,quyu=0)              
lianjia_data = lianjia_data[-1,]
area_list <- c("dongcheng","xicheng","chaoyang","haidian","fengtai","shijingshan","tongzhou","changping","daxing","yizhuangkaifaqu","shunyi","fangshan","mentougou","yanjiao")
page_list <- c(23,37,100,57,53,13,38,28,29,12,22,15,19,100)             
for (i in 1:14){
	for (m in 1:page_list[i]){
lianjia_url <- paste0("https://bj.lianjia.com/zufang/",area_list[i],"/pg",m,"/")
lianjia_web<-read_html(lianjia_url,encoding = "UTF-8")
where <- html_nodes(lianjia_web,".where")%>%html_text()
other <- html_nodes(lianjia_web,".other")%>%html_text()
chanquan <- html_nodes(lianjia_web,".chanquan")%>%html_text()
price <- html_nodes(lianjia_web,".price")%>%html_text()
data1 <- data.frame(where,other,price,chanquan,quyu=area_list[i])
lianjia_data <- rbind(lianjia_data,data1) 
print(c(area_list[i],m))
}
}

爬取结果下载:https://pan.baidu.com/s/1qZz9WhYALJYJOQH7_TpfyA

你可能感兴趣的:(数据分析)