利用rvest包爬取薄荷网食物热量数据

library(rvest)

myurl <- "http://www.boohee.com/food/group/"

food_cal <- data.frame()

for(i in 1:10){ #循环采集10种类型的食物热量

        food_class <- read_html(paste0(myurl,i,"?page=1")) %>%

                              html_nodes("div[class='widget-food-list pull-right'] h3") %>%

                              html_text()#获取食物类型数据

       food_class <- sub("\n","",food_class)#将多余符号去掉

       food_class <- rep(food_class, 100) #重复100遍

       food_name <- c()

       cal <- c()

       for(j in 1:10){ #每种类型循环采集10页

                myurlpaste <- paste0(myurl,i,"?page=",j)

                myweb <- read_html(myurlpaste)

                food_name1 <- myweb %>%

                          html_nodes(".food-list li div[class = 'text-box pull-left'] a") %>%

                          html_text() #食物名称

               cal1 <- myweb %>%

                          html_nodes(".food-list li div[class = 'text-box pull-left'] p") %>%

                          html_text() #食物热量

              food_name <- c(food_name, food_name1)

              cal <- c(cal, cal1)

           }

     food_cal1 <- data.frame(food_class, food_name, cal)

     food_cal <- rbind(food_cal, food_cal1)

}

write.csv(food_cal, file = "E:\\薄荷食物热量.csv")

你可能感兴趣的:(利用rvest包爬取薄荷网食物热量数据)