url: https://www.toutiao.com/api/pc/feed/
拼接参数:
-
category
:见下category表格 -
utm_source
: -
widen
: -
max_behot_time
: -
max_behot_time_tmp
: -
tadrequire
: -
as
: -
cp
:
category 列表:
标签 | category值 |
---|---|
推荐 | __all__ |
热点 | news_hot |
科技 | news_tech |
社会 | news_society |
娱乐 | news_entertainment |
游戏 | news_game |
体育 | news_sports |
汽车 | news_car |
财经 | news_finance |
搞笑 | funny |
段子 | essay_joke |
军事 | news_military |
国际 | news_world |
时尚 | news_fashion |
旅游 | news_travel |
探索 | news_discovery |
育儿 | news_baby |
养生 | news_regimen |
美文 | news_essay |
历史 | news_history |
美食 | news_food |
... | ... |
json接口示例:https://www.toutiao.com/api/pc/feed/?category=news_hot
代码:
using HtmlAgilityPack;
using Newtonsoft.Json.Linq;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
using ToutiaoNews.Data;
using ToutiaoNews.Helper;
namespace ToutiaoNews
{
class Program
{
static void Main(string[] args)
{
Console.WriteLine("start...");
GetToutiaoNews();
Console.WriteLine("end...");
}
static string domain = "https://www.toutiao.com/";
static string domain_para = "api/pc/feed/?category=";
static string connection = "Server=.;Database=toutiao;User ID=sa;Password=123456";
static string image_url, title, source, labels, category, content, time;
public static void GetToutiaoNews()
{
List list = new List
{
"__all__",
"news_hot",
"news_tech",
"news_society",
"news_entertainment",
"news_game",
"news_sports",
"news_car",
"news_finance",
"funny",
"essay_joke&as=A115993CAA5F9BC",
"news_military",
"news_world",
"news_fashion",
"news_travel",
"news_discovery",
"news_baby",
"news_regimen",
"news_essay",
"news_history",
"news_food"
};
foreach (var _category in list)
{
GetToutiaoNewsCategory(_category);
string api = domain + domain_para + _category;
Console.WriteLine(category + ":" + api);
string json = HttpHelper.HttpGet(api);
JObject obj = JObject.Parse(json);
if (obj["message"].ToString() == "success")
{
GetToutiaoNews(obj);
}
}
}
private static void GetToutiaoNewsCategory(string _category)
{
switch (_category)
{
case "__all__":
category = "推荐";
break;
case "news_hot":
category = "热点";
break;
case "news_tech":
category = "科技";
break;
case "news_society":
category = "社会";
break;
case "news_entertainment":
category = "娱乐";
break;
case "news_game":
category = "游戏";
break;
case "news_sports":
category = "体育";
break;
case "news_car":
category = "汽车";
break;
case "news_finance":
category = "财经";
break;
case "funny":
category = "搞笑";
break;
case "essay_joke&as=A115993CAA5F9BC":
category = "段子";
break;
case "news_military":
category = "军事";
break;
case "news_world":
category = "国际";
break;
case "news_fashion":
category = "时尚";
break;
case "news_travel":
category = "旅游";
break;
case "news_discovery":
category = "探索";
break;
case "news_baby":
category = "育儿";
break;
case "news_regimen":
category = "养生";
break;
case "news_essay":
category = "美文";
break;
case "news_history":
category = "历史";
break;
case "news_food":
category = "美食";
break;
}
}
private static void GetToutiaoNews(JObject obj)
{
foreach (var data in obj["data"])
{
string article_genre = data["article_genre"].ToString();
if (article_genre == "article")
{
try
{
image_url = data["image_url"].ToString();
title = data["title"].ToString();
GetToutiaoNewsLabel(data);
source = data["source"].ToString();
string source_url = data["source_url"].ToString();
GetToutiaoNewsContent(source_url);
}
catch
{
}
}
}
}
private static void GetToutiaoNewsContent(string source_url)
{
HtmlWeb web = new HtmlWeb();
HtmlDocument doc = web.Load(domain + source_url);
List nodeList = doc.DocumentNode.SelectNodes("//*/script").AsParallel().ToList();
foreach (var list in nodeList)
{
if (list.InnerHtml.Contains("BASE_DATA"))
{
string result = list.InnerHtml;
content = Regex.Match(result, ".*content:.'([^'']+)'.*", RegexOptions.IgnoreCase).Value.Replace("content:", "").Replace("'", "").Replace(@".replace(/
|\n|\r/ig, ),", "").Trim();
time = Regex.Match(result, ".*time:.'([^'']+)'.*", RegexOptions.IgnoreCase).Value.Replace("time:", "").Replace("'", "").Trim();
InsertToutiaoNews();
labels = "";
}
}
}
private static void InsertToutiaoNews()
{
using (NewsDbContext db = new NewsDbContext(connection))
{
bool exist = db.News.Any(a => a.Title == title && a.Source == source);
if (!exist)
{
db.News.Add(new Entity.News()
{
Title = title,
Source = source,
Logo = image_url,
Labels = labels,
Category = category,
Pubdate = Convert.ToDateTime(time),
Detail = content
});
db.SaveChanges();
Console.WriteLine(title + " ---- ok");
}
}
}
private static void GetToutiaoNewsLabel(JToken data)
{
foreach (string label in data["label"])
{
labels += label + ",";
}
labels = labels.Substring(0, labels.Length - 1);
}
}
}