instagram获取图片地址和视频地址

instagram 获取图片地址和视频地址

先保持 以后在详细说明

package main

import (
	"bufio"
	"encoding/json"
	"fmt"
	"github.com/unknwon/goconfig"
	"io/ioutil"
	"log"
	"net/http"
	"net/url"
	"os"
	"os/exec"
	"path/filepath"

	"strings"
	"time"
)

var Queryhash string
var Cookie string
var Proxy string

type IGUserInfo struct {
	Graphql struct {
		User struct {
			ID       string `json:"id"`
			Username string `json:"username"`
		} `json:"user"`
	} `json:"graphql"`
}

type IGData struct {
	Data struct {
		User struct {
			EdgeOwnerToTimelineMedia struct {
				Edges []struct {
					Node struct {
						DisplayURL            string `json:"display_url"`
						EdgeSidecarToChildren struct {
							Edges []struct {
								Node struct {
									DisplayURL string `json:"display_url"`
									IsVideo    bool   `json:"is_video"`
									VideoURL   string `json:"video_url"`
								} `json:"node"`
							} `json:"edges"`
						} `json:"edge_sidecar_to_children"`
						IsVideo  bool   `json:"is_video"`
						VideoURL string `json:"video_url"`
					} `json:"node"`
				} `json:"edges"`
				PageInfo struct {
					EndCursor   string `json:"end_cursor"`
					HasNextPage bool   `json:"has_next_page"`
				} `json:"page_info"`
			} `json:"edge_owner_to_timeline_media"`
		} `json:"user"`
	} `json:"data"`
}

// 获取网页源代码
func GetHtml(Insurl string) (html string) {
	// 解析代理地址
	proxy, err := url.Parse(Proxy) //加载本地代理
	//设置网络传输
	netTransport := &http.Transport{
		Proxy:                 http.ProxyURL(proxy),
		MaxIdleConnsPerHost:   10,
		ResponseHeaderTimeout: time.Second * time.Duration(5),
	}
	httpClient := &http.Client{
		Timeout:   time.Second * 10,
		Transport: netTransport,
	}
	request, err := http.NewRequest("GET", Insurl, nil)
	if err != nil {
		log.Println(err)
	}
	request.Header.Add("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36") //模拟浏览器User-Agent
	request.Header.Set("Cookie", Cookie)

	res, err := httpClient.Do(request)
	if err != nil {
		log.Println(err)
		return
	}
	defer res.Body.Close()
	//判断是否成功访问,如果成功访问StatusCode应该为200
	if res.StatusCode != http.StatusOK {
		log.Println(err)
		return
	}
	content, _ := ioutil.ReadAll(res.Body)
	return string(content)
}

// 通过?__a=1参数获得页面主题数据信息,并提取用户信息
func GetUserInfo(homepage string) (string, string) {
	html := GetHtml(homepage + "?__a=1")
	var info IGUserInfo
	json.Unmarshal([]byte(html), &info)
	id := info.Graphql.User.ID
	username := info.Graphql.User.Username
	return id, username
}

// 拼接json地址
func SetQueryUrl(query_hash, id, first, after string) string {
	url := fmt.Sprintf("https://www.instagram.com/graphql/query/?query_hash=%s&variables={\"id\":\"%s\",\"first\":%s,\"after\":\"%s\"}", query_hash, id, first, after)
	return url
}

//通过json得到After
func GetAfter(ins IGData) string {
	return ins.Data.User.EdgeOwnerToTimelineMedia.PageInfo.EndCursor
}

// 判断网站是否加载到底
func IsEnd(ins IGData) bool {
	return ins.Data.User.EdgeOwnerToTimelineMedia.PageInfo.HasNextPage
}

// 通过json得到图片和视频下载地址
func GetDownloadUrl(savefile string, ins IGData) {
	for _, v := range ins.Data.User.EdgeOwnerToTimelineMedia.Edges {
		var content string
		if v.Node.IsVideo != true {
			fmt.Println(v.Node.DisplayURL)
			content = v.Node.DisplayURL + "\n"
		} else {
			fmt.Println(v.Node.VideoURL)
			content += v.Node.VideoURL + "\n"
		}
		for _, v1 := range v.Node.EdgeSidecarToChildren.Edges {
			if v1.Node.IsVideo != true {
				fmt.Println(v1.Node.DisplayURL)
				content = v1.Node.DisplayURL + "\n"
			} else {
				fmt.Println(v1.Node.VideoURL)
				content += v1.Node.VideoURL + "\n"
			}
		}
		WirteText(savefile, content)
	}
}

// 写入txt文件
func WirteText(savefile string, txt string) {
	f, err := os.OpenFile(savefile, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0777)
	if err != nil {
		fmt.Println("os Create error: ", err)
		return
	}
	defer f.Close()
	bw := bufio.NewWriter(f)
	bw.WriteString(txt)
	bw.Flush()
}

// 获取程序当前运行路径
func GetRunPath() string {
	file, _ := exec.LookPath(os.Args[0])
	path, _ := filepath.Abs(file)
	index := strings.LastIndex(path, string(os.PathSeparator))
	runpath := path[:index]
	return runpath
}

func init() {
	runpath := GetRunPath()
	cfg, err := goconfig.LoadConfigFile(runpath + "/conf.ini")
	//cfg, err := goconfig.LoadConfigFile("conf.ini")
	if err != nil {
		panic("没有加载到配置文件")
	}
	Queryhash, err = cfg.GetValue("Instagram", "queryhash")
	if err != nil {
		panic("queryhash错误")
	}

	Proxy, err = cfg.GetValue("Instagram", "proxy")
	if err != nil {
		panic("proxy错误")
	}
	Cookie, err = cfg.GetValue("Instagram", "cookie")
	if err != nil {
		panic("cookie错误")
	}

}

func main() {
	var homepage string
	fmt.Println("输入主页地址")
	fmt.Scanln(&homepage)
	//homepage = "https://www.instagram.com/gonglifanpage/"
	first := "12"
	//通过html获取第一条json地址
	id, username := GetUserInfo(homepage)
	//第一条数据不需要after参数
	firstpage := SetQueryUrl(Queryhash, id, first, "") //  第一条json地址

	fmt.Println(firstpage)

	//通过json获取内容
	jsondata := GetHtml(firstpage)
	fmt.Println(jsondata)
	var data IGData
	json.Unmarshal([]byte(jsondata), &data)
	GetDownloadUrl(username+".txt", data)
	for {

		var data IGData
		json.Unmarshal([]byte(jsondata), &data)

		next_after := GetAfter(data) //通过json获得after的值
		nextpage := SetQueryUrl(Queryhash, id, first, next_after)
		GetDownloadUrl(username+".txt", data)
		jsondata = GetHtml(nextpage)
		if !IsEnd(data) { //如果页面加载到底,结束循环
			break
		}
	}
	fmt.Println("获取地址结束")

}

``

你可能感兴趣的:(go,go)