Go语言使用Playwright自动化测试、录屏、执行js

Go版本的Playwright支持Chromium、Firefox和WebKit的Web自动化测试,兼容Windows、Linux和MacOS,默认支持headless无头模式,安装方便、绿色高效、兼容性强、运行速度快。
支持的主要功能有:
跨多个页面、域名和iframe的场景;
在执行操作(如单击、填充)之前自动等待元素就绪;
拦截网络活动以拦截和模拟网络请求;
模拟移动设备、地理位置和权限;
通过阴影穿透选择器支持web组件;
鼠标和键盘的原生事件;
上传和下载文件;

安装

使用go get安装playwright-go。

go get github.com/playwright-community/playwright-go

另外还需要安装驱动和浏览器依赖:

go run github.com/playwright-community/playwright-go/cmd/playwright install --with-deps
# Or
go install github.com/playwright-community/playwright-go/cmd/playwright
playwright install --with-deps

Go语言使用Playwright自动化测试、录屏、执行js_第1张图片

自动化测试

package main

import (
	"fmt"
	"log"

	"github.com/playwright-community/playwright-go"
)

func main() {
	pw, err := playwright.Run()
	if err != nil {
		log.Fatalf("could not start playwright: %v", err)
	}
	browser, err := pw.Chromium.Launch()
	if err != nil {
		log.Fatalf("could not launch browser: %v", err)
	}
	page, err := browser.NewPage()
	if err != nil {
		log.Fatalf("could not create page: %v", err)
	}
	if _, err = page.Goto("http://www.dzwww.com/xinwen/"); err != nil {
		log.Fatalf("could not goto: %v", err)
	}
	entries, err := page.QuerySelectorAll("ul.list")
	if err != nil {
		log.Fatalf("could not get entries: %v", err)
	}
	for i, entry := range entries {
		titleElement, err := entry.QuerySelector("h3 > a")
		if err != nil {
			log.Fatalf("could not get title element: %v", err)
		}
		title, err := titleElement.TextContent()
		if err != nil {
			log.Fatalf("could not get text content: %v", err)
		}
		fmt.Printf("%d: %s\n", i+1, title)
	}
	if err = browser.Close(); err != nil {
		log.Fatalf("could not close browser: %v", err)
	}
	if err = pw.Stop(); err != nil {
		log.Fatalf("could not stop Playwright: %v", err)
	}
}

playwright.Run()创建playwright对象。
pw.Chromium.Launch()创建Chromium浏览器对象。
browser.NewPage()创业一个新页面。
page.Goto打开一个url页面。
page.QuerySelectorAll使用css选择器查找所有元素。
entry.QuerySelector查找第一个元素。
titleElement.TextContent()获取inner_text文本内容。
browser.Close()关闭浏览器。
pw.Stop()关闭playwright对象。

录屏视频

package main

import (
	"fmt"
	"log"

	"github.com/playwright-community/playwright-go"
)

func main() {
	pw, err := playwright.Run()
	if err != nil {
		log.Fatalf("could not launch playwright: %v", err)
	}
	browser, err := pw.Chromium.Launch()
	if err != nil {
		log.Fatalf("could not launch Chromium: %v", err)
	}
	page, err := browser.NewPage(playwright.BrowserNewContextOptions{
		RecordVideo: &playwright.BrowserNewContextOptionsRecordVideo{
			Dir: playwright.String("videos/"),
		},
	})
	if err != nil {
		log.Fatalf("could not create page: %v", err)
	}
	gotoPage := func(url string) {
		fmt.Printf("Visiting %s\n", url)
		if _, err = page.Goto(url); err != nil {
			log.Fatalf("could not goto: %v", err)
		}
		fmt.Printf("Visited %s\n", url)
	}
	gotoPage("http://whatsmyuseragent.org")
	gotoPage("https://github.com")
	gotoPage("https://microsoft.com")
	if err := page.Close(); err != nil {
		log.Fatalf("failed to close page: %v", err)
	}
	path, err := page.Video().Path()
	if err != nil {
		log.Fatalf("failed to get video path: %v", err)
	}
	fmt.Printf("Saved to %s\n", path)
	if err = browser.Close(); err != nil {
		log.Fatalf("could not close browser: %v", err)
	}
	if err = pw.Stop(); err != nil {
		log.Fatalf("could not stop Playwright: %v", err)
	}
}

这里用到了playwright的浏览器上下文配置:playwright.BrowserNewContextOptions

执行js

package main

import (
	"fmt"
	"log"

	"github.com/playwright-community/playwright-go"
)

func main() {
	pw, err := playwright.Run()
	if err != nil {
		log.Fatal(err)
	}
	browser, err := pw.Chromium.Launch()
	if err != nil {
		log.Fatalf("could not launch browser: %v\n", err)
	}
	page, err := browser.NewPage()
	if err != nil {
		log.Fatalf("could not create page: %v\n", err)
	}
	if _, err = page.Goto("https://en.wikipedia.org/wiki/JavaScript"); err != nil {
		log.Fatalf("could not goto: %v\n", err)
	}
	// mw.config.values is the JS object where Wikipedia stores wiki metadata
	handle, err := page.EvaluateHandle("mw.config.values", struct{}{})
	if err != nil {
		log.Fatalf("could not acquire JSHandle: %v\n", err)
	}
	// mw.config.values.wgPageName is the name of the current page
	pageName, err := handle.(playwright.JSHandle).GetProperty("wgPageName")
	if err != nil {
		log.Fatalf("could not get Wikipedia page name: %v\n", err)
	}

	fmt.Printf("Lots of type casting, brought to you by %s\n", pageName)

	if err := browser.Close(); err != nil {
		log.Fatalf("could not close browser: %v\n", err)
	}
	if err := pw.Stop(); err != nil {
		log.Fatalf("could not stop Playwright: %v\n", err)
	}

}

page.EvaluateHandle可以传递js代码字符串来执行js。

并行抓取

package main

import (
	"archive/zip"
	"bytes"
	"context"
	"encoding/csv"
	"fmt"
	"io"
	"io/ioutil"
	"log"
	"math"
	"net/http"
	"os"
	"path/filepath"
	"strings"
	"time"

	"github.com/playwright-community/playwright-go"
)

func assertErrorToNilf(message string, err error) {
	if err != nil {
		log.Fatalf(message, err)
	}
}

func worker(id int, jobs chan Job, results chan<- Job, browser playwright.Browser) {
	for job := range jobs {
		fmt.Printf("starting (try: %d): %s\n", job.Try, job.URL)
		if job.Try >= 3 {
			job.Success = false
			job.err = fmt.Errorf("Stopped with domain %s (%w)", job.URL, job.err)
			results <- job
			continue
		}
		jobCtx, cancel := context.WithTimeout(context.Background(), time.Second*12)
		internalJobError := make(chan error, 1)
		go func() {
			internalJobError <- processJob(browser, job, jobCtx)
			cancel()
		}()
		select {
		case <-jobCtx.Done():
			job.err = fmt.Errorf("timeout (try: %d)", job.Try+1)
			job.Success = false
			job.Try++
			jobs <- job
		case err := <-internalJobError:
			if err != nil {
				job.err = err
				job.Success = false
				job.Try++
				jobs <- job
				cancel()
			} else {
				job.Success = true
				job.err = nil
				results <- job
			}
		}
	}
}

func processJob(browser playwright.Browser, job Job, ctx context.Context) error {
	context, err := browser.NewContext(playwright.BrowserNewContextOptions{
		UserAgent: playwright.String("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"),
	})
	if err != nil {
		return fmt.Errorf("could not create context: %w", err)
	}
	defer context.Close()
	go func() {
		<-ctx.Done()
		context.Close()
	}()

	page, err := context.NewPage()
	if err != nil {
		return fmt.Errorf("could not create page: %w", err)
	}

	_, err = page.Goto("http://"+job.URL, playwright.PageGotoOptions{
		WaitUntil: playwright.WaitUntilStateNetworkidle,
	})
	if err != nil {
		return fmt.Errorf("could not goto: %s: %v", job.URL, err)
	}
	cwd, err := os.Getwd()
	if err != nil {
		return fmt.Errorf("could not get cwd %w", err)
	}
	_, err = page.Screenshot(playwright.PageScreenshotOptions{
		Path: playwright.String(filepath.Join(cwd, "out", strings.Replace(job.URL, ".", "-", -1)+".png")),
	})
	if err != nil {
		return fmt.Errorf("could not screenshot: %w", err)
	}
	return nil
}

type Job struct {
	URL     string
	Try     int
	err     error
	Success bool
}

func main() {
	log.Println("Downloading Alexa top domains")
	topDomains, err := getAlexaTopDomains()
	assertErrorToNilf("could not get alexa top domains: %w", err)
	log.Println("Downloaded Alexa top domains successfully")
	cwd, err := os.Getwd()
	if err != nil {
		assertErrorToNilf("could not get cwd %w", err)
	}
	if err := os.Mkdir(filepath.Join(cwd, "out"), 0777); err != nil && !os.IsExist(err) {
		assertErrorToNilf("could not create output directory %w", err)
	}

	pw, err := playwright.Run()
	assertErrorToNilf("could not launch playwright: %w", err)
	browser, err := pw.Chromium.Launch(playwright.BrowserTypeLaunchOptions{
		Headless: playwright.Bool(false),
	})
	assertErrorToNilf("could not launch Chromium: %w", err)

	numberOfJobs := int(math.Min(30, float64(len(topDomains))))

	jobs := make(chan Job, numberOfJobs)
	results := make(chan Job, numberOfJobs)

	for w := 1; w <= 3; w++ {
		go worker(w, jobs, results, browser)
	}

	for _, url := range topDomains[:numberOfJobs] {
		jobs <- Job{
			URL: url,
		}
	}

	for a := 0; a < numberOfJobs; a++ {
		job := <-results
		if job.Success {
			fmt.Println("success:", job.URL)
		} else {
			fmt.Println("error:", job.URL, job.err)
		}
	}

	close(jobs)
	close(results)

	assertErrorToNilf("could not close browser: %w", browser.Close())
	assertErrorToNilf("could not stop Playwright: %w", pw.Stop())
}

func getAlexaTopDomains() ([]string, error) {
	resp, err := http.Get("http://s3.amazonaws.com/alexa-static/top-1m.csv.zip")
	if err != nil {
		return nil, fmt.Errorf("could not get: %w", err)
	}
	body, err := ioutil.ReadAll(resp.Body)
	if err != nil {
		return nil, fmt.Errorf("could not read body: %w", err)
	}
	defer resp.Body.Close()
	zipReader, err := zip.NewReader(bytes.NewReader(body), int64(len(body)))
	if err != nil {
		return nil, fmt.Errorf("could not create zip reader: %w", err)
	}
	alexaFile, err := zipReader.File[0].Open()
	if err != nil {
		return nil, fmt.Errorf("could not read alexa file: %w", err)
	}
	defer alexaFile.Close()
	reader := csv.NewReader(alexaFile)
	out := make([]string, 0)
	for {
		record, err := reader.Read()
		if err == io.EOF {
			return out, nil
		}
		if err != nil {
			return nil, fmt.Errorf("could not read csv: %w", err)
		}
		out = append(out, record[1])
	}
}

参考

https://playwright-community.github.io/playwright-go/

你可能感兴趣的:(Go,golang,开发语言,后端)