regexp包里的正则表达式函数代码示例
package main
import (
"fmt"
"io/ioutil"
"net/http"
"os"
"regexp"
"strings"
)
/*func Match(pattern string, b []byte) (matched bool, error error)
func MatchReader(pattern string, r io.RuneReader) (matched bool, error error)
func MatchString(pattern string, s string) (matched bool, error error)*/
func IsIp(ip string) bool {
if m, _ := regexp.MatchString("^[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}.[0-9]{1,3}$", ip); !m {
return false
}
return true
}
func IsDigital(str string) string {
if m, _ := regexp.MatchString("^[0-9]+$", str); !m {
return "不是数字"
}
return "是数字"
}
func main1() {
fmt.Println(IsIp("192.168.172.88"))
fmt.Println(IsIp("192,167.12.11"))
fmt.Println(IsDigital("192,167.12.11"))
fmt.Println(IsDigital("192"))
}
/*以爬虫为例来说明如何使用正则来过滤或截取抓取到的数据*/
func main2() {
resp, err := http.Get("http://www.baidu.com")
if err != nil {
fmt.Println("http ger error")
return
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
fmt.Println("http read err")
return
}
src := string(body)
/*O_RDONLY int = syscall.O_RDONLY // open the file read-only.
O_WRONLY int = syscall.O_WRONLY // open the file write-only.
O_RDWR int = syscall.O_RDWR // open the file read-write.
O_APPEND int = syscall.O_APPEND // append data to the file when writing.
O_CREATE int = syscall.O_CREAT // create a new file if none exists.
O_EXCL int = syscall.O_EXCL // used with O_CREATE, file must not exist
O_SYNC int = syscall.O_SYNC // open for synchronous I/O.
O_TRUNC int = syscall.O_TRUNC // if possible, truncate file when opened.*/
file, _ := os.OpenFile("baidu.html", os.O_RDWR|os.O_CREATE, 0666)
defer file.Close()
file.WriteString(src)
//使用复杂的正则首先是Compile,它会解析正则表达式是否合法,如果正确,那么就会返回一个Regexp,
//然后就可以利用返回的Regexp在任意的字符串上面执行需要的操作
//将HTML标签全转换成小写
re, _ := regexp.Compile("\\<[\\S\\s]+?\\>")
src = re.ReplaceAllStringFunc(src, strings.ToLower)
//去除STYLE
re, _ = regexp.Compile("\\