42 lines
645 B
Go
42 lines
645 B
Go
package scraper
|
|
|
|
import (
|
|
"fmt"
|
|
"regexp"
|
|
"strings"
|
|
|
|
"golang.org/x/net/html"
|
|
"golang.org/x/net/html/atom"
|
|
)
|
|
|
|
func containsWord(s, substr string) (match bool) {
|
|
var err error
|
|
for _, field := range strings.Fields(s) {
|
|
match, err = regexp.MatchString(
|
|
fmt.Sprintf("^%s$", substr),
|
|
field,
|
|
)
|
|
if err != nil {
|
|
println(err)
|
|
}
|
|
if match {
|
|
return
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
var body = &html.Node{
|
|
Type: html.ElementNode,
|
|
Data: "body",
|
|
DataAtom: atom.Body,
|
|
}
|
|
|
|
func ParseFragment(s string) (*html.Node, error) {
|
|
n, err := html.ParseFragment(strings.NewReader(s), body)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return n[0], nil
|
|
}
|