74 lines
1.2 KiB
Go
74 lines
1.2 KiB
Go
package scraper
|
|
|
|
import (
|
|
"errors"
|
|
"slices"
|
|
"strings"
|
|
|
|
"golang.org/x/net/html"
|
|
"golang.org/x/net/html/atom"
|
|
)
|
|
|
|
var body = &html.Node{
|
|
Type: html.ElementNode,
|
|
Data: "body",
|
|
DataAtom: atom.Body,
|
|
}
|
|
|
|
var ErrNotAnElementNode = errors.New("not an ElementNode")
|
|
|
|
func parseFragment(s string) (*html.Node, error) {
|
|
n, err := html.ParseFragment(strings.NewReader(s), body)
|
|
if err != nil {
|
|
return n, err
|
|
}
|
|
return n[0], nil
|
|
}
|
|
|
|
func MatchElemAttr(s string, n2 *html.Node) (bool, error) {
|
|
n1, err := parseFragemnt(s)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
if !(n1.Type == html.ElementNode &&
|
|
n2.Type == html.ElementNode) {
|
|
return false, ErrNotAnElementNode
|
|
}
|
|
|
|
if n1.Data == n2.Data &&
|
|
slices.Equal(n1.Attr, n2.Attr) {
|
|
return true, nil
|
|
}
|
|
return false, nil
|
|
}
|
|
|
|
|
|
func SearchElemAttr2(s string, n2 *html.Node) (chan *html.Node, error) {
|
|
ch := make(chan *html.Node)
|
|
var crawl func(*html.Node)
|
|
crawl = func(n *html.Node) {
|
|
matches, err := MatchElemAttr(s, n)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
if matches {
|
|
ch <- n
|
|
}
|
|
c := n.FirstChild
|
|
for c != nil {
|
|
crawl(c)
|
|
c = c.NextSibling
|
|
}
|
|
}
|
|
go func() {
|
|
defer close(ch)
|
|
crawl(n2)
|
|
}()
|
|
|
|
if r := recover(); r != nil {
|
|
return nil, r
|
|
}
|
|
return <-ch, nil
|
|
}
|