82 lines
1.4 KiB
Go
82 lines
1.4 KiB
Go
//go:build ignore
|
|
package scraper
|
|
|
|
import (
|
|
"errors"
|
|
"slices"
|
|
"strings"
|
|
|
|
"golang.org/x/net/html"
|
|
"golang.org/x/net/html/atom"
|
|
)
|
|
|
|
var body = &html.Node{
|
|
Type: html.ElementNode,
|
|
Data: "body",
|
|
DataAtom: atom.Body,
|
|
}
|
|
|
|
var ErrNotAnElementNode = errors.New("not a html.ElementNode")
|
|
|
|
func ParseFragment(s string) (*html.Node, error) {
|
|
n, err := html.ParseFragment(strings.NewReader(s), body)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return n[0], nil
|
|
}
|
|
|
|
func MatchElemAttr(s string, n *html.Node) (bool, error) {
|
|
n1, err := parseFragment(s)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
if n1.Type != html.ElementNode {
|
|
return false, errors.Join(
|
|
ErrNotAnElementNode,
|
|
errors.New("s isn's a html.ElementNode"),
|
|
)
|
|
}
|
|
if n.Type != html.ElementNode {
|
|
return false, errors.Join(
|
|
ErrNotAnElementNode,
|
|
errors.New("n isn's a html.ElementNode"),
|
|
)
|
|
}
|
|
|
|
if n1.Data == n.Data &&
|
|
slices.Equal(n1.Attr, n.Attr) {
|
|
return true, nil
|
|
}
|
|
return false, nil
|
|
}
|
|
|
|
func SearchElemAttr2(s string, n2 *html.Node) (chan *html.Node, error) {
|
|
ch := make(chan *html.Node)
|
|
var crawl func(*html.Node)
|
|
crawl = func(n *html.Node) {
|
|
matches, err := MatchElemAttr(s, n)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
if matches {
|
|
ch <- n
|
|
}
|
|
c := n.FirstChild
|
|
for c != nil {
|
|
crawl(c)
|
|
c = c.NextSibling
|
|
}
|
|
}
|
|
go func() {
|
|
defer close(ch)
|
|
crawl(n2)
|
|
}()
|
|
|
|
if r := recover(); r != nil {
|
|
return nil, r.(error)
|
|
}
|
|
return ch, nil
|
|
}
|