Files
scraper/match.go
2026-03-02 22:26:28 +03:00

82 lines
1.4 KiB
Go

//go:build ignore
package scraper
import (
"errors"
"slices"
"strings"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
var body = &html.Node{
Type: html.ElementNode,
Data: "body",
DataAtom: atom.Body,
}
var ErrNotAnElementNode = errors.New("not a html.ElementNode")
func ParseFragment(s string) (*html.Node, error) {
n, err := html.ParseFragment(strings.NewReader(s), body)
if err != nil {
return nil, err
}
return n[0], nil
}
func MatchElemAttr(s string, n *html.Node) (bool, error) {
n1, err := parseFragment(s)
if err != nil {
return false, err
}
if n1.Type != html.ElementNode {
return false, errors.Join(
ErrNotAnElementNode,
errors.New("s isn's a html.ElementNode"),
)
}
if n.Type != html.ElementNode {
return false, errors.Join(
ErrNotAnElementNode,
errors.New("n isn's a html.ElementNode"),
)
}
if n1.Data == n.Data &&
slices.Equal(n1.Attr, n.Attr) {
return true, nil
}
return false, nil
}
func SearchElemAttr2(s string, n2 *html.Node) (chan *html.Node, error) {
ch := make(chan *html.Node)
var crawl func(*html.Node)
crawl = func(n *html.Node) {
matches, err := MatchElemAttr(s, n)
if err != nil {
panic(err)
}
if matches {
ch <- n
}
c := n.FirstChild
for c != nil {
crawl(c)
c = c.NextSibling
}
}
go func() {
defer close(ch)
crawl(n2)
}()
if r := recover(); r != nil {
return nil, r.(error)
}
return ch, nil
}