Files
scraper/match.go
2026-02-11 23:49:58 +03:00

74 lines
1.2 KiB
Go

package scraper
import (
"errors"
"slices"
"strings"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
var body = &html.Node{
Type: html.ElementNode,
Data: "body",
DataAtom: atom.Body,
}
var ErrNotAnElementNode = errors.New("not an ElementNode")
func parseFragment(s string) (*html.Node, error) {
n, err := html.ParseFragment(strings.NewReader(s), body)
if err != nil {
return n, err
}
return n[0], nil
}
func MatchElemAttr(s string, n2 *html.Node) (bool, error) {
n1, err := parseFragemnt(s)
if err != nil {
return false, err
}
if !(n1.Type == html.ElementNode &&
n2.Type == html.ElementNode) {
return false, ErrNotAnElementNode
}
if n1.Data == n2.Data &&
slices.Equal(n1.Attr, n2.Attr) {
return true, nil
}
return false, nil
}
func SearchElemAttr2(s string, n2 *html.Node) (chan *html.Node, error) {
ch := make(chan *html.Node)
var crawl func(*html.Node)
crawl = func(n *html.Node) {
matches, err := MatchElemAttr(s, n)
if err != nil {
panic(err)
}
if matches {
ch <- n
}
c := n.FirstChild
for c != nil {
crawl(c)
c = c.NextSibling
}
}
go func() {
defer close(ch)
crawl(n2)
}()
if r := recover(); r != nil {
return nil, r
}
return <-ch, nil
}