shouldwork

This commit is contained in:
2026-02-12 00:33:05 +03:00
parent 12b9af4c01
commit d5b8c8fd12
4 changed files with 19 additions and 5 deletions

5
go.mod Normal file
View File

@@ -0,0 +1,5 @@
module scraper
go 1.25.5
require golang.org/x/net v0.50.0

2
go.sum Normal file
View File

@@ -0,0 +1,2 @@
golang.org/x/net v0.50.0 h1:ucWh9eiCGyDR3vtzso0WMQinm2Dnt8cFMuQa9K33J60=
golang.org/x/net v0.50.0/go.mod h1:UgoSli3F/pBgdJBHCTc+tp3gmrU4XswgGRgtnwWTfyM=

View File

@@ -20,13 +20,13 @@ var ErrNotAnElementNode = errors.New("not an ElementNode")
func parseFragment(s string) (*html.Node, error) { func parseFragment(s string) (*html.Node, error) {
n, err := html.ParseFragment(strings.NewReader(s), body) n, err := html.ParseFragment(strings.NewReader(s), body)
if err != nil { if err != nil {
return n, err return nil, err
} }
return n[0], nil return n[0], nil
} }
func MatchElemAttr(s string, n2 *html.Node) (bool, error) { func MatchElemAttr(s string, n2 *html.Node) (bool, error) {
n1, err := parseFragemnt(s) n1, err := parseFragment(s)
if err != nil { if err != nil {
return false, err return false, err
} }
@@ -67,7 +67,7 @@ func SearchElemAttr2(s string, n2 *html.Node) (chan *html.Node, error) {
}() }()
if r := recover(); r != nil { if r := recover(); r != nil {
return nil, r return nil, r.(error)
} }
return <-ch, nil return ch, nil
} }

View File

@@ -10,6 +10,13 @@ import (
const htmlStr = `<div><span id="test">Hello</span><span id="test">World</span></div>` const htmlStr = `<div><span id="test">Hello</span><span id="test">World</span></div>`
func Must[T any](v T, err error) T {
if err != nil {
panic(err)
}
return v
}
func TestSearchElem(t *testing.T) { func TestSearchElem(t *testing.T) {
doc := Must( doc := Must(
html.Parse(strings.NewReader(htmlStr)), html.Parse(strings.NewReader(htmlStr)),
@@ -60,7 +67,7 @@ func TestSearchElemAttr(t *testing.T) {
func TestCrawlTest(t *testing.T) { func TestCrawlTest(t *testing.T) {
fmt.Println( fmt.Println(
crawlText(Must( CrawlText(Must(
html.Parse(strings.NewReader(htmlStr)), html.Parse(strings.NewReader(htmlStr)),
)), )),
) )