shouldwork
This commit is contained in:
5
go.mod
Normal file
5
go.mod
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
module scraper
|
||||||
|
|
||||||
|
go 1.25.5
|
||||||
|
|
||||||
|
require golang.org/x/net v0.50.0
|
||||||
2
go.sum
Normal file
2
go.sum
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
golang.org/x/net v0.50.0 h1:ucWh9eiCGyDR3vtzso0WMQinm2Dnt8cFMuQa9K33J60=
|
||||||
|
golang.org/x/net v0.50.0/go.mod h1:UgoSli3F/pBgdJBHCTc+tp3gmrU4XswgGRgtnwWTfyM=
|
||||||
8
match.go
8
match.go
@@ -20,13 +20,13 @@ var ErrNotAnElementNode = errors.New("not an ElementNode")
|
|||||||
func parseFragment(s string) (*html.Node, error) {
|
func parseFragment(s string) (*html.Node, error) {
|
||||||
n, err := html.ParseFragment(strings.NewReader(s), body)
|
n, err := html.ParseFragment(strings.NewReader(s), body)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return n, err
|
return nil, err
|
||||||
}
|
}
|
||||||
return n[0], nil
|
return n[0], nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func MatchElemAttr(s string, n2 *html.Node) (bool, error) {
|
func MatchElemAttr(s string, n2 *html.Node) (bool, error) {
|
||||||
n1, err := parseFragemnt(s)
|
n1, err := parseFragment(s)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, err
|
return false, err
|
||||||
}
|
}
|
||||||
@@ -67,7 +67,7 @@ func SearchElemAttr2(s string, n2 *html.Node) (chan *html.Node, error) {
|
|||||||
}()
|
}()
|
||||||
|
|
||||||
if r := recover(); r != nil {
|
if r := recover(); r != nil {
|
||||||
return nil, r
|
return nil, r.(error)
|
||||||
}
|
}
|
||||||
return <-ch, nil
|
return ch, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,6 +10,13 @@ import (
|
|||||||
|
|
||||||
const htmlStr = `<div><span id="test">Hello</span><span id="test">World</span></div>`
|
const htmlStr = `<div><span id="test">Hello</span><span id="test">World</span></div>`
|
||||||
|
|
||||||
|
func Must[T any](v T, err error) T {
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
|
||||||
func TestSearchElem(t *testing.T) {
|
func TestSearchElem(t *testing.T) {
|
||||||
doc := Must(
|
doc := Must(
|
||||||
html.Parse(strings.NewReader(htmlStr)),
|
html.Parse(strings.NewReader(htmlStr)),
|
||||||
@@ -60,7 +67,7 @@ func TestSearchElemAttr(t *testing.T) {
|
|||||||
|
|
||||||
func TestCrawlTest(t *testing.T) {
|
func TestCrawlTest(t *testing.T) {
|
||||||
fmt.Println(
|
fmt.Println(
|
||||||
crawlText(Must(
|
CrawlText(Must(
|
||||||
html.Parse(strings.NewReader(htmlStr)),
|
html.Parse(strings.NewReader(htmlStr)),
|
||||||
)),
|
)),
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user