Compare commits
2 Commits
3bbd6c72ca
...
78985687a1
| Author | SHA1 | Date | |
|---|---|---|---|
| 78985687a1 | |||
| 5cb8665a08 |
5
go.mod
5
go.mod
@@ -1,5 +0,0 @@
|
||||
module git.nkpl.cc/XoxJlopeZi4BB/scraper.git
|
||||
|
||||
go 1.25.5
|
||||
|
||||
require golang.org/x/net v0.48.0
|
||||
2
go.sum
2
go.sum
@@ -1,2 +0,0 @@
|
||||
golang.org/x/net v0.48.0 h1:zyQRTTrjc33Lhh0fBgT/H3oZq9WuvRR5gPC70xpDiQU=
|
||||
golang.org/x/net v0.48.0/go.mod h1:+ndRgGjkh8FGtu1w1FGbEC31if4VrNVMuKTgcAAnQRY=
|
||||
12
scraper.go
12
scraper.go
@@ -8,7 +8,7 @@ import (
|
||||
|
||||
type crawlFunc func(*html.Node)
|
||||
|
||||
func searchElem(n *html.Node, data string) chan *html.Node {
|
||||
func SearchElem(n *html.Node, data string) chan *html.Node {
|
||||
ch := make(chan *html.Node)
|
||||
var crawl crawlFunc
|
||||
crawl = func(n *html.Node) {
|
||||
@@ -29,7 +29,7 @@ func searchElem(n *html.Node, data string) chan *html.Node {
|
||||
return ch
|
||||
}
|
||||
|
||||
func searchAttr(n *html.Node, key, contains string) chan *html.Node {
|
||||
func SearchAttr(n *html.Node, key, contains string) chan *html.Node {
|
||||
ch := make(chan *html.Node)
|
||||
var crawl crawlFunc
|
||||
crawl = func(n *html.Node) {
|
||||
@@ -52,14 +52,14 @@ func searchAttr(n *html.Node, key, contains string) chan *html.Node {
|
||||
return ch
|
||||
}
|
||||
|
||||
func searchElemAttr(n *html.Node, elem, key, value string) chan *html.Node {
|
||||
func SearchElemAttr(n *html.Node, elem, key, value string) chan *html.Node {
|
||||
ch := make(chan *html.Node)
|
||||
go func() {
|
||||
defer close(ch)
|
||||
for e := range searchElem(n, elem) {
|
||||
for e := range SearchElem(n, elem) {
|
||||
// If document is too large there are
|
||||
// would be a hundreds of goroutines :((
|
||||
for attr := range searchAttr(e, key, value) {
|
||||
for attr := range SearchAttr(e, key, value) {
|
||||
ch <- attr
|
||||
}
|
||||
}
|
||||
@@ -67,7 +67,7 @@ func searchElemAttr(n *html.Node, elem, key, value string) chan *html.Node {
|
||||
return ch
|
||||
}
|
||||
|
||||
func crawlText(n *html.Node) string {
|
||||
func CrawlText(n *html.Node) string {
|
||||
var s = new(strings.Builder)
|
||||
var crawl crawlFunc
|
||||
crawl = func(n *html.Node) {
|
||||
|
||||
Reference in New Issue
Block a user