Skip to content

Commit

Permalink
V2 API using Go 1.23 iterators (#3)
Browse files Browse the repository at this point in the history
* V2 API using Go 1.23 iterators.

* Update CI.

* go mod tidy

* Update docs.
  • Loading branch information
bobg authored Nov 2, 2024
1 parent ba50081 commit bc26546
Show file tree
Hide file tree
Showing 7 changed files with 118 additions and 82 deletions.
13 changes: 8 additions & 5 deletions .github/workflows/go.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,18 @@ jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0

- name: Set up Go
uses: actions/setup-go@v2
with:
go-version: 1.18
go-version: 1.23

- name: Test
run: go test -coverprofile=cover.out .
- name: Unit tests
run: go test -coverprofile=cover.out ./...

- name: Send coverage
uses: shogo82148/actions-goveralls@v1
Expand All @@ -28,7 +31,7 @@ jobs:

- name: Modver
if: ${{ github.event_name == 'pull_request' }}
uses: bobg/modver@v2.6.0
uses: bobg/modver@v2.10.1
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
pull_request_url: https://github.com/${{ github.repository }}/pull/${{ github.event.number }}
4 changes: 2 additions & 2 deletions Readme.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Htree - Go package for working with html.Node trees

[![Go Reference](https://pkg.go.dev/badge/github.com/bobg/htree.svg)](https://pkg.go.dev/github.com/bobg/htree)
[![Go Report Card](https://goreportcard.com/badge/github.com/bobg/htree)](https://goreportcard.com/report/github.com/bobg/htree)
[![Go Reference](https://pkg.go.dev/badge/github.com/bobg/htree/v2.svg)](https://pkg.go.dev/github.com/bobg/htree/v2)
[![Go Report Card](https://goreportcard.com/badge/github.com/bobg/htree/v2)](https://goreportcard.com/report/github.com/bobg/htree/v2)
[![Tests](https://github.com/bobg/htree/actions/workflows/go.yml/badge.svg)](https://github.com/bobg/htree/actions/workflows/go.yml)
[![Coverage Status](https://coveralls.io/repos/github/bobg/htree/badge.svg?branch=master)](https://coveralls.io/github/bobg/htree?branch=master)
[![Mentioned in Awesome Go](https://awesome.re/mentioned-badge.svg)](https://github.com/avelino/awesome-go)
Expand Down
6 changes: 3 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
module github.com/bobg/htree
module github.com/bobg/htree/v2

go 1.18
go 1.23

require golang.org/x/net v0.19.0
require golang.org/x/net v0.30.0
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
golang.org/x/net v0.19.0 h1:zTwKpTd2XuCqf8huc7Fo2iSy+4RHPd10s4KzeTnVr1c=
golang.org/x/net v0.19.0/go.mod h1:CfAk/cbD4CthTvqiEl8NpboMuiuOYsAr/7NOjZJtv1U=
golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4=
golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU=
135 changes: 89 additions & 46 deletions htree.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,27 @@ package htree
import (
"bytes"
"io"
"iter"
"strings"

"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)

// Seq is the type of an iterator over HTML-tree nodes.
type Seq = iter.Seq[*html.Node]

// Find finds the first node,
// in a depth-first search of the tree rooted at `node`,
// in a depth-first search of the given tree,
// satisfying the given predicate.
func Find(node *html.Node, pred func(*html.Node) bool) *html.Node {
if pred(node) {
return node
func Find(tree *html.Node, pred func(*html.Node) bool) *html.Node {
if pred(tree) {
return tree
}
if node.Type == html.TextNode {
if tree.Type == html.TextNode {
return nil
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
for child := tree.FirstChild; child != nil; child = child.NextSibling {
if found := Find(child, pred); found != nil {
return found
}
Expand All @@ -29,74 +33,113 @@ func Find(node *html.Node, pred func(*html.Node) bool) *html.Node {
}

// FindEl finds the first `ElementNode`-typed node,
// in a depth-first search of the tree rooted at `node`,
// in a depth-first search of the tree,
// satisfying the given predicate.
func FindEl(node *html.Node, pred func(*html.Node) bool) *html.Node {
return Find(node, elPred(pred))
func FindEl(tree *html.Node, pred func(*html.Node) bool) *html.Node {
return Find(tree, elPred(pred))
}

// Walk applies f to each node in a recursive, preorder, depth-first walk of `node`.
// If any call to f produces an error, the walk is aborted and the error returned.
func Walk(node *html.Node, f func(*html.Node) error) error {
err := f(node)
if err != nil {
return err
// Walk produces an iterator over the nodes in the tree
// in a recursive, preorder, depth-first walk.
func Walk(tree *html.Node) Seq {
return func(yield func(*html.Node) bool) {
walk(tree, yield)
}
}

func walk(node *html.Node, yield func(*html.Node) bool) bool {
if node.Type == html.TextNode {
return nil
return true
}
if !yield(node) {
return false
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
err = Walk(child, f)
if err != nil {
return err
if !walk(child, yield) {
return false
}
}
return nil
return true
}

// FindAll walks the tree rooted at `node` in preorder, depth-first fashion.
// It tests each node in the tree with `pred`.
// Any node that passes the test causes FindAll to
// (a) call `f` on the node, and
// (b) skip walking the node's subtree.
//
// If any call to `f` returns an error, FindAll aborts the walk and returns the error.
// FindAll produces an iterator over the nodes in the tree that satisfy the given predicate,
// skipping that node's children.
//
// To continue walking the subtree of a node `n` that passes `pred`,
// call FindAllChildren(n, pred, f) in the body of `f`.
func FindAll(node *html.Node, pred func(*html.Node) bool, f func(*html.Node) error) error {
// call `FindAllChildren(n, pred)`.
// Example:
//
// for n := range FindAll(tree, pred) {
// doSomething(n, pred)
// }
//
// And elsewhere:
//
// func doSomething(n *html.Node, pred func(*html.Node) bool) {
// // ...do something with n...
// for child := range FindAllChildren(n, pred) {
// doSomething(child, pred)
// }
// }
func FindAll(tree *html.Node, pred func(*html.Node) bool) Seq {
return func(yield func(*html.Node) bool) {
findAll(tree, pred, yield)
}
}

func findAll(node *html.Node, pred func(*html.Node) bool, yield func(*html.Node) bool) bool {
if pred(node) {
return f(node)
if !yield(node) {
return false
}
}
return FindAllChildren(node, pred, f)
return findAllChildren(node, pred, yield)
}

// FindAllChildren is the same as FindAll but operates only on the children of `node`, not `node` itself.
func FindAllChildren(node *html.Node, pred func(*html.Node) bool, f func(*html.Node) error) error {
// FindAllChildren is the same as [FindAll]
// but operates only on the children of `node`, not `node` itself.
//
// As with FindAll,
// the children of a node that passes `pred` are skipped.
// To continue walking the subtree of a node `n` that passes `pred`,
// call `FindAllChildren(n, pred)`.
func FindAllChildren(node *html.Node, pred func(*html.Node) bool) Seq {
return func(yield func(*html.Node) bool) {
findAllChildren(node, pred, yield)
}
}

func findAllChildren(node *html.Node, pred func(*html.Node) bool, yield func(*html.Node) bool) bool {
if node.Type == html.TextNode {
return nil
return true
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
err := FindAll(child, pred, f)
if err != nil {
return err
if !findAll(child, pred, yield) {
return false
}
}
return nil
return true
}

// FindAllEls is like FindAll but calls `pred`, and perhaps `f`,
// only for nodes with type `ElementNode`.
// FindAllEls is like [FindAll] but calls `pred` only for nodes with type `ElementNode`.
//
// As with FindAll,
// the children of a node that passes `pred` are skipped.
// To continue walking the subtree of a node `n` that passes `pred`,
// call FindAllChildEls(n, pred, f) in the body of `f`.
func FindAllEls(node *html.Node, pred func(*html.Node) bool, f func(*html.Node) error) error {
return FindAll(node, elPred(pred), f)
// call `FindAllChildEls(n, pred)`.
func FindAllEls(node *html.Node, pred func(*html.Node) bool) Seq {
return FindAll(node, elPred(pred))
}

// FindAllChildEls is the same as FindAllEls but operates only on the children of `node`, not `node` itself.
func FindAllChildEls(node *html.Node, pred func(*html.Node) bool, f func(*html.Node) error) error {
return FindAllChildren(node, elPred(pred), f)
// FindAllChildEls is the same as [FindAllEls]
// but operates only on the children of `node`, not `node` itself.
//
// As with FindAll,
// the children of a node that passes `pred` are skipped.
// To continue walking the subtree of a node `n` that passes `pred`,
// call `FindAllChildEls(n, pred)`.
func FindAllChildEls(node *html.Node, pred func(*html.Node) bool) Seq {
return FindAllChildren(node, elPred(pred))
}

// elPred takes a predicate function of a node and returns a new predicate
Expand Down
38 changes: 14 additions & 24 deletions htree_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"fmt"
"os"
"reflect"
"slices"
"strings"
"testing"

Expand Down Expand Up @@ -38,7 +39,7 @@ func TestText(t *testing.T) {
}

func TestHTML(t *testing.T) {
f, err := os.Open("HTML.html")
f, err := os.Open("testdata/HTML.html")
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -69,29 +70,22 @@ func TestHTML(t *testing.T) {
t.Run("FindAllEls", func(t *testing.T) {
var strs []string

err := FindAllEls(
root,
func(n *html.Node) bool {
return n.DataAtom == atom.Div && ElClassContains(n, "vector-pinnable-header-label")
},
func(n *html.Node) error {
s, err := Text(n)
if err != nil {
return err
}
strs = append(strs, s)
return nil
},
)
if err != nil {
t.Fatal(err)
seq := FindAllEls(root, func(n *html.Node) bool {
return n.DataAtom == atom.Div && ElClassContains(n, "vector-pinnable-header-label")
})
for n := range seq {
s, err := Text(n)
if err != nil {
t.Fatal(err)
}
strs = append(strs, s)
}

want := []string{
"Main menu",
"Tools",
}
if !reflect.DeepEqual(strs, want) {
if !slices.Equal(strs, want) {
t.Errorf("got %v, want %v", strs, want)
}
})
Expand All @@ -104,17 +98,13 @@ func TestHTML(t *testing.T) {
t.Fatal("no el")
}
var atoms []atom.Atom
err := Walk(el, func(n *html.Node) error {
for n := range Walk(el) {
if n.Type == html.ElementNode {
atoms = append(atoms, n.DataAtom)
}
return nil
})
if err != nil {
t.Fatal(err)
}
want := []atom.Atom{atom.Li, atom.A, atom.Div, atom.Span, atom.Ul}
if !reflect.DeepEqual(atoms, want) {
if !slices.Equal(atoms, want) {
t.Errorf("got %v, want %v", atoms, want)
}
})
Expand Down
File renamed without changes.

0 comments on commit bc26546

Please sign in to comment.