Skip to content

Commit

Permalink
optimize winnowNodes and ClosestNodes in the same way
Browse files Browse the repository at this point in the history
  • Loading branch information
mna committed Aug 28, 2016
1 parent 26f4f54 commit 5dccaa1
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 4 deletions.
86 changes: 86 additions & 0 deletions bench/v1.0.1c-go1.7
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
BenchmarkFirst-4 30000000 51.7 ns/op 48 B/op 1 allocs/op
BenchmarkLast-4 30000000 51.9 ns/op 48 B/op 1 allocs/op
BenchmarkEq-4 30000000 50.0 ns/op 48 B/op 1 allocs/op
BenchmarkSlice-4 500000000 3.47 ns/op 0 B/op 0 allocs/op
BenchmarkGet-4 2000000000 1.68 ns/op 0 B/op 0 allocs/op
BenchmarkIndex-4 2000000 804 ns/op 248 B/op 10 allocs/op
BenchmarkIndexSelector-4 100000 16285 ns/op 3839 B/op 21 allocs/op
BenchmarkIndexOfNode-4 200000000 6.50 ns/op 0 B/op 0 allocs/op
BenchmarkIndexOfSelection-4 200000000 7.02 ns/op 0 B/op 0 allocs/op
BenchmarkMetalReviewExample-4 10000 143160 ns/op 12417 B/op 320 allocs/op
BenchmarkAdd-4 200000 10326 ns/op 208 B/op 9 allocs/op
BenchmarkAddSelection-4 10000000 155 ns/op 48 B/op 1 allocs/op
BenchmarkAddNodes-4 10000000 156 ns/op 48 B/op 1 allocs/op
BenchmarkAddNodesBig-4 20000 94439 ns/op 21847 B/op 37 allocs/op
BenchmarkAndSelf-4 1000000 1791 ns/op 1008 B/op 5 allocs/op
BenchmarkFilter-4 100000 19470 ns/op 360 B/op 8 allocs/op
BenchmarkNot-4 100000 22500 ns/op 136 B/op 5 allocs/op
BenchmarkFilterFunction-4 50000 34578 ns/op 22976 B/op 755 allocs/op
BenchmarkNotFunction-4 50000 38703 ns/op 29120 B/op 757 allocs/op
BenchmarkFilterNodes-4 50000 34486 ns/op 20960 B/op 749 allocs/op
BenchmarkNotNodes-4 30000 41094 ns/op 29120 B/op 757 allocs/op
BenchmarkFilterSelection-4 50000 33623 ns/op 20960 B/op 749 allocs/op
BenchmarkNotSelection-4 30000 41483 ns/op 29120 B/op 757 allocs/op
BenchmarkHas-4 5000 266628 ns/op 2371 B/op 50 allocs/op
BenchmarkHasNodes-4 10000 152617 ns/op 21184 B/op 752 allocs/op
BenchmarkHasSelection-4 10000 156682 ns/op 21184 B/op 752 allocs/op
BenchmarkEnd-4 2000000000 1.00 ns/op 0 B/op 0 allocs/op
BenchmarkEach-4 300000 4712 ns/op 3304 B/op 118 allocs/op
BenchmarkMap-4 200000 8434 ns/op 5572 B/op 184 allocs/op
BenchmarkEachWithBreak-4 2000000 819 ns/op 560 B/op 20 allocs/op
BenchmarkAttr-4 100000000 21.7 ns/op 0 B/op 0 allocs/op
BenchmarkText-4 200000 9376 ns/op 7536 B/op 110 allocs/op
BenchmarkLength-4 2000000000 0.35 ns/op 0 B/op 0 allocs/op
BenchmarkHtml-4 5000000 401 ns/op 120 B/op 2 allocs/op
BenchmarkIs-4 100000 22214 ns/op 88 B/op 4 allocs/op
BenchmarkIsPositional-4 50000 26559 ns/op 1112 B/op 10 allocs/op
BenchmarkIsFunction-4 1000000 1228 ns/op 784 B/op 28 allocs/op
BenchmarkIsSelection-4 50000 33471 ns/op 20960 B/op 749 allocs/op
BenchmarkIsNodes-4 50000 34461 ns/op 20960 B/op 749 allocs/op
BenchmarkHasClass-4 10000 232429 ns/op 14944 B/op 976 allocs/op
BenchmarkContains-4 200000000 7.62 ns/op 0 B/op 0 allocs/op
BenchmarkFind-4 100000 16114 ns/op 3839 B/op 21 allocs/op
BenchmarkFindWithinSelection-4 30000 42520 ns/op 3540 B/op 82 allocs/op
BenchmarkFindSelection-4 10000 209801 ns/op 5615 B/op 89 allocs/op
BenchmarkFindNodes-4 10000 209082 ns/op 5614 B/op 89 allocs/op
BenchmarkContents-4 300000 4836 ns/op 1420 B/op 36 allocs/op
BenchmarkContentsFiltered-4 200000 5495 ns/op 1570 B/op 41 allocs/op
BenchmarkChildren-4 3000000 527 ns/op 152 B/op 7 allocs/op
BenchmarkChildrenFiltered-4 500000 2499 ns/op 352 B/op 15 allocs/op
BenchmarkParent-4 50000 34072 ns/op 6942 B/op 387 allocs/op
BenchmarkParentFiltered-4 50000 36077 ns/op 7141 B/op 394 allocs/op
BenchmarkParents-4 20000 64118 ns/op 30719 B/op 837 allocs/op
BenchmarkParentsFiltered-4 20000 63432 ns/op 31303 B/op 845 allocs/op
BenchmarkParentsUntil-4 50000 29589 ns/op 11829 B/op 358 allocs/op
BenchmarkParentsUntilSelection-4 10000 101033 ns/op 54076 B/op 1523 allocs/op
BenchmarkParentsUntilNodes-4 10000 100584 ns/op 54076 B/op 1523 allocs/op
BenchmarkParentsFilteredUntil-4 200000 8061 ns/op 2787 B/op 88 allocs/op
BenchmarkParentsFilteredUntilSelection-4 100000 13848 ns/op 5995 B/op 192 allocs/op
BenchmarkParentsFilteredUntilNodes-4 100000 13766 ns/op 5995 B/op 192 allocs/op
BenchmarkSiblings-4 20000 75135 ns/op 28453 B/op 225 allocs/op
BenchmarkSiblingsFiltered-4 20000 80532 ns/op 29544 B/op 234 allocs/op
BenchmarkNext-4 100000 14200 ns/op 4660 B/op 117 allocs/op
BenchmarkNextFiltered-4 100000 15284 ns/op 4859 B/op 123 allocs/op
BenchmarkNextAll-4 20000 60889 ns/op 22774 B/op 157 allocs/op
BenchmarkNextAllFiltered-4 20000 65125 ns/op 23869 B/op 166 allocs/op
BenchmarkPrev-4 100000 14448 ns/op 4659 B/op 117 allocs/op
BenchmarkPrevFiltered-4 100000 15444 ns/op 4859 B/op 123 allocs/op
BenchmarkPrevAll-4 100000 22019 ns/op 7344 B/op 120 allocs/op
BenchmarkPrevAllFiltered-4 100000 23307 ns/op 7545 B/op 126 allocs/op
BenchmarkNextUntil-4 50000 30287 ns/op 8766 B/op 267 allocs/op
BenchmarkNextUntilSelection-4 30000 41476 ns/op 19862 B/op 546 allocs/op
BenchmarkNextUntilNodes-4 100000 16106 ns/op 8133 B/op 249 allocs/op
BenchmarkPrevUntil-4 20000 98951 ns/op 25728 B/op 467 allocs/op
BenchmarkPrevUntilSelection-4 30000 52390 ns/op 24875 B/op 694 allocs/op
BenchmarkPrevUntilNodes-4 100000 12986 ns/op 6334 B/op 204 allocs/op
BenchmarkNextFilteredUntil-4 100000 19365 ns/op 5908 B/op 177 allocs/op
BenchmarkNextFilteredUntilSelection-4 30000 45334 ns/op 20555 B/op 571 allocs/op
BenchmarkNextFilteredUntilNodes-4 30000 45292 ns/op 20556 B/op 571 allocs/op
BenchmarkPrevFilteredUntil-4 100000 19412 ns/op 6032 B/op 179 allocs/op
BenchmarkPrevFilteredUntilSelection-4 30000 46286 ns/op 21304 B/op 591 allocs/op
BenchmarkPrevFilteredUntilNodes-4 30000 46554 ns/op 21305 B/op 591 allocs/op
BenchmarkClosest-4 500000 3480 ns/op 160 B/op 8 allocs/op
BenchmarkClosestSelection-4 2000000 722 ns/op 96 B/op 6 allocs/op
BenchmarkClosestNodes-4 2000000 719 ns/op 96 B/op 6 allocs/op
PASS
ok github.com/PuerkitoBio/goquery 160.565s
12 changes: 11 additions & 1 deletion filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,18 @@ func winnow(sel *Selection, m Matcher, keep bool) []*html.Node {
// Filter based on an array of nodes, and the indicator to keep (Filter) or
// to get rid of (Not) the matching elements.
func winnowNodes(sel *Selection, nodes []*html.Node, keep bool) []*html.Node {
if len(nodes)+len(sel.Nodes) < minNodesForSet {
return grep(sel, func(i int, s *Selection) bool {
return isInSlice(nodes, s.Get(0)) == keep
})
}

set := make(map[*html.Node]bool)
for _, n := range nodes {
set[n] = true
}
return grep(sel, func(i int, s *Selection) bool {
return isInSlice(nodes, s.Get(0)) == keep
return set[s.Get(0)] == keep
})
}

Expand Down
6 changes: 5 additions & 1 deletion traversal.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,11 +143,15 @@ func (s *Selection) ClosestMatcher(m Matcher) *Selection {
// ClosestNodes gets the first element that matches one of the nodes by testing the
// element itself and traversing up through its ancestors in the DOM tree.
func (s *Selection) ClosestNodes(nodes ...*html.Node) *Selection {
set := make(map[*html.Node]bool)
for _, n := range nodes {
set[n] = true
}
return pushStack(s, mapNodes(s.Nodes, func(i int, n *html.Node) []*html.Node {
// For each node in the selection, test the node itself, then each parent
// until a match is found.
for ; n != nil; n = n.Parent {
if isInSlice(nodes, n) {
if set[n] {
return []*html.Node{n}
}
}
Expand Down
7 changes: 5 additions & 2 deletions utilities.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ import (
"golang.org/x/net/html"
)

// used to determine if a set (map[*html.Node]bool) should be used
// instead of iterating over a slice. The set uses more memory and
// is slower than slice iteration for small N.
const minNodesForSet = 1000

var nodeNames = []string{
html.ErrorNode: "#error",
html.TextNode: "#text",
Expand Down Expand Up @@ -108,8 +113,6 @@ func indexInSlice(slice []*html.Node, node *html.Node) int {
// a new underlying array. If targetSet is nil, a local set is created with the
// target if len(target) + len(nodes) is greater than minNodesForSet.
func appendWithoutDuplicates(target []*html.Node, nodes []*html.Node, targetSet map[*html.Node]bool) []*html.Node {
const minNodesForSet = 1000

// if there are not that many nodes, don't use the map, faster to just use nested loops
// (unless a non-nil targetSet is passed, in which case the caller knows better).
if targetSet == nil && len(target)+len(nodes) < minNodesForSet {
Expand Down

0 comments on commit 5dccaa1

Please sign in to comment.