fblook/vendor/github.com/andybalholm/cascadia/selector.go

623 lines
14 KiB
Go

package cascadia
import (
"bytes"
"fmt"
"regexp"
"strings"
"golang.org/x/net/html"
)
// the Selector type, and functions for creating them
// A Selector is a function which tells whether a node matches or not.
type Selector func(*html.Node) bool
// hasChildMatch returns whether n has any child that matches a.
func hasChildMatch(n *html.Node, a Selector) bool {
for c := n.FirstChild; c != nil; c = c.NextSibling {
if a(c) {
return true
}
}
return false
}
// hasDescendantMatch performs a depth-first search of n's descendants,
// testing whether any of them match a. It returns true as soon as a match is
// found, or false if no match is found.
func hasDescendantMatch(n *html.Node, a Selector) bool {
for c := n.FirstChild; c != nil; c = c.NextSibling {
if a(c) || (c.Type == html.ElementNode && hasDescendantMatch(c, a)) {
return true
}
}
return false
}
// Compile parses a selector and returns, if successful, a Selector object
// that can be used to match against html.Node objects.
func Compile(sel string) (Selector, error) {
p := &parser{s: sel}
compiled, err := p.parseSelectorGroup()
if err != nil {
return nil, err
}
if p.i < len(sel) {
return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i)
}
return compiled, nil
}
// MustCompile is like Compile, but panics instead of returning an error.
func MustCompile(sel string) Selector {
compiled, err := Compile(sel)
if err != nil {
panic(err)
}
return compiled
}
// MatchAll returns a slice of the nodes that match the selector,
// from n and its children.
func (s Selector) MatchAll(n *html.Node) []*html.Node {
return s.matchAllInto(n, nil)
}
func (s Selector) matchAllInto(n *html.Node, storage []*html.Node) []*html.Node {
if s(n) {
storage = append(storage, n)
}
for child := n.FirstChild; child != nil; child = child.NextSibling {
storage = s.matchAllInto(child, storage)
}
return storage
}
// Match returns true if the node matches the selector.
func (s Selector) Match(n *html.Node) bool {
return s(n)
}
// MatchFirst returns the first node that matches s, from n and its children.
func (s Selector) MatchFirst(n *html.Node) *html.Node {
if s.Match(n) {
return n
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
m := s.MatchFirst(c)
if m != nil {
return m
}
}
return nil
}
// Filter returns the nodes in nodes that match the selector.
func (s Selector) Filter(nodes []*html.Node) (result []*html.Node) {
for _, n := range nodes {
if s(n) {
result = append(result, n)
}
}
return result
}
// typeSelector returns a Selector that matches elements with a given tag name.
func typeSelector(tag string) Selector {
tag = toLowerASCII(tag)
return func(n *html.Node) bool {
return n.Type == html.ElementNode && n.Data == tag
}
}
// toLowerASCII returns s with all ASCII capital letters lowercased.
func toLowerASCII(s string) string {
var b []byte
for i := 0; i < len(s); i++ {
if c := s[i]; 'A' <= c && c <= 'Z' {
if b == nil {
b = make([]byte, len(s))
copy(b, s)
}
b[i] = s[i] + ('a' - 'A')
}
}
if b == nil {
return s
}
return string(b)
}
// attributeSelector returns a Selector that matches elements
// where the attribute named key satisifes the function f.
func attributeSelector(key string, f func(string) bool) Selector {
key = toLowerASCII(key)
return func(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
for _, a := range n.Attr {
if a.Key == key && f(a.Val) {
return true
}
}
return false
}
}
// attributeExistsSelector returns a Selector that matches elements that have
// an attribute named key.
func attributeExistsSelector(key string) Selector {
return attributeSelector(key, func(string) bool { return true })
}
// attributeEqualsSelector returns a Selector that matches elements where
// the attribute named key has the value val.
func attributeEqualsSelector(key, val string) Selector {
return attributeSelector(key,
func(s string) bool {
return s == val
})
}
// attributeNotEqualSelector returns a Selector that matches elements where
// the attribute named key does not have the value val.
func attributeNotEqualSelector(key, val string) Selector {
key = toLowerASCII(key)
return func(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
for _, a := range n.Attr {
if a.Key == key && a.Val == val {
return false
}
}
return true
}
}
// attributeIncludesSelector returns a Selector that matches elements where
// the attribute named key is a whitespace-separated list that includes val.
func attributeIncludesSelector(key, val string) Selector {
return attributeSelector(key,
func(s string) bool {
for s != "" {
i := strings.IndexAny(s, " \t\r\n\f")
if i == -1 {
return s == val
}
if s[:i] == val {
return true
}
s = s[i+1:]
}
return false
})
}
// attributeDashmatchSelector returns a Selector that matches elements where
// the attribute named key equals val or starts with val plus a hyphen.
func attributeDashmatchSelector(key, val string) Selector {
return attributeSelector(key,
func(s string) bool {
if s == val {
return true
}
if len(s) <= len(val) {
return false
}
if s[:len(val)] == val && s[len(val)] == '-' {
return true
}
return false
})
}
// attributePrefixSelector returns a Selector that matches elements where
// the attribute named key starts with val.
func attributePrefixSelector(key, val string) Selector {
return attributeSelector(key,
func(s string) bool {
if strings.TrimSpace(s) == "" {
return false
}
return strings.HasPrefix(s, val)
})
}
// attributeSuffixSelector returns a Selector that matches elements where
// the attribute named key ends with val.
func attributeSuffixSelector(key, val string) Selector {
return attributeSelector(key,
func(s string) bool {
if strings.TrimSpace(s) == "" {
return false
}
return strings.HasSuffix(s, val)
})
}
// attributeSubstringSelector returns a Selector that matches nodes where
// the attribute named key contains val.
func attributeSubstringSelector(key, val string) Selector {
return attributeSelector(key,
func(s string) bool {
if strings.TrimSpace(s) == "" {
return false
}
return strings.Contains(s, val)
})
}
// attributeRegexSelector returns a Selector that matches nodes where
// the attribute named key matches the regular expression rx
func attributeRegexSelector(key string, rx *regexp.Regexp) Selector {
return attributeSelector(key,
func(s string) bool {
return rx.MatchString(s)
})
}
// intersectionSelector returns a selector that matches nodes that match
// both a and b.
func intersectionSelector(a, b Selector) Selector {
return func(n *html.Node) bool {
return a(n) && b(n)
}
}
// unionSelector returns a selector that matches elements that match
// either a or b.
func unionSelector(a, b Selector) Selector {
return func(n *html.Node) bool {
return a(n) || b(n)
}
}
// negatedSelector returns a selector that matches elements that do not match a.
func negatedSelector(a Selector) Selector {
return func(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
return !a(n)
}
}
// writeNodeText writes the text contained in n and its descendants to b.
func writeNodeText(n *html.Node, b *bytes.Buffer) {
switch n.Type {
case html.TextNode:
b.WriteString(n.Data)
case html.ElementNode:
for c := n.FirstChild; c != nil; c = c.NextSibling {
writeNodeText(c, b)
}
}
}
// nodeText returns the text contained in n and its descendants.
func nodeText(n *html.Node) string {
var b bytes.Buffer
writeNodeText(n, &b)
return b.String()
}
// nodeOwnText returns the contents of the text nodes that are direct
// children of n.
func nodeOwnText(n *html.Node) string {
var b bytes.Buffer
for c := n.FirstChild; c != nil; c = c.NextSibling {
if c.Type == html.TextNode {
b.WriteString(c.Data)
}
}
return b.String()
}
// textSubstrSelector returns a selector that matches nodes that
// contain the given text.
func textSubstrSelector(val string) Selector {
return func(n *html.Node) bool {
text := strings.ToLower(nodeText(n))
return strings.Contains(text, val)
}
}
// ownTextSubstrSelector returns a selector that matches nodes that
// directly contain the given text
func ownTextSubstrSelector(val string) Selector {
return func(n *html.Node) bool {
text := strings.ToLower(nodeOwnText(n))
return strings.Contains(text, val)
}
}
// textRegexSelector returns a selector that matches nodes whose text matches
// the specified regular expression
func textRegexSelector(rx *regexp.Regexp) Selector {
return func(n *html.Node) bool {
return rx.MatchString(nodeText(n))
}
}
// ownTextRegexSelector returns a selector that matches nodes whose text
// directly matches the specified regular expression
func ownTextRegexSelector(rx *regexp.Regexp) Selector {
return func(n *html.Node) bool {
return rx.MatchString(nodeOwnText(n))
}
}
// hasChildSelector returns a selector that matches elements
// with a child that matches a.
func hasChildSelector(a Selector) Selector {
return func(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
return hasChildMatch(n, a)
}
}
// hasDescendantSelector returns a selector that matches elements
// with any descendant that matches a.
func hasDescendantSelector(a Selector) Selector {
return func(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
return hasDescendantMatch(n, a)
}
}
// nthChildSelector returns a selector that implements :nth-child(an+b).
// If last is true, implements :nth-last-child instead.
// If ofType is true, implements :nth-of-type instead.
func nthChildSelector(a, b int, last, ofType bool) Selector {
return func(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
parent := n.Parent
if parent == nil {
return false
}
if parent.Type == html.DocumentNode {
return false
}
i := -1
count := 0
for c := parent.FirstChild; c != nil; c = c.NextSibling {
if (c.Type != html.ElementNode) || (ofType && c.Data != n.Data) {
continue
}
count++
if c == n {
i = count
if !last {
break
}
}
}
if i == -1 {
// This shouldn't happen, since n should always be one of its parent's children.
return false
}
if last {
i = count - i + 1
}
i -= b
if a == 0 {
return i == 0
}
return i%a == 0 && i/a >= 0
}
}
// simpleNthChildSelector returns a selector that implements :nth-child(b).
// If ofType is true, implements :nth-of-type instead.
func simpleNthChildSelector(b int, ofType bool) Selector {
return func(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
parent := n.Parent
if parent == nil {
return false
}
if parent.Type == html.DocumentNode {
return false
}
count := 0
for c := parent.FirstChild; c != nil; c = c.NextSibling {
if c.Type != html.ElementNode || (ofType && c.Data != n.Data) {
continue
}
count++
if c == n {
return count == b
}
if count >= b {
return false
}
}
return false
}
}
// simpleNthLastChildSelector returns a selector that implements
// :nth-last-child(b). If ofType is true, implements :nth-last-of-type
// instead.
func simpleNthLastChildSelector(b int, ofType bool) Selector {
return func(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
parent := n.Parent
if parent == nil {
return false
}
if parent.Type == html.DocumentNode {
return false
}
count := 0
for c := parent.LastChild; c != nil; c = c.PrevSibling {
if c.Type != html.ElementNode || (ofType && c.Data != n.Data) {
continue
}
count++
if c == n {
return count == b
}
if count >= b {
return false
}
}
return false
}
}
// onlyChildSelector returns a selector that implements :only-child.
// If ofType is true, it implements :only-of-type instead.
func onlyChildSelector(ofType bool) Selector {
return func(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
parent := n.Parent
if parent == nil {
return false
}
if parent.Type == html.DocumentNode {
return false
}
count := 0
for c := parent.FirstChild; c != nil; c = c.NextSibling {
if (c.Type != html.ElementNode) || (ofType && c.Data != n.Data) {
continue
}
count++
if count > 1 {
return false
}
}
return count == 1
}
}
// inputSelector is a Selector that matches input, select, textarea and button elements.
func inputSelector(n *html.Node) bool {
return n.Type == html.ElementNode && (n.Data == "input" || n.Data == "select" || n.Data == "textarea" || n.Data == "button")
}
// emptyElementSelector is a Selector that matches empty elements.
func emptyElementSelector(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
switch c.Type {
case html.ElementNode, html.TextNode:
return false
}
}
return true
}
// descendantSelector returns a Selector that matches an element if
// it matches d and has an ancestor that matches a.
func descendantSelector(a, d Selector) Selector {
return func(n *html.Node) bool {
if !d(n) {
return false
}
for p := n.Parent; p != nil; p = p.Parent {
if a(p) {
return true
}
}
return false
}
}
// childSelector returns a Selector that matches an element if
// it matches d and its parent matches a.
func childSelector(a, d Selector) Selector {
return func(n *html.Node) bool {
return d(n) && n.Parent != nil && a(n.Parent)
}
}
// siblingSelector returns a Selector that matches an element
// if it matches s2 and in is preceded by an element that matches s1.
// If adjacent is true, the sibling must be immediately before the element.
func siblingSelector(s1, s2 Selector, adjacent bool) Selector {
return func(n *html.Node) bool {
if !s2(n) {
return false
}
if adjacent {
for n = n.PrevSibling; n != nil; n = n.PrevSibling {
if n.Type == html.TextNode || n.Type == html.CommentNode {
continue
}
return s1(n)
}
return false
}
// Walk backwards looking for element that matches s1
for c := n.PrevSibling; c != nil; c = c.PrevSibling {
if s1(c) {
return true
}
}
return false
}
}
// rootSelector implements :root
func rootSelector(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
if n.Parent == nil {
return false
}
return n.Parent.Type == html.DocumentNode
}