This commit is contained in:
Jay 2019-03-09 20:39:41 +08:00
commit 2c6ec585d0
5 changed files with 141 additions and 0 deletions

9
Dockerfile Normal file
View File

@ -0,0 +1,9 @@
FROM golang:1.12-alpine as builder
WORKDIR /data
COPY . .
RUN apk add --no-cache git make && \
go build -o crawler .
FROM geckodriver:latest
COPY --from=builder /data/crawler /usr/bin/crawler
CMD ["/usr/bin/crawler"]

10
go.mod Normal file
View File

@ -0,0 +1,10 @@
module git.trj.tw/golang/go-crawler
require (
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802 // indirect
github.com/BurntSushi/xgbutil v0.0.0-20160919175755-f7c97cef3b4e // indirect
github.com/blang/semver v3.5.1+incompatible // indirect
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b // indirect
github.com/google/go-cmp v0.2.0 // indirect
github.com/tebeka/selenium v0.9.4-0.20181011202039-edf31bb7fd71
)

12
go.sum Normal file
View File

@ -0,0 +1,12 @@
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802 h1:1BDTz0u9nC3//pOCMdNH+CiXJVYJh5UQNCOBG7jbELc=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/BurntSushi/xgbutil v0.0.0-20160919175755-f7c97cef3b4e h1:4ZrkT/RzpnROylmoQL57iVUL57wGKTR5O6KpVnbm2tA=
github.com/BurntSushi/xgbutil v0.0.0-20160919175755-f7c97cef3b4e/go.mod h1:uw9h2sd4WWHOPdJ13MQpwK5qYWKYDumDqxWWIknEQ+k=
github.com/blang/semver v3.5.1+incompatible h1:cQNTCjp13qL8KC3Nbxr/y2Bqb63oX6wdnnjpJbkM4JQ=
github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/google/go-cmp v0.2.0 h1:+dTQ8DZQJz0Mb/HjFlkptS1FeQ4cWSnN941F8aEG4SQ=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/tebeka/selenium v0.9.4-0.20181011202039-edf31bb7fd71 h1:GYfP3lFaIaBdXxn7Ygzyo7FByq9Of3/Zk9VHSKe/Hso=
github.com/tebeka/selenium v0.9.4-0.20181011202039-edf31bb7fd71/go.mod h1:eIMjt8y9rypiIrlx7TAlwwjiL8pr0uqZYQHUxhA2NNE=

42
main.go Normal file
View File

@ -0,0 +1,42 @@
package main
import (
"fmt"
"log"
"github.com/tebeka/selenium"
"git.trj.tw/golang/go-crawler/modules/browser"
)
func main() {
opts := browser.Options{
SeleniumPath: "/selenium.jar",
DriverPath: "/usr/bin/geckodriver",
Port: 8080,
}
err := browser.NewService(opts)
if err != nil {
log.Fatal(err)
}
defer func() { browser.StopService() }()
wd, err := browser.NewWD()
if err != nil {
log.Fatal(err)
}
defer wd.Quit()
err = wd.Get("https://tools.trj.tw")
if err != nil {
log.Fatal(err)
}
el, err := wd.FindElement(selenium.ByCSSSelector, "body")
if err != nil {
log.Fatal(err)
}
fmt.Println(el.Text())
}

View File

@ -0,0 +1,68 @@
package browser
import (
"errors"
"fmt"
"github.com/tebeka/selenium"
)
// Browser -
type Browser struct {
SVC *selenium.Service
Opts Options
}
// Options -
type Options struct {
SeleniumPath string
DriverPath string
Port int
}
var svc *Browser
// NewService -
func NewService(opts Options) (err error) {
opt := []selenium.ServiceOption{
selenium.StartFrameBuffer(), // Start an X frame buffer for the browser to run in.
selenium.GeckoDriver(opts.DriverPath), // Specify the path to GeckoDriver in order to use Firefox.
}
svc = &Browser{}
svc.Opts = opts
svc.SVC, err = selenium.NewSeleniumService(opts.SeleniumPath, opts.Port, opt...)
if err != nil {
svc = nil
return err
}
return
}
// StopService -
func StopService() error {
if svc == nil {
return nil
}
if err := svc.SVC.Stop(); err != nil {
return err
}
svc = nil
return nil
}
// NewWD -
func NewWD() (selenium.WebDriver, error) {
if svc == nil {
return nil, errors.New("service not init")
}
caps := selenium.Capabilities{"browserName": "firefox"}
wd, err := selenium.NewRemote(caps, fmt.Sprintf("http://localhost:%d/wd/hub", svc.Opts.Port))
if err != nil {
return nil, err
}
return wd, nil
}