first
This commit is contained in:
commit
2c6ec585d0
9
Dockerfile
Normal file
9
Dockerfile
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
FROM golang:1.12-alpine as builder
|
||||||
|
WORKDIR /data
|
||||||
|
COPY . .
|
||||||
|
RUN apk add --no-cache git make && \
|
||||||
|
go build -o crawler .
|
||||||
|
|
||||||
|
FROM geckodriver:latest
|
||||||
|
COPY --from=builder /data/crawler /usr/bin/crawler
|
||||||
|
CMD ["/usr/bin/crawler"]
|
10
go.mod
Normal file
10
go.mod
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
module git.trj.tw/golang/go-crawler
|
||||||
|
|
||||||
|
require (
|
||||||
|
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802 // indirect
|
||||||
|
github.com/BurntSushi/xgbutil v0.0.0-20160919175755-f7c97cef3b4e // indirect
|
||||||
|
github.com/blang/semver v3.5.1+incompatible // indirect
|
||||||
|
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b // indirect
|
||||||
|
github.com/google/go-cmp v0.2.0 // indirect
|
||||||
|
github.com/tebeka/selenium v0.9.4-0.20181011202039-edf31bb7fd71
|
||||||
|
)
|
12
go.sum
Normal file
12
go.sum
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802 h1:1BDTz0u9nC3//pOCMdNH+CiXJVYJh5UQNCOBG7jbELc=
|
||||||
|
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
|
||||||
|
github.com/BurntSushi/xgbutil v0.0.0-20160919175755-f7c97cef3b4e h1:4ZrkT/RzpnROylmoQL57iVUL57wGKTR5O6KpVnbm2tA=
|
||||||
|
github.com/BurntSushi/xgbutil v0.0.0-20160919175755-f7c97cef3b4e/go.mod h1:uw9h2sd4WWHOPdJ13MQpwK5qYWKYDumDqxWWIknEQ+k=
|
||||||
|
github.com/blang/semver v3.5.1+incompatible h1:cQNTCjp13qL8KC3Nbxr/y2Bqb63oX6wdnnjpJbkM4JQ=
|
||||||
|
github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk=
|
||||||
|
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58=
|
||||||
|
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
|
||||||
|
github.com/google/go-cmp v0.2.0 h1:+dTQ8DZQJz0Mb/HjFlkptS1FeQ4cWSnN941F8aEG4SQ=
|
||||||
|
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
|
||||||
|
github.com/tebeka/selenium v0.9.4-0.20181011202039-edf31bb7fd71 h1:GYfP3lFaIaBdXxn7Ygzyo7FByq9Of3/Zk9VHSKe/Hso=
|
||||||
|
github.com/tebeka/selenium v0.9.4-0.20181011202039-edf31bb7fd71/go.mod h1:eIMjt8y9rypiIrlx7TAlwwjiL8pr0uqZYQHUxhA2NNE=
|
42
main.go
Normal file
42
main.go
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
|
||||||
|
"github.com/tebeka/selenium"
|
||||||
|
|
||||||
|
"git.trj.tw/golang/go-crawler/modules/browser"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
opts := browser.Options{
|
||||||
|
SeleniumPath: "/selenium.jar",
|
||||||
|
DriverPath: "/usr/bin/geckodriver",
|
||||||
|
Port: 8080,
|
||||||
|
}
|
||||||
|
|
||||||
|
err := browser.NewService(opts)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
defer func() { browser.StopService() }()
|
||||||
|
|
||||||
|
wd, err := browser.NewWD()
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
defer wd.Quit()
|
||||||
|
|
||||||
|
err = wd.Get("https://tools.trj.tw")
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
el, err := wd.FindElement(selenium.ByCSSSelector, "body")
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Println(el.Text())
|
||||||
|
}
|
68
modules/browser/browser.go
Normal file
68
modules/browser/browser.go
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
package browser
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/tebeka/selenium"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Browser -
|
||||||
|
type Browser struct {
|
||||||
|
SVC *selenium.Service
|
||||||
|
Opts Options
|
||||||
|
}
|
||||||
|
|
||||||
|
// Options -
|
||||||
|
type Options struct {
|
||||||
|
SeleniumPath string
|
||||||
|
DriverPath string
|
||||||
|
Port int
|
||||||
|
}
|
||||||
|
|
||||||
|
var svc *Browser
|
||||||
|
|
||||||
|
// NewService -
|
||||||
|
func NewService(opts Options) (err error) {
|
||||||
|
opt := []selenium.ServiceOption{
|
||||||
|
selenium.StartFrameBuffer(), // Start an X frame buffer for the browser to run in.
|
||||||
|
selenium.GeckoDriver(opts.DriverPath), // Specify the path to GeckoDriver in order to use Firefox.
|
||||||
|
}
|
||||||
|
|
||||||
|
svc = &Browser{}
|
||||||
|
svc.Opts = opts
|
||||||
|
svc.SVC, err = selenium.NewSeleniumService(opts.SeleniumPath, opts.Port, opt...)
|
||||||
|
if err != nil {
|
||||||
|
svc = nil
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// StopService -
|
||||||
|
func StopService() error {
|
||||||
|
if svc == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if err := svc.SVC.Stop(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
svc = nil
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewWD -
|
||||||
|
func NewWD() (selenium.WebDriver, error) {
|
||||||
|
if svc == nil {
|
||||||
|
return nil, errors.New("service not init")
|
||||||
|
}
|
||||||
|
|
||||||
|
caps := selenium.Capabilities{"browserName": "firefox"}
|
||||||
|
wd, err := selenium.NewRemote(caps, fmt.Sprintf("http://localhost:%d/wd/hub", svc.Opts.Port))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return wd, nil
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user