commit 2c6ec585d0cdf55b182b09ae5edbbcfcf7fdcfdc Author: Jay Date: Sat Mar 9 20:39:41 2019 +0800 first diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..58ece17 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,9 @@ +FROM golang:1.12-alpine as builder +WORKDIR /data +COPY . . +RUN apk add --no-cache git make && \ + go build -o crawler . + +FROM geckodriver:latest +COPY --from=builder /data/crawler /usr/bin/crawler +CMD ["/usr/bin/crawler"] \ No newline at end of file diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..6c1d441 --- /dev/null +++ b/go.mod @@ -0,0 +1,10 @@ +module git.trj.tw/golang/go-crawler + +require ( + github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802 // indirect + github.com/BurntSushi/xgbutil v0.0.0-20160919175755-f7c97cef3b4e // indirect + github.com/blang/semver v3.5.1+incompatible // indirect + github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b // indirect + github.com/google/go-cmp v0.2.0 // indirect + github.com/tebeka/selenium v0.9.4-0.20181011202039-edf31bb7fd71 +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..db5c2df --- /dev/null +++ b/go.sum @@ -0,0 +1,12 @@ +github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802 h1:1BDTz0u9nC3//pOCMdNH+CiXJVYJh5UQNCOBG7jbELc= +github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= +github.com/BurntSushi/xgbutil v0.0.0-20160919175755-f7c97cef3b4e h1:4ZrkT/RzpnROylmoQL57iVUL57wGKTR5O6KpVnbm2tA= +github.com/BurntSushi/xgbutil v0.0.0-20160919175755-f7c97cef3b4e/go.mod h1:uw9h2sd4WWHOPdJ13MQpwK5qYWKYDumDqxWWIknEQ+k= +github.com/blang/semver v3.5.1+incompatible h1:cQNTCjp13qL8KC3Nbxr/y2Bqb63oX6wdnnjpJbkM4JQ= +github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/google/go-cmp v0.2.0 h1:+dTQ8DZQJz0Mb/HjFlkptS1FeQ4cWSnN941F8aEG4SQ= +github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/tebeka/selenium v0.9.4-0.20181011202039-edf31bb7fd71 h1:GYfP3lFaIaBdXxn7Ygzyo7FByq9Of3/Zk9VHSKe/Hso= +github.com/tebeka/selenium v0.9.4-0.20181011202039-edf31bb7fd71/go.mod h1:eIMjt8y9rypiIrlx7TAlwwjiL8pr0uqZYQHUxhA2NNE= diff --git a/main.go b/main.go new file mode 100644 index 0000000..53ca31c --- /dev/null +++ b/main.go @@ -0,0 +1,42 @@ +package main + +import ( + "fmt" + "log" + + "github.com/tebeka/selenium" + + "git.trj.tw/golang/go-crawler/modules/browser" +) + +func main() { + opts := browser.Options{ + SeleniumPath: "/selenium.jar", + DriverPath: "/usr/bin/geckodriver", + Port: 8080, + } + + err := browser.NewService(opts) + if err != nil { + log.Fatal(err) + } + defer func() { browser.StopService() }() + + wd, err := browser.NewWD() + if err != nil { + log.Fatal(err) + } + defer wd.Quit() + + err = wd.Get("https://tools.trj.tw") + if err != nil { + log.Fatal(err) + } + + el, err := wd.FindElement(selenium.ByCSSSelector, "body") + if err != nil { + log.Fatal(err) + } + + fmt.Println(el.Text()) +} diff --git a/modules/browser/browser.go b/modules/browser/browser.go new file mode 100644 index 0000000..802f77a --- /dev/null +++ b/modules/browser/browser.go @@ -0,0 +1,68 @@ +package browser + +import ( + "errors" + "fmt" + + "github.com/tebeka/selenium" +) + +// Browser - +type Browser struct { + SVC *selenium.Service + Opts Options +} + +// Options - +type Options struct { + SeleniumPath string + DriverPath string + Port int +} + +var svc *Browser + +// NewService - +func NewService(opts Options) (err error) { + opt := []selenium.ServiceOption{ + selenium.StartFrameBuffer(), // Start an X frame buffer for the browser to run in. + selenium.GeckoDriver(opts.DriverPath), // Specify the path to GeckoDriver in order to use Firefox. + } + + svc = &Browser{} + svc.Opts = opts + svc.SVC, err = selenium.NewSeleniumService(opts.SeleniumPath, opts.Port, opt...) + if err != nil { + svc = nil + return err + } + + return +} + +// StopService - +func StopService() error { + if svc == nil { + return nil + } + if err := svc.SVC.Stop(); err != nil { + return err + } + svc = nil + return nil +} + +// NewWD - +func NewWD() (selenium.WebDriver, error) { + if svc == nil { + return nil, errors.New("service not init") + } + + caps := selenium.Capabilities{"browserName": "firefox"} + wd, err := selenium.NewRemote(caps, fmt.Sprintf("http://localhost:%d/wd/hub", svc.Opts.Port)) + if err != nil { + return nil, err + } + + return wd, nil +}