This commit is contained in:
Jay
2018-09-21 22:58:41 +08:00
commit b9c7083dba
80 changed files with 24440 additions and 0 deletions
+14
View File
@@ -0,0 +1,14 @@
package background
import (
"github.com/robfig/cron"
)
var c *cron.Cron
// SetBackground -
func SetBackground() {
c = cron.New()
c.AddFunc("0 * * * * *", getFacebookPageData)
c.Start()
}
+231
View File
@@ -0,0 +1,231 @@
package background
import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"net/url"
"regexp"
"sort"
"strconv"
"time"
"git.trj.tw/golang/fblook/module/config"
"github.com/PuerkitoBio/goquery"
)
var idRegex = []*regexp.Regexp{
regexp.MustCompile(`[\?|&]story_fbid\=(\d+)`),
regexp.MustCompile(`\/posts\/(\d+)`),
regexp.MustCompile(`\/photos\/.+?\/(\d+)`),
regexp.MustCompile(`\/videos\/(\d+)`),
}
// PageData - facebook fan page data
type PageData struct {
ID string
Text string
Time int32
Link string
}
type byTime []*PageData
func (pd byTime) Len() int { return len(pd) }
func (pd byTime) Swap(i, j int) { pd[i], pd[j] = pd[j], pd[i] }
func (pd byTime) Less(i, j int) bool { return pd[i].Time < pd[j].Time }
func getFacebookPageData() {
ids, err := getPageIDs()
if err != nil {
return
}
cc := make(chan bool, 2)
fmt.Println(ids)
for _, v := range ids {
cc <- true
go lookFacebookPageData(v, cc)
}
}
func getPageIDs() (ids []string, err error) {
tmpStruct := struct {
List []string `json:"list"`
}{}
conf := config.GetConf()
u, err := url.Parse(conf.APIURL)
if err != nil {
return
}
u, err = u.Parse("/api/private/pages")
if err != nil {
return
}
req, err := http.NewRequest("GET", u.String(), nil)
if err != nil {
return
}
req.Header.Set("X-Mtfos-Key", conf.APIKey)
resp, err := http.DefaultClient.Do(req)
if err != nil {
return
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return
}
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return
}
err = json.Unmarshal(body, &tmpStruct)
if err != nil {
return
}
ids = tmpStruct.List
return
}
func lookFacebookPageData(pageid string, cc chan bool) {
fmt.Println("start look page ::::: ", pageid)
defer func() {
<-cc
}()
resp, err := http.Get(fmt.Sprintf("https://facebook.com/%s", pageid))
if err != nil {
fmt.Println("get page html err ", err)
return
}
defer resp.Body.Close()
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
fmt.Println("parse doc err ", err)
return
}
var pageData []*PageData
sel := doc.Find("div.userContentWrapper")
sel.Each(func(idx int, s *goquery.Selection) {
timeEl := s.Find("abbr")
time, timeExists := timeEl.Attr("data-utime")
if !timeExists {
fmt.Println("time not found")
return
}
link, linkExists := timeEl.Parent().Attr("href")
if !linkExists {
fmt.Println("link not found")
return
}
postContent := s.Find("div.userContent")
text := postContent.Text()
postID, idExists := postContent.First().Attr("id")
if !idExists {
idFlag := false
for _, v := range idRegex {
if v.MatchString(link) {
idFlag = true
m := v.FindStringSubmatch(link)
postID = m[1]
}
}
if !idFlag {
fmt.Println("id not found")
return
}
}
fmt.Printf("Time: %s / Text: %s / ID: %s \n", time, text, postID)
timeInt, err := strconv.ParseInt(time, 10, 32)
if err != nil {
fmt.Println("time parse err ", err)
return
}
re := regexp.MustCompile(`^\/`)
pageLink := fmt.Sprintf("https://www.facebook.com/%s", re.ReplaceAllString(link, ""))
data := &PageData{
ID: postID,
Text: text,
Time: int32(timeInt),
Link: pageLink,
}
pageData = append(pageData, data)
})
if len(pageData) == 0 {
return
}
sort.Sort(sort.Reverse(byTime(pageData)))
lastData := pageData[0]
t := int32(time.Now().Unix())
if (t - 600) < lastData.Time {
sendToAPI(pageid, *lastData)
}
}
type apiPage struct {
ID string `json:"id"` // pageid
PostID string `json:"post_id"`
Link string `json:"link"`
Text string `json:"text"`
}
func sendToAPI(pageid string, pageData PageData) {
conf := config.GetConf()
pagesStruct := struct {
Pages []apiPage `json:"pages"`
}{}
jsonStruct := apiPage{}
jsonStruct.ID = pageid
jsonStruct.PostID = pageData.ID
jsonStruct.Link = pageData.Link
jsonStruct.Text = pageData.Text
pagesStruct.Pages = make([]apiPage, 1)
pagesStruct.Pages[0] = jsonStruct
jsonByte, err := json.Marshal(pagesStruct)
if err != nil {
return
}
u, err := url.Parse(conf.APIURL)
if err != nil {
return
}
u, err = u.Parse("/api/private/pageposts")
if err != nil {
return
}
req, err := http.NewRequest("POST", u.String(), bytes.NewReader(jsonByte))
if err != nil {
return
}
req.Header.Set("X-Mtfos-Key", conf.APIKey)
req.Header.Set("Content-Type", "application/json")
resp, err := http.DefaultClient.Do(req)
if err != nil {
return
}
defer resp.Body.Close()
}
+57
View File
@@ -0,0 +1,57 @@
package config
import (
"errors"
"io/ioutil"
"os"
"path"
"git.trj.tw/golang/fblook/module/utils"
yaml "gopkg.in/yaml.v2"
)
// Config -
type Config struct {
APIURL string `yaml:"api_url"`
APIKey string `yaml:"api_key"`
}
var conf *Config
// LoadConfig -
func LoadConfig(p ...string) error {
var fp string
if len(p) > 0 && len(p[0]) > 0 {
fp = p[0]
} else {
wd, err := os.Getwd()
if err != nil {
return err
}
fp = path.Join(wd, "config.yml")
}
fp = utils.ParsePath(fp)
exists := utils.CheckExists(fp, false)
if !exists {
return errors.New("config file not exists")
}
data, err := ioutil.ReadFile(fp)
if err != nil {
return err
}
conf = &Config{}
err = yaml.Unmarshal(data, conf)
if err != nil {
return err
}
return nil
}
// GetConf -
func GetConf() *Config {
return conf
}
+26
View File
@@ -0,0 +1,26 @@
package options
import (
"flag"
)
// Options - flag options
type Options struct {
Help bool
Config string
}
var opts *Options
// RegFlag - register flag
func RegFlag() {
opts = &Options{}
flag.StringVar(&opts.Config, "config", "", "config file path (defualt {PWD}/config.yml")
flag.StringVar(&opts.Config, "f", "", "config file path (short) (defualt {PWD}/config.yml")
flag.BoolVar(&opts.Help, "help", false, "show help")
}
// GetFlag -
func GetFlag() *Options {
return opts
}
+127
View File
@@ -0,0 +1,127 @@
package utils
import (
"math"
"os"
"path"
"reflect"
"regexp"
"runtime"
"strings"
)
// PageObject -
type PageObject struct {
Page int `json:"page" cc:"page"`
Total int `json:"total" cc:"total"`
Offset int `json:"offset" cc:"offset"`
Limit int `json:"limit" cc:"limit"`
}
// CalcPage -
func CalcPage(count, page, max int) (po PageObject) {
if count < 0 {
count = 0
}
if page < 1 {
page = 1
}
if max < 1 {
max = 1
}
total := int(math.Ceil(float64(count) / float64(max)))
if total < 1 {
total = 1
}
if page > total {
page = total
}
offset := (page - 1) * max
if offset > count {
offset = count
}
limit := max
po = PageObject{}
po.Limit = limit
po.Page = page
po.Offset = offset
po.Total = total
return
}
// ToMap struct to map[string]interface{}
func ToMap(ss interface{}) map[string]interface{} {
t := reflect.ValueOf(ss)
if t.Kind() == reflect.Ptr {
t = t.Elem()
}
smap := make(map[string]interface{})
mtag := regexp.MustCompile(`cc:\"(.+)\"`)
for i := 0; i < t.NumField(); i++ {
f := t.Field(i)
tag := string(t.Type().Field(i).Tag)
str := mtag.FindStringSubmatch(tag)
name := t.Type().Field(i).Name
if len(str) > 1 {
name = str[1]
}
if name != "-" {
smap[name] = f.Interface()
}
}
return smap
}
// ParsePath - parse file path to absPath
func ParsePath(dst string) string {
wd, err := os.Getwd()
if err != nil {
wd = ""
}
if []rune(dst)[0] == '~' {
home := UserHomeDir()
if len(home) > 0 {
dst = strings.Replace(dst, "~", home, -1)
}
}
if path.IsAbs(dst) {
dst = path.Clean(dst)
return dst
}
str := path.Join(wd, dst)
str = path.Clean(str)
return str
}
// UserHomeDir - get user home directory
func UserHomeDir() string {
env := "HOME"
if runtime.GOOS == "windows" {
env = "USERPROFILE"
} else if runtime.GOOS == "plan9" {
env = "home"
}
return os.Getenv(env)
}
// CheckExists - check file exists
func CheckExists(filePath string, allowDir bool) bool {
filePath = ParsePath(filePath)
stat, err := os.Stat(filePath)
if err != nil && !os.IsExist(err) {
return false
}
if !allowDir && stat.IsDir() {
return false
}
return true
}