232 lines
4.4 KiB
Go
232 lines
4.4 KiB
Go
package background
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"net/http"
|
|
"net/url"
|
|
"regexp"
|
|
"sort"
|
|
"strconv"
|
|
"time"
|
|
|
|
"git.trj.tw/golang/fblook/module/config"
|
|
"github.com/PuerkitoBio/goquery"
|
|
)
|
|
|
|
var idRegex = []*regexp.Regexp{
|
|
regexp.MustCompile(`[\?|&]story_fbid\=(\d+)`),
|
|
regexp.MustCompile(`\/posts\/(\d+)`),
|
|
regexp.MustCompile(`\/photos\/.+?\/(\d+)`),
|
|
regexp.MustCompile(`\/videos\/(\d+)`),
|
|
}
|
|
|
|
// PageData - facebook fan page data
|
|
type PageData struct {
|
|
ID string
|
|
Text string
|
|
Time int32
|
|
Link string
|
|
}
|
|
|
|
type byTime []*PageData
|
|
|
|
func (pd byTime) Len() int { return len(pd) }
|
|
func (pd byTime) Swap(i, j int) { pd[i], pd[j] = pd[j], pd[i] }
|
|
func (pd byTime) Less(i, j int) bool { return pd[i].Time < pd[j].Time }
|
|
|
|
func getFacebookPageData() {
|
|
ids, err := getPageIDs()
|
|
if err != nil {
|
|
return
|
|
}
|
|
cc := make(chan bool, 2)
|
|
fmt.Println(ids)
|
|
for _, v := range ids {
|
|
cc <- true
|
|
go lookFacebookPageData(v, cc)
|
|
}
|
|
}
|
|
|
|
func getPageIDs() (ids []string, err error) {
|
|
tmpStruct := struct {
|
|
List []string `json:"list"`
|
|
}{}
|
|
|
|
conf := config.GetConf()
|
|
u, err := url.Parse(conf.APIURL)
|
|
if err != nil {
|
|
return
|
|
}
|
|
u, err = u.Parse("/api/private/pages")
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
req, err := http.NewRequest("GET", u.String(), nil)
|
|
if err != nil {
|
|
return
|
|
}
|
|
req.Header.Set("X-Mtfos-Key", conf.APIKey)
|
|
|
|
resp, err := http.DefaultClient.Do(req)
|
|
if err != nil {
|
|
return
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != 200 {
|
|
return
|
|
}
|
|
|
|
body, err := ioutil.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
err = json.Unmarshal(body, &tmpStruct)
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
ids = tmpStruct.List
|
|
|
|
return
|
|
}
|
|
|
|
func lookFacebookPageData(pageid string, cc chan bool) {
|
|
fmt.Println("start look page ::::: ", pageid)
|
|
defer func() {
|
|
<-cc
|
|
}()
|
|
resp, err := http.Get(fmt.Sprintf("https://facebook.com/%s", pageid))
|
|
if err != nil {
|
|
fmt.Println("get page html err ", err)
|
|
return
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
|
if err != nil {
|
|
fmt.Println("parse doc err ", err)
|
|
return
|
|
}
|
|
var pageData []*PageData
|
|
sel := doc.Find("div.userContentWrapper")
|
|
sel.Each(func(idx int, s *goquery.Selection) {
|
|
timeEl := s.Find("abbr")
|
|
time, timeExists := timeEl.Attr("data-utime")
|
|
if !timeExists {
|
|
fmt.Println("time not found")
|
|
return
|
|
}
|
|
link, linkExists := timeEl.Parent().Attr("href")
|
|
if !linkExists {
|
|
fmt.Println("link not found")
|
|
return
|
|
}
|
|
postContent := s.Find("div.userContent")
|
|
text := postContent.Text()
|
|
postID, idExists := postContent.First().Attr("id")
|
|
|
|
if !idExists {
|
|
idFlag := false
|
|
for _, v := range idRegex {
|
|
if v.MatchString(link) {
|
|
idFlag = true
|
|
m := v.FindStringSubmatch(link)
|
|
postID = m[1]
|
|
}
|
|
}
|
|
if !idFlag {
|
|
fmt.Println("id not found")
|
|
return
|
|
}
|
|
}
|
|
fmt.Printf("Time: %s / Text: %s / ID: %s \n", time, text, postID)
|
|
|
|
timeInt, err := strconv.ParseInt(time, 10, 32)
|
|
|
|
if err != nil {
|
|
fmt.Println("time parse err ", err)
|
|
return
|
|
}
|
|
|
|
re := regexp.MustCompile(`^\/`)
|
|
pageLink := fmt.Sprintf("https://www.facebook.com/%s", re.ReplaceAllString(link, ""))
|
|
|
|
data := &PageData{
|
|
ID: postID,
|
|
Text: text,
|
|
Time: int32(timeInt),
|
|
Link: pageLink,
|
|
}
|
|
|
|
pageData = append(pageData, data)
|
|
})
|
|
|
|
if len(pageData) == 0 {
|
|
return
|
|
}
|
|
|
|
sort.Sort(sort.Reverse(byTime(pageData)))
|
|
|
|
lastData := pageData[0]
|
|
t := int32(time.Now().Unix())
|
|
|
|
if (t - 600) < lastData.Time {
|
|
sendToAPI(pageid, *lastData)
|
|
}
|
|
}
|
|
|
|
type apiPage struct {
|
|
ID string `json:"id"` // pageid
|
|
PostID string `json:"post_id"`
|
|
Link string `json:"link"`
|
|
Text string `json:"text"`
|
|
}
|
|
|
|
func sendToAPI(pageid string, pageData PageData) {
|
|
conf := config.GetConf()
|
|
pagesStruct := struct {
|
|
Pages []apiPage `json:"pages"`
|
|
}{}
|
|
|
|
jsonStruct := apiPage{}
|
|
jsonStruct.ID = pageid
|
|
jsonStruct.PostID = pageData.ID
|
|
jsonStruct.Link = pageData.Link
|
|
jsonStruct.Text = pageData.Text
|
|
|
|
pagesStruct.Pages = make([]apiPage, 1)
|
|
pagesStruct.Pages[0] = jsonStruct
|
|
|
|
jsonByte, err := json.Marshal(pagesStruct)
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
u, err := url.Parse(conf.APIURL)
|
|
if err != nil {
|
|
return
|
|
}
|
|
u, err = u.Parse("/api/private/pageposts")
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
req, err := http.NewRequest("POST", u.String(), bytes.NewReader(jsonByte))
|
|
if err != nil {
|
|
return
|
|
}
|
|
req.Header.Set("X-Mtfos-Key", conf.APIKey)
|
|
req.Header.Set("Content-Type", "application/json")
|
|
|
|
resp, err := http.DefaultClient.Do(req)
|
|
if err != nil {
|
|
return
|
|
}
|
|
defer resp.Body.Close()
|
|
}
|