fblook/module/background/facebook.go

232 lines
4.4 KiB
Go

package background
import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"net/url"
"regexp"
"sort"
"strconv"
"time"
"git.trj.tw/golang/fblook/module/config"
"github.com/PuerkitoBio/goquery"
)
var idRegex = []*regexp.Regexp{
regexp.MustCompile(`[\?|&]story_fbid\=(\d+)`),
regexp.MustCompile(`\/posts\/(\d+)`),
regexp.MustCompile(`\/photos\/.+?\/(\d+)`),
regexp.MustCompile(`\/videos\/(\d+)`),
}
// PageData - facebook fan page data
type PageData struct {
ID string
Text string
Time int32
Link string
}
type byTime []*PageData
func (pd byTime) Len() int { return len(pd) }
func (pd byTime) Swap(i, j int) { pd[i], pd[j] = pd[j], pd[i] }
func (pd byTime) Less(i, j int) bool { return pd[i].Time < pd[j].Time }
func getFacebookPageData() {
ids, err := getPageIDs()
if err != nil {
return
}
cc := make(chan bool, 2)
fmt.Println(ids)
for _, v := range ids {
cc <- true
go lookFacebookPageData(v, cc)
}
}
func getPageIDs() (ids []string, err error) {
tmpStruct := struct {
List []string `json:"list"`
}{}
conf := config.GetConf()
u, err := url.Parse(conf.APIURL)
if err != nil {
return
}
u, err = u.Parse("/api/private/pages")
if err != nil {
return
}
req, err := http.NewRequest("GET", u.String(), nil)
if err != nil {
return
}
req.Header.Set("X-Mtfos-Key", conf.APIKey)
resp, err := http.DefaultClient.Do(req)
if err != nil {
return
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return
}
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return
}
err = json.Unmarshal(body, &tmpStruct)
if err != nil {
return
}
ids = tmpStruct.List
return
}
func lookFacebookPageData(pageid string, cc chan bool) {
fmt.Println("start look page ::::: ", pageid)
defer func() {
<-cc
}()
resp, err := http.Get(fmt.Sprintf("https://facebook.com/%s", pageid))
if err != nil {
fmt.Println("get page html err ", err)
return
}
defer resp.Body.Close()
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
fmt.Println("parse doc err ", err)
return
}
var pageData []*PageData
sel := doc.Find("div.userContentWrapper")
sel.Each(func(idx int, s *goquery.Selection) {
timeEl := s.Find("abbr")
time, timeExists := timeEl.Attr("data-utime")
if !timeExists {
fmt.Println("time not found")
return
}
link, linkExists := timeEl.Parent().Attr("href")
if !linkExists {
fmt.Println("link not found")
return
}
postContent := s.Find("div.userContent")
text := postContent.Text()
postID, idExists := postContent.First().Attr("id")
if !idExists {
idFlag := false
for _, v := range idRegex {
if v.MatchString(link) {
idFlag = true
m := v.FindStringSubmatch(link)
postID = m[1]
}
}
if !idFlag {
fmt.Println("id not found")
return
}
}
fmt.Printf("Time: %s / Text: %s / ID: %s \n", time, text, postID)
timeInt, err := strconv.ParseInt(time, 10, 32)
if err != nil {
fmt.Println("time parse err ", err)
return
}
re := regexp.MustCompile(`^\/`)
pageLink := fmt.Sprintf("https://www.facebook.com/%s", re.ReplaceAllString(link, ""))
data := &PageData{
ID: postID,
Text: text,
Time: int32(timeInt),
Link: pageLink,
}
pageData = append(pageData, data)
})
if len(pageData) == 0 {
return
}
sort.Sort(sort.Reverse(byTime(pageData)))
lastData := pageData[0]
t := int32(time.Now().Unix())
if (t - 600) < lastData.Time {
sendToAPI(pageid, *lastData)
}
}
type apiPage struct {
ID string `json:"id"` // pageid
PostID string `json:"post_id"`
Link string `json:"link"`
Text string `json:"text"`
}
func sendToAPI(pageid string, pageData PageData) {
conf := config.GetConf()
pagesStruct := struct {
Pages []apiPage `json:"pages"`
}{}
jsonStruct := apiPage{}
jsonStruct.ID = pageid
jsonStruct.PostID = pageData.ID
jsonStruct.Link = pageData.Link
jsonStruct.Text = pageData.Text
pagesStruct.Pages = make([]apiPage, 1)
pagesStruct.Pages[0] = jsonStruct
jsonByte, err := json.Marshal(pagesStruct)
if err != nil {
return
}
u, err := url.Parse(conf.APIURL)
if err != nil {
return
}
u, err = u.Parse("/api/private/pageposts")
if err != nil {
return
}
req, err := http.NewRequest("POST", u.String(), bytes.NewReader(jsonByte))
if err != nil {
return
}
req.Header.Set("X-Mtfos-Key", conf.APIKey)
req.Header.Set("Content-Type", "application/json")
resp, err := http.DefaultClient.Do(req)
if err != nil {
return
}
defer resp.Body.Close()
}