mtfosbot/module/background/facebook.go

156 lines
3.2 KiB
Go
Raw Normal View History

2018-09-10 10:13:27 +00:00
package background
import (
"fmt"
"net/http"
"regexp"
"sort"
"strconv"
2018-09-11 09:58:08 +00:00
"strings"
2018-09-10 15:12:18 +00:00
"time"
2018-09-10 10:13:27 +00:00
"github.com/PuerkitoBio/goquery"
"git.trj.tw/golang/mtfosbot/model"
2018-09-11 09:58:08 +00:00
"git.trj.tw/golang/mtfosbot/module/apis/line"
2018-09-10 10:13:27 +00:00
)
var idRegex = []*regexp.Regexp{
regexp.MustCompile(`[\?|&]id\=(\d+)`),
regexp.MustCompile(`\/posts\/(\d+)`),
regexp.MustCompile(`\/photos\/.+?\/(\d+)`),
regexp.MustCompile(`\/videos\/(\d+)`),
}
2018-09-11 09:58:08 +00:00
// PageData - facebook fan page data
2018-09-10 10:13:27 +00:00
type PageData struct {
ID string
Text string
2018-09-10 15:12:18 +00:00
Time int32
2018-09-10 10:13:27 +00:00
Link string
}
type byTime []*PageData
func (pd byTime) Len() int { return len(pd) }
func (pd byTime) Swap(i, j int) { pd[i], pd[j] = pd[j], pd[i] }
func (pd byTime) Less(i, j int) bool { return pd[i].Time < pd[j].Time }
func readFacebookPage() {
pages, err := model.GetAllFacebookPage()
if err != nil {
2018-09-21 00:38:49 +00:00
fmt.Println("get page data err ", err)
2018-09-10 10:13:27 +00:00
return
}
for _, v := range pages {
go getPageHTML(v)
}
}
func getPageHTML(page *model.FacebookPage) {
2018-09-10 15:12:18 +00:00
err := page.GetGroups()
if err != nil {
2018-09-21 00:38:49 +00:00
fmt.Println("get page group err ", err)
2018-09-10 15:12:18 +00:00
return
}
2018-09-21 10:36:09 +00:00
resp, err := http.Get(fmt.Sprintf("https://facebook.com/%s", page.ID))
2018-09-10 10:13:27 +00:00
if err != nil {
2018-09-21 00:38:49 +00:00
fmt.Println("get page html err ", err)
2018-09-10 10:13:27 +00:00
return
}
defer resp.Body.Close()
2018-09-21 05:37:39 +00:00
2018-09-21 10:36:09 +00:00
doc, err := goquery.NewDocumentFromReader(resp.Body)
2018-09-10 10:13:27 +00:00
if err != nil {
2018-09-21 00:38:49 +00:00
fmt.Println("parse doc err ", err)
2018-09-10 10:13:27 +00:00
return
}
var pageData []*PageData
2018-09-21 05:37:39 +00:00
sel := doc.Find("div.userContentWrapper")
sel.Each(func(idx int, s *goquery.Selection) {
2018-09-10 10:13:27 +00:00
timeEl := s.Find("abbr")
time, timeExists := timeEl.Attr("data-utime")
if !timeExists {
2018-09-21 00:38:49 +00:00
fmt.Println("time not found")
2018-09-10 10:13:27 +00:00
return
}
link, linkExists := timeEl.Parent().Attr("href")
if !linkExists {
2018-09-21 00:38:49 +00:00
fmt.Println("link not found")
2018-09-10 10:13:27 +00:00
return
}
postContent := s.Find("div.userContent")
text := postContent.Text()
postID, idExists := postContent.First().Attr("id")
if !idExists {
idFlag := false
for _, v := range idRegex {
if v.MatchString(link) {
idFlag = true
m := v.FindStringSubmatch(link)
postID = m[1]
}
}
if !idFlag {
2018-09-21 00:38:49 +00:00
fmt.Println("id not found")
2018-09-10 10:13:27 +00:00
return
}
}
2018-09-21 10:36:09 +00:00
fmt.Printf("Time: %s / Text: %s / ID: %s \n", time, text, postID)
2018-09-10 10:13:27 +00:00
2018-09-10 15:12:18 +00:00
timeInt, err := strconv.ParseInt(time, 10, 32)
2018-09-10 10:13:27 +00:00
if err != nil {
2018-09-21 00:38:49 +00:00
fmt.Println("time parse err ", err)
2018-09-10 10:13:27 +00:00
return
}
re := regexp.MustCompile(`^\/`)
pageLink := fmt.Sprintf("https://www.facebook.com/%s", re.ReplaceAllString(link, ""))
data := &PageData{
ID: postID,
Text: text,
2018-09-10 15:12:18 +00:00
Time: int32(timeInt),
2018-09-10 10:13:27 +00:00
Link: pageLink,
}
pageData = append(pageData, data)
})
if len(pageData) == 0 {
return
}
sort.Sort(sort.Reverse(byTime(pageData)))
2018-09-10 15:12:18 +00:00
lastData := pageData[0]
t := int32(time.Now().Unix())
2018-09-11 09:58:08 +00:00
if (t-600) < lastData.Time && lastData.ID != page.LastPost {
err = page.UpdatePost(lastData.ID)
if err != nil {
return
}
2018-09-10 15:12:18 +00:00
2018-09-11 09:58:08 +00:00
for _, v := range page.Groups {
if v.Notify {
tmpl := v.Tmpl
if len(tmpl) > 0 {
tmpl = strings.Replace(tmpl, "{link}", lastData.Link, -1)
tmpl = strings.Replace(tmpl, "{txt}", lastData.Text, -1)
} else {
tmpl = fmt.Sprintf("%s\n%s", lastData.Text, lastData.Link)
}
msg := line.TextMessage{
Text: tmpl,
}
line.PushMessage(v.ID, msg)
}
}
2018-09-10 15:12:18 +00:00
}
2018-09-10 10:13:27 +00:00
}