mtfosbot/module/background/facebook.go

162 lines
3.3 KiB
Go

package background
import (
"fmt"
"log"
"net/http"
"regexp"
"sort"
"strconv"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
"git.trj.tw/golang/mtfosbot/model"
"git.trj.tw/golang/mtfosbot/module/apis/line"
)
var idRegex = []*regexp.Regexp{
regexp.MustCompile(`[\?|&]id\=(\d+)`),
regexp.MustCompile(`\/posts\/(\d+)`),
regexp.MustCompile(`\/photos\/.+?\/(\d+)`),
regexp.MustCompile(`\/videos\/(\d+)`),
}
// PageData - facebook fan page data
type PageData struct {
ID string
Text string
Time int32
Link string
}
type byTime []*PageData
func (pd byTime) Len() int { return len(pd) }
func (pd byTime) Swap(i, j int) { pd[i], pd[j] = pd[j], pd[i] }
func (pd byTime) Less(i, j int) bool { return pd[i].Time < pd[j].Time }
func readFacebookPage() {
pages, err := model.GetAllFacebookPage()
if err != nil {
fmt.Println("get page data err ", err)
return
}
for _, v := range pages {
go getPageHTML(v)
}
}
func getPageHTML(page *model.FacebookPage) {
err := page.GetGroups()
if err != nil {
fmt.Println("get page group err ", err)
return
}
resp, err := http.Get(fmt.Sprintf("https://facebook.com/%s", page.ID))
if err != nil {
fmt.Println("get page html err ", err)
return
}
defer resp.Body.Close()
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
fmt.Println("parse doc err ", err)
return
}
var pageData []*PageData
sel := doc.Find("div.userContentWrapper")
sel.Each(func(idx int, s *goquery.Selection) {
timeEl := s.Find("abbr")
time, timeExists := timeEl.Attr("data-utime")
if !timeExists {
fmt.Println("time not found")
return
}
link, linkExists := timeEl.Parent().Attr("href")
if !linkExists {
fmt.Println("link not found")
return
}
postContent := s.Find("div.userContent")
text := postContent.Text()
postID, idExists := postContent.First().Attr("id")
if !idExists {
idFlag := false
for _, v := range idRegex {
if v.MatchString(link) {
idFlag = true
m := v.FindStringSubmatch(link)
postID = m[1]
}
}
if !idFlag {
fmt.Println("id not found")
return
}
}
fmt.Printf("Time: %s / Text: %s / ID: %s \n", time, text, postID)
timeInt, err := strconv.ParseInt(time, 10, 32)
if err != nil {
fmt.Println("time parse err ", err)
return
}
re := regexp.MustCompile(`^\/`)
pageLink := fmt.Sprintf("https://www.facebook.com/%s", re.ReplaceAllString(link, ""))
data := &PageData{
ID: postID,
Text: text,
Time: int32(timeInt),
Link: pageLink,
}
pageData = append(pageData, data)
})
if len(pageData) == 0 {
return
}
sort.Sort(sort.Reverse(byTime(pageData)))
lastData := pageData[0]
t := int32(time.Now().Unix())
if (t-600) < lastData.Time && lastData.ID != page.LastPost {
err = page.UpdatePost(lastData.ID)
if err != nil {
return
}
for _, v := range page.Groups {
if v.Notify {
bot, err := v.GetBot()
if err != nil || bot == nil {
log.Println("get group binding bot fail :: ", err)
continue
}
tmpl := v.Tmpl
if len(tmpl) > 0 {
tmpl = strings.Replace(tmpl, "{link}", lastData.Link, -1)
tmpl = strings.Replace(tmpl, "{txt}", lastData.Text, -1)
} else {
tmpl = fmt.Sprintf("%s\n%s", lastData.Text, lastData.Link)
}
msg := line.TextMessage{
Text: tmpl,
}
line.PushMessage(bot.AccessToken, v.ID, msg)
}
}
}
}