package background import ( "bytes" "encoding/json" "fmt" "io/ioutil" "net/http" "net/url" "regexp" "sort" "strconv" "time" "git.trj.tw/golang/fblook/module/config" "github.com/PuerkitoBio/goquery" ) var idRegex = []*regexp.Regexp{ regexp.MustCompile(`[\?|&]story_fbid\=(\d+)`), regexp.MustCompile(`\/posts\/(\d+)`), regexp.MustCompile(`\/photos\/.+?\/(\d+)`), regexp.MustCompile(`\/videos\/(\d+)`), } // PageData - facebook fan page data type PageData struct { ID string Text string Time int32 Link string } type byTime []*PageData func (pd byTime) Len() int { return len(pd) } func (pd byTime) Swap(i, j int) { pd[i], pd[j] = pd[j], pd[i] } func (pd byTime) Less(i, j int) bool { return pd[i].Time < pd[j].Time } func getFacebookPageData() { ids, err := getPageIDs() if err != nil { return } cc := make(chan bool, 2) fmt.Println(ids) for _, v := range ids { cc <- true go lookFacebookPageData(v, cc) } } func getPageIDs() (ids []string, err error) { tmpStruct := struct { List []string `json:"list"` }{} conf := config.GetConf() u, err := url.Parse(conf.APIURL) if err != nil { return } u, err = u.Parse("/api/private/pages") if err != nil { return } req, err := http.NewRequest("GET", u.String(), nil) if err != nil { return } req.Header.Set("X-Mtfos-Key", conf.APIKey) resp, err := http.DefaultClient.Do(req) if err != nil { return } defer resp.Body.Close() if resp.StatusCode != 200 { return } body, err := ioutil.ReadAll(resp.Body) if err != nil { return } err = json.Unmarshal(body, &tmpStruct) if err != nil { return } ids = tmpStruct.List return } func lookFacebookPageData(pageid string, cc chan bool) { fmt.Println("start look page ::::: ", pageid) defer func() { <-cc }() resp, err := http.Get(fmt.Sprintf("https://facebook.com/%s", pageid)) if err != nil { fmt.Println("get page html err ", err) return } defer resp.Body.Close() doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { fmt.Println("parse doc err ", err) return } var pageData []*PageData sel := doc.Find("div.userContentWrapper") sel.Each(func(idx int, s *goquery.Selection) { timeEl := s.Find("abbr") time, timeExists := timeEl.Attr("data-utime") if !timeExists { fmt.Println("time not found") return } link, linkExists := timeEl.Parent().Attr("href") if !linkExists { fmt.Println("link not found") return } postContent := s.Find("div.userContent") text := postContent.Text() postID, idExists := postContent.First().Attr("id") if !idExists { idFlag := false for _, v := range idRegex { if v.MatchString(link) { idFlag = true m := v.FindStringSubmatch(link) postID = m[1] } } if !idFlag { fmt.Println("id not found") return } } fmt.Printf("Time: %s / Text: %s / ID: %s \n", time, text, postID) timeInt, err := strconv.ParseInt(time, 10, 32) if err != nil { fmt.Println("time parse err ", err) return } re := regexp.MustCompile(`^\/`) pageLink := fmt.Sprintf("https://www.facebook.com/%s", re.ReplaceAllString(link, "")) data := &PageData{ ID: postID, Text: text, Time: int32(timeInt), Link: pageLink, } pageData = append(pageData, data) }) if len(pageData) == 0 { return } sort.Sort(sort.Reverse(byTime(pageData))) lastData := pageData[0] t := int32(time.Now().Unix()) if (t - 600) < lastData.Time { sendToAPI(pageid, *lastData) } } type apiPage struct { ID string `json:"id"` // pageid PostID string `json:"post_id"` Link string `json:"link"` Text string `json:"text"` } func sendToAPI(pageid string, pageData PageData) { conf := config.GetConf() pagesStruct := struct { Pages []apiPage `json:"pages"` }{} jsonStruct := apiPage{} jsonStruct.ID = pageid jsonStruct.PostID = pageData.ID jsonStruct.Link = pageData.Link jsonStruct.Text = pageData.Text pagesStruct.Pages = make([]apiPage, 1) pagesStruct.Pages[0] = jsonStruct jsonByte, err := json.Marshal(pagesStruct) if err != nil { return } u, err := url.Parse(conf.APIURL) if err != nil { return } u, err = u.Parse("/api/private/pageposts") if err != nil { return } req, err := http.NewRequest("POST", u.String(), bytes.NewReader(jsonByte)) if err != nil { return } req.Header.Set("X-Mtfos-Key", conf.APIKey) req.Header.Set("Content-Type", "application/json") resp, err := http.DefaultClient.Do(req) if err != nil { return } defer resp.Body.Close() }