package facebook import ( "errors" "fmt" "net/url" "regexp" "sort" "strconv" "strings" "git.trj.tw/golang/go-crawler/modules/browser" "github.com/tebeka/selenium" ) var fbURL = "https://www.facebook.com" // PostInfo - type PostInfo struct { Text string ID string Link string Time int64 } // ByPostInfo - type ByPostInfo []PostInfo // Len - func (p ByPostInfo) Len() int { return len(p) } // Swap - func (p ByPostInfo) Swap(i, j int) { p[i], p[j] = p[j], p[i] } // Less - func (p ByPostInfo) Less(i, j int) bool { return p[i].Time < p[j].Time } var ( idRegexps []*regexp.Regexp qsRegexp *regexp.Regexp ) func init() { idRegexps = make([]*regexp.Regexp, 0, 4) idRegexps = append(idRegexps, regexp.MustCompile("[\\?|&]story_fbid\\=(\\d+)"), regexp.MustCompile("\\/posts\\/(\\d+)"), regexp.MustCompile("\\/photos\\/.+?\\/(\\d+)"), regexp.MustCompile("\\/videos\\/(\\d+)")) qsRegexp = regexp.MustCompile("id") } // GetLastPost - func GetLastPost(page string) (info *PostInfo, err error) { if len(page) == 0 { return nil, errors.New("page id is empty") } pageURL, err := url.Parse(fbURL) if err != nil { return nil, err } pageURL, err = pageURL.Parse(fmt.Sprintf("/%s/posts", page)) wd, err := browser.NewWD() if err != nil { return nil, err } defer wd.Quit() err = wd.Get(pageURL.String()) if err != nil { return nil, err } elements, err := wd.FindElements(selenium.ByCSSSelector, "div.userContentWrapper") if err != nil { return nil, err } _ = elements posts := make([]PostInfo, 0, len(elements)) for _, el := range elements { post := PostInfo{} // get timestamp timeEl, err := el.FindElement(selenium.ByTagName, "abbr") if err != nil { continue } timeStr, err := timeEl.GetAttribute("data-utime") if err != nil { continue } timestamp, err := strconv.Atoi(timeStr) if err != nil { continue } post.Time = int64(timestamp) // get link element linkEl, err := timeEl.FindElement(selenium.ByXPATH, "..") if err != nil { continue } post.Link, err = linkEl.GetAttribute("href") if err != nil { continue } // get post content postEl, err := el.FindElement(selenium.ByCSSSelector, "div.userContent") if err != nil { continue } post.Text, err = postEl.Text() if err != nil { continue } // get post id postChile, err := postEl.FindElement(selenium.ByXPATH, "./*") if err != nil { continue } id, err := postChile.GetAttribute("id") if err != nil || len(id) == 0 { for _, regex := range idRegexps { if regex.Match([]byte(post.Link)) { strs := regex.FindAllStringSubmatch(post.Link, -1) if len(strs) > 0 && len(strs[0]) > 1 { id = strs[0][1] break } } } } posts = append(posts, post) } sort.Slice(posts, func(i, j int) bool { return posts[i].Time > posts[j].Time }) info = &posts[0] // remove qs urls := strings.Split(info.Link, "?") if len(urls) > 1 { qsStr := strings.Split(urls[1], "&") if len(qsStr) > 0 { qs := make([]string, 0) for _, val := range qsStr { items := strings.SplitN(val, "=", 1) if len(items) > 0 { if qsRegexp.MatchString(items[0]) { qs = append(qs, val) } } } if len(qs) > 0 { info.Link = urls[0] + "?" + strings.Join(qs, "&") } } } info.Link = fbURL + info.Link return info, nil }