const request = require('request') const cheerio = require('cheerio') const qs = require('querystring') /** * @typedef lastPost * @prop {string} txt post body * @prop {string} id post id * @prop {string} link post link * @prop {string} time timestamp */ /** * get facebook fan page last post * @param {string} pageid facebook fan page id * @return {Promise} */ const getLastPost = async (pageid = '') => { if (typeof pageid !== 'string' || pageid.trim().length === 0) return null pageid = pageid.trim() // console.log('access facebook fan page :::: ' + pageid) let page = await new Promise((resolve) => { request({ baseUrl: 'https://www.facebook.com', url: `/${encodeURIComponent(pageid)}/posts`, method: 'get', headers: { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:62.0) Gecko/20100101 Firefox/62.0' } }, (err, res, body) => { if (err) { resolve(null) return } if (body && typeof body !== 'string' && !(body instanceof String)) { resolve(null) return } resolve(body) }) }) if (page === null) return null console.log(`${pageid} page length :::: `, Buffer.from(page).length) let $ = cheerio.load(page, { lowerCaseTags: true, lowerCaseAttributeNames: true }) let posts = [] $('div.userContentWrapper').each((i, el) => { let t = cheerio.load(el) let timeEl = t('abbr') let time = timeEl.attr('data-utime') let link = timeEl.parent().attr('href') let p = t('div.userContent') let txt = p.text() || '' let id = p.first().attr('id') if (!id) { if (/[\?|&]id\=(\d+)/.test(link)) { // eslint-disable-line let m = link.match(/[\?|&]story_fbid\=(\d+)/) // eslint-disable-line if (m !== null && m.length > 1) { id = m[1] } } else if (/\/posts\/(\d+)/.test(link)) { let m = link.match(/\/posts\/(\d+)/) if (m !== null && m.length > 1) { id = m[1] } } else if (/\/photos\/.+?\/(\d+)/.test(link)) { let m = link.match(/\/photos\/.+?\/(\d+)/) if (m !== null && m.length > 1) { id = m[1] } } else if (/\/videos\/(\d+)/.test(link)) { let m = link.match(/\/videos\/(\d+)/) if (m !== null && m.length > 1) { id = m[1] } } } // console.log(time, link, txt, id) if (!time || !link || !id) return null let tmp = { txt, id, link, time } // tmp.link = tmp.link.split('?')[0] posts.push(tmp) el = null t = null }) $ = null if (posts.length === 0) return null posts.sort((a, b) => { return b.time - a.time }) let post = posts[0] let larr = post.link.split('?') let linkqs = qs.parse(larr[1]) let newqs = {} for (let i in linkqs) { if (/id/i.test(i)) { newqs[i] = linkqs[i] } } post.link = larr[0] + '?' + qs.stringify(newqs) post.link = `https://www.facebook.com/${post.link.replace(/^\//, '')}`.replace(/\?$/, '') return post } module.exports = { getLastPost }