node-fblook/facebook-parser.js

118 lines
3.0 KiB
JavaScript

const request = require('request')
const cheerio = require('cheerio')
const qs = require('querystring')
/**
* @typedef lastPost
* @prop {string} txt post body
* @prop {string} id post id
* @prop {string} link post link
* @prop {string} time timestamp
*/
/**
* get facebook fan page last post
* @param {string} pageid facebook fan page id
* @return {Promise<lastPost>}
*/
const getLastPost = async (pageid = '') => {
if (typeof pageid !== 'string' || pageid.trim().length === 0) return null
pageid = pageid.trim()
// console.log('access facebook fan page :::: ' + pageid)
let page = await new Promise((resolve) => {
request({
baseUrl: 'https://www.facebook.com',
url: `/${encodeURIComponent(pageid)}/posts`,
method: 'get',
headers: {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:62.0) Gecko/20100101 Firefox/62.0'
}
}, (err, res, body) => {
if (err) {
resolve(null)
return
}
if (body && typeof body !== 'string' && !(body instanceof String)) {
resolve(null)
return
}
resolve(body)
})
})
if (page === null) return null
console.log(`${pageid} page length :::: `, Buffer.from(page).length)
let $ = cheerio.load(page, {
lowerCaseTags: true,
lowerCaseAttributeNames: true
})
let posts = []
$('div.userContentWrapper').each((i, el) => {
let t = cheerio.load(el)
let timeEl = t('abbr')
let time = timeEl.attr('data-utime')
let link = timeEl.parent().attr('href')
let p = t('div.userContent')
let txt = p.text() || ''
let id = p.first().attr('id')
if (!id) {
if (/[\?|&]id\=(\d+)/.test(link)) { // eslint-disable-line
let m = link.match(/[\?|&]story_fbid\=(\d+)/) // eslint-disable-line
if (m !== null && m.length > 1) {
id = m[1]
}
} else if (/\/posts\/(\d+)/.test(link)) {
let m = link.match(/\/posts\/(\d+)/)
if (m !== null && m.length > 1) {
id = m[1]
}
} else if (/\/photos\/.+?\/(\d+)/.test(link)) {
let m = link.match(/\/photos\/.+?\/(\d+)/)
if (m !== null && m.length > 1) {
id = m[1]
}
} else if (/\/videos\/(\d+)/.test(link)) {
let m = link.match(/\/videos\/(\d+)/)
if (m !== null && m.length > 1) {
id = m[1]
}
}
}
// console.log(time, link, txt, id)
if (!time || !link || !id) return null
let tmp = {
txt,
id,
link,
time
}
posts.push(tmp)
el = null
t = null
})
$ = null
if (posts.length === 0) return null
posts.sort((a, b) => {
return b.time - a.time
})
let post = posts[0]
let larr = post.link.split('?')
let linkqs = qs.parse(larr[1])
let newqs = {}
for (let i in linkqs) {
if (/id/i.test(i)) {
newqs[i] = linkqs[i]
}
}
post.link = larr[0] + '?' + qs.stringify(newqs)
post.link = `https://www.facebook.com/${post.link.replace(/^\//, '')}`.replace(/\?$/, '')
return post
}
module.exports = {
getLastPost
}