first
This commit is contained in:
commit
2c1c61fb48
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
.env
|
||||
node_modules/
|
4
config/index.js
Normal file
4
config/index.js
Normal file
@ -0,0 +1,4 @@
|
||||
module.exports = {
|
||||
api_url: process.env.API_URL || '',
|
||||
api_key: process.env.API_KEY || ''
|
||||
}
|
44
index.js
Normal file
44
index.js
Normal file
@ -0,0 +1,44 @@
|
||||
require('dotenv').config()
|
||||
const cron = require('cron')
|
||||
const apis = require('./libs/apis.js')
|
||||
const crawler = require('./libs/crawler.js')
|
||||
|
||||
const runIGLook = async () => {
|
||||
let ids = await apis.GetInstagramIDs()
|
||||
console.log('all ids :: ', ids)
|
||||
if (ids === null || !Array.isArray(ids) || ids.length === 0) return
|
||||
|
||||
ids.forEach(t => { getPost(t) })
|
||||
}
|
||||
|
||||
const getPost = async (id) => {
|
||||
console.log('get instagram :: ', id)
|
||||
let post = await crawler.getLastPost(id)
|
||||
console.log(`get post data ::: ${post.id} / ${post.text} / ${post.link} / ${post.timestamp}`)
|
||||
let minTime = Math.floor(Date.now() / 1000) - 1800
|
||||
if (minTime > post.timestamp) return
|
||||
let data = {
|
||||
id,
|
||||
post_id: post.id,
|
||||
text: post.text,
|
||||
link: post.link
|
||||
}
|
||||
|
||||
await apis.SendPostData([data])
|
||||
}
|
||||
|
||||
// set fblook
|
||||
new cron.CronJob({ //eslint-disable-line
|
||||
cronTime: '00 */2 * * * *',
|
||||
onTick: async () => {
|
||||
console.log('Start Tick')
|
||||
try {
|
||||
await runIGLook()
|
||||
} catch (err) {
|
||||
console.log('run tick fail', err)
|
||||
}
|
||||
},
|
||||
runOnInit: true,
|
||||
start: true,
|
||||
timeZone: 'Asia/Taipei'
|
||||
})
|
36
init.d/iglook
Normal file
36
init.d/iglook
Normal file
@ -0,0 +1,36 @@
|
||||
#!/sbin/openrc-run
|
||||
DIRECTORY=/opt/iglook
|
||||
PIDFILE=/var/run/iglook.pid
|
||||
|
||||
depend() {
|
||||
need net
|
||||
}
|
||||
|
||||
start(){
|
||||
ebegin "start iglook"
|
||||
start-stop-daemon \
|
||||
--start \
|
||||
-d "${DIRECTORY}" \
|
||||
-1 /var/log/iglook.log \
|
||||
-2 /var/log/iglook.err \
|
||||
-m --pidfile ${PIDFILE} \
|
||||
--background \
|
||||
--exec /usr/bin/node -- index.js
|
||||
eend $?
|
||||
}
|
||||
|
||||
stop(){
|
||||
ebegin "stop iglook"
|
||||
start-stop-daemon \
|
||||
--stop \
|
||||
--pidfile ${PIDFILE} \
|
||||
-d "${DIRECTORY}" \
|
||||
eend $?
|
||||
}
|
||||
|
||||
reload(){
|
||||
ebegin "restart iglook"
|
||||
kill -HUP ${PIDFILE}
|
||||
start
|
||||
eend $?
|
||||
}
|
43
libs/apis.js
Normal file
43
libs/apis.js
Normal file
@ -0,0 +1,43 @@
|
||||
const axios = require('axios')
|
||||
const config = require('../config/index.js')
|
||||
const baseURL = config.api_url
|
||||
|
||||
const apis = {}
|
||||
module.exports = apis
|
||||
|
||||
apis.GetInstagramIDs = async () => {
|
||||
try {
|
||||
let result = await axios({
|
||||
url: '/api/private/ig',
|
||||
baseURL,
|
||||
method: 'get',
|
||||
headers: {
|
||||
'X-Mtfos-Key': config.api_key
|
||||
}
|
||||
})
|
||||
if (!('data' in result) || typeof result.data !== 'object' || !('list' in result.data) || !Array.isArray(result.data.list)) return null
|
||||
return result.data.list
|
||||
} catch (err) { return null }
|
||||
}
|
||||
|
||||
apis.SendPostData = async (posts = []) => {
|
||||
if (!Array.isArray(posts) || posts.length === 0) return
|
||||
posts = posts.filter(t => {
|
||||
return 'id' in t && 'post_id' in t && 'link' in t && 'text' in t
|
||||
})
|
||||
if (posts.length === 0) return
|
||||
|
||||
try {
|
||||
await axios({
|
||||
url: '/api/private/igposts',
|
||||
baseURL,
|
||||
method: 'post',
|
||||
data: {
|
||||
igs: posts
|
||||
},
|
||||
headers: {
|
||||
'X-Mtfos-Key': config.api_key
|
||||
}
|
||||
})
|
||||
} catch (err) { return null }
|
||||
}
|
82
libs/crawler.js
Normal file
82
libs/crawler.js
Normal file
@ -0,0 +1,82 @@
|
||||
const axios = require('axios')
|
||||
const cheerio = require('cheerio')
|
||||
const SafeEval = require('safe-eval')
|
||||
const baseURL = 'https://www.instagram.com'
|
||||
|
||||
const obj = {}
|
||||
module.exports = obj
|
||||
|
||||
/**
|
||||
* get post link
|
||||
* @param {string} shortcode ig post shortcode
|
||||
*/
|
||||
const getPostLink = (shortcode = '') => {
|
||||
if (typeof shortcode !== 'string' || shortcode.trim().length === 0) return ''
|
||||
return `${baseURL.replace(/\/$/, '')}/p/${shortcode}`
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} data instagram source html
|
||||
*/
|
||||
const getInstagramData = (data) => {
|
||||
// console.log(data)
|
||||
if (typeof data !== 'string' || data.trim().length === 0) return null
|
||||
let $ = cheerio.load(data)
|
||||
|
||||
let dataArr = []
|
||||
$('script').each((idx, el) => {
|
||||
let e = cheerio.load(el)
|
||||
// console.log(e('script').html())
|
||||
if (/^window._sharedData/i.test(e('script').html())) {
|
||||
// console.log('aaa')
|
||||
dataArr.push(e('script').html())
|
||||
}
|
||||
})
|
||||
|
||||
// console.log(dataArr)
|
||||
if (dataArr.length === 0) return null
|
||||
let context = { window: {} }
|
||||
SafeEval(dataArr[0], context)
|
||||
let posts = context.window._sharedData.entry_data.ProfilePage[0].graphql.user.edge_owner_to_timeline_media.edges
|
||||
let lastPost = posts[0].node
|
||||
// console.log(JSON.stringify(lastPost, null, 2))
|
||||
let postData = {}
|
||||
postData.id = lastPost.id
|
||||
postData.timestamp = lastPost.taken_at_timestamp
|
||||
postData.link = getPostLink(lastPost.shortcode)
|
||||
try {
|
||||
postData.text = lastPost.edge_media_to_caption.edges.length > 0 ? lastPost.edge_media_to_caption.edges[0].node.text : ''
|
||||
} catch (err) {
|
||||
postData.text = ''
|
||||
}
|
||||
lastPost = null
|
||||
posts = null
|
||||
delete context.window
|
||||
return postData
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} id ig user page id
|
||||
*/
|
||||
obj.getLastPost = async (id) => {
|
||||
if (typeof id !== 'string' || id.trim().length === 0) return null
|
||||
let param = {
|
||||
baseURL,
|
||||
method: 'get',
|
||||
url: `/${id}`,
|
||||
headers: {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:68.0) Gecko/20100101 Firefox/68.0'
|
||||
// 'Referer': 'https://wiki.trj.tw'
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
let result = await axios(param)
|
||||
if (!('data' in result) || typeof result.data !== 'string') return null
|
||||
let data = getInstagramData(result.data)
|
||||
return data
|
||||
} catch (err) {
|
||||
console.log(err.response.data || 'no response error data')
|
||||
return null
|
||||
}
|
||||
}
|
1834
package-lock.json
generated
Normal file
1834
package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
23
package.json
Normal file
23
package.json
Normal file
@ -0,0 +1,23 @@
|
||||
{
|
||||
"name": "node-ig-crawler",
|
||||
"version": "1.0.0",
|
||||
"description": "",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"start": "node index.js",
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"keywords": [],
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"axios": "^0.18.0",
|
||||
"cheerio": "^1.0.0-rc.3",
|
||||
"cron": "^1.7.1",
|
||||
"dotenv": "^8.0.0",
|
||||
"safe-eval": "^0.4.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"standard": "^12.0.1"
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user