Browse Source

Initial news aggregation support

ghorsington 5 years ago
parent
commit
bcd2fe0741
4 changed files with 228 additions and 1 deletions
  1. 81 0
      commands/aggregators/kiss_diary.js
  2. 143 0
      commands/news_aggregator.js
  3. 3 1
      db.js
  4. 1 0
      package.json

+ 81 - 0
commands/aggregators/kiss_diary.js

@@ -0,0 +1,81 @@
+const html = require("node-html-parser"); 
+const axios = require("axios");
+const db = require("../../db.js");
+
+const urlPattern = /diary\.php\?no=(\d+)/i;
+const kissDiaryRoot = "http://www.kisskiss.tv/kiss";
+
+async function aggregate() {
+    let lastDiary = db.get("latestKissDiaryEntry").value();
+    
+    try {
+        let mainPageRes = await axios.get(`${kissDiaryRoot}/diary.php`);
+        
+        if(mainPageRes.status != 200)
+            return [];
+
+        let rootNode = html.parse(mainPageRes.data, {
+                pre: true,
+                script: false,
+                style: false
+        });
+
+        let diaryEntries = rootNode.querySelectorAll("div.blog_frame_middle ul.disc li a");
+
+        if(!diaryEntries) {
+            console.log("[KISS DIARY] Failed to find listing!");
+        }
+
+        let result = [];
+        let latestEntry = lastDiary;
+
+        for(let a of diaryEntries) {
+            let matches = urlPattern.exec(a.rawAttributes.href);
+            if(!matches)
+                continue;
+            
+            let id = +matches[1];
+
+            if(id <= lastDiary)
+                continue;
+
+            if(id > latestEntry)
+                latestEntry = id;
+
+            let diaryLink = `${kissDiaryRoot}/${a.rawAttributes.href}`;
+            let res = await axios.get(diaryLink);
+            if(res.status != 200)
+                continue;
+
+            let node = html.parse(res.data, {
+                pre: true,
+                script: false,
+                style: false
+            });
+
+            let title = node.querySelector("table.blog_frame_top tr td a");
+            let contents = node.querySelector("div.blog_frame_middle");
+            let bottomFrame = contents.querySelector("div.blog_data");
+            if(bottomFrame)
+                contents.childNodes[0].removeChild(bottomFrame);
+
+            result.push({
+                id: `kisskisstv-diary-${id}`,
+                link: diaryLink,
+                title: title.text,
+                author: "KISS BLOG",
+                contents: contents.innerHTML,
+                embedColor: 0xf4c100
+            });
+        }
+
+        db.set("latestKissDiaryEntry", latestEntry).write();
+        return result;
+    } catch(err) {
+        return [];
+    }
+}
+
+module.exports = {
+    aggregate: aggregate
+};

+ 143 - 0
commands/news_aggregator.js

@@ -0,0 +1,143 @@
+const TurndownService = require("turndown");
+const RSSParser = require("rss-parser");
+const db = require("../db.js");
+const interval = require("interval-promise");
+const client = require("../client.js");
+const sha1 = require("sha1");
+const html = require("node-html-parser"); 
+const axios = require("axios");
+const path = require("path");
+const fs = require("fs");
+const Discord = require("discord.js");
+
+const UPDATE_INTERVAL = 5;
+
+const aggregators = [];
+const aggregateChannelID = db.get("aggregateChannel").value();
+
+// TODO: Run BBCode converter instead
+const turndown = new TurndownService();
+turndown.addRule("image", {
+    filter: "img",
+    replacement: () => ""
+});
+turndown.addRule("link", {
+    filter: node => node.nodeName === "A" &&node.getAttribute("href"),
+    replacement: (content, node) => node.getAttribute("href")
+});
+
+function markdownify(htmStr, link) {
+    return turndown.turndown(htmStr)/*.replace(/( {2}\n|\n\n){2,}/gm, "\n").replace(link, "")*/;
+}
+
+async function checkFeeds() {
+    console.log(`Aggregating feeds on ${new Date().toISOString()}`);
+
+    let aggregatorJobs = [];
+
+    for(let aggregator of aggregators) {
+        if(aggregator.aggregate)
+            aggregatorJobs.push(aggregator.aggregate());    
+    }
+    let aggregatedItems = await Promise.all(aggregatorJobs);
+
+    for(let itemSet of aggregatedItems) {
+        for(let item of itemSet) {
+            let itemObj = {
+                id: item.id,
+                link: item.link || "",
+                title: item.title || "",
+                author: item.author,
+                contents: markdownify(item.contents, item.link),
+                hash: null,
+                cacheMessageId: null,
+                postedMessageId: null,
+                embedColor: item.embedColor || 0xffffffff
+            };
+            itemObj.hash = sha1(itemObj.contents);
+
+            await addNewsItem(itemObj);
+        }
+    }
+}
+
+// TODO: Replace with proper forum implementation
+async function addNewsItem(item) {
+    let aggrItems = db.get("aggregatedItemsCache");
+
+    if(aggrItems.has(item.id).value()) {
+        let postedItem = aggrItems.get(item.id).value();
+
+        // No changes, skip
+        if(postedItem.hash == item.hash)
+            return;
+        else
+            await deleteCacheMessage(postedItem.cacheMessageId);
+    }
+
+    let ch = client.channels.get(aggregateChannelID);
+
+    let msg = await ch.send(new Discord.RichEmbed({
+        title: item.title,
+        url: item.link,
+        color: item.embedColor,
+        timestamp: new Date(),
+        description: `${item.contents.substring(0, Math.min(item.contents.length, 300))}...`,
+        author: {
+            name: item.author
+        },
+        footer: {
+            text: "NoctBot News Aggregator"
+        }
+    }));
+
+    aggrItems.set(item.id, {
+        hash: item.hash,
+        cacheMessageId: msg.id,
+        postedMessageId: null
+    }).write();
+}
+
+async function deleteCacheMessage(messageId) {
+    let ch = client.channels.get(aggregateChannelID);
+    let msg = await tryFetchMessage(ch, messageId);
+
+    if(msg)
+        await msg.delete();
+}
+
+async function tryFetchMessage(channel, messageId) {
+    try {
+        return await channel.fetchMessage(messageId);
+    }catch(error){
+        return null;
+    }
+}
+
+function initAggregators() {
+    let aggregatorsPath = path.join(path.dirname(module.filename), "aggregators");
+    let files = fs.readdirSync(aggregatorsPath);
+
+    for(let file of files) {
+        let ext  = path.extname(file);
+        if(ext != ".js")
+            continue;
+
+        let obj = require(path.resolve(aggregatorsPath, file));
+
+        if(obj)
+            aggregators.push(obj);
+
+        if(obj.init)
+            obj.init();
+    }
+}
+
+function onStart() {
+    initAggregators();
+    interval(checkFeeds, UPDATE_INTERVAL * 1000);
+};
+
+module.exports = {
+    onStart: onStart
+};

+ 3 - 1
db.js

@@ -75,6 +75,7 @@ db.defaults({
     postedNewsGuids: {},
     newsPostVerifyChannel: "",
     feedOutputChannel: "493337841724555276",
+    aggregateChannel: "596752269861584898",
     messageReactions: {},
     faceEditChannels: {
         "459622760839118848": 1.0,
@@ -133,7 +134,8 @@ db.defaults({
             "I nut"
         ]
     },
-    quotes: []
+    quotes: [],
+    latestKissDiaryEntry: 1229
 }).write();
 
 module.exports = db;

+ 1 - 0
package.json

@@ -32,6 +32,7 @@
     "request-promise-native": "^1.0.5",
     "rss-parser": "^3.4.3",
     "sha1": "^1.1.1",
+    "translate-google": "^1.3.5",
     "turndown": "^5.0.1",
     "uws": "^99.0.0"
   },