123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106 |
- import * as html from "node-html-parser";
- import request from "request-promise-native";
- import { Response } from "request";
- import { INewsItem, IAggregator } from "./aggregator";
- import { getRepository } from "typeorm";
- import { AggroNewsItem } from "@db/entity/AggroNewsItem";
- const urlPattern = /diary\.php\?no=(\d+)/i;
- const kissDiaryRoot = "http://www.kisskiss.tv/kiss";
- const FEED_NAME = "kisskisstv-diary";
- async function aggregate() {
- let repo = getRepository(AggroNewsItem);
- let lastPost = await repo.findOne({
- select: [ "newsId" ],
- where: { feedName: FEED_NAME },
- order: { newsId: "DESC" }
- });
- if(!lastPost)
- lastPost = repo.create({
- newsId: 0
- });
-
- try {
- let mainPageRes = await request(`${kissDiaryRoot}/diary.php`, {resolveWithFullResponse: true}) as Response;
-
- if(mainPageRes.statusCode != 200)
- return [];
- let rootNode = html.parse(mainPageRes.body, {
- pre: true,
- script: false,
- style: false
- });
- if(!(rootNode instanceof html.HTMLElement))
- return;
- let diaryEntries = rootNode.querySelectorAll("div.blog_frame_middle ul.disc li a");
- if(!diaryEntries) {
- console.log("[KISS DIARY] Failed to find listing!");
- }
- let result : INewsItem[] = [];
- let latestEntry = lastPost.newsId;
- for(let a of diaryEntries) {
- let matches = urlPattern.exec(a.rawAttributes.href);
- if(!matches)
- continue;
-
- let id = +matches[1];
- if(id <= lastPost.newsId)
- continue;
- if(id > latestEntry)
- latestEntry = id;
- let diaryLink = `${kissDiaryRoot}/${a.rawAttributes.href}`;
- let res = await request(diaryLink, {resolveWithFullResponse: true}) as Response;
- if(res.statusCode != 200)
- continue;
- let node = html.parse(res.body, {
- pre: true,
- script: false,
- style: false
- });
- if(!(node instanceof html.HTMLElement))
- continue;
- let title = node.querySelector("table.blog_frame_top tr td a");
- let contents = node.querySelector("div.blog_frame_middle");
- let bottomFrame = contents.querySelector("div.blog_data");
- if(bottomFrame) {
- let child = contents.childNodes[0];
- if(child instanceof html.HTMLElement)
- child.removeChild(bottomFrame);
- }
- result.push({
- newsId: id,
- feedId: FEED_NAME,
- link: diaryLink,
- title: title.text,
- author: "KISS BLOG",
- contents: contents.innerHTML,
- embedColor: 0xf4c100,
- needsTranslation: true
- });
- }
- return result;
- } catch(err) {
- return [];
- }
- }
- export default {
- aggregate: aggregate
- } as IAggregator;
|