|
@@ -1,9 +1,9 @@
|
|
|
-import * as html from "node-html-parser";
|
|
|
import request from "request-promise-native";
|
|
|
import { Response } from "request";
|
|
|
import { INewsItem, IAggregator } from "./aggregator";
|
|
|
import { getRepository } from "typeorm";
|
|
|
import { AggroNewsItem } from "@db/entity/AggroNewsItem";
|
|
|
+import cheerio from "cheerio";
|
|
|
|
|
|
const urlPattern = /diary\.php\?no=(\d+)/i;
|
|
|
const kissDiaryRoot = "http://www.kisskiss.tv/kiss";
|
|
@@ -29,26 +29,19 @@ async function aggregate() {
|
|
|
if(mainPageRes.statusCode != 200)
|
|
|
return [];
|
|
|
|
|
|
- let rootNode = html.parse(mainPageRes.body, {
|
|
|
- pre: true,
|
|
|
- script: false,
|
|
|
- style: false
|
|
|
- });
|
|
|
-
|
|
|
- if(!(rootNode instanceof html.HTMLElement))
|
|
|
- return;
|
|
|
+ let rootNode = cheerio.load(mainPageRes.body);
|
|
|
|
|
|
- let diaryEntries = rootNode.querySelectorAll("div.blog_frame_middle ul.disc li a");
|
|
|
+ let diaryEntries = rootNode("div.blog_frame_middle ul.disc li a");
|
|
|
|
|
|
- if(!diaryEntries) {
|
|
|
+ if(diaryEntries.length == 0) {
|
|
|
console.log("[KISS DIARY] Failed to find listing!");
|
|
|
}
|
|
|
|
|
|
let result : INewsItem[] = [];
|
|
|
let latestEntry = lastPost.newsId;
|
|
|
|
|
|
- for(let a of diaryEntries) {
|
|
|
- let matches = urlPattern.exec(a.rawAttributes.href);
|
|
|
+ for(let a of diaryEntries.get() as CheerioElement[]) {
|
|
|
+ let matches = urlPattern.exec(a.attribs.href);
|
|
|
if(!matches)
|
|
|
continue;
|
|
|
|
|
@@ -60,36 +53,26 @@ async function aggregate() {
|
|
|
if(id > latestEntry)
|
|
|
latestEntry = id;
|
|
|
|
|
|
- let diaryLink = `${kissDiaryRoot}/${a.rawAttributes.href}`;
|
|
|
+ let diaryLink = `${kissDiaryRoot}/${a.attribs.href}`;
|
|
|
let res = await request(diaryLink, {resolveWithFullResponse: true}) as Response;
|
|
|
+
|
|
|
if(res.statusCode != 200)
|
|
|
continue;
|
|
|
|
|
|
- let node = html.parse(res.body, {
|
|
|
- pre: true,
|
|
|
- script: false,
|
|
|
- style: false
|
|
|
- });
|
|
|
-
|
|
|
- if(!(node instanceof html.HTMLElement))
|
|
|
- continue;
|
|
|
+ let node = cheerio.load(res.body);
|
|
|
|
|
|
- let title = node.querySelector("table.blog_frame_top tr td a");
|
|
|
- let contents = node.querySelector("div.blog_frame_middle");
|
|
|
- let bottomFrame = contents.querySelector("div.blog_data");
|
|
|
- if(bottomFrame) {
|
|
|
- let child = contents.childNodes[0];
|
|
|
- if(child instanceof html.HTMLElement)
|
|
|
- child.removeChild(bottomFrame);
|
|
|
- }
|
|
|
+ let title = node("table.blog_frame_top tr td a");
|
|
|
+ let contents = node("div.blog_frame_middle");
|
|
|
+ let bottomFrame = contents.find("div.blog_data");
|
|
|
+ bottomFrame.remove();
|
|
|
|
|
|
result.push({
|
|
|
newsId: id,
|
|
|
feedId: FEED_NAME,
|
|
|
link: diaryLink,
|
|
|
- title: title.text,
|
|
|
+ title: title.text(),
|
|
|
author: "KISS BLOG",
|
|
|
- contents: contents.innerHTML,
|
|
|
+ contents: contents.html(),
|
|
|
embedColor: 0xf4c100,
|
|
|
needsTranslation: true
|
|
|
});
|