mirror of
https://github.com/daniel-j/fimfic2epub.git
synced 2024-06-25 01:21:15 +12:00
Use HTML download instead of fetching chapters separately
Added twemoji Fixed progress bar
This commit is contained in:
parent
d780668cfb
commit
dac3a01a1b
|
@ -4,7 +4,7 @@
|
|||
"name": "fimfic2epub",
|
||||
"short_name": "fimfic2epub",
|
||||
"description": "Improved EPUB exporter for Fimfiction",
|
||||
"version": "1.6.3",
|
||||
"version": "1.6.4",
|
||||
|
||||
"icons": {
|
||||
"128": "icon-128.png"
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "fimfic2epub",
|
||||
"version": "1.6.3",
|
||||
"version": "1.6.4",
|
||||
"description": "Tool to generate improved EPUB ebooks from Fimfiction stories",
|
||||
"author": "djazz",
|
||||
"repository": {
|
||||
|
@ -26,12 +26,14 @@
|
|||
"html-entities": "^1.2.0",
|
||||
"html-to-text": "^2.1.3",
|
||||
"image-size": "^0.5.0",
|
||||
"is-svg": "^2.1.0",
|
||||
"jszip": "^3.1.2",
|
||||
"match-words": "^0.1.0",
|
||||
"mithril": "^0.2.5",
|
||||
"pretty-data": "^0.40.0",
|
||||
"request": "^2.74.0",
|
||||
"sanitize-filename": "^1.6.0",
|
||||
"twemoji": "^2.3.0",
|
||||
"zero-fill": "^2.2.3"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
|
|
@ -7,6 +7,7 @@ import sanitize from 'sanitize-filename'
|
|||
import URL from 'url'
|
||||
import isNode from 'detect-node'
|
||||
import fileType from 'file-type'
|
||||
import isSvg from 'is-svg'
|
||||
import sizeOf from 'image-size'
|
||||
import Emitter from 'es6-event-emitter'
|
||||
|
||||
|
@ -22,6 +23,8 @@ import { containerXml } from './constants'
|
|||
|
||||
const entities = new XmlEntities()
|
||||
|
||||
const trimWhitespace = /^\s*(<br\s*\/?\s*>)+|(<br\s*\/?\s*>)+\s*$/ig
|
||||
|
||||
class FimFic2Epub extends Emitter {
|
||||
|
||||
static getStoryId (id) {
|
||||
|
@ -197,6 +200,58 @@ class FimFic2Epub extends Emitter {
|
|||
this.chaptersWithNotes.length = 0
|
||||
|
||||
this.progress(0, 0, 'Fetching chapters...')
|
||||
|
||||
let chapterCount = this.storyInfo.chapters.length
|
||||
let url = 'https://www.fimfiction.net/story/download/' + this.storyInfo.id + '/html'
|
||||
|
||||
this.pcache.chapters = fetch(url).then((html) => {
|
||||
// console.log(html)
|
||||
let p = Promise.resolve()
|
||||
let matchChapter = /<article class="chapter">[\s\S]*?<\/header>([\s\S]*?)<\/article>/g
|
||||
for (let ma, i = 0; (ma = matchChapter.exec(html)); i++) {
|
||||
let chapterContent = ma[1]
|
||||
chapterContent = chapterContent.replace(/<footer>[\s\S]*?<\/footer>/g, '').trim()
|
||||
|
||||
let authorNotesPos = chapterContent.indexOf('<aside ')
|
||||
let notesContent = ''
|
||||
let notesFirst = authorNotesPos === 0
|
||||
if (authorNotesPos !== -1) {
|
||||
// console.log(chapterContent.length)
|
||||
chapterContent = chapterContent.replace(/<aside class="authors-note">([\s\S]*?)<\/aside>/, (match, content, pos) => {
|
||||
// console.log(pos + match.length)
|
||||
content = content.replace(/<header><h1>.*?<\/h1><\/header>/, '')
|
||||
notesContent = content.trim().replace(trimWhitespace, '')
|
||||
return ''
|
||||
})
|
||||
}
|
||||
|
||||
chapterContent = chapterContent.trim().replace(trimWhitespace, '')
|
||||
let chapter = {content: chapterContent, notes: notesContent, notesFirst}
|
||||
p = p.then(cleanMarkup(chapter.content).then((content) => {
|
||||
chapter.content = content
|
||||
}))
|
||||
if (notesContent) {
|
||||
p = p.then(cleanMarkup(chapter.notes).then((notes) => {
|
||||
chapter.notes = notes
|
||||
}))
|
||||
}
|
||||
p = p.then(() => {
|
||||
this.progress(0, (i + 1) / chapterCount, 'Processed chapter ' + (i + 1) + ' / ' + chapterCount)
|
||||
if (chapter.notes) {
|
||||
this.hasAuthorNotes = true
|
||||
this.chaptersWithNotes.push(i)
|
||||
}
|
||||
this.chapters[i] = chapter
|
||||
let ch = this.storyInfo.chapters[i]
|
||||
ch.realWordCount = htmlWordCount(chapter.content)
|
||||
})
|
||||
}
|
||||
return p
|
||||
}).then(() => {
|
||||
this.pcache.chapters = null
|
||||
})
|
||||
|
||||
/*
|
||||
this.pcache.chapters = new Promise((resolve, reject) => {
|
||||
let chapters = this.storyInfo.chapters
|
||||
let chapterCount = this.storyInfo.chapters.length
|
||||
|
@ -251,6 +306,7 @@ class FimFic2Epub extends Emitter {
|
|||
}).then(() => {
|
||||
this.pcache.chapters = null
|
||||
})
|
||||
*/
|
||||
return this.pcache.chapters
|
||||
}
|
||||
|
||||
|
@ -284,6 +340,15 @@ class FimFic2Epub extends Emitter {
|
|||
fetchRemote(url, 'arraybuffer').then((data) => {
|
||||
r.dest = null
|
||||
let info = fileType(isNode ? data : new Uint8Array(data))
|
||||
if (!info) {
|
||||
// file-type doesn't support SVG, extra check:
|
||||
if (isSvg(Buffer.from(data).toString('utf8'))) {
|
||||
info = {
|
||||
mime: 'image/svg+xml',
|
||||
ext: 'svg'
|
||||
}
|
||||
}
|
||||
}
|
||||
if (info) {
|
||||
let type = info.mime
|
||||
r.type = type
|
||||
|
@ -649,7 +714,7 @@ class FimFic2Epub extends Emitter {
|
|||
let chapterPos = html.indexOf('<div class="bbcode">')
|
||||
let chapter = html.substring(chapterPos + 20)
|
||||
|
||||
let pos = chapter.indexOf('\t\t</div>\n\t</div>')
|
||||
let pos = chapter.indexOf('\t\t</div>\n\t</div>\t\t\n\t\t\t\t\t</div>\n')
|
||||
|
||||
chapter = chapter.substring(0, pos).trim()
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
|
||||
import m from 'mithril'
|
||||
import { XmlEntities } from 'html-entities'
|
||||
import twemoji from 'twemoji'
|
||||
import render from './lib/mithril-node-render'
|
||||
|
||||
import fetch from './fetch'
|
||||
|
@ -14,6 +15,8 @@ export function cleanMarkup (html) {
|
|||
}
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
html = twemoji.parse(html, {ext: '.svg', folder: 'svg'})
|
||||
html = html.replace(/(<img class="emoji" draggable="false" alt=".*?" src=".*?")>/g, '$1/>')
|
||||
// replace HTML entities with decimal entities
|
||||
html = html.replace(/ /g, ' ')
|
||||
html = html.replace(/ /g, ' ')
|
||||
|
@ -58,28 +61,16 @@ export function cleanMarkup (html) {
|
|||
let cache = new Map()
|
||||
let completeCount = 0
|
||||
|
||||
let matchYoutube = /<div class="embed-container" data-original-src="(.*?)" data-src="(.*?)" data-id="(.*?)" data-origin="(.*?)">(.+?)<\/div><\/div><\/div>/g
|
||||
for (let ma; (ma = matchYoutube.exec(html));) {
|
||||
if (ma[4] === 'YouTube') {
|
||||
let youtubeId = ma[3]
|
||||
cache.set(youtubeId, null)
|
||||
}
|
||||
let matchYouTube = /<p><a class="embed" href="https:\/\/www\.youtube\.com\/watch\?v=(.*?)">.*?<\/a><\/p>/g
|
||||
for (let ma; (ma = matchYouTube.exec(html));) {
|
||||
let youtubeId = ma[1]
|
||||
cache.set(youtubeId, null)
|
||||
}
|
||||
|
||||
let matchSoundCloud = /<div data-controller="oembed" class="oembed" data-url="(.*?)" .+?<\/div>/g
|
||||
let matchSoundCloud = /<p><a class="embed" href="(https:\/\/soundcloud\.com\/.*?)">.*?<\/a><\/p>/g
|
||||
html = html.replace(matchSoundCloud, (match, url) => {
|
||||
return render(m('.soundcloud.leftalign', [
|
||||
'SoundCloud song ', m('a', {href: entities.decode(url), rel: 'nofollow'}, url.replace('https://soundcloud.com', ''))
|
||||
]))
|
||||
})
|
||||
|
||||
// Story embed
|
||||
let matchStoryEmbed = /<div style='[^']*?' class='bbcode__block'><div style="position:relative;" class="story-card-container".*?data-story-id="([^"]*?)"[\s\S]*?<a class="story_link" href="(.*?)" title=".*?">(.*?)<\/a>[\s\S]*?" class="story-card__author">(.*?)<\/a>[\s\S]*?<\/div><\/div>[\s\S]*?<\/div><\/div>/g
|
||||
html = html.replace(matchStoryEmbed, (match, id, storyLink, storyTitle, author) => {
|
||||
return render(m('.story', [
|
||||
'Story: ',
|
||||
m('a', {href: 'http://fimfiction.net' + entities.decode(storyLink), rel: 'nofollow'}, storyTitle),
|
||||
' by ' + author
|
||||
'SoundCloud: ', m('a', {href: entities.decode(url), rel: 'nofollow'}, url.replace('https://soundcloud.com/', '').replace(/[-_]/g, ' ').replace('/', ' - ').replace(/ {2}/g, ' '))
|
||||
]))
|
||||
})
|
||||
|
||||
|
@ -100,13 +91,13 @@ export function cleanMarkup (html) {
|
|||
completeCount++
|
||||
})
|
||||
if (completeCount === cache.size || data.length === 0) {
|
||||
html = html.replace(matchYoutube, replaceYoutube)
|
||||
html = html.replace(matchYouTube, replaceYouTube)
|
||||
continueParsing()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
function replaceYoutube (match, origSrc, src, id, origin) {
|
||||
function replaceYouTube (match, id) {
|
||||
let youtubeId = id
|
||||
let thumbnail = 'http://img.youtube.com/vi/' + youtubeId + '/hqdefault.jpg'
|
||||
let youtubeUrl = 'https://youtube.com/watch?v=' + youtubeId
|
||||
|
|
|
@ -58,6 +58,9 @@ img {
|
|||
max-width: 100%;
|
||||
max-height: 100%;
|
||||
}
|
||||
img.emoji {
|
||||
height: 1em;
|
||||
}
|
||||
|
||||
hr.old {
|
||||
padding: 0;
|
||||
|
|
Loading…
Reference in a new issue