mirror of
https://github.com/daniel-j/fimfic2epub.git
synced 2024-05-22 13:23:19 +12:00
Better reading ease method, async with thread wakeups, to prevent thread blocking
This commit is contained in:
parent
c328997748
commit
37b8a6671c
|
@ -12,9 +12,9 @@ const args = require('commander')
|
|||
.option('-e, --no-external', 'Don\'t embed external resources, such as images (breaks EPUB spec)')
|
||||
.option('-n, --no-notes', 'Don\'t include author notes')
|
||||
.option('-i, --notes-index', 'Create an index with all author notes at the end of the ebook')
|
||||
.option('-p, --paragraphs <style>', 'Select a paragraph style [spaced|indented|indentedall|both]', 'spaced')
|
||||
.option('-j, --join-subjects', 'Join subjects to a single value')
|
||||
.option('-r, --reading-ease', 'Calculate Flesch reading ease (slow for long stories)')
|
||||
.option('-p, --paragraphs <style>', 'Select a paragraph style <spaced|indented|indentedall|both>', 'spaced')
|
||||
.option('-j, --join-subjects', 'Join dc:subjects to a single value')
|
||||
.option('-r, --reading-ease [wakeup-interval]', 'Calculate Flesch reading ease, optional thread wakeup interval in ms (default: 50)')
|
||||
.option('-C, --cover <url>', 'Set cover image url')
|
||||
.parse(process.argv)
|
||||
|
||||
|
@ -40,7 +40,8 @@ const ffc = new FimFic2Epub(STORY_ID, {
|
|||
includeExternal: !!args.external,
|
||||
paragraphStyle: args.paragraphs,
|
||||
joinSubjects: !!args.joinSubjects,
|
||||
calculateReadingEase: !!args.readingEase
|
||||
calculateReadingEase: !!args.readingEase,
|
||||
readingEaseWakeupInterval: typeof args.readingEase === 'boolean' ? 50 : +args.readingEase
|
||||
})
|
||||
ffc.coverUrl = args.cover
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "fimfic2epub",
|
||||
"version": "1.7.9",
|
||||
"version": "1.7.11",
|
||||
"description": "Tool to generate improved EPUB ebooks from Fimfiction stories",
|
||||
"author": "djazz",
|
||||
"repository": {
|
||||
|
@ -36,9 +36,9 @@
|
|||
"mithril-node-render": "^2.2.0",
|
||||
"node-png": "^0.4.3",
|
||||
"pretty-data": "^0.40.0",
|
||||
"reading-level": "0.0.7",
|
||||
"request": "^2.85.0",
|
||||
"sanitize-filename": "^1.6.0",
|
||||
"syllable": "^3.0.0",
|
||||
"twemoji": "^2.5.0",
|
||||
"zero-fill": "^2.2.3"
|
||||
},
|
||||
|
|
|
@ -98,7 +98,8 @@ class FimFic2Epub extends Emitter {
|
|||
includeExternal: true,
|
||||
paragraphStyle: 'spaced',
|
||||
joinSubjects: false,
|
||||
calculateReadingEase: false
|
||||
calculateReadingEase: false,
|
||||
readingEaseWakeupInterval: isNode ? 50 : 200 // lower for node, to not slow down thread
|
||||
}
|
||||
|
||||
this.options = Object.assign(this.defaultOptions, options)
|
||||
|
@ -130,6 +131,7 @@ class FimFic2Epub extends Emitter {
|
|||
this.coverFilename = ''
|
||||
this.coverType = ''
|
||||
this.coverImageDimensions = {width: 0, height: 0}
|
||||
this.readingEase = null
|
||||
|
||||
this.hasRemoteResources = {
|
||||
titlePage: false
|
||||
|
@ -383,19 +385,31 @@ class FimFic2Epub extends Emitter {
|
|||
this.notesHtml[i] = html
|
||||
})
|
||||
}
|
||||
chain = chain.then(() => {
|
||||
if (!ch.realWordCount) {
|
||||
ch.realWordCount = utils.htmlWordCount(chapter.content)
|
||||
}
|
||||
if (this.options.calculateReadingEase && !ch.readingEase) {
|
||||
let text = utils.htmlToText(chapter.content)
|
||||
text = text.replace(/\s+/g, ' ').trim()
|
||||
ch.readingEase = utils.readingEase(text)
|
||||
}
|
||||
this.progress(0, (i + 1) / this.chapters.length, 'Processed chapter ' + (i + 1) + ' / ' + this.chapters.length)
|
||||
}).then(() => new Promise((resolve) => setTimeout(resolve, 20)))
|
||||
chain = chain
|
||||
.then(() => {
|
||||
if (!ch.realWordCount) {
|
||||
ch.realWordCount = utils.htmlWordCount(chapter.content)
|
||||
}
|
||||
this.progress(0, (i + 1) / this.chapters.length, 'Processed chapter ' + (i + 1) + ' / ' + this.chapters.length)
|
||||
})
|
||||
.then(() => new Promise((resolve) => setTimeout(resolve, 0)))
|
||||
}
|
||||
|
||||
chain = chain.then(async () => {
|
||||
if (this.options.calculateReadingEase && !this.readingEase) {
|
||||
const content = this.chapters.reduce((str, ch) => {
|
||||
return str + utils.htmlToText(ch.content) + '\n\n'
|
||||
}, '')
|
||||
this.progress(0, 0, 'Calculating Flesch reading ease...')
|
||||
this.readingEase = await utils.readingEase(
|
||||
content, this.options.readingEaseSleepInterval,
|
||||
(progress) => {
|
||||
this.progress(0, progress, 'Calculating Flesch reading ease ' + Math.round(progress * 100) + '%')
|
||||
}
|
||||
)
|
||||
}
|
||||
})
|
||||
|
||||
return chain
|
||||
}
|
||||
|
||||
|
|
|
@ -346,15 +346,6 @@ function calcWordCount (chapters) {
|
|||
}
|
||||
return count
|
||||
}
|
||||
function calcReadingEase (chapters) {
|
||||
let avg = 0
|
||||
for (let i = 0; i < chapters.length; i++) {
|
||||
let ch = chapters[i]
|
||||
avg += ch.readingEase.ease
|
||||
}
|
||||
avg = avg / chapters.length
|
||||
return Math.round(avg * 100) / 100
|
||||
}
|
||||
|
||||
export function createTitlePage (ffc) {
|
||||
const tokenContent = '%%HTML_CONTENT_' + Math.random() + '%%'
|
||||
|
@ -402,7 +393,7 @@ export function createTitlePage (ffc) {
|
|||
ffc.storyInfo.publishDate && infoBox('First Published', prettyDate(new Date(ffc.storyInfo.publishDate * 1000))),
|
||||
infoBox('Last Modified', prettyDate(new Date(ffc.storyInfo.date_modified * 1000))),
|
||||
infoBox('Word Count', calcWordCount(ffc.storyInfo.chapters).toLocaleString('en-GB')),
|
||||
ffc.options.calculateReadingEase ? infoBox('Reading Ease', calcReadingEase(ffc.storyInfo.chapters).toLocaleString('en-GB')) : null
|
||||
ffc.options.calculateReadingEase && ffc.readingEase ? infoBox('Reading Ease', (Math.round(ffc.readingEase.ease * 100) / 100).toLocaleString('en-GB')) : null
|
||||
]),
|
||||
// m('hr'),
|
||||
m('section.tags', [
|
||||
|
|
79
src/utils.js
79
src/utils.js
|
@ -1,7 +1,7 @@
|
|||
|
||||
import htmlToTextModule from 'html-to-text'
|
||||
import matchWords from 'match-words'
|
||||
import { readingLevel } from 'reading-level'
|
||||
import syllable from 'syllable'
|
||||
|
||||
export function replaceAsync (str, re, callback) {
|
||||
// http://es5.github.io/#x15.5.4.11
|
||||
|
@ -72,6 +72,10 @@ export function htmlToText (html) {
|
|||
})
|
||||
}
|
||||
|
||||
export function sleep (ms) {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms))
|
||||
}
|
||||
|
||||
export function htmlWordCount (html) {
|
||||
let text = htmlToText(html)
|
||||
|
||||
|
@ -82,8 +86,73 @@ export function htmlWordCount (html) {
|
|||
return count
|
||||
}
|
||||
|
||||
export function readingEase (text) {
|
||||
const result = readingLevel(text, 'full')
|
||||
const ease = 206.835 - 1.015 * (result.words / result.sentences) - 84.6 * (result.syllables / result.words)
|
||||
return {ease, gradeLevel: result.unrounded}
|
||||
export async function readingEase (text, wakeupInterval = Infinity, progresscb) {
|
||||
const result = {
|
||||
sentences: 0, words: 0, syllables: 0, grade: NaN, ease: NaN
|
||||
}
|
||||
|
||||
if (!/[a-z]/i.test(text)) {
|
||||
return null
|
||||
}
|
||||
|
||||
await sleep(0)
|
||||
|
||||
// sentence tokenizer by Darkentor
|
||||
const tokenSentences = text
|
||||
.replace('\0', '')
|
||||
.replace(/\s+/g, ' ') // Replace all whitespace (including newlines) with a single space
|
||||
.replace(/(mr|mrs|dr|ms|prof|rev|col|cmdr|flt|lt|brgdr|hon|wng|capt|rt|revd|gen|cdre|admrl|herr|hr|frau|alderman|alhaji|brig|cdr|cik|consul|datin|dato|datuk|seri|dhr|dipl|ing|dott|sa|dra|drs|en|encik|eng|eur|exma|sra|exmo|sr|lieut|fr|fraulein|fru|graaf|gravin|grp|hajah|haji|hajim|hra|ir|lcda|lic|maj|mlle|mme|mstr|nti|sri|rva|sig|na|ra|sqn|ldr|srta|wg)\./gi, '$1')
|
||||
.replace(/(((^|\w).*?[^\w\s,]+)(?=\s+\W*[A-Z])|:|;)/g, '$1\0')
|
||||
.split(/\s*\0\s*/)
|
||||
|
||||
if (typeof progresscb === 'function') {
|
||||
progresscb(0)
|
||||
}
|
||||
|
||||
await sleep(0)
|
||||
|
||||
const counts = { syllables: 0, words: 0 }
|
||||
let lastTime = Date.now()
|
||||
|
||||
for (let i = 0; i < tokenSentences.length; i++) {
|
||||
let now = Date.now()
|
||||
if (lastTime + wakeupInterval < now) {
|
||||
lastTime = now
|
||||
if (typeof progresscb === 'function') {
|
||||
progresscb(i / tokenSentences.length)
|
||||
}
|
||||
await sleep(0)
|
||||
}
|
||||
const sentence = tokenSentences[i]
|
||||
// strip all punctuation and numbers from the sentence
|
||||
const words = sentence
|
||||
.replace(/[^\w\s]|_/g, '')
|
||||
.replace(/\s+/g, ' ')
|
||||
.split(' ')
|
||||
.filter(letter => letter)
|
||||
|
||||
counts.syllables += words.reduce((total, word) => total + syllable(word), 0)
|
||||
counts.words += words.length
|
||||
}
|
||||
|
||||
const { words, syllables } = counts
|
||||
const sentences = tokenSentences.length
|
||||
const grade = 0.39 * (words / sentences) + 11.8 * (syllables / words) - 15.59
|
||||
const ease = 206.835 - 1.015 * (words / sentences) - 84.6 * (syllables / words)
|
||||
|
||||
tokenSentences.length = 0
|
||||
|
||||
if (!ease) {
|
||||
return null
|
||||
}
|
||||
|
||||
Object.assign(result, {
|
||||
sentences, words, syllables, grade, ease
|
||||
})
|
||||
|
||||
if (typeof progresscb === 'function') {
|
||||
progresscb(1)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue