Improved word counter

This commit is contained in:
Daniel Jönsson 2018-03-15 11:33:52 +01:00
parent db9d802821
commit da4ae1e053
5 changed files with 25 additions and 20 deletions

View file

@ -49,8 +49,12 @@ let webpackDefines = new webpack.DefinePlugin({
FIMFIC2EPUB_VERSION: JSON.stringify(packageVersion)
})
// No need to bloat the build with a list of all tlds...
let replaceTlds = new webpack.NormalModuleReplacementPlugin(/^tlds$/, '../../src/false')
webpackConfig.forEach((c) => {
c.plugins.push(webpackDefines)
c.plugins.push(replaceTlds)
})
let wpCompiler = webpack(webpackConfig)

View file

@ -41,6 +41,7 @@
"sanitize-filename": "^1.6.0",
"syllable": "^3.0.0",
"twemoji": "^2.5.0",
"url-regex": "^4.1.1",
"zero-fill": "^2.2.3"
},
"devDependencies": {

View file

@ -222,6 +222,7 @@ class FimFic2Epub extends EventEmitter {
let p = Promise.resolve()
let matchChapter = /<article class="chapter">[\s\S]*?<\/header>([\s\S]*?)<\/article>/g
for (let ma, i = 0; (ma = matchChapter.exec(html)); i++) {
const ch = this.storyInfo.chapters[i]
let chapterContent = ma[1]
chapterContent = chapterContent.replace(/<footer>[\s\S]*?<\/footer>/g, '').trim()
@ -237,7 +238,9 @@ class FimFic2Epub extends EventEmitter {
}
chapterContent = chapterContent.trim().replace(trimWhitespace, '')
let chapter = {content: chapterContent, notes: notesContent, notesFirst}
const chapter = {content: chapterContent, notes: notesContent, notesFirst}
ch.realWordCount = utils.htmlWordCount(chapter.content)
p = p.then(cleanMarkup(chapter.content).then((content) => {
chapter.content = content
}))
@ -253,7 +256,8 @@ class FimFic2Epub extends EventEmitter {
this.chaptersWithNotes.push(i)
}
this.chapters[i] = chapter
}).then(() => new Promise((resolve, reject) => setTimeout(resolve, 20)))
return utils.sleep(0)
})
}
return p
}).then(() => {
@ -385,12 +389,9 @@ class FimFic2Epub extends EventEmitter {
}
chain = chain
.then(() => {
if (!ch.realWordCount) {
ch.realWordCount = utils.htmlWordCount(chapter.content)
}
this.progress(0, ((i + 1) / this.chapters.length) * 0.99, 'Processed chapter ' + (i + 1) + ' / ' + this.chapters.length)
return utils.sleep(0)
})
.then(() => new Promise((resolve) => setTimeout(resolve, 0)))
}
chain = chain.then(async () => {

View file

@ -339,12 +339,7 @@ function infoBox (heading, data) {
}
function calcWordCount (chapters) {
let count = 0
for (let i = 0; i < chapters.length; i++) {
let ch = chapters[i]
count += ch.realWordCount
}
return count
return chapters.reduce((count, ch) => count + ch.realWordCount, 0)
}
export function createTitlePage (ffc) {

View file

@ -1,5 +1,6 @@
import htmlToTextModule from 'html-to-text'
import urlRegex from 'url-regex'
import matchWords from 'match-words'
import syllable from 'syllable'
@ -64,20 +65,23 @@ export function webp2png (data) {
})
}
export function htmlToText (html) {
return htmlToTextModule.fromString(html, {
wordwrap: false,
ignoreImage: true,
ignoreHref: true
})
}
export function sleep (ms) {
return new Promise((resolve) => setTimeout(resolve, ms))
}
export function htmlToText (html, options = {}) {
options = Object.assign({
wordwrap: false,
ignoreImage: true,
ignoreHref: true
}, options)
return htmlToTextModule.fromString(html, options)
}
export function htmlWordCount (html) {
html = html.replace(/<pre>.*?<\/pre>/g, '') // Ignore codeblocks
let text = htmlToText(html)
text = text.replace(urlRegex(), '') // Remove urls
let count = 0
try {