Improved word counter

This commit is contained in:
Daniel Jönsson 2018-03-15 11:33:52 +01:00
parent db9d802821
commit da4ae1e053
5 changed files with 25 additions and 20 deletions

View file

@ -49,8 +49,12 @@ let webpackDefines = new webpack.DefinePlugin({
FIMFIC2EPUB_VERSION: JSON.stringify(packageVersion) FIMFIC2EPUB_VERSION: JSON.stringify(packageVersion)
}) })
// No need to bloat the build with a list of all tlds...
let replaceTlds = new webpack.NormalModuleReplacementPlugin(/^tlds$/, '../../src/false')
webpackConfig.forEach((c) => { webpackConfig.forEach((c) => {
c.plugins.push(webpackDefines) c.plugins.push(webpackDefines)
c.plugins.push(replaceTlds)
}) })
let wpCompiler = webpack(webpackConfig) let wpCompiler = webpack(webpackConfig)

View file

@ -41,6 +41,7 @@
"sanitize-filename": "^1.6.0", "sanitize-filename": "^1.6.0",
"syllable": "^3.0.0", "syllable": "^3.0.0",
"twemoji": "^2.5.0", "twemoji": "^2.5.0",
"url-regex": "^4.1.1",
"zero-fill": "^2.2.3" "zero-fill": "^2.2.3"
}, },
"devDependencies": { "devDependencies": {

View file

@ -222,6 +222,7 @@ class FimFic2Epub extends EventEmitter {
let p = Promise.resolve() let p = Promise.resolve()
let matchChapter = /<article class="chapter">[\s\S]*?<\/header>([\s\S]*?)<\/article>/g let matchChapter = /<article class="chapter">[\s\S]*?<\/header>([\s\S]*?)<\/article>/g
for (let ma, i = 0; (ma = matchChapter.exec(html)); i++) { for (let ma, i = 0; (ma = matchChapter.exec(html)); i++) {
const ch = this.storyInfo.chapters[i]
let chapterContent = ma[1] let chapterContent = ma[1]
chapterContent = chapterContent.replace(/<footer>[\s\S]*?<\/footer>/g, '').trim() chapterContent = chapterContent.replace(/<footer>[\s\S]*?<\/footer>/g, '').trim()
@ -237,7 +238,9 @@ class FimFic2Epub extends EventEmitter {
} }
chapterContent = chapterContent.trim().replace(trimWhitespace, '') chapterContent = chapterContent.trim().replace(trimWhitespace, '')
let chapter = {content: chapterContent, notes: notesContent, notesFirst} const chapter = {content: chapterContent, notes: notesContent, notesFirst}
ch.realWordCount = utils.htmlWordCount(chapter.content)
p = p.then(cleanMarkup(chapter.content).then((content) => { p = p.then(cleanMarkup(chapter.content).then((content) => {
chapter.content = content chapter.content = content
})) }))
@ -253,7 +256,8 @@ class FimFic2Epub extends EventEmitter {
this.chaptersWithNotes.push(i) this.chaptersWithNotes.push(i)
} }
this.chapters[i] = chapter this.chapters[i] = chapter
}).then(() => new Promise((resolve, reject) => setTimeout(resolve, 20))) return utils.sleep(0)
})
} }
return p return p
}).then(() => { }).then(() => {
@ -385,12 +389,9 @@ class FimFic2Epub extends EventEmitter {
} }
chain = chain chain = chain
.then(() => { .then(() => {
if (!ch.realWordCount) {
ch.realWordCount = utils.htmlWordCount(chapter.content)
}
this.progress(0, ((i + 1) / this.chapters.length) * 0.99, 'Processed chapter ' + (i + 1) + ' / ' + this.chapters.length) this.progress(0, ((i + 1) / this.chapters.length) * 0.99, 'Processed chapter ' + (i + 1) + ' / ' + this.chapters.length)
return utils.sleep(0)
}) })
.then(() => new Promise((resolve) => setTimeout(resolve, 0)))
} }
chain = chain.then(async () => { chain = chain.then(async () => {

View file

@ -339,12 +339,7 @@ function infoBox (heading, data) {
} }
function calcWordCount (chapters) { function calcWordCount (chapters) {
let count = 0 return chapters.reduce((count, ch) => count + ch.realWordCount, 0)
for (let i = 0; i < chapters.length; i++) {
let ch = chapters[i]
count += ch.realWordCount
}
return count
} }
export function createTitlePage (ffc) { export function createTitlePage (ffc) {

View file

@ -1,5 +1,6 @@
import htmlToTextModule from 'html-to-text' import htmlToTextModule from 'html-to-text'
import urlRegex from 'url-regex'
import matchWords from 'match-words' import matchWords from 'match-words'
import syllable from 'syllable' import syllable from 'syllable'
@ -64,20 +65,23 @@ export function webp2png (data) {
}) })
} }
export function htmlToText (html) {
return htmlToTextModule.fromString(html, {
wordwrap: false,
ignoreImage: true,
ignoreHref: true
})
}
export function sleep (ms) { export function sleep (ms) {
return new Promise((resolve) => setTimeout(resolve, ms)) return new Promise((resolve) => setTimeout(resolve, ms))
} }
export function htmlToText (html, options = {}) {
options = Object.assign({
wordwrap: false,
ignoreImage: true,
ignoreHref: true
}, options)
return htmlToTextModule.fromString(html, options)
}
export function htmlWordCount (html) { export function htmlWordCount (html) {
html = html.replace(/<pre>.*?<\/pre>/g, '') // Ignore codeblocks
let text = htmlToText(html) let text = htmlToText(html)
text = text.replace(urlRegex(), '') // Remove urls
let count = 0 let count = 0
try { try {