mirror of
https://github.com/daniel-j/fimfic2epub.git
synced 2024-06-25 01:21:15 +12:00
Improved word counter
This commit is contained in:
parent
db9d802821
commit
da4ae1e053
|
@ -49,8 +49,12 @@ let webpackDefines = new webpack.DefinePlugin({
|
|||
FIMFIC2EPUB_VERSION: JSON.stringify(packageVersion)
|
||||
})
|
||||
|
||||
// No need to bloat the build with a list of all tlds...
|
||||
let replaceTlds = new webpack.NormalModuleReplacementPlugin(/^tlds$/, '../../src/false')
|
||||
|
||||
webpackConfig.forEach((c) => {
|
||||
c.plugins.push(webpackDefines)
|
||||
c.plugins.push(replaceTlds)
|
||||
})
|
||||
|
||||
let wpCompiler = webpack(webpackConfig)
|
||||
|
|
|
@ -41,6 +41,7 @@
|
|||
"sanitize-filename": "^1.6.0",
|
||||
"syllable": "^3.0.0",
|
||||
"twemoji": "^2.5.0",
|
||||
"url-regex": "^4.1.1",
|
||||
"zero-fill": "^2.2.3"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
|
|
@ -222,6 +222,7 @@ class FimFic2Epub extends EventEmitter {
|
|||
let p = Promise.resolve()
|
||||
let matchChapter = /<article class="chapter">[\s\S]*?<\/header>([\s\S]*?)<\/article>/g
|
||||
for (let ma, i = 0; (ma = matchChapter.exec(html)); i++) {
|
||||
const ch = this.storyInfo.chapters[i]
|
||||
let chapterContent = ma[1]
|
||||
chapterContent = chapterContent.replace(/<footer>[\s\S]*?<\/footer>/g, '').trim()
|
||||
|
||||
|
@ -237,7 +238,9 @@ class FimFic2Epub extends EventEmitter {
|
|||
}
|
||||
|
||||
chapterContent = chapterContent.trim().replace(trimWhitespace, '')
|
||||
let chapter = {content: chapterContent, notes: notesContent, notesFirst}
|
||||
const chapter = {content: chapterContent, notes: notesContent, notesFirst}
|
||||
ch.realWordCount = utils.htmlWordCount(chapter.content)
|
||||
|
||||
p = p.then(cleanMarkup(chapter.content).then((content) => {
|
||||
chapter.content = content
|
||||
}))
|
||||
|
@ -253,7 +256,8 @@ class FimFic2Epub extends EventEmitter {
|
|||
this.chaptersWithNotes.push(i)
|
||||
}
|
||||
this.chapters[i] = chapter
|
||||
}).then(() => new Promise((resolve, reject) => setTimeout(resolve, 20)))
|
||||
return utils.sleep(0)
|
||||
})
|
||||
}
|
||||
return p
|
||||
}).then(() => {
|
||||
|
@ -385,12 +389,9 @@ class FimFic2Epub extends EventEmitter {
|
|||
}
|
||||
chain = chain
|
||||
.then(() => {
|
||||
if (!ch.realWordCount) {
|
||||
ch.realWordCount = utils.htmlWordCount(chapter.content)
|
||||
}
|
||||
this.progress(0, ((i + 1) / this.chapters.length) * 0.99, 'Processed chapter ' + (i + 1) + ' / ' + this.chapters.length)
|
||||
return utils.sleep(0)
|
||||
})
|
||||
.then(() => new Promise((resolve) => setTimeout(resolve, 0)))
|
||||
}
|
||||
|
||||
chain = chain.then(async () => {
|
||||
|
|
|
@ -339,12 +339,7 @@ function infoBox (heading, data) {
|
|||
}
|
||||
|
||||
function calcWordCount (chapters) {
|
||||
let count = 0
|
||||
for (let i = 0; i < chapters.length; i++) {
|
||||
let ch = chapters[i]
|
||||
count += ch.realWordCount
|
||||
}
|
||||
return count
|
||||
return chapters.reduce((count, ch) => count + ch.realWordCount, 0)
|
||||
}
|
||||
|
||||
export function createTitlePage (ffc) {
|
||||
|
|
20
src/utils.js
20
src/utils.js
|
@ -1,5 +1,6 @@
|
|||
|
||||
import htmlToTextModule from 'html-to-text'
|
||||
import urlRegex from 'url-regex'
|
||||
import matchWords from 'match-words'
|
||||
import syllable from 'syllable'
|
||||
|
||||
|
@ -64,20 +65,23 @@ export function webp2png (data) {
|
|||
})
|
||||
}
|
||||
|
||||
export function htmlToText (html) {
|
||||
return htmlToTextModule.fromString(html, {
|
||||
wordwrap: false,
|
||||
ignoreImage: true,
|
||||
ignoreHref: true
|
||||
})
|
||||
}
|
||||
|
||||
export function sleep (ms) {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms))
|
||||
}
|
||||
|
||||
export function htmlToText (html, options = {}) {
|
||||
options = Object.assign({
|
||||
wordwrap: false,
|
||||
ignoreImage: true,
|
||||
ignoreHref: true
|
||||
}, options)
|
||||
return htmlToTextModule.fromString(html, options)
|
||||
}
|
||||
|
||||
export function htmlWordCount (html) {
|
||||
html = html.replace(/<pre>.*?<\/pre>/g, '') // Ignore codeblocks
|
||||
let text = htmlToText(html)
|
||||
text = text.replace(urlRegex(), '') // Remove urls
|
||||
|
||||
let count = 0
|
||||
try {
|
||||
|
|
Loading…
Reference in a new issue