mirror of
https://github.com/daniel-j/fimfic2epub.git
synced 2024-09-30 09:06:14 +13:00
Improved word counter
This commit is contained in:
parent
db9d802821
commit
da4ae1e053
5 changed files with 25 additions and 20 deletions
|
@ -49,8 +49,12 @@ let webpackDefines = new webpack.DefinePlugin({
|
||||||
FIMFIC2EPUB_VERSION: JSON.stringify(packageVersion)
|
FIMFIC2EPUB_VERSION: JSON.stringify(packageVersion)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// No need to bloat the build with a list of all tlds...
|
||||||
|
let replaceTlds = new webpack.NormalModuleReplacementPlugin(/^tlds$/, '../../src/false')
|
||||||
|
|
||||||
webpackConfig.forEach((c) => {
|
webpackConfig.forEach((c) => {
|
||||||
c.plugins.push(webpackDefines)
|
c.plugins.push(webpackDefines)
|
||||||
|
c.plugins.push(replaceTlds)
|
||||||
})
|
})
|
||||||
|
|
||||||
let wpCompiler = webpack(webpackConfig)
|
let wpCompiler = webpack(webpackConfig)
|
||||||
|
|
|
@ -41,6 +41,7 @@
|
||||||
"sanitize-filename": "^1.6.0",
|
"sanitize-filename": "^1.6.0",
|
||||||
"syllable": "^3.0.0",
|
"syllable": "^3.0.0",
|
||||||
"twemoji": "^2.5.0",
|
"twemoji": "^2.5.0",
|
||||||
|
"url-regex": "^4.1.1",
|
||||||
"zero-fill": "^2.2.3"
|
"zero-fill": "^2.2.3"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
|
|
|
@ -222,6 +222,7 @@ class FimFic2Epub extends EventEmitter {
|
||||||
let p = Promise.resolve()
|
let p = Promise.resolve()
|
||||||
let matchChapter = /<article class="chapter">[\s\S]*?<\/header>([\s\S]*?)<\/article>/g
|
let matchChapter = /<article class="chapter">[\s\S]*?<\/header>([\s\S]*?)<\/article>/g
|
||||||
for (let ma, i = 0; (ma = matchChapter.exec(html)); i++) {
|
for (let ma, i = 0; (ma = matchChapter.exec(html)); i++) {
|
||||||
|
const ch = this.storyInfo.chapters[i]
|
||||||
let chapterContent = ma[1]
|
let chapterContent = ma[1]
|
||||||
chapterContent = chapterContent.replace(/<footer>[\s\S]*?<\/footer>/g, '').trim()
|
chapterContent = chapterContent.replace(/<footer>[\s\S]*?<\/footer>/g, '').trim()
|
||||||
|
|
||||||
|
@ -237,7 +238,9 @@ class FimFic2Epub extends EventEmitter {
|
||||||
}
|
}
|
||||||
|
|
||||||
chapterContent = chapterContent.trim().replace(trimWhitespace, '')
|
chapterContent = chapterContent.trim().replace(trimWhitespace, '')
|
||||||
let chapter = {content: chapterContent, notes: notesContent, notesFirst}
|
const chapter = {content: chapterContent, notes: notesContent, notesFirst}
|
||||||
|
ch.realWordCount = utils.htmlWordCount(chapter.content)
|
||||||
|
|
||||||
p = p.then(cleanMarkup(chapter.content).then((content) => {
|
p = p.then(cleanMarkup(chapter.content).then((content) => {
|
||||||
chapter.content = content
|
chapter.content = content
|
||||||
}))
|
}))
|
||||||
|
@ -253,7 +256,8 @@ class FimFic2Epub extends EventEmitter {
|
||||||
this.chaptersWithNotes.push(i)
|
this.chaptersWithNotes.push(i)
|
||||||
}
|
}
|
||||||
this.chapters[i] = chapter
|
this.chapters[i] = chapter
|
||||||
}).then(() => new Promise((resolve, reject) => setTimeout(resolve, 20)))
|
return utils.sleep(0)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
return p
|
return p
|
||||||
}).then(() => {
|
}).then(() => {
|
||||||
|
@ -385,12 +389,9 @@ class FimFic2Epub extends EventEmitter {
|
||||||
}
|
}
|
||||||
chain = chain
|
chain = chain
|
||||||
.then(() => {
|
.then(() => {
|
||||||
if (!ch.realWordCount) {
|
|
||||||
ch.realWordCount = utils.htmlWordCount(chapter.content)
|
|
||||||
}
|
|
||||||
this.progress(0, ((i + 1) / this.chapters.length) * 0.99, 'Processed chapter ' + (i + 1) + ' / ' + this.chapters.length)
|
this.progress(0, ((i + 1) / this.chapters.length) * 0.99, 'Processed chapter ' + (i + 1) + ' / ' + this.chapters.length)
|
||||||
|
return utils.sleep(0)
|
||||||
})
|
})
|
||||||
.then(() => new Promise((resolve) => setTimeout(resolve, 0)))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
chain = chain.then(async () => {
|
chain = chain.then(async () => {
|
||||||
|
|
|
@ -339,12 +339,7 @@ function infoBox (heading, data) {
|
||||||
}
|
}
|
||||||
|
|
||||||
function calcWordCount (chapters) {
|
function calcWordCount (chapters) {
|
||||||
let count = 0
|
return chapters.reduce((count, ch) => count + ch.realWordCount, 0)
|
||||||
for (let i = 0; i < chapters.length; i++) {
|
|
||||||
let ch = chapters[i]
|
|
||||||
count += ch.realWordCount
|
|
||||||
}
|
|
||||||
return count
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export function createTitlePage (ffc) {
|
export function createTitlePage (ffc) {
|
||||||
|
|
20
src/utils.js
20
src/utils.js
|
@ -1,5 +1,6 @@
|
||||||
|
|
||||||
import htmlToTextModule from 'html-to-text'
|
import htmlToTextModule from 'html-to-text'
|
||||||
|
import urlRegex from 'url-regex'
|
||||||
import matchWords from 'match-words'
|
import matchWords from 'match-words'
|
||||||
import syllable from 'syllable'
|
import syllable from 'syllable'
|
||||||
|
|
||||||
|
@ -64,20 +65,23 @@ export function webp2png (data) {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
export function htmlToText (html) {
|
|
||||||
return htmlToTextModule.fromString(html, {
|
|
||||||
wordwrap: false,
|
|
||||||
ignoreImage: true,
|
|
||||||
ignoreHref: true
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
export function sleep (ms) {
|
export function sleep (ms) {
|
||||||
return new Promise((resolve) => setTimeout(resolve, ms))
|
return new Promise((resolve) => setTimeout(resolve, ms))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function htmlToText (html, options = {}) {
|
||||||
|
options = Object.assign({
|
||||||
|
wordwrap: false,
|
||||||
|
ignoreImage: true,
|
||||||
|
ignoreHref: true
|
||||||
|
}, options)
|
||||||
|
return htmlToTextModule.fromString(html, options)
|
||||||
|
}
|
||||||
|
|
||||||
export function htmlWordCount (html) {
|
export function htmlWordCount (html) {
|
||||||
|
html = html.replace(/<pre>.*?<\/pre>/g, '') // Ignore codeblocks
|
||||||
let text = htmlToText(html)
|
let text = htmlToText(html)
|
||||||
|
text = text.replace(urlRegex(), '') // Remove urls
|
||||||
|
|
||||||
let count = 0
|
let count = 0
|
||||||
try {
|
try {
|
||||||
|
|
Loading…
Reference in a new issue