More kepubify stuff

This commit is contained in:
daniel-j 2018-03-26 20:32:02 +02:00
parent e49c082fec
commit ab2994ecf5
6 changed files with 87 additions and 32 deletions

View file

@ -66,6 +66,7 @@ By default the EPUB will be saved in the current working directory with the file
-n, --no-notes Don't include author notes
-i, --notes-index Create an index with all author notes at the end of the ebook
-p, --paragraphs <style> Select a paragraph style <spaced|indented|indentedall|both> (default: spaced)
-k, --kepubify Add extra <span> elements for Kobo EPUB (KEPUB) format
-j, --join-subjects Join dc:subjects to a single value
-w, --wpm <number> Words per minute. Set to 0 to disable reading time estimations (default: 200)
-C, --cover <url> Set cover image url

View file

@ -18,6 +18,7 @@ import fetchRemote from './fetchRemote'
import * as template from './templates'
import { styleCss, coverstyleCss, titlestyleCss, iconsCss, navstyleCss, paragraphsCss } from './styles'
import * as utils from './utils'
import kepubify from './kepubify'
import subsetFont from './subsetFont'
import fontAwesomeCodes from '../build/font-awesome-codes.json'
@ -97,6 +98,7 @@ class FimFic2Epub extends EventEmitter {
addChapterHeadings: true,
includeExternal: true,
paragraphStyle: 'spaced',
kepubify: false,
joinSubjects: false,
calculateReadingEase: true,
readingEaseWakeupInterval: isNode ? 50 : 200, // lower for node, to not slow down thread
@ -374,6 +376,13 @@ class FimFic2Epub extends EventEmitter {
if (this.options.includeAuthorNotes && this.options.useAuthorNotesIndex && this.hasAuthorNotes) {
this.pages.notesnav = await template.createNotesNav(this)
}
if (this.options.kepubify) {
this.pages.nav = kepubify(this.pages.nav)
this.pages.title = kepubify(this.pages.title)
if (this.pages.notesnav) {
this.pages.notesnav = kepubify(this.pages.notesnav)
}
}
}
buildChapters () {
@ -400,6 +409,9 @@ class FimFic2Epub extends EventEmitter {
index: i
})).then((html) => {
this.findRemoteResources('ch_' + zeroFill(3, i + 1), {chapter: i}, html)
if (this.options.kepubify) {
html = kepubify(html)
}
this.chaptersHtml[i] = html
})
if (this.options.includeAuthorNotes && this.options.useAuthorNotesIndex && chapter.notes) {
@ -409,6 +421,9 @@ class FimFic2Epub extends EventEmitter {
index: i
}, true)).then((html) => {
this.findRemoteResources('note_' + zeroFill(3, i + 1), {note: i}, html)
if (this.options.kepubify) {
html = kepubify(html)
}
this.notesHtml[i] = html
})
}

View file

@ -15,6 +15,7 @@ const args = require('commander')
.option('-n, --no-notes', 'Don\'t include author notes')
.option('-i, --notes-index', 'Create an index with all author notes at the end of the ebook')
.option('-p, --paragraphs <style>', 'Select a paragraph style <spaced|indented|indentedall|both>', 'spaced')
.option('-k, --kepubify', 'Add extra <span> elements for Kobo EPUB (KEPUB) format')
.option('-j, --join-subjects', 'Join dc:subjects to a single value')
.option('-w, --wpm <number>', 'Words per minute. Set to 0 to disable reading time estimations', parseInt, 200)
.option('-C, --cover <url>', 'Set cover image url')
@ -50,6 +51,7 @@ const ffc = new FimFic2Epub(STORY_ID, {
addChapterHeadings: !!args.headings,
includeExternal: !!args.external,
paragraphStyle: args.paragraphs,
kepubify: !!args.kepubify,
joinSubjects: !!args.joinSubjects,
calculateReadingEase: !!args.readingEase,
readingEaseWakeupInterval: 800,
@ -71,7 +73,11 @@ ffc.fetchMetadata()
.then(ffc.fetchAll.bind(ffc))
.then(ffc.build.bind(ffc))
.then(() => {
let filename = (args.args[1] || '').replace('%id%', ffc.storyInfo.id) || ffc.filename
let filename = ffc.filename
if (ffc.options.kepubify) {
filename = filename.replace(/\.epub$/, '.kepub.epub')
}
filename = (args.args[1] || '').replace('%id%', ffc.storyInfo.id) || filename
let stream
if (args.dir) {

View file

@ -1,28 +1,28 @@
import et from 'elementtree'
// Todo: Fix missing tails outside of <body></body>
export default function kepubify (html) {
const tree = et.parse(html)
const body = tree.find('./body')
addDivs(body)
body.getchildren().forEach((child) => addSpansToNode(child, body))
return tree.write()
const state = {paragraph: 0, segment: 0}
body.getchildren().forEach((child) => addSpansToNode(child, body, state))
return '<?xml version="1.0" encoding="utf-8"?>\n<!DOCTYPE html>\n' + tree.write({
xml_declaration: false
})
}
const specialTags = /^(img|pre)$/i
const sentenceRe = /(((^|\w).*?[^\w\s,]+)(?=\s+\W*[A-Z])|:|;)/g
let paragraph_counter = 0
let segment_counter = 0
const specialTags = /^(img|pre|svg)$/i
const paragraphTags = /^(p|ol|ul)$/i
function addDivs (body) {
const bookInner = et.Element('div', {class: 'book-inner'})
const bookColumns = et.SubElement(bookInner, 'div', {class: 'book-columns'})
body.getchildren().forEach((child, i) => {
body.getchildren().splice(i, 1)
bookColumns.getchildren().push(child)
})
body.append(bookInner)
bookColumns._children = body.getchildren()
body._children = [bookInner]
}
function createSpan (paragraph, segment) {
@ -33,44 +33,62 @@ function createSpan (paragraph, segment) {
return span
}
function addSpans (node, text) {
function textToSpans (node, text, state) {
const tokenSentences = text
.replace('\0', '')
.replace(/\s+/g, ' ') // Replace all whitespace (including newlines) with a single space
.normalize('NFD').replace(/[\u0300-\u036f]/g, '') // strip diacritics since JS's \w group and explicit [a-z]|[A-Z] don't account for them
.replace(/(mr|mrs|dr|ms|prof|rev|col|cmdr|flt|lt|brgdr|hon|wng|capt|rt|revd|gen|cdre|admrl|herr|hr|frau|alderman|alhaji|brig|cdr|cik|consul|datin|dato|datuk|seri|dhr|dipl|ing|dott|sa|dra|drs|en|encik|eng|eur|exma|sra|exmo|sr|lieut|fr|fraulein|fru|graaf|gravin|grp|hajah|haji|hajim|hra|ir|lcda|lic|maj|mlle|mme|mstr|nti|sri|rva|sig|na|ra|sqn|ldr|srta|wg|co|esq|inc|iou|ltd|mdlle|messers|messrs|mlles|mm|mmes|mt|p\.s|pvt|st|viz)\./gi, '$1')
.replace(/(((^|\w).*?[^\w\s,]+)(?=\s+\W*[A-Z])|:|;)/g, '$1\0')
.split(/\s*\0\s*/)
.split(/\s*\0/)
for (let i = 0; i < tokenSentences.length; i++) {
let s = tokenSentences[i]
if (s.trim().length === 0) {
if (i-1 >= 0) tokenSentences[i-1] += s
tokenSentences.splice(i, 1)
i--
}
}
return tokenSentences.map((sentence, i) => {
if (!sentence) return null
const span = createSpan(paragraph_counter, segment_counter)
const span = createSpan(state.paragraph, state.segment++)
span.text = sentence
return span
}).filter((el) => el)
}).filter(el => el)
}
function addSpansToNode (node, parent) {
const nodePosition = parent.getchildren().indexOf(node)
function addSpansToNode (node, parent, state) {
let nodePosition = parent.getchildren().indexOf(node)
if (node.tag.match(paragraphTags)) {
state.paragraph++
state.segment = 0
}
if (node.tag.match(specialTags)) {
const span = createSpan(paragraph_counter, segment_counter)
const span = createSpan(state.paragraph, state.segment++)
span.append(node)
parent.getchildren().splice(nodePosition, 1, span)
} else {
node.getchildren().forEach((child) => {
addSpansToNode(child, node)
})
}
let prependNodes = []
if (node.text) {
addSpans(node, node.text).forEach((span, i) => {
if (node.text) {
prependNodes = textToSpans(node, node.text, state)
node.text = null
}
node.getchildren().forEach((child) => {
addSpansToNode(child, node, state)
})
prependNodes.forEach((span, i) => {
node.getchildren().splice(i, 0, span)
})
node.text = null
}
if (node.tail) {
addSpans(node, node.tail).forEach((span, i) => {
nodePosition = parent.getchildren().indexOf(node)
textToSpans(node, node.tail, state).forEach((span, i) => {
parent.getchildren().splice(nodePosition + 1 + i, 0, span)
})
node.tail = null

View file

@ -135,6 +135,7 @@ let dialog = {
this.useAuthorNotesIndex = prop(ffc.options.useAuthorNotesIndex)
this.addChapterHeadings = prop(ffc.options.addChapterHeadings)
this.includeExternal = prop(ffc.options.includeExternal)
this.kepubify = prop(ffc.options.kepubify)
this.joinSubjects = prop(ffc.options.joinSubjects)
this.paragraphStyle = prop(ffc.options.paragraphStyle)
this.calculateReadingEase = prop(ffc.options.calculateReadingEase)
@ -261,7 +262,8 @@ let dialog = {
m(checkbox, {checked: ctrl.calculateReadingEase(), onchange: m.withAttr('checked', ctrl.calculateReadingEase)}, 'Calculate Flesch reading ease'),
m(checkbox, {checked: ctrl.addChapterBars(), onchange: m.withAttr('checked', ctrl.addChapterBars)}, 'Show reading progress and chapter lengths as bars'),
m(checkbox, {checked: ctrl.includeExternal(), onchange: m.withAttr('checked', ctrl.includeExternal)}, 'Download & include remote content (embed images)'),
m('div', {style: 'font-size: 0.9em; line-height: 1em; margin-top: 4px; margin-bottom: 6px; opacity: 0.6;'}, 'Note: Disabling this creates invalid EPUBs and requires internet access to see remote content. Only cover image will be embedded.')
m('div', {style: 'font-size: 0.9em; line-height: 1em; margin-top: 4px; margin-bottom: 6px; opacity: 0.6;'}, 'Note: Disabling this creates invalid EPUBs and requires internet access to see remote content. Only cover image will be embedded.'),
m(checkbox, {checked: ctrl.kepubify(), onchange: m.withAttr('checked', ctrl.kepubify)}, 'Export as Kobo EPUB, this adds some Kobo-specific div/span tags.'),
)),
m('tr', m('td.label', 'Words per minute'), m('td', {colspan: 2},
m('input', {type: 'number', min: 0, step: 1, value: ctrl.wordsPerMinute(), onchange: m.withAttr('value', ctrl.wordsPerMinute), placeholder: '200 (default)', style: {width: '140px', float: 'left', marginRight: '.75rem', marginTop: '.35rem', position: 'relative', zIndex: 1}}),
@ -276,7 +278,7 @@ let dialog = {
))
]),
m('.drop-down-pop-up-footer', [
m('button.styled_button', {onclick: ctrl.createEpub, disabled: ffcProgress() >= 0 && ffcProgress() < 1, style: 'float: right'}, 'Download EPUB'),
m('button.styled_button', {onclick: ctrl.createEpub, disabled: ffcProgress() >= 0 && ffcProgress() < 1, style: 'float: right'}, 'Download ' + (ctrl.kepubify() ? 'Kobo EPUB' : 'EPUB')),
ffcProgress() >= 0 ? m('.rating_container',
m('.rating-bar', {style: {background: 'rgba(0, 0, 0, 0.2)', 'margin-right': '5px'}}, m('.like-bar', {style: {width: Math.max(0, ffcProgress()) * 100 + '%'}})),
' ',
@ -327,6 +329,7 @@ function createEpub (model) {
ffc.options.addChapterHeadings = model.addChapterHeadings()
ffc.options.includeExternal = model.includeExternal()
ffc.options.paragraphStyle = model.paragraphStyle()
ffc.options.kepubify = model.kepubify()
ffc.subjects = model.subjects()
ffc.options.joinSubjects = model.joinSubjects()
ffc.options.calculateReadingEase = model.calculateReadingEase()
@ -348,7 +351,11 @@ function createEpub (model) {
alert('Add .epub to the filename of the downloaded file')
})
} else {
saveAs(file, ffc.filename)
let filename = ffc.filename
if (ffc.options.kepubify) {
filename = filename.replace(/\.epub$/, '.kepub.epub')
}
saveAs(file, filename)
}
})
}

View file

@ -4,4 +4,12 @@ require('mithril/test-utils/browserMock')(global)
const kepubify = require('../src/kepubify').default
console.log(kepubify(`<html><body><p>Some text. Woo or not. Here is <img /> another sentence.</p></body></html>`))
console.log(1, kepubify(`<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html>
<html><body>text <p>aaaa</p><p>Some text. Woo <!-- or --> not. Here is <img /> another sentence.</p><!-- comment --><p>More text <img/> tail</p> body tail</body> html tail</html>`) === `<?xml version="1.0" encoding="utf-8"?>\n<!DOCTYPE html>\n<html><body>text <div class="book-inner"><div class="book-columns"><p><span class="koboSpan" id="kobo.1.0">aaaa</span></p><p><span class="koboSpan" id="kobo.2.0">Some text.</span><span class="koboSpan" id="kobo.2.1"> Woo not.</span><span class="koboSpan" id="kobo.2.2"> Here is </span><span class="koboSpan" id="kobo.2.4"> another sentence.</span><span class="koboSpan" id="kobo.2.3"><img /></span></p><p><span class="koboSpan" id="kobo.3.0">More text </span><span class="koboSpan" id="kobo.3.2"> tail</span><span class="koboSpan" id="kobo.3.1"><img /></span></p><span class="koboSpan" id="kobo.3.3"> body tail</span></div></div></body></html>`)
console.log(2, kepubify(`<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html>
<html><body><p>Dated: June 5th. Wohoo</p></body></html>`) === `<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html>
<html><body><div class="book-inner"><div class="book-columns"><p><span class="koboSpan" id="kobo.1.0">Dated:</span><span class="koboSpan" id="kobo.1.1"> June 5th.</span><span class="koboSpan" id="kobo.1.2"> Wohoo</span></p></div></div></body></html>`)