More kepubify stuff

This commit is contained in:
daniel-j 2018-03-26 20:32:02 +02:00
parent e49c082fec
commit ab2994ecf5
6 changed files with 87 additions and 32 deletions

View file

@ -66,6 +66,7 @@ By default the EPUB will be saved in the current working directory with the file
-n, --no-notes Don't include author notes -n, --no-notes Don't include author notes
-i, --notes-index Create an index with all author notes at the end of the ebook -i, --notes-index Create an index with all author notes at the end of the ebook
-p, --paragraphs <style> Select a paragraph style <spaced|indented|indentedall|both> (default: spaced) -p, --paragraphs <style> Select a paragraph style <spaced|indented|indentedall|both> (default: spaced)
-k, --kepubify Add extra <span> elements for Kobo EPUB (KEPUB) format
-j, --join-subjects Join dc:subjects to a single value -j, --join-subjects Join dc:subjects to a single value
-w, --wpm <number> Words per minute. Set to 0 to disable reading time estimations (default: 200) -w, --wpm <number> Words per minute. Set to 0 to disable reading time estimations (default: 200)
-C, --cover <url> Set cover image url -C, --cover <url> Set cover image url

View file

@ -18,6 +18,7 @@ import fetchRemote from './fetchRemote'
import * as template from './templates' import * as template from './templates'
import { styleCss, coverstyleCss, titlestyleCss, iconsCss, navstyleCss, paragraphsCss } from './styles' import { styleCss, coverstyleCss, titlestyleCss, iconsCss, navstyleCss, paragraphsCss } from './styles'
import * as utils from './utils' import * as utils from './utils'
import kepubify from './kepubify'
import subsetFont from './subsetFont' import subsetFont from './subsetFont'
import fontAwesomeCodes from '../build/font-awesome-codes.json' import fontAwesomeCodes from '../build/font-awesome-codes.json'
@ -97,6 +98,7 @@ class FimFic2Epub extends EventEmitter {
addChapterHeadings: true, addChapterHeadings: true,
includeExternal: true, includeExternal: true,
paragraphStyle: 'spaced', paragraphStyle: 'spaced',
kepubify: false,
joinSubjects: false, joinSubjects: false,
calculateReadingEase: true, calculateReadingEase: true,
readingEaseWakeupInterval: isNode ? 50 : 200, // lower for node, to not slow down thread readingEaseWakeupInterval: isNode ? 50 : 200, // lower for node, to not slow down thread
@ -374,6 +376,13 @@ class FimFic2Epub extends EventEmitter {
if (this.options.includeAuthorNotes && this.options.useAuthorNotesIndex && this.hasAuthorNotes) { if (this.options.includeAuthorNotes && this.options.useAuthorNotesIndex && this.hasAuthorNotes) {
this.pages.notesnav = await template.createNotesNav(this) this.pages.notesnav = await template.createNotesNav(this)
} }
if (this.options.kepubify) {
this.pages.nav = kepubify(this.pages.nav)
this.pages.title = kepubify(this.pages.title)
if (this.pages.notesnav) {
this.pages.notesnav = kepubify(this.pages.notesnav)
}
}
} }
buildChapters () { buildChapters () {
@ -400,6 +409,9 @@ class FimFic2Epub extends EventEmitter {
index: i index: i
})).then((html) => { })).then((html) => {
this.findRemoteResources('ch_' + zeroFill(3, i + 1), {chapter: i}, html) this.findRemoteResources('ch_' + zeroFill(3, i + 1), {chapter: i}, html)
if (this.options.kepubify) {
html = kepubify(html)
}
this.chaptersHtml[i] = html this.chaptersHtml[i] = html
}) })
if (this.options.includeAuthorNotes && this.options.useAuthorNotesIndex && chapter.notes) { if (this.options.includeAuthorNotes && this.options.useAuthorNotesIndex && chapter.notes) {
@ -409,6 +421,9 @@ class FimFic2Epub extends EventEmitter {
index: i index: i
}, true)).then((html) => { }, true)).then((html) => {
this.findRemoteResources('note_' + zeroFill(3, i + 1), {note: i}, html) this.findRemoteResources('note_' + zeroFill(3, i + 1), {note: i}, html)
if (this.options.kepubify) {
html = kepubify(html)
}
this.notesHtml[i] = html this.notesHtml[i] = html
}) })
} }

View file

@ -15,6 +15,7 @@ const args = require('commander')
.option('-n, --no-notes', 'Don\'t include author notes') .option('-n, --no-notes', 'Don\'t include author notes')
.option('-i, --notes-index', 'Create an index with all author notes at the end of the ebook') .option('-i, --notes-index', 'Create an index with all author notes at the end of the ebook')
.option('-p, --paragraphs <style>', 'Select a paragraph style <spaced|indented|indentedall|both>', 'spaced') .option('-p, --paragraphs <style>', 'Select a paragraph style <spaced|indented|indentedall|both>', 'spaced')
.option('-k, --kepubify', 'Add extra <span> elements for Kobo EPUB (KEPUB) format')
.option('-j, --join-subjects', 'Join dc:subjects to a single value') .option('-j, --join-subjects', 'Join dc:subjects to a single value')
.option('-w, --wpm <number>', 'Words per minute. Set to 0 to disable reading time estimations', parseInt, 200) .option('-w, --wpm <number>', 'Words per minute. Set to 0 to disable reading time estimations', parseInt, 200)
.option('-C, --cover <url>', 'Set cover image url') .option('-C, --cover <url>', 'Set cover image url')
@ -50,6 +51,7 @@ const ffc = new FimFic2Epub(STORY_ID, {
addChapterHeadings: !!args.headings, addChapterHeadings: !!args.headings,
includeExternal: !!args.external, includeExternal: !!args.external,
paragraphStyle: args.paragraphs, paragraphStyle: args.paragraphs,
kepubify: !!args.kepubify,
joinSubjects: !!args.joinSubjects, joinSubjects: !!args.joinSubjects,
calculateReadingEase: !!args.readingEase, calculateReadingEase: !!args.readingEase,
readingEaseWakeupInterval: 800, readingEaseWakeupInterval: 800,
@ -71,7 +73,11 @@ ffc.fetchMetadata()
.then(ffc.fetchAll.bind(ffc)) .then(ffc.fetchAll.bind(ffc))
.then(ffc.build.bind(ffc)) .then(ffc.build.bind(ffc))
.then(() => { .then(() => {
let filename = (args.args[1] || '').replace('%id%', ffc.storyInfo.id) || ffc.filename let filename = ffc.filename
if (ffc.options.kepubify) {
filename = filename.replace(/\.epub$/, '.kepub.epub')
}
filename = (args.args[1] || '').replace('%id%', ffc.storyInfo.id) || filename
let stream let stream
if (args.dir) { if (args.dir) {

View file

@ -1,28 +1,28 @@
import et from 'elementtree' import et from 'elementtree'
// Todo: Fix missing tails outside of <body></body>
export default function kepubify (html) { export default function kepubify (html) {
const tree = et.parse(html) const tree = et.parse(html)
const body = tree.find('./body') const body = tree.find('./body')
addDivs(body) addDivs(body)
body.getchildren().forEach((child) => addSpansToNode(child, body)) const state = {paragraph: 0, segment: 0}
return tree.write() body.getchildren().forEach((child) => addSpansToNode(child, body, state))
return '<?xml version="1.0" encoding="utf-8"?>\n<!DOCTYPE html>\n' + tree.write({
xml_declaration: false
})
} }
const specialTags = /^(img|pre)$/i const specialTags = /^(img|pre|svg)$/i
const sentenceRe = /(((^|\w).*?[^\w\s,]+)(?=\s+\W*[A-Z])|:|;)/g const paragraphTags = /^(p|ol|ul)$/i
let paragraph_counter = 0
let segment_counter = 0
function addDivs (body) { function addDivs (body) {
const bookInner = et.Element('div', {class: 'book-inner'}) const bookInner = et.Element('div', {class: 'book-inner'})
const bookColumns = et.SubElement(bookInner, 'div', {class: 'book-columns'}) const bookColumns = et.SubElement(bookInner, 'div', {class: 'book-columns'})
body.getchildren().forEach((child, i) => { bookColumns._children = body.getchildren()
body.getchildren().splice(i, 1) body._children = [bookInner]
bookColumns.getchildren().push(child)
})
body.append(bookInner)
} }
function createSpan (paragraph, segment) { function createSpan (paragraph, segment) {
@ -33,44 +33,62 @@ function createSpan (paragraph, segment) {
return span return span
} }
function addSpans (node, text) { function textToSpans (node, text, state) {
const tokenSentences = text const tokenSentences = text
.replace('\0', '') .replace('\0', '')
.replace(/\s+/g, ' ') // Replace all whitespace (including newlines) with a single space .replace(/\s+/g, ' ') // Replace all whitespace (including newlines) with a single space
.normalize('NFD').replace(/[\u0300-\u036f]/g, '') // strip diacritics since JS's \w group and explicit [a-z]|[A-Z] don't account for them .normalize('NFD').replace(/[\u0300-\u036f]/g, '') // strip diacritics since JS's \w group and explicit [a-z]|[A-Z] don't account for them
.replace(/(mr|mrs|dr|ms|prof|rev|col|cmdr|flt|lt|brgdr|hon|wng|capt|rt|revd|gen|cdre|admrl|herr|hr|frau|alderman|alhaji|brig|cdr|cik|consul|datin|dato|datuk|seri|dhr|dipl|ing|dott|sa|dra|drs|en|encik|eng|eur|exma|sra|exmo|sr|lieut|fr|fraulein|fru|graaf|gravin|grp|hajah|haji|hajim|hra|ir|lcda|lic|maj|mlle|mme|mstr|nti|sri|rva|sig|na|ra|sqn|ldr|srta|wg|co|esq|inc|iou|ltd|mdlle|messers|messrs|mlles|mm|mmes|mt|p\.s|pvt|st|viz)\./gi, '$1') .replace(/(mr|mrs|dr|ms|prof|rev|col|cmdr|flt|lt|brgdr|hon|wng|capt|rt|revd|gen|cdre|admrl|herr|hr|frau|alderman|alhaji|brig|cdr|cik|consul|datin|dato|datuk|seri|dhr|dipl|ing|dott|sa|dra|drs|en|encik|eng|eur|exma|sra|exmo|sr|lieut|fr|fraulein|fru|graaf|gravin|grp|hajah|haji|hajim|hra|ir|lcda|lic|maj|mlle|mme|mstr|nti|sri|rva|sig|na|ra|sqn|ldr|srta|wg|co|esq|inc|iou|ltd|mdlle|messers|messrs|mlles|mm|mmes|mt|p\.s|pvt|st|viz)\./gi, '$1')
.replace(/(((^|\w).*?[^\w\s,]+)(?=\s+\W*[A-Z])|:|;)/g, '$1\0') .replace(/(((^|\w).*?[^\w\s,]+)(?=\s+\W*[A-Z])|:|;)/g, '$1\0')
.split(/\s*\0\s*/) .split(/\s*\0/)
for (let i = 0; i < tokenSentences.length; i++) {
let s = tokenSentences[i]
if (s.trim().length === 0) {
if (i-1 >= 0) tokenSentences[i-1] += s
tokenSentences.splice(i, 1)
i--
}
}
return tokenSentences.map((sentence, i) => { return tokenSentences.map((sentence, i) => {
if (!sentence) return null const span = createSpan(state.paragraph, state.segment++)
const span = createSpan(paragraph_counter, segment_counter)
span.text = sentence span.text = sentence
return span return span
}).filter((el) => el) }).filter(el => el)
} }
function addSpansToNode (node, parent) { function addSpansToNode (node, parent, state) {
const nodePosition = parent.getchildren().indexOf(node) let nodePosition = parent.getchildren().indexOf(node)
if (node.tag.match(paragraphTags)) {
state.paragraph++
state.segment = 0
}
if (node.tag.match(specialTags)) { if (node.tag.match(specialTags)) {
const span = createSpan(paragraph_counter, segment_counter) const span = createSpan(state.paragraph, state.segment++)
span.append(node) span.append(node)
parent.getchildren().splice(nodePosition, 1, span) parent.getchildren().splice(nodePosition, 1, span)
} else { } else {
node.getchildren().forEach((child) => { let prependNodes = []
addSpansToNode(child, node)
})
}
if (node.text) { if (node.text) {
addSpans(node, node.text).forEach((span, i) => { prependNodes = textToSpans(node, node.text, state)
node.getchildren().splice(i, 0, span)
})
node.text = null node.text = null
} }
node.getchildren().forEach((child) => {
addSpansToNode(child, node, state)
})
prependNodes.forEach((span, i) => {
node.getchildren().splice(i, 0, span)
})
}
if (node.tail) { if (node.tail) {
addSpans(node, node.tail).forEach((span, i) => { nodePosition = parent.getchildren().indexOf(node)
textToSpans(node, node.tail, state).forEach((span, i) => {
parent.getchildren().splice(nodePosition + 1 + i, 0, span) parent.getchildren().splice(nodePosition + 1 + i, 0, span)
}) })
node.tail = null node.tail = null

View file

@ -135,6 +135,7 @@ let dialog = {
this.useAuthorNotesIndex = prop(ffc.options.useAuthorNotesIndex) this.useAuthorNotesIndex = prop(ffc.options.useAuthorNotesIndex)
this.addChapterHeadings = prop(ffc.options.addChapterHeadings) this.addChapterHeadings = prop(ffc.options.addChapterHeadings)
this.includeExternal = prop(ffc.options.includeExternal) this.includeExternal = prop(ffc.options.includeExternal)
this.kepubify = prop(ffc.options.kepubify)
this.joinSubjects = prop(ffc.options.joinSubjects) this.joinSubjects = prop(ffc.options.joinSubjects)
this.paragraphStyle = prop(ffc.options.paragraphStyle) this.paragraphStyle = prop(ffc.options.paragraphStyle)
this.calculateReadingEase = prop(ffc.options.calculateReadingEase) this.calculateReadingEase = prop(ffc.options.calculateReadingEase)
@ -261,7 +262,8 @@ let dialog = {
m(checkbox, {checked: ctrl.calculateReadingEase(), onchange: m.withAttr('checked', ctrl.calculateReadingEase)}, 'Calculate Flesch reading ease'), m(checkbox, {checked: ctrl.calculateReadingEase(), onchange: m.withAttr('checked', ctrl.calculateReadingEase)}, 'Calculate Flesch reading ease'),
m(checkbox, {checked: ctrl.addChapterBars(), onchange: m.withAttr('checked', ctrl.addChapterBars)}, 'Show reading progress and chapter lengths as bars'), m(checkbox, {checked: ctrl.addChapterBars(), onchange: m.withAttr('checked', ctrl.addChapterBars)}, 'Show reading progress and chapter lengths as bars'),
m(checkbox, {checked: ctrl.includeExternal(), onchange: m.withAttr('checked', ctrl.includeExternal)}, 'Download & include remote content (embed images)'), m(checkbox, {checked: ctrl.includeExternal(), onchange: m.withAttr('checked', ctrl.includeExternal)}, 'Download & include remote content (embed images)'),
m('div', {style: 'font-size: 0.9em; line-height: 1em; margin-top: 4px; margin-bottom: 6px; opacity: 0.6;'}, 'Note: Disabling this creates invalid EPUBs and requires internet access to see remote content. Only cover image will be embedded.') m('div', {style: 'font-size: 0.9em; line-height: 1em; margin-top: 4px; margin-bottom: 6px; opacity: 0.6;'}, 'Note: Disabling this creates invalid EPUBs and requires internet access to see remote content. Only cover image will be embedded.'),
m(checkbox, {checked: ctrl.kepubify(), onchange: m.withAttr('checked', ctrl.kepubify)}, 'Export as Kobo EPUB, this adds some Kobo-specific div/span tags.'),
)), )),
m('tr', m('td.label', 'Words per minute'), m('td', {colspan: 2}, m('tr', m('td.label', 'Words per minute'), m('td', {colspan: 2},
m('input', {type: 'number', min: 0, step: 1, value: ctrl.wordsPerMinute(), onchange: m.withAttr('value', ctrl.wordsPerMinute), placeholder: '200 (default)', style: {width: '140px', float: 'left', marginRight: '.75rem', marginTop: '.35rem', position: 'relative', zIndex: 1}}), m('input', {type: 'number', min: 0, step: 1, value: ctrl.wordsPerMinute(), onchange: m.withAttr('value', ctrl.wordsPerMinute), placeholder: '200 (default)', style: {width: '140px', float: 'left', marginRight: '.75rem', marginTop: '.35rem', position: 'relative', zIndex: 1}}),
@ -276,7 +278,7 @@ let dialog = {
)) ))
]), ]),
m('.drop-down-pop-up-footer', [ m('.drop-down-pop-up-footer', [
m('button.styled_button', {onclick: ctrl.createEpub, disabled: ffcProgress() >= 0 && ffcProgress() < 1, style: 'float: right'}, 'Download EPUB'), m('button.styled_button', {onclick: ctrl.createEpub, disabled: ffcProgress() >= 0 && ffcProgress() < 1, style: 'float: right'}, 'Download ' + (ctrl.kepubify() ? 'Kobo EPUB' : 'EPUB')),
ffcProgress() >= 0 ? m('.rating_container', ffcProgress() >= 0 ? m('.rating_container',
m('.rating-bar', {style: {background: 'rgba(0, 0, 0, 0.2)', 'margin-right': '5px'}}, m('.like-bar', {style: {width: Math.max(0, ffcProgress()) * 100 + '%'}})), m('.rating-bar', {style: {background: 'rgba(0, 0, 0, 0.2)', 'margin-right': '5px'}}, m('.like-bar', {style: {width: Math.max(0, ffcProgress()) * 100 + '%'}})),
' ', ' ',
@ -327,6 +329,7 @@ function createEpub (model) {
ffc.options.addChapterHeadings = model.addChapterHeadings() ffc.options.addChapterHeadings = model.addChapterHeadings()
ffc.options.includeExternal = model.includeExternal() ffc.options.includeExternal = model.includeExternal()
ffc.options.paragraphStyle = model.paragraphStyle() ffc.options.paragraphStyle = model.paragraphStyle()
ffc.options.kepubify = model.kepubify()
ffc.subjects = model.subjects() ffc.subjects = model.subjects()
ffc.options.joinSubjects = model.joinSubjects() ffc.options.joinSubjects = model.joinSubjects()
ffc.options.calculateReadingEase = model.calculateReadingEase() ffc.options.calculateReadingEase = model.calculateReadingEase()
@ -348,7 +351,11 @@ function createEpub (model) {
alert('Add .epub to the filename of the downloaded file') alert('Add .epub to the filename of the downloaded file')
}) })
} else { } else {
saveAs(file, ffc.filename) let filename = ffc.filename
if (ffc.options.kepubify) {
filename = filename.replace(/\.epub$/, '.kepub.epub')
}
saveAs(file, filename)
} }
}) })
} }

View file

@ -4,4 +4,12 @@ require('mithril/test-utils/browserMock')(global)
const kepubify = require('../src/kepubify').default const kepubify = require('../src/kepubify').default
console.log(kepubify(`<html><body><p>Some text. Woo or not. Here is <img /> another sentence.</p></body></html>`)) console.log(1, kepubify(`<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html>
<html><body>text <p>aaaa</p><p>Some text. Woo <!-- or --> not. Here is <img /> another sentence.</p><!-- comment --><p>More text <img/> tail</p> body tail</body> html tail</html>`) === `<?xml version="1.0" encoding="utf-8"?>\n<!DOCTYPE html>\n<html><body>text <div class="book-inner"><div class="book-columns"><p><span class="koboSpan" id="kobo.1.0">aaaa</span></p><p><span class="koboSpan" id="kobo.2.0">Some text.</span><span class="koboSpan" id="kobo.2.1"> Woo not.</span><span class="koboSpan" id="kobo.2.2"> Here is </span><span class="koboSpan" id="kobo.2.4"> another sentence.</span><span class="koboSpan" id="kobo.2.3"><img /></span></p><p><span class="koboSpan" id="kobo.3.0">More text </span><span class="koboSpan" id="kobo.3.2"> tail</span><span class="koboSpan" id="kobo.3.1"><img /></span></p><span class="koboSpan" id="kobo.3.3"> body tail</span></div></div></body></html>`)
console.log(2, kepubify(`<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html>
<html><body><p>Dated: June 5th. Wohoo</p></body></html>`) === `<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html>
<html><body><div class="book-inner"><div class="book-columns"><p><span class="koboSpan" id="kobo.1.0">Dated:</span><span class="koboSpan" id="kobo.1.1"> June 5th.</span><span class="koboSpan" id="kobo.1.2"> Wohoo</span></p></div></div></body></html>`)