Don't prettify html content, it breaks whitespace.

This commit is contained in:
daniel-j 2017-05-25 22:11:44 +02:00
parent 6942f9a625
commit 07c1505f6d
8 changed files with 283 additions and 12 deletions

View file

@ -4,7 +4,7 @@
"name": "fimfic2epub",
"short_name": "fimfic2epub",
"description": "Improved EPUB exporter for Fimfiction",
"version": "1.4.9",
"version": "1.5.0",
"icons": {
"128": "icon-128.png"

View file

@ -1,6 +1,6 @@
{
"name": "fimfic2epub",
"version": "1.4.9",
"version": "1.5.0",
"description": "Tool to generate improved EPUB ebooks from Fimfiction stories",
"author": "djazz",
"repository": {

View file

@ -20,13 +20,8 @@ export function cleanMarkup (html) {
}
return new Promise((resolve, reject) => {
// replace HTML non-breaking spaces with normal spaces
html = html.replace(/ /g, ' ')
html = fixParagraphIndent(html)
html = fixDoubleSpacing(html)
// fix center tags
html = html.replace(/<center>/g, '<p style="text-align: center;">')
html = html.replace(/<\/center>/g, '</p>')
@ -108,6 +103,12 @@ export function cleanMarkup (html) {
function continueParsing () {
html = tidy(html, tidyOptions).trim()
// replace HTML non-breaking spaces with normal spaces
html = html.replace(/&nbsp;/g, ' ')
html = html.replace(/&#160;/g, ' ')
html = fixDoubleSpacing(html)
resolve(html)
}
})

View file

@ -16,7 +16,6 @@ export const tidyOptions = {
'alt-text': 'Image',
'wrap': '0',
'quiet': 'yes',
'show-warnings': 0,
'newline': 'LF',
'tidy-mark': 'no',
'show-body-only': 'auto'

125
src/same.xhtml Normal file
View file

@ -0,0 +1,125 @@
<p class="indented double"><span id="kobo.0.1">This chapter is designed to test the capabilities of various ePub extraction tools against each BBCode tag supported by FIMFiction.net. First of all, note that this paragraph begins with a tab indent&#x2014;many tools will remove this. Next, here is the set of non-ANSI characters that I frequently use in my stories: &#x201C;&#x201D; &#x2018;&#x2019; &#x2014; &#x2026; (smart double quotes, smart single quotes, em-dash, and ellipses).</span></p>
<p class="indented double"><span id="kobo.1.1">Next are the tags that should work in-line: [b]
<b>bold</b>[/b], [i]
<i>italics</i>[/i], [u]
<u>underline</u>[/u], [s]
<span style="text-decoration: line-through;">strikethrough</span>[/s], [smcaps]
<span style="font-variant:small-caps;">small caps</span>[/smcaps], [spoiler]
<span class="spoiler">spoiler text</span>[/spoiler], [url]
<a href="http://www.google.com" rel="nofollow">hyperlink</a>[/url], [email]
<a href="mailto:user@server.com" rel="nofollow">user@server.com</a>[/email], and finally an in-line image: [img]
<img src="../Images/emoticon_pinkiehappy.png" class="user_image" alt="Image"/>[/img].
</span></p>
<p class="indented double"><span id="kobo.2.1">
<i>There&apos;s a known bug in translating FIMFiction stories where consecutive italicized paragraphs lose the italics after the first paragraph. Well, this paragraph is supposed to be italicized.</i>
</span></p>
<p class="indented double"><span id="kobo.3.1">
<i>And so is this one.</i>
</span></p>
<p class="indented double"><span id="kobo.4.1">
<i>And ending with this one.</i>
</span></p>
<p class="indented double"><span id="kobo.5.1">Let&#x2019;s quickly step through all of the named color tags:
<span style="color:red;">red</span>,
<span style="color:silver;">silver</span>,
<span style="color:orange;">orange</span>,
<span style="color:yellow;">yellow</span>,
<span style="color:brown;">brown</span>,
<span style="color:green;">green</span>,
<span style="color:olive;">olive</span>,
<span style="color:cornflowerblue;">cornflowerblue</span>,
<span style="color:cyan;">cyan</span>,
<span style="color:purple;">purple</span>,
<span style="color:pink;">pink</span>,
<span style="color:hotpink;">hotpink</span>,
<span style="color:lime;">lime</span>,
<span style="color:white;">white</span> (white),
<span style="color:lightgrey;">lightgrey</span>,
<span style="color:indigo;">indigo</span>,
<span style="color:darkblue;">darkblue</span>,
<span style="color:lightblue;">lightblue</span>,
<span style="color:blue;">blue</span>,
<span style="color:gold;">gold</span>,
<span style="color:navy;">navy</span>,
<span style="color:chartreuse;">chartreuse</span>, and
<span style="color:teal;">teal</span>. Here are some of the colors I use to code character&apos;s thoughts in my &#x201C;Thought Experiments&#x201D; series:
<span style="color:#0077D0;">#0077D0</span> for Vinyl Scratch,
<span style="color:#60BB50;">#60BB50</span> for Spike,
<span style="color:purple;">purple</span> for Twilight Sparkle,
<span style="color:hotpink;">hotpink</span> for Pinkie Pie, and
<span style="color:darkblue;">darkblue</span> for Rainbow Dash.
</span></p>
<p class="indented double"><span id="kobo.6.1">Size can be indicated by either an absolute number, or in &quot;ems&quot;, which is a percentage scaling:</span></p>
<p class="double"><span id="kobo.7.1">[size=24]
<span style="font-size:24px; line-height: 1.3em;">Size 24, which should be normal-size text for default settings</span>[/size]
</span></p>
<p class="double"><span id="kobo.8.1">[size=12]
<span style="font-size:12px; line-height: 1.3em;">Size 12, which should be half-size text for default settings</span>[/size]
</span></p>
<p class="double"><span id="kobo.9.1">[size=48]
<span style="font-size:48px; line-height: 1.3em;">Size 48, which should be double-size text for default settings</span>[/size]
</span></p>
<p class="double"><span id="kobo.10.1">[size=1em]
<span style="font-size:1em; line-height: 1.3em;">1 em text</span>[/size]
</span></p>
<p class="double"><span id="kobo.11.1">[size=.5em]
<span style="font-size:.5em; line-height: 1.3em;">0.5 em text</span>[/size]
</span></p>
<p class="double"><span id="kobo.12.1">[size=2em]
<span style="font-size:2em; line-height: 1.3em;">2 em text</span>[/size]
</span></p>
<p class="double"><span id="kobo.13.1"/></p>
<hr/>
<p class="indented double"><span id="kobo.14.1">Just above this paragraph should be a horizontal rule, or [hr] tag.</span></p>
<p class="double"><span id="kobo.15.1"/></p>
<p style="text-align: center;"><span id="kobo.16.1">[center]Centered text[/center]</span></p>
<p class="double"><span id="kobo.17.1"/></p>
<div style="text-align:right;">
[right]Right-aligned text[/right]
</div>
<p class="double"><span id="kobo.18.1"/></p>
<blockquote>
[quote] This is a quote.
<p class="indented double"><span id="kobo.19.1">One big problem I&apos;ve found with ePub extractors is handling multiple-paragraph quotes. This paragraph should still be inside the above quote tag.</span></p>
<p class="double"><span id="kobo.20.1"/></p>
<p style="text-align: center;"><span id="kobo.21.1">[center]Centered in the quote.[/center]</span></p>
<p><span id="kobo.22.1">[/quote]</span></p>
</blockquote>
<p class="double"><span id="kobo.23.1"/></p>
<hr/>
<p class="indented double"><span id="kobo.24.1">Here&apos;s an image all by itself:</span></p>
<p class="double"><span id="kobo.25.1">
<img src="../Images/ch_001_2.jpg" class="user_image" alt="Image"/>
</span></p>
<p class="indented double"><span id="kobo.26.1">Here&apos;s a YouTube video:</span></p>
<p class="double"><span id="kobo.27.1"/></p>
<figure class="youtube">
<a href="https://youtube.com/watch?v=_eDXhih_NW4">
<img src="../Images/ch_001_3.jpg" alt="d.notive - Vicious Lies (Depeche Mode Imitation)"/>
</a>
<figcaption>
<a href="https://youtube.com/watch?v=_eDXhih_NW4">d.notive - Vicious Lies (Depeche Mode Imitation) on YouTube</a>
</figcaption>
</figure>
<p class="double"><span id="kobo.28.1"/></p>
<hr/>
<p class="double"><span id="kobo.29.1"/></p>
<blockquote class="left_insert">
[left_insert]This creates left-aligned text that wraps at the center of the screen. Here&apos;s some
<i>lorum ipsum</i> to demonstrate line-wrapping: Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
<p class="double"><span id="kobo.30.1">Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.[/left_insert]</span></p>
</blockquote>
<p class="double"><span id="kobo.31.1"/></p>
<blockquote class="right_insert">
[right_insert]This creates left-aligned text starting at the center of the screen. Here&apos;s some
<i>lorum ipsum</i> to demonstrate line-wrapping: Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
<p class="double"><span id="kobo.32.1">Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.[/right_insert]</span></p>
</blockquote>
<p class="double"><span id="kobo.33.1"/></p>
<hr/>

View file

@ -50,15 +50,17 @@ export function createChapter (ch) {
m('h1', title),
m('hr')
]) : null,
sections,
m('p.double', {style: 'text-align: center; clear: both;'},
'%%HTML_CONTENT%%',
(link || linkNotes) ? m('p.double', {style: 'text-align: center; clear: both;'},
link ? m('a.chaptercomments', {href: link + '#comment_list'}, 'Read chapter comments online') : null,
linkNotes ? m('a.chaptercomments', {href: linkNotes}, 'Read author\'s note') : null
)
) : null
])
])
))
chapterPage = chapterPage.replace('%%HTML_CONTENT%%', '\n' + render(sections) + '\n')
resolve(chapterPage)
})
}
@ -339,7 +341,7 @@ export function createTitlePage (ffc) {
'This story is a sequel to ',
m('a', {href: ffc.storyInfo.prequel.url}, ffc.storyInfo.prequel.title)
]), m('hr')] : null,
m('#description', m.trust(ffc.storyInfo.description)),
m('#description', '%%HTML_CONTENT%%'),
m('.bottom', [
m('span', {className: 'completed-status-' + ffc.storyInfo.status.toLowerCase()}, ffc.storyInfo.status),
ffc.storyInfo.publishDate && infoBox('First Published', prettyDate(new Date(ffc.storyInfo.publishDate * 1000))),
@ -355,6 +357,7 @@ export function createTitlePage (ffc) {
])
])
))
titlePage = titlePage.replace('%%HTML_CONTENT%%', '\n' + ffc.storyInfo.description + '\n')
// console.log(titlePage)
return titlePage
}

125
src/test.xhtml Normal file
View file

@ -0,0 +1,125 @@
<p class="indented double">This chapter is designed to test the capabilities of various ePub extraction tools against each BBCode tag supported by FIMFiction.net. First of all, note that this paragraph begins with a tab indent—many tools will remove this. Next, here is the set of non-ANSI characters that I frequently use in my stories: “” — … (smart double quotes, smart single quotes, em-dash, and ellipses).</p>
<p class="indented double">Next are the tags that should work in-line: [b]
<b>bold</b>[/b], [i]
<i>italics</i>[/i], [u]
<u>underline</u>[/u], [s]
<span style="text-decoration: line-through;">strikethrough</span>[/s], [smcaps]
<span style="font-variant:small-caps;">small caps</span>[/smcaps], [spoiler]
<span class="spoiler">spoiler text</span>[/spoiler], [url]
<a href="http://www.google.com" rel="nofollow">hyperlink</a>[/url], [email]
<a href="mailto:user@server.com" rel="nofollow">user@server.com</a>[/email], and finally an in-line image: [img]
<img src="../Images/emoticon_pinkiehappy.png" class="user_image" alt="Image"/>[/img].
</p>
<p class="indented double">
<i>There's a known bug in translating FIMFiction stories where consecutive italicized paragraphs lose the italics after the first paragraph. Well, this paragraph is supposed to be italicized.</i>
</p>
<p class="indented double">
<i>And so is this one.</i>
</p>
<p class="indented double">
<i>And ending with this one.</i>
</p>
<p class="indented double">Lets quickly step through all of the named color tags:
<span style="color:red;">red</span>,
<span style="color:silver;">silver</span>,
<span style="color:orange;">orange</span>,
<span style="color:yellow;">yellow</span>,
<span style="color:brown;">brown</span>,
<span style="color:green;">green</span>,
<span style="color:olive;">olive</span>,
<span style="color:cornflowerblue;">cornflowerblue</span>,
<span style="color:cyan;">cyan</span>,
<span style="color:purple;">purple</span>,
<span style="color:pink;">pink</span>,
<span style="color:hotpink;">hotpink</span>,
<span style="color:lime;">lime</span>,
<span style="color:white;">white</span> (white),
<span style="color:lightgrey;">lightgrey</span>,
<span style="color:indigo;">indigo</span>,
<span style="color:darkblue;">darkblue</span>,
<span style="color:lightblue;">lightblue</span>,
<span style="color:blue;">blue</span>,
<span style="color:gold;">gold</span>,
<span style="color:navy;">navy</span>,
<span style="color:chartreuse;">chartreuse</span>, and
<span style="color:teal;">teal</span>. Here are some of the colors I use to code character's thoughts in my “Thought Experiments” series:
<span style="color:#0077D0;">#0077D0</span> for Vinyl Scratch,
<span style="color:#60BB50;">#60BB50</span> for Spike,
<span style="color:purple;">purple</span> for Twilight Sparkle,
<span style="color:hotpink;">hotpink</span> for Pinkie Pie, and
<span style="color:darkblue;">darkblue</span> for Rainbow Dash.
</p>
<p class="indented double">Size can be indicated by either an absolute number, or in "ems", which is a percentage scaling:</p>
<p class="double">[size=24]
<span style="font-size:24px; line-height: 1.3em;">Size 24, which should be normal-size text for default settings</span>[/size]
</p>
<p class="double">[size=12]
<span style="font-size:12px; line-height: 1.3em;">Size 12, which should be half-size text for default settings</span>[/size]
</p>
<p class="double">[size=48]
<span style="font-size:48px; line-height: 1.3em;">Size 48, which should be double-size text for default settings</span>[/size]
</p>
<p class="double">[size=1em]
<span style="font-size:1em; line-height: 1.3em;">1 em text</span>[/size]
</p>
<p class="double">[size=.5em]
<span style="font-size:.5em; line-height: 1.3em;">0.5 em text</span>[/size]
</p>
<p class="double">[size=2em]
<span style="font-size:2em; line-height: 1.3em;">2 em text</span>[/size]
</p>
<p class="double"></p>
<hr/>
<p class="indented double">Just above this paragraph should be a horizontal rule, or [hr] tag.</p>
<p class="double"></p>
<p style="text-align: center;">[center]Centered text[/center]</p>
<p class="double"></p>
<div style='text-align:right;'>
[right]Right-aligned text[/right]
</div>
<p class="double"></p>
<blockquote>
[quote] This is a quote.
<p class="indented double">One big problem I've found with ePub extractors is handling multiple-paragraph quotes. This paragraph should still be inside the above quote tag.</p>
<p class="double"></p>
<p style="text-align: center;">[center]Centered in the quote.[/center]</p>
<p>[/quote]</p>
</blockquote>
<p class="double"></p>
<hr/>
<p class="indented double">Here's an image all by itself:</p>
<p class="double">
<img src="../Images/ch_001_2.jpg" class="user_image" alt="Image"/>
</p>
<p class="indented double">Here's a YouTube video:</p>
<p class="double"></p>
<figure class="youtube">
<a href="https://youtube.com/watch?v=_eDXhih_NW4">
<img src="../Images/ch_001_3.jpg" alt="d.notive - Vicious Lies (Depeche Mode Imitation)"/>
</a>
<figcaption>
<a href="https://youtube.com/watch?v=_eDXhih_NW4">d.notive - Vicious Lies (Depeche Mode Imitation) on YouTube</a>
</figcaption>
</figure>
<p class="double"></p>
<hr/>
<p class="double"></p>
<blockquote class="left_insert">
[left_insert]This creates left-aligned text that wraps at the center of the screen. Here's some
<i>lorum ipsum</i> to demonstrate line-wrapping: Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
<p class="double">Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.[/left_insert]</p>
</blockquote>
<p class="double"></p>
<blockquote class="right_insert">
[right_insert]This creates left-aligned text starting at the center of the screen. Here's some
<i>lorum ipsum</i> to demonstrate line-wrapping: Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
<p class="double">Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.[/right_insert]</p>
</blockquote>
<p class="double"></p>
<hr/>

18
src/testkepub.js Normal file
View file

@ -0,0 +1,18 @@
const cheerio = require('cheerio')
const fs = require('fs')
let content = fs.readFileSync('test.xhtml', 'utf8')
let $ = cheerio.load(content, {xmlMode: true})
let elements = $('p, h1, h2, h3, h4, h5, h6')
elements.each((i, element) => {
let el = $(element)
let newTag = $('<span/>')
newTag.attr('id', 'kobo.' + i + '.1')
newTag.append(el.contents())
el.append(newTag)
})
console.log($.xml())