first commit

This commit is contained in:
daniel-j 2016-06-19 22:28:28 +02:00
commit 49387a70b3
5 changed files with 421 additions and 0 deletions

3
.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
node_modules/
out.epub

136
mithril-node-render.js Normal file
View file

@ -0,0 +1,136 @@
'use strict'
var VOID_TAGS = ['area', 'base', 'br', 'col', 'command', 'embed', 'hr',
'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track',
'wbr', '!doctype']
function isArray (thing) {
return Object.prototype.toString.call(thing) === '[object Array]'
}
function camelToDash (str) {
return str.replace(/\W+/g, '-')
.replace(/([a-z\d])([A-Z])/g, '$1-$2')
}
function removeEmpties (n) {
return n !== ''
}
// shameless stolen from https://github.com/punkave/sanitize-html
function escapeHtml (s, replaceDoubleQuote) {
if (s === 'undefined') {
s = ''
}
if (typeof (s) !== 'string') {
s = s + ''
}
s = s.replace(/\&/g, '&amp;').replace(/</g, '&lt;').replace(/\>/g, '&gt;')
if (replaceDoubleQuote) {
return s.replace(/\"/g, '&quot;')
}
return s
}
function createAttrString (view, escapeAttributeValue) {
var attrs = view.attrs
if (!attrs || !Object.keys(attrs).length) {
return ''
}
return Object.keys(attrs).map(function (name) {
var value = attrs[name]
if (typeof value === 'undefined' || value === null || typeof value === 'function') {
return
}
if (typeof value === 'boolean') {
return value ? ' ' + name : ''
}
if (name === 'style') {
if (!value) {
return
}
var styles = attrs.style
if (typeof styles === 'object') {
styles = Object.keys(styles).map(function (property) {
return styles[property] !== '' ? [camelToDash(property).toLowerCase(), styles[property]].join(':') : ''
}).filter(removeEmpties).join(';')
}
return styles !== '' ? ' style="' + escapeAttributeValue(styles, true) + '"' : ''
}
// Handle SVG <use> tags specially
if (name === 'href' && view.tag === 'use') {
return ' xlink:href="' + escapeAttributeValue(value, true) + '"'
}
return ' ' + (name === 'className' ? 'class' : name) + '="' + escapeAttributeValue(value, true) + '"'
}).join('')
}
function createChildrenContent (view) {
if (isArray(view.children) && !view.children.length) {
return ''
}
return render(view.children)
}
function render (view, options) {
options = options || {}
var defaultOptions = {
escapeAttributeValue: escapeHtml,
escapeString: escapeHtml
}
Object.keys(defaultOptions).forEach(function (key) {
if (!options.hasOwnProperty(key)) options[key] = defaultOptions[key]
})
var type = typeof view
if (type === 'string') {
return options.escapeString(view)
}
if (type === 'number' || type === 'boolean') {
return view
}
if (!view) {
return ''
}
if (isArray(view)) {
return view.map(function (view) { return render(view, options) }).join('')
}
// compontent
if (view.view) {
var scope = view.controller ? new view.controller() : {}
var result = render(view.view(scope), options)
if (scope.onunload) {
scope.onunload()
}
return result
}
if (view.$trusted) {
return '' + view
}
var children = createChildrenContent(view)
if (!children/* && VOID_TAGS.indexOf(view.tag.toLowerCase()) >= 0*/) {
return '<' + view.tag + createAttrString(view, options.escapeAttributeValue) + '/>'
}
return [
'<', view.tag, createAttrString(view, options.escapeAttributeValue), '>',
children,
'</', view.tag, '>'
].join('')
}
render.escapeHtml = escapeHtml
module.exports = render

19
package.json Normal file
View file

@ -0,0 +1,19 @@
{
"name": "tidyweb",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "",
"license": "ISC",
"dependencies": {
"jszip": "^3.0.0",
"mithril": "^0.2.5",
"parse5": "^2.1.5",
"pretty": "^1.0.0",
"tidy-html5": "^0.1.1",
"xmlserializer": "^0.3.3"
}
}

216
test.js Normal file
View file

@ -0,0 +1,216 @@
#!/usr/bin/env node
'use strict'
const fs = require('fs')
const m = require('mithril')
const render = require('./mithril-node-render')
const pretty = require('pretty')
//const parse5 = require('parse5')
//const xmlserializer = require('xmlserializer')
const JSZip = require('jszip')
const zip = new JSZip()
zip.file('mimetype', 'application/epub+zip')
zip.folder('META-INF').file('container.xml', `<?xml version="1.0" encoding="UTF-8"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
<rootfiles>
<rootfile full-path="content.opf" media-type="application/oebps-package+xml"/>
</rootfiles>
</container>
`)
//let html = fs.readFileSync('test.xhtml', 'utf8')
//let dom = parse5.parseFragment(html)
//let frag = parse5.parseFragment('<package version="3.0">Hello</package>')
//console.log(frag.childNodes[0])
const NS = {
OPF: 'http://www.idpf.org/2007/opf',
OPS: 'http://www.idpf.org/2007/ops',
DC: 'http://purl.org/dc/elements/1.1/',
DAISY: 'http://www.daisy.org/z3986/2005/ncx/',
XHTML: 'http://www.w3.org/1999/xhtml'
}
/*function attrs(a) {
let arr = []
for (let i in a) {
arr.push({name: i, value: a[i]})
}
return arr
}
const dom = parse5.treeAdapters.default
let opf = dom.createDocumentFragment()
let packageNode = dom.createElement('package', NS, attrs({
version: '3.0',
'unique-identifier': 'BookId'
}))
let metadataNode = dom.createElement('metadata', NS, attrs({
'xmlns:dc': 'http://purl.org/dc/elements/1.1/',
'xmlns:opf': 'http://www.idpf.org/2007/opf'
}))
let dcIdentifier = dom.createElement('dc:identifier', NS, attrs({id: 'BookId'}))
dom.insertText(dcIdentifier, 'urn:uuid:'+12345)
dom.appendChild(metadataNode, dcIdentifier)
dom.appendChild(packageNode, metadataNode)
dom.appendChild(opf, packageNode)
//console.log(xmlserializer.serializeToString(opf))
*/
let bookInfo = {
uuid: 'urn:uuid:'+12345,
title: 'Book title',
author: 'Author',
publishDate: '2016-06-19',
lastModifiedDate: '2016-06-18T16:32:40Z'
}
function subjects(s) {
var list = []
for (let i = 0; i < s.length; i++) {
list.push(m('dc:subject', s[i]))
}
return list
}
let contentOpf = '<?xml version="1.0" encoding="utf-8"?>\n'+pretty(render(
m('package', {xmlns: NS.OPF, version: '3.0', 'unique-identifier': 'BookId'}, [
m('metadata', {'xmlns:dc': NS.DC, 'xmlns:opf': NS.OPF}, [
m('dc:identifier', {id: 'BookId'}, bookInfo.uuid),
m('dc:title', bookInfo.title),
m('dc:creator', {id: 'cre'}, bookInfo.author),
m('meta', {refines: '#cre', property: 'role', scheme: 'marc:relators'}, 'aut'),
m('dc:date', bookInfo.publishDate),
m('dc:publisher', 'Fimfiction'),
m('dc:source', 'http://fimfiction.net/story/'+'STORY_ID'),
m('dc:language', 'en'),
m('meta', {property: 'dcterms:modified'}, bookInfo.lastModifiedDate)
].concat(subjects(['Fiction', 'Pony']))),
m('manifest', [
m('item', {id: 'ncx', href: 'toc.ncx', 'media-type': 'application/x-dtbncx+xml'}),
m('item', {id: 'nav', 'href': 'nav.xhtml', 'media-type': 'application/xhtml+xml', properties: 'nav'})
]),
m('spine', {toc: 'ncx'}, [
m('itemref', {idref: 'nav'})
]),
false? m('guide', [
]):null
])
))
zip.file('content.opf', contentOpf)
function navPoints(list) {
var arr = []
for (let i = 0; i < list.length; i++) {
list[i]
arr.push(m('navPoint', {id: 'navPoint-'+(i+1), playOrder: i+1}, [
m('navLabel', m('text', list[i][0])),
m('content', {src: list[i][1]})
]))
}
return arr
}
let tocNcx = `<?xml version="1.0" encoding="utf-8" ?>\n`+pretty(render(
m('ncx', {version: '2005-1', xmlns: NS.DAISY}, [
m('head', [
m('meta', {content: bookInfo.uuid, name: 'dtb:uid'}),
m('meta', {content: 0, name: 'dtb:depth'}),
m('meta', {content: 0, name: 'dtb:totalPageCount'}),
m('meta', {content: 0, name: 'dtb:maxPageNumber'})
]),
m('docTitle', m('text', bookInfo.title)),
m('navMap', navPoints([
['Contents', 'nav.xhtml']
]))
])
))
zip.file('toc.ncx', tocNcx)
let navDocument = `<?xml version="1.0" encoding="utf-8"?>\n<!DOCTYPE html>\n\n`+pretty(render(
m('html', {xmlns: NS.XHTML, 'xmlns:epub': NS.OPS, lang: 'en', 'xml:lang': 'en'}, [
m('head', [
m('meta', {charset: 'utf-8'}),
//m('link', {rel: 'stylesheet', type: 'text/css', href: 'styles.css'}),
m('title', 'Contents')
]),
m('body', [
m('nav', {'epub:type': 'toc', id: 'toc'}, [
m('h1', 'Contents'),
m('ol', [])
])
])
])
))
zip.file('nav.xhtml', navDocument)
zip
.generateNodeStream({
type: 'nodebuffer',
streamFiles: true,
mimeType: 'application/epub+zip',
compression: 'DEFLATE',
compressionOptions: {level: 9}
})
.pipe(fs.createWriteStream('out.epub'))
.on('finish', function () {
// JSZip generates a readable stream with a "end" event,
// but is piped here in a writable stream which emits a "finish" event.
console.log("out.epub written.");
})
/*
let promise = null
if (JSZip.support.uint8array) {
promise = zip.generateAsync({type : 'uint8array', mimeType: 'application/epub+zip'})
} else {
promise = zip.generateAsync({type : 'string', mimeType: 'application/epub+zip'})
}
promise.then((zip) => {
console.log(zip)
})
*/
/*
const tidy = require("tidy-html5").tidy_html5
let content = fs.readFileSync("test.xhtml")
let result = tidy(content, {
"indent": "auto",
"numeric-entities": "yes",
"output-xhtml": "yes",
"alt-text": "Image",
"wrap": "0",
"quiet": "yes"
})
console.log(result)
*/

47
test.xhtml Normal file
View file

@ -0,0 +1,47 @@
<?xml version="1.0" encoding="utf-8"?>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>Tag Test</title>
<link rel="stylesheet" type="text/css" href="style.css" />
</head>
<body>
<div><p><span style="font-size:2em; line-height: 1.3em;"><b>Tag Test</b></span></p>
<p class="indented double">This chapter is designed to test the capabilities of various ePub extraction tools against each BBCode tag supported by FIMFiction.net. First of all, note that this paragraph begins with a tab indent—many tools will remove this. Next, here is the set of non-ANSI characters that I frequently use in my stories: “” — … (smart double quotes, smart single quotes, em-dash, and ellipses).</p>
<p class="indented double">Next are the tags that should work in-line: [b]<b>bold</b>[/b], [i]<i>italics</i>[/i], [u]<span class="u">underline</span>[/u], [s]<span style="text-decoration: line-through;">strikethrough</span>[/s], [smcaps]<span style="font-variant:small-caps;">small caps</span>[/smcaps], [spoiler]<span class="spoiler">spoiler text</span>[/spoiler], [url]<a href="http://www.google.com">hyperlink</a>[/url], [email]<a href="https://www.fimfiction.net/mailto:user@server.com">user@server.com</a>[/email], and finally an in-line image: [img]<img src="images/ch0001pic0001.png" alt="ch0001pic0001" />[/img].</p>
<p class="indented double"><i>There&#039;s a known bug in translating FIMFiction stories where consecutive italicized paragraphs lose the italics after the first paragraph. Well, this paragraph is supposed to be italicized.</i></p>
<p class="indented double"><i>And so is this one.</i></p>
<p class="indented double"><i>And ending with this one.</i></p>
<p class="indented double">Lets quickly step through all of the named color tags: <span style="color:red;">red</span>, <span style="color:silver;">silver</span>, <span style="color:orange;">orange</span>, <span style="color:yellow;">yellow</span>, <span style="color:brown;">brown</span>, <span style="color:green;">green</span>, <span style="color:olive;">olive</span>, <span style="color:cornflowerblue;">cornflowerblue</span>, <span style="color:cyan;">cyan</span>, <span style="color:purple;">purple</span>, <span style="color:pink;">pink</span>, <span style="color:hotpink;">hotpink</span>, <span style="color:lime;">lime</span>, <span style="color:white;">white</span> (white), <span style="color:lightgrey;">lightgrey</span>, <span style="color:indigo;">indigo</span>, <span style="color:darkblue;">darkblue</span>, <span style="color:lightblue;">lightblue</span>, <span style="color:blue;">blue</span>, <span style="color:gold;">gold</span>, <span style="color:navy;">navy</span>, <span style="color:chartreuse;">chartreuse</span>, and <span style="color:teal;">teal</span>. Here are some of the colors I use to code character&#039;s thoughts in my “Thought Experiments” series: <span style="color:#0077D0;">#0077D0</span> for Vinyl Scratch, <span style="color:#60BB50;">#60BB50</span> for Spike, <span style="color:purple;">purple</span> for Twilight Sparkle, <span style="color:hotpink;">hotpink</span> for Pinkie Pie, and <span style="color:darkblue;">darkblue</span> for Rainbow Dash.</p>
<p class="indented double">Size can be indicated by either an absolute number, or in &quot;ems&quot;, which is a percentage scaling:</p>
<p class="double">[size=24]<span style="font-size:24px; line-height: 1.3em;">Size 24, which should be normal-size text for default settings</span>[/size]</p>
<p class="double">[size=12]<span style="font-size:12px; line-height: 1.3em;">Size 12, which should be half-size text for default settings</span>[/size]</p>
<p class="double">[size=48]<span style="font-size:48px; line-height: 1.3em;">Size 48, which should be double-size text for default settings</span>[/size]</p>
<p class="double">[size=1em]<span style="font-size:1em; line-height: 1.3em;">1 em text</span>[/size]</p>
<p class="double">[size=.5em]<span style="font-size:.5em; line-height: 1.3em;">0.5 em text</span>[/size]</p>
<p class="double">[size=2em]<span style="font-size:2em; line-height: 1.3em;">2 em text</span>[/size]</p>
<hr />
<p class="indented double">Just above this paragraph should be a horizontal rule, or [hr] tag.</p>
<p class="double" style="text-align:center;">[center]Centered text[/center]</p>
<p class="double"><div style='text-align:right;'>[right]Right-aligned text[/right]</div></p><blockquote><p class="double">[quote] This is a quote.</p>
<p class="indented double">One big problem I&#039;ve found with ePub extractors is handling multiple-paragraph quotes. This paragraph should still be inside the above quote tag.</p>
<p class="double" style="text-align:center;">[center]Centered in the quote.[/center]</p>
<p>[/quote]</p></blockquote>
<hr />
<p class="indented double">Here&#039;s an image all by itself:</p>
<p class="double"><img src="images/ch0001pic0002.jpg" alt="ch0001pic0002" /></p>
<p class="indented double">Here&#039;s a YouTube video:</p>
<p class="double"><div class="youtube_container"><span class="link"><a href="http://www.youtube.com/watch?v=_eDXhih_NW4">Youtube Video</a></span></div></p>
<hr />
<p class="double"><span class="link"><blockquote style="margin: 10px 0px; box-sizing:border-box; -moz-box-sizing:border-box;margin-right:25px; padding: 15px;background-color: #F7F7F7;border: 1px solid #AAA;width: 50%;float:left;box-shadow: 5px 5px 0px #EEE;">[left_insert]This creates left-aligned text that wraps at the center of the screen. Here&#039;s some <i>lorum ipsum</i> to demonstrate line-wrapping: Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.</blockquote></span></p>
<p class="double"><span class="link"><blockquote style="margin: 10px 0px; box-sizing:border-box; -moz-box-sizing:border-box;margin-right:25px; padding: 15px;background-color: #F7F7F7;border: 1px solid #AAA;width: 50%;float:left;box-shadow: 5px 5px 0px #EEE;">Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.[/left_insert]</blockquote></span></p>
<p class="double"><span class="link"><blockquote style="margin: 10px 0px; box-sizing:border-box; -moz-box-sizing:border-box;margin-left:25px; padding: 15px;background-color: #F7F7F7;border: 1px solid #AAA;width: 50%;float:right;box-shadow: 5px 5px 0px #EEE;">[right_insert]This creates left-aligned text starting at the center of the screen. Here&#039;s some <i>lorum ipsum</i> to demonstrate line-wrapping: Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.</blockquote></span></p>
<p class="double"><span class="link"><blockquote style="margin: 10px 0px; box-sizing:border-box; -moz-box-sizing:border-box;margin-left:25px; padding: 15px;background-color: #F7F7F7;border: 1px solid #AAA;width: 50%;float:right;box-shadow: 5px 5px 0px #EEE;">Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.[/right_insert]</blockquote></span></p>
<hr />
</div>
<div class="authors-note">
<p><b>Author&#039;s Note:</b></p>
<p>This is here to test whether author&#039;s notes make it into the ePub. This section tends to have most tags stripped out, so expect no [b]bold[/b] text, but do expect [img]<img src="images/ch0001pic0001.png" alt="ch0001pic0001" />[/img] (images).</p>
</div>
</body>
</html>