commit 49387a70b39dbaf0b7ea4966048f30eece1a9092 Author: daniel-j Date: Sun Jun 19 22:28:28 2016 +0200 first commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cb792eb --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +node_modules/ +out.epub + diff --git a/mithril-node-render.js b/mithril-node-render.js new file mode 100644 index 0000000..5acd14a --- /dev/null +++ b/mithril-node-render.js @@ -0,0 +1,136 @@ +'use strict' + +var VOID_TAGS = ['area', 'base', 'br', 'col', 'command', 'embed', 'hr', + 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', + 'wbr', '!doctype'] + +function isArray (thing) { + return Object.prototype.toString.call(thing) === '[object Array]' +} + +function camelToDash (str) { + return str.replace(/\W+/g, '-') + .replace(/([a-z\d])([A-Z])/g, '$1-$2') +} + +function removeEmpties (n) { + return n !== '' +} + +// shameless stolen from https://github.com/punkave/sanitize-html +function escapeHtml (s, replaceDoubleQuote) { + if (s === 'undefined') { + s = '' + } + if (typeof (s) !== 'string') { + s = s + '' + } + s = s.replace(/\&/g, '&').replace(//g, '>') + if (replaceDoubleQuote) { + return s.replace(/\"/g, '"') + } + return s +} + +function createAttrString (view, escapeAttributeValue) { + var attrs = view.attrs + + if (!attrs || !Object.keys(attrs).length) { + return '' + } + + return Object.keys(attrs).map(function (name) { + var value = attrs[name] + if (typeof value === 'undefined' || value === null || typeof value === 'function') { + return + } + if (typeof value === 'boolean') { + return value ? ' ' + name : '' + } + if (name === 'style') { + if (!value) { + return + } + var styles = attrs.style + if (typeof styles === 'object') { + styles = Object.keys(styles).map(function (property) { + return styles[property] !== '' ? [camelToDash(property).toLowerCase(), styles[property]].join(':') : '' + }).filter(removeEmpties).join(';') + } + return styles !== '' ? ' style="' + escapeAttributeValue(styles, true) + '"' : '' + } + + // Handle SVG tags specially + if (name === 'href' && view.tag === 'use') { + return ' xlink:href="' + escapeAttributeValue(value, true) + '"' + } + + return ' ' + (name === 'className' ? 'class' : name) + '="' + escapeAttributeValue(value, true) + '"' + }).join('') +} + +function createChildrenContent (view) { + if (isArray(view.children) && !view.children.length) { + return '' + } + + return render(view.children) +} + +function render (view, options) { + options = options || {} + + var defaultOptions = { + escapeAttributeValue: escapeHtml, + escapeString: escapeHtml + } + + Object.keys(defaultOptions).forEach(function (key) { + if (!options.hasOwnProperty(key)) options[key] = defaultOptions[key] + }) + + var type = typeof view + + if (type === 'string') { + return options.escapeString(view) + } + + if (type === 'number' || type === 'boolean') { + return view + } + + if (!view) { + return '' + } + + if (isArray(view)) { + return view.map(function (view) { return render(view, options) }).join('') + } + + // compontent + if (view.view) { + var scope = view.controller ? new view.controller() : {} + var result = render(view.view(scope), options) + if (scope.onunload) { + scope.onunload() + } + return result + } + + if (view.$trusted) { + return '' + view + } + var children = createChildrenContent(view) + if (!children/* && VOID_TAGS.indexOf(view.tag.toLowerCase()) >= 0*/) { + return '<' + view.tag + createAttrString(view, options.escapeAttributeValue) + '/>' + } + return [ + '<', view.tag, createAttrString(view, options.escapeAttributeValue), '>', + children, + '' + ].join('') +} + +render.escapeHtml = escapeHtml + +module.exports = render \ No newline at end of file diff --git a/package.json b/package.json new file mode 100644 index 0000000..cad0439 --- /dev/null +++ b/package.json @@ -0,0 +1,19 @@ +{ + "name": "tidyweb", + "version": "1.0.0", + "description": "", + "main": "index.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "author": "", + "license": "ISC", + "dependencies": { + "jszip": "^3.0.0", + "mithril": "^0.2.5", + "parse5": "^2.1.5", + "pretty": "^1.0.0", + "tidy-html5": "^0.1.1", + "xmlserializer": "^0.3.3" + } +} diff --git a/test.js b/test.js new file mode 100644 index 0000000..d25c124 --- /dev/null +++ b/test.js @@ -0,0 +1,216 @@ +#!/usr/bin/env node +'use strict' + +const fs = require('fs') + +const m = require('mithril') +const render = require('./mithril-node-render') +const pretty = require('pretty') + +//const parse5 = require('parse5') +//const xmlserializer = require('xmlserializer') + +const JSZip = require('jszip') + +const zip = new JSZip() + +zip.file('mimetype', 'application/epub+zip') + +zip.folder('META-INF').file('container.xml', ` + + + + + +`) + + + +//let html = fs.readFileSync('test.xhtml', 'utf8') + + +//let dom = parse5.parseFragment(html) + + +//let frag = parse5.parseFragment('Hello') +//console.log(frag.childNodes[0]) + +const NS = { + OPF: 'http://www.idpf.org/2007/opf', + OPS: 'http://www.idpf.org/2007/ops', + DC: 'http://purl.org/dc/elements/1.1/', + DAISY: 'http://www.daisy.org/z3986/2005/ncx/', + XHTML: 'http://www.w3.org/1999/xhtml' +} + +/*function attrs(a) { + let arr = [] + for (let i in a) { + arr.push({name: i, value: a[i]}) + } + return arr +} + +const dom = parse5.treeAdapters.default + +let opf = dom.createDocumentFragment() + +let packageNode = dom.createElement('package', NS, attrs({ + version: '3.0', + 'unique-identifier': 'BookId' +})) +let metadataNode = dom.createElement('metadata', NS, attrs({ + 'xmlns:dc': 'http://purl.org/dc/elements/1.1/', + 'xmlns:opf': 'http://www.idpf.org/2007/opf' +})) +let dcIdentifier = dom.createElement('dc:identifier', NS, attrs({id: 'BookId'})) +dom.insertText(dcIdentifier, 'urn:uuid:'+12345) +dom.appendChild(metadataNode, dcIdentifier) + +dom.appendChild(packageNode, metadataNode) + +dom.appendChild(opf, packageNode) + +//console.log(xmlserializer.serializeToString(opf)) +*/ + +let bookInfo = { + uuid: 'urn:uuid:'+12345, + title: 'Book title', + author: 'Author', + publishDate: '2016-06-19', + lastModifiedDate: '2016-06-18T16:32:40Z' +} + + +function subjects(s) { + var list = [] + for (let i = 0; i < s.length; i++) { + list.push(m('dc:subject', s[i])) + } + return list +} + +let contentOpf = '\n'+pretty(render( + m('package', {xmlns: NS.OPF, version: '3.0', 'unique-identifier': 'BookId'}, [ + m('metadata', {'xmlns:dc': NS.DC, 'xmlns:opf': NS.OPF}, [ + m('dc:identifier', {id: 'BookId'}, bookInfo.uuid), + m('dc:title', bookInfo.title), + m('dc:creator', {id: 'cre'}, bookInfo.author), + m('meta', {refines: '#cre', property: 'role', scheme: 'marc:relators'}, 'aut'), + m('dc:date', bookInfo.publishDate), + m('dc:publisher', 'Fimfiction'), + m('dc:source', 'http://fimfiction.net/story/'+'STORY_ID'), + m('dc:language', 'en'), + m('meta', {property: 'dcterms:modified'}, bookInfo.lastModifiedDate) + + ].concat(subjects(['Fiction', 'Pony']))), + + m('manifest', [ + m('item', {id: 'ncx', href: 'toc.ncx', 'media-type': 'application/x-dtbncx+xml'}), + m('item', {id: 'nav', 'href': 'nav.xhtml', 'media-type': 'application/xhtml+xml', properties: 'nav'}) + ]), + + m('spine', {toc: 'ncx'}, [ + m('itemref', {idref: 'nav'}) + ]), + + false? m('guide', [ + + ]):null + ]) +)) +zip.file('content.opf', contentOpf) + +function navPoints(list) { + var arr = [] + for (let i = 0; i < list.length; i++) { + list[i] + arr.push(m('navPoint', {id: 'navPoint-'+(i+1), playOrder: i+1}, [ + m('navLabel', m('text', list[i][0])), + m('content', {src: list[i][1]}) + ])) + } + return arr +} + +let tocNcx = `\n`+pretty(render( + m('ncx', {version: '2005-1', xmlns: NS.DAISY}, [ + m('head', [ + m('meta', {content: bookInfo.uuid, name: 'dtb:uid'}), + m('meta', {content: 0, name: 'dtb:depth'}), + m('meta', {content: 0, name: 'dtb:totalPageCount'}), + m('meta', {content: 0, name: 'dtb:maxPageNumber'}) + ]), + m('docTitle', m('text', bookInfo.title)), + m('navMap', navPoints([ + ['Contents', 'nav.xhtml'] + ])) + ]) +)) +zip.file('toc.ncx', tocNcx) + + +let navDocument = `\n\n\n`+pretty(render( + m('html', {xmlns: NS.XHTML, 'xmlns:epub': NS.OPS, lang: 'en', 'xml:lang': 'en'}, [ + m('head', [ + m('meta', {charset: 'utf-8'}), + //m('link', {rel: 'stylesheet', type: 'text/css', href: 'styles.css'}), + m('title', 'Contents') + ]), + m('body', [ + m('nav', {'epub:type': 'toc', id: 'toc'}, [ + m('h1', 'Contents'), + m('ol', []) + ]) + ]) + ]) +)) +zip.file('nav.xhtml', navDocument) + +zip + .generateNodeStream({ + type: 'nodebuffer', + streamFiles: true, + mimeType: 'application/epub+zip', + compression: 'DEFLATE', + compressionOptions: {level: 9} + }) + .pipe(fs.createWriteStream('out.epub')) + .on('finish', function () { + // JSZip generates a readable stream with a "end" event, + // but is piped here in a writable stream which emits a "finish" event. + console.log("out.epub written."); +}) + +/* +let promise = null +if (JSZip.support.uint8array) { + promise = zip.generateAsync({type : 'uint8array', mimeType: 'application/epub+zip'}) +} else { + promise = zip.generateAsync({type : 'string', mimeType: 'application/epub+zip'}) +} + +promise.then((zip) => { + console.log(zip) +}) +*/ + +/* +const tidy = require("tidy-html5").tidy_html5 + + +let content = fs.readFileSync("test.xhtml") + +let result = tidy(content, { + "indent": "auto", + "numeric-entities": "yes", + "output-xhtml": "yes", + "alt-text": "Image", + "wrap": "0", + "quiet": "yes" +}) + + +console.log(result) +*/ diff --git a/test.xhtml b/test.xhtml new file mode 100644 index 0000000..47dd3ba --- /dev/null +++ b/test.xhtml @@ -0,0 +1,47 @@ + + + + + Tag Test + + + +

Tag Test

+

This chapter is designed to test the capabilities of various ePub extraction tools against each BBCode tag supported by FIMFiction.net. First of all, note that this paragraph begins with a tab indent—many tools will remove this. Next, here is the set of non-ANSI characters that I frequently use in my stories: “” ‘’ — … (smart double quotes, smart single quotes, em-dash, and ellipses).

+

Next are the tags that should work in-line: [b]bold[/b], [i]italics[/i], [u]underline[/u], [s]strikethrough[/s], [smcaps]small caps[/smcaps], [spoiler]spoiler text[/spoiler], [url]hyperlink[/url], [email]user@server.com[/email], and finally an in-line image: [img]ch0001pic0001[/img].

+

There's a known bug in translating FIMFiction stories where consecutive italicized paragraphs lose the italics after the first paragraph. Well, this paragraph is supposed to be italicized.

+

And so is this one.

+

And ending with this one.

+

Let’s quickly step through all of the named color tags: red, silver, orange, yellow, brown, green, olive, cornflowerblue, cyan, purple, pink, hotpink, lime, white (white), lightgrey, indigo, darkblue, lightblue, blue, gold, navy, chartreuse, and teal. Here are some of the colors I use to code character's thoughts in my “Thought Experiments” series: #0077D0 for Vinyl Scratch, #60BB50 for Spike, purple for Twilight Sparkle, hotpink for Pinkie Pie, and darkblue for Rainbow Dash.

+

Size can be indicated by either an absolute number, or in "ems", which is a percentage scaling:

+

[size=24]Size 24, which should be normal-size text for default settings[/size]

+

[size=12]Size 12, which should be half-size text for default settings[/size]

+

[size=48]Size 48, which should be double-size text for default settings[/size]

+

[size=1em]1 em text[/size]

+

[size=.5em]0.5 em text[/size]

+

[size=2em]2 em text[/size]

+
+

Just above this paragraph should be a horizontal rule, or [hr] tag.

+

[center]Centered text[/center]

+

[right]Right-aligned text[/right]

[quote] This is a quote.

+

One big problem I've found with ePub extractors is handling multiple-paragraph quotes. This paragraph should still be inside the above quote tag.

+

[center]Centered in the quote.[/center]

+

[/quote]

+
+

Here's an image all by itself:

+

ch0001pic0002

+

Here's a YouTube video:

+

+
+

[left_insert]This creates left-aligned text that wraps at the center of the screen. Here's some lorum ipsum to demonstrate line-wrapping: Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.

+

Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.[/left_insert]

+

[right_insert]This creates left-aligned text starting at the center of the screen. Here's some lorum ipsum to demonstrate line-wrapping: Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.

+

Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.[/right_insert]

+
+
+
+

Author's Note:

+

This is here to test whether author's notes make it into the ePub. This section tends to have most tags stripped out, so expect no [b]bold[/b] text, but do expect [img]ch0001pic0001[/img] (images).

+
+ + \ No newline at end of file