Use HTML download instead of fetching chapters separately

Added twemoji Fixed progress bar
2024-06-25 01:21:15 +12:00 · 2017-06-12 13:53:17 +02:00 · 2017-06-12 13:53:17 +02:00 · dac3a01a1b
parent d780668cfb
commit dac3a01a1b
5 changed files with 84 additions and 23 deletions
--- a/extension/manifest.json
+++ b/extension/manifest.json
@ -4,7 +4,7 @@
 	"name": "fimfic2epub",
 	"short_name": "fimfic2epub",
 	"description": "Improved EPUB exporter for Fimfiction",
-	"version": "1.6.3",
+	"version": "1.6.4",

 	"icons": {
 		"128": "icon-128.png"
--- a/package.json
+++ b/package.json
@ -1,6 +1,6 @@
 {
  "name": "fimfic2epub",
-  "version": "1.6.3",
+  "version": "1.6.4",
  "description": "Tool to generate improved EPUB ebooks from Fimfiction stories",
  "author": "djazz",
  "repository": {
@ -26,12 +26,14 @@
    "html-entities": "^1.2.0",
    "html-to-text": "^2.1.3",
    "image-size": "^0.5.0",
+    "is-svg": "^2.1.0",
    "jszip": "^3.1.2",
    "match-words": "^0.1.0",
    "mithril": "^0.2.5",
    "pretty-data": "^0.40.0",
    "request": "^2.74.0",
    "sanitize-filename": "^1.6.0",
+    "twemoji": "^2.3.0",
    "zero-fill": "^2.2.3"
  },
  "devDependencies": {
--- a/src/FimFic2Epub.js
+++ b/src/FimFic2Epub.js
@ -7,6 +7,7 @@ import sanitize from 'sanitize-filename'
 import URL from 'url'
 import isNode from 'detect-node'
 import fileType from 'file-type'
+import isSvg from 'is-svg'
 import sizeOf from 'image-size'
 import Emitter from 'es6-event-emitter'

@ -22,6 +23,8 @@ import { containerXml } from './constants'

 const entities = new XmlEntities()

+const trimWhitespace = /^\s*(<br\s*\/?\s*>)+|(<br\s*\/?\s*>)+\s*$/ig
+
 class FimFic2Epub extends Emitter {

  static getStoryId (id) {
@ -197,6 +200,58 @@ class FimFic2Epub extends Emitter {
    this.chaptersWithNotes.length = 0

    this.progress(0, 0, 'Fetching chapters...')
+
+    let chapterCount = this.storyInfo.chapters.length
+    let url = 'https://www.fimfiction.net/story/download/' + this.storyInfo.id + '/html'
+
+    this.pcache.chapters = fetch(url).then((html) => {
+      // console.log(html)
+      let p = Promise.resolve()
+      let matchChapter = /<article class="chapter">[\s\S]*?<\/header>([\s\S]*?)<\/article>/g
+      for (let ma, i = 0; (ma = matchChapter.exec(html)); i++) {
+        let chapterContent = ma[1]
+        chapterContent = chapterContent.replace(/<footer>[\s\S]*?<\/footer>/g, '').trim()
+
+        let authorNotesPos = chapterContent.indexOf('<aside ')
+        let notesContent = ''
+        let notesFirst = authorNotesPos === 0
+        if (authorNotesPos !== -1) {
+          // console.log(chapterContent.length)
+          chapterContent = chapterContent.replace(/<aside class="authors-note">([\s\S]*?)<\/aside>/, (match, content, pos) => {
+            // console.log(pos + match.length)
+            content = content.replace(/<header><h1>.*?<\/h1><\/header>/, '')
+            notesContent = content.trim().replace(trimWhitespace, '')
+            return ''
+          })
+        }
+
+        chapterContent = chapterContent.trim().replace(trimWhitespace, '')
+        let chapter = {content: chapterContent, notes: notesContent, notesFirst}
+        p = p.then(cleanMarkup(chapter.content).then((content) => {
+          chapter.content = content
+        }))
+        if (notesContent) {
+          p = p.then(cleanMarkup(chapter.notes).then((notes) => {
+            chapter.notes = notes
+          }))
+        }
+        p = p.then(() => {
+          this.progress(0, (i + 1) / chapterCount, 'Processed chapter ' + (i + 1) + ' / ' + chapterCount)
+          if (chapter.notes) {
+            this.hasAuthorNotes = true
+            this.chaptersWithNotes.push(i)
+          }
+          this.chapters[i] = chapter
+          let ch = this.storyInfo.chapters[i]
+          ch.realWordCount = htmlWordCount(chapter.content)
+        })
+      }
+      return p
+    }).then(() => {
+      this.pcache.chapters = null
+    })
+
+    /*
    this.pcache.chapters = new Promise((resolve, reject) => {
      let chapters = this.storyInfo.chapters
      let chapterCount = this.storyInfo.chapters.length
@ -251,6 +306,7 @@ class FimFic2Epub extends Emitter {
    }).then(() => {
      this.pcache.chapters = null
    })
+    */
    return this.pcache.chapters
  }

@ -284,6 +340,15 @@ class FimFic2Epub extends Emitter {
        fetchRemote(url, 'arraybuffer').then((data) => {
          r.dest = null
          let info = fileType(isNode ? data : new Uint8Array(data))
+          if (!info) {
+            // file-type doesn't support SVG, extra check:
+            if (isSvg(Buffer.from(data).toString('utf8'))) {
+              info = {
+                mime: 'image/svg+xml',
+                ext: 'svg'
+              }
+            }
+          }
          if (info) {
            let type = info.mime
            r.type = type
@ -649,7 +714,7 @@ class FimFic2Epub extends Emitter {
    let chapterPos = html.indexOf('<div class="bbcode">')
    let chapter = html.substring(chapterPos + 20)

-    let pos = chapter.indexOf('\t\t</div>\n\t</div>')
+    let pos = chapter.indexOf('\t\t</div>\n\t</div>\t\t\n\t\t\t\t\t</div>\n')

    chapter = chapter.substring(0, pos).trim()

--- a/src/cleanMarkup.js
+++ b/src/cleanMarkup.js
@ -1,6 +1,7 @@

 import m from 'mithril'
 import { XmlEntities } from 'html-entities'
+import twemoji from 'twemoji'
 import render from './lib/mithril-node-render'

 import fetch from './fetch'
@ -14,6 +15,8 @@ export function cleanMarkup (html) {
  }

  return new Promise((resolve, reject) => {
+    html = twemoji.parse(html, {ext: '.svg', folder: 'svg'})
+    html = html.replace(/(<img class="emoji" draggable="false" alt=".*?" src=".*?")>/g, '$1/>')
    // replace HTML entities with decimal entities
    html = html.replace(/&nbsp;/g, '&#160;')
    html = html.replace(/&emsp;/g, '&#8195;')
@ -58,28 +61,16 @@ export function cleanMarkup (html) {
    let cache = new Map()
    let completeCount = 0

-    let matchYoutube = /<div class="embed-container" data-original-src="(.*?)" data-src="(.*?)" data-id="(.*?)" data-origin="(.*?)">(.+?)<\/div><\/div><\/div>/g
-    for (let ma; (ma = matchYoutube.exec(html));) {
-      if (ma[4] === 'YouTube') {
-        let youtubeId = ma[3]
-        cache.set(youtubeId, null)
-      }
+    let matchYouTube = /<p><a class="embed" href="https:\/\/www\.youtube\.com\/watch\?v=(.*?)">.*?<\/a><\/p>/g
+    for (let ma; (ma = matchYouTube.exec(html));) {
+      let youtubeId = ma[1]
+      cache.set(youtubeId, null)
    }

-    let matchSoundCloud = /<div data-controller="oembed" class="oembed" data-url="(.*?)" .+?<\/div>/g
+    let matchSoundCloud = /<p><a class="embed" href="(https:\/\/soundcloud\.com\/.*?)">.*?<\/a><\/p>/g
    html = html.replace(matchSoundCloud, (match, url) => {
      return render(m('.soundcloud.leftalign', [
-        'SoundCloud song ', m('a', {href: entities.decode(url), rel: 'nofollow'}, url.replace('https://soundcloud.com', ''))
-      ]))
-    })
-
-    // Story embed
-    let matchStoryEmbed = /<div style='[^']*?' class='bbcode__block'><div style="position:relative;" class="story-card-container".*?data-story-id="([^"]*?)"[\s\S]*?<a class="story_link" href="(.*?)" title=".*?">(.*?)<\/a>[\s\S]*?" class="story-card__author">(.*?)<\/a>[\s\S]*?<\/div><\/div>[\s\S]*?<\/div><\/div>/g
-    html = html.replace(matchStoryEmbed, (match, id, storyLink, storyTitle, author) => {
-      return render(m('.story', [
-        'Story: ',
-        m('a', {href: 'http://fimfiction.net' + entities.decode(storyLink), rel: 'nofollow'}, storyTitle),
-        ' by ' + author
+        'SoundCloud: ', m('a', {href: entities.decode(url), rel: 'nofollow'}, url.replace('https://soundcloud.com/', '').replace(/[-_]/g, ' ').replace('/', ' - ').replace(/ {2}/g, ' '))
      ]))
    })

@ -100,13 +91,13 @@ export function cleanMarkup (html) {
          completeCount++
        })
        if (completeCount === cache.size || data.length === 0) {
-          html = html.replace(matchYoutube, replaceYoutube)
+          html = html.replace(matchYouTube, replaceYouTube)
          continueParsing()
        }
      })
    }

-    function replaceYoutube (match, origSrc, src, id, origin) {
+    function replaceYouTube (match, id) {
      let youtubeId = id
      let thumbnail = 'http://img.youtube.com/vi/' + youtubeId + '/hqdefault.jpg'
      let youtubeUrl = 'https://youtube.com/watch?v=' + youtubeId
--- a/src/style/style.styl
+++ b/src/style/style.styl
@ -58,6 +58,9 @@ img {
  max-width: 100%;
  max-height: 100%;
 }
+img.emoji {
+  height: 1em;
+}

 hr.old {
  padding: 0;