mirror of
https://github.com/daniel-j/fimfic2epub.git
synced 2024-09-30 00:56:13 +13:00
165 lines
6.3 KiB
JavaScript
165 lines
6.3 KiB
JavaScript
|
|
import m from 'mithril'
|
|
import { XmlEntities } from 'html-entities'
|
|
import render from './lib/mithril-node-render'
|
|
|
|
import fetch from './fetch'
|
|
import { youtubeKey } from './constants'
|
|
|
|
const entities = new XmlEntities()
|
|
|
|
export function cleanMarkup (html) {
|
|
if (!html) {
|
|
return Promise.resolve('')
|
|
}
|
|
|
|
return new Promise((resolve, reject) => {
|
|
// replace HTML non-breaking spaces with normal spaces
|
|
html = html.replace(/ /g, ' ')
|
|
html = html.replace(/ /g, ' ')
|
|
|
|
// fix some tags
|
|
html = html.replace(/<u>/g, '<span style="text-decoration: underline">')
|
|
html = html.replace(/<\/u>/g, '</span>')
|
|
html = html.replace(/<s>/g, '<span style="text-decoration: line-through">')
|
|
html = html.replace(/<\/s>/g, '</span>')
|
|
|
|
html = html.replace(/<p>\s*/g, '<p>')
|
|
html = html.replace(/\s*<\/p>/g, '</p>')
|
|
|
|
html = html.replace(/<p><p>/g, '<p>')
|
|
html = html.replace(/<\/div><\/p>/g, '</div>')
|
|
|
|
html = fixParagraphIndent(html)
|
|
|
|
html = fixDoubleSpacing(html)
|
|
|
|
// fix floating blockquote tags
|
|
html = html.replace('<blockquote style="margin: 10px 0px; box-sizing:border-box; -moz-box-sizing:border-box;margin-right:25px; padding: 15px;background-color: #F7F7F7;border: 1px solid #AAA;width: 50%;float:left;box-shadow: 5px 5px 0px #EEE;">', '<blockquote class="left_insert">')
|
|
html = html.replace('<blockquote style="margin: 10px 0px; box-sizing:border-box; -moz-box-sizing:border-box;margin-left:25px; padding: 15px;background-color: #F7F7F7;border: 1px solid #AAA;width: 50%;float:right;box-shadow: 5px 5px 0px #EEE;">', '<blockquote class="right_insert">')
|
|
|
|
let imageEmbed = /<img data-src="(.*?)" class="user_image" src="(.*?)" data-lightbox\/>/g
|
|
html = html.replace(imageEmbed, (match, originalUrl, cdnUrl) => {
|
|
return render(m('img', {src: entities.decode(cdnUrl), alt: 'Image'}))
|
|
})
|
|
|
|
// Fix links pointing to pages on fimfiction
|
|
// Example: <a href="/user/djazz" rel="nofollow">djazz</a>
|
|
let matchLink = /(<a .?href=")(.+?)(".+?>)/g
|
|
html = html.replace(matchLink, (match, head, url, tail) => {
|
|
if (url.substring(0, 1) !== '#' && url.substring(0, 2) !== '//' && url.substring(0, 4) !== 'http') {
|
|
if (url.substring(0, 1) === '/') {
|
|
url = 'http://www.fimfiction.net' + entities.decode(url)
|
|
} else {
|
|
// do something else
|
|
}
|
|
}
|
|
|
|
return head + url + tail
|
|
})
|
|
|
|
let cache = new Map()
|
|
let completeCount = 0
|
|
|
|
let matchYoutube = /<div class="embed-container" data-original-src="(.*?)" data-src="(.*?)" data-id="(.*?)" data-origin="(.*?)">(.+?)<\/div><\/div><\/div>/g
|
|
for (let ma; (ma = matchYoutube.exec(html));) {
|
|
if (ma[4] === 'YouTube') {
|
|
let youtubeId = ma[3]
|
|
cache.set(youtubeId, null)
|
|
}
|
|
}
|
|
|
|
let matchSoundCloud = /<div data-controller="oembed" class="oembed" data-url="(.*?)" .+?<\/div>/g
|
|
html = html.replace(matchSoundCloud, (match, url) => {
|
|
return render(m('.soundcloud.leftalign', [
|
|
'SoundCloud song ', m('a', {href: entities.decode(url), rel: 'nofollow'}, url.replace('https://soundcloud.com', ''))
|
|
]))
|
|
})
|
|
|
|
// Story embed
|
|
let matchStoryEmbed = /<div style='[^']*?' class='bbcode__block'><div style="position:relative;" class="story-card-container".*?data-story-id="([^"]*?)"[\s\S]*?<a class="story_link" href="(.*?)" title=".*?">(.*?)<\/a>[\s\S]*?" class="story-card__author">(.*?)<\/a>[\s\S]*?<\/div><\/div>[\s\S]*?<\/div><\/div>/g
|
|
html = html.replace(matchStoryEmbed, (match, id, storyLink, storyTitle, author) => {
|
|
return render(m('.story', [
|
|
'Story: ',
|
|
m('a', {href: 'http://fimfiction.net' + entities.decode(storyLink), rel: 'nofollow'}, storyTitle),
|
|
' by ' + author
|
|
]))
|
|
})
|
|
|
|
if (cache.size === 0) {
|
|
continueParsing()
|
|
} else {
|
|
getYoutubeInfo([...cache.keys()])
|
|
}
|
|
|
|
function getYoutubeInfo (ids) {
|
|
fetch('https://www.googleapis.com/youtube/v3/videos?id=' + ids + '&part=snippet&maxResults=50&key=' + youtubeKey).then((raw) => {
|
|
let data = []
|
|
try {
|
|
data = JSON.parse(raw).items
|
|
} catch (e) { }
|
|
data.forEach((video) => {
|
|
cache.set(video.id, video.snippet)
|
|
completeCount++
|
|
})
|
|
if (completeCount === cache.size || data.length === 0) {
|
|
html = html.replace(matchYoutube, replaceYoutube)
|
|
continueParsing()
|
|
}
|
|
})
|
|
}
|
|
|
|
function replaceYoutube (match, origSrc, src, id, origin) {
|
|
let youtubeId = id
|
|
let thumbnail = 'http://img.youtube.com/vi/' + youtubeId + '/hqdefault.jpg'
|
|
let youtubeUrl = 'https://youtube.com/watch?v=' + youtubeId
|
|
let title = 'Youtube Video'
|
|
let caption = ''
|
|
let data = cache.get(youtubeId)
|
|
if (data) {
|
|
thumbnail = (data.thumbnails.standard || data.thumbnails.high || data.thumbnails.medium || data.thumbnails.default).url
|
|
title = data.title
|
|
caption = data.title + ' on YouTube'
|
|
} else {
|
|
return ''
|
|
}
|
|
return render(m('figure.youtube', [
|
|
m('a', {href: youtubeUrl, rel: 'nofollow'},
|
|
m('img', {src: thumbnail, alt: title})
|
|
),
|
|
m('figcaption', m('a', {href: youtubeUrl, rel: 'nofollow'}, caption))
|
|
]))
|
|
}
|
|
|
|
function continueParsing () {
|
|
// html = tidy(html, tidyOptions).trim()
|
|
|
|
resolve(html)
|
|
}
|
|
})
|
|
}
|
|
|
|
export function fixDoubleSpacing (html) {
|
|
// from FimFictionConverter by Nyerguds
|
|
html = html.replace(/\s\s+/g, ' ')
|
|
// push spaces to the closed side of tags
|
|
html = html.replace(/\s+(<[a-z][^>]*>)\s+/g, ' $1')
|
|
html = html.replace(/\s+(<\/[a-z][^>]*>)\s+/g, '$1 ')
|
|
return html
|
|
}
|
|
|
|
export function fixParagraphIndent (html) {
|
|
// from FimFictionConverter by Nyerguds
|
|
let fixIndent = 2
|
|
if (fixIndent > 0) {
|
|
// only trigger indenting when finding as many whitespace characters in a row as indicated by the FixIndent setting.
|
|
|
|
// Add indented class, with the search keeping into account that there could be opening tags behind the p tag.
|
|
html = html.replace(new RegExp('<p>((<([^>]+)>)*)\\s{' + fixIndent + '}\\s*', 'g'), '<p class="indented">$1')
|
|
html = html.replace(new RegExp('<p class="(((?!indented)[^>])*)">((<([^>]+)>)*)\\s{' + fixIndent + '}\\s*', 'g'), '<p class="indented $1">$3')
|
|
|
|
// Cleanup of remaining start whitespace in already indented paragraphs:
|
|
html = html.replace(/<p([^>]*)>((<[^>]+>)*)\\s+/g, '<p$1>$2')
|
|
}
|
|
return html
|
|
}
|