2016-06-28 09:19:01 +12:00
|
|
|
|
|
|
|
import m from 'mithril'
|
2017-06-07 21:13:31 +12:00
|
|
|
import { XmlEntities } from 'html-entities'
|
2017-06-12 23:53:17 +12:00
|
|
|
import twemoji from 'twemoji'
|
2017-10-19 02:02:07 +13:00
|
|
|
import render from 'mithril-node-render'
|
2016-06-28 09:19:01 +12:00
|
|
|
|
2016-06-28 23:59:39 +12:00
|
|
|
import fetch from './fetch'
|
2017-06-07 08:15:05 +12:00
|
|
|
import { youtubeKey } from './constants'
|
2017-10-19 02:02:07 +13:00
|
|
|
import { replaceAsync } from './utils'
|
2016-06-28 09:19:01 +12:00
|
|
|
|
2017-06-07 21:13:31 +12:00
|
|
|
const entities = new XmlEntities()
|
|
|
|
|
2017-10-19 02:02:07 +13:00
|
|
|
export async function cleanMarkup (html) {
|
2016-08-20 02:51:40 +12:00
|
|
|
if (!html) {
|
|
|
|
return Promise.resolve('')
|
|
|
|
}
|
|
|
|
|
2018-05-09 07:11:50 +12:00
|
|
|
html = html.normalize('NFC') // normalize unicode
|
|
|
|
|
2019-10-08 19:37:27 +13:00
|
|
|
html = twemoji.parse(html, { ext: '.svg', folder: 'svg' })
|
2018-03-13 10:08:52 +13:00
|
|
|
|
2017-10-19 02:02:07 +13:00
|
|
|
// replace HTML entities with decimal entities
|
2020-03-04 21:10:37 +13:00
|
|
|
/* eslint-disable no-control-regex */
|
2018-05-09 07:11:50 +12:00
|
|
|
html = html.replace(/\xA0/g, ' ')
|
2018-03-23 23:16:32 +13:00
|
|
|
html = html.replace(/ /ig, ' ')
|
|
|
|
html = html.replace(/ /ig, ' ')
|
2020-03-04 21:10:37 +13:00
|
|
|
html = html.replace(/[\u000C\u007F]/g, '') // remove invalid token (formfeed and u007F)
|
|
|
|
/* eslint-enable no-control-regex */
|
2017-06-07 08:15:05 +12:00
|
|
|
|
2017-10-19 02:02:07 +13:00
|
|
|
// fix some tags
|
2018-03-23 23:16:32 +13:00
|
|
|
html = html.replace(/<u>/ig, '<span style="text-decoration: underline">')
|
|
|
|
html = html.replace(/<\/u>/ig, '</span>')
|
|
|
|
html = html.replace(/<s>/ig, '<span style="text-decoration: line-through">')
|
|
|
|
html = html.replace(/<\/s>/ig, '</span>')
|
|
|
|
html = html.replace(/<span style="font-variant-caps:small-caps">/ig, '<span class="smcp">')
|
|
|
|
|
|
|
|
html = html.replace(/<p>\s*/ig, '<p>')
|
|
|
|
html = html.replace(/\s*<\/p>/ig, '</p>')
|
2017-06-07 08:15:05 +12:00
|
|
|
|
2017-10-19 02:02:07 +13:00
|
|
|
// html = fixParagraphIndent(html)
|
2017-06-07 17:46:57 +12:00
|
|
|
|
2017-10-19 02:02:07 +13:00
|
|
|
html = fixDoubleSpacing(html)
|
2017-06-07 17:46:57 +12:00
|
|
|
|
2017-10-19 02:02:07 +13:00
|
|
|
// fix floating blockquote tags
|
|
|
|
html = html.replace('<blockquote style="margin: 10px 0px; box-sizing:border-box; -moz-box-sizing:border-box;margin-right:25px; padding: 15px;background-color: #F7F7F7;border: 1px solid #AAA;width: 50%;float:left;box-shadow: 5px 5px 0px #EEE;">', '<blockquote class="left_insert">')
|
|
|
|
html = html.replace('<blockquote style="margin: 10px 0px; box-sizing:border-box; -moz-box-sizing:border-box;margin-left:25px; padding: 15px;background-color: #F7F7F7;border: 1px solid #AAA;width: 50%;float:right;box-shadow: 5px 5px 0px #EEE;">', '<blockquote class="right_insert">')
|
2016-08-24 02:32:55 +12:00
|
|
|
|
2018-05-09 07:11:50 +12:00
|
|
|
// add alt attributes to images that don't have them
|
2019-10-08 22:31:42 +13:00
|
|
|
const imageEmbed = /<img src="(.*?)" \/>/g
|
2019-10-08 19:37:27 +13:00
|
|
|
html = await replaceAsync(html, imageEmbed, (match, src) => render(m('img', { src: entities.decode(src), alt: 'Image' }), { strict: true }))
|
2017-06-07 21:13:31 +12:00
|
|
|
|
2017-10-19 02:02:07 +13:00
|
|
|
// Fix links pointing to pages on fimfiction
|
|
|
|
// Example: <a href="/user/djazz" rel="nofollow">djazz</a>
|
2019-10-08 22:31:42 +13:00
|
|
|
const matchLink = /(<a .?href=")(.+?)(".+?>)/g
|
2017-10-19 02:02:07 +13:00
|
|
|
html = html.replace(matchLink, (match, head, url, tail) => {
|
2018-10-10 20:31:39 +13:00
|
|
|
if (url.substring(0, 1) !== '#' && url.substring(0, 2) !== '//' && url.substring(0, 4) !== 'http' && url.substring(0, 1) === '/') {
|
|
|
|
url = 'https://fimfiction.net' + url
|
2017-10-19 02:02:07 +13:00
|
|
|
}
|
2016-08-22 21:42:33 +12:00
|
|
|
|
2017-10-19 02:02:07 +13:00
|
|
|
return head + url + tail
|
|
|
|
})
|
2016-08-22 21:42:33 +12:00
|
|
|
|
2018-03-26 18:49:15 +13:00
|
|
|
const cache = new Map()
|
|
|
|
const query = new Map()
|
2017-10-19 02:02:07 +13:00
|
|
|
let completeCount = 0
|
2016-06-28 23:59:39 +12:00
|
|
|
|
2019-10-08 22:31:42 +13:00
|
|
|
const matchYouTube = /<p><a class="embed" href="https:\/\/www\.youtube\.com\/watch\?v=(.*?)">.*?<\/a><\/p>/g
|
2017-10-19 02:02:07 +13:00
|
|
|
for (let ma; (ma = matchYouTube.exec(html));) {
|
2019-10-08 22:31:42 +13:00
|
|
|
const youtubeId = ma[1].match(/^[^&]+/)[0]
|
2017-10-19 02:02:07 +13:00
|
|
|
cache.set(youtubeId, null)
|
2018-03-26 18:49:15 +13:00
|
|
|
query.set(entities.decode(ma[1]), youtubeId)
|
2017-10-19 02:02:07 +13:00
|
|
|
}
|
2016-08-24 02:32:55 +12:00
|
|
|
|
2019-10-08 22:31:42 +13:00
|
|
|
const matchSoundCloud = /<p><a class="embed" href="(https:\/\/soundcloud\.com\/.*?)">.*?<\/a><\/p>/g
|
2017-10-19 02:02:07 +13:00
|
|
|
html = await replaceAsync(html, matchSoundCloud, (match, url) => {
|
|
|
|
return render(m('.soundcloud.leftalign', [
|
2019-10-08 19:37:27 +13:00
|
|
|
'SoundCloud: ', m('a', { href: entities.decode(url), rel: 'nofollow' }, url.replace('https://soundcloud.com/', '').replace(/[-_]/g, ' ').replace('/', ' - ').replace(/ {2}/g, ' '))
|
|
|
|
]), { strict: true })
|
2017-10-19 02:02:07 +13:00
|
|
|
})
|
2017-06-07 20:41:58 +12:00
|
|
|
|
2017-10-19 02:02:07 +13:00
|
|
|
if (cache.size === 0) {
|
|
|
|
return html
|
|
|
|
} else {
|
|
|
|
return getYoutubeInfo([...cache.keys()])
|
|
|
|
}
|
2016-08-24 02:32:55 +12:00
|
|
|
|
2017-10-19 02:02:07 +13:00
|
|
|
async function getYoutubeInfo (ids) {
|
|
|
|
return fetch('https://www.googleapis.com/youtube/v3/videos?id=' + ids + '&part=snippet&maxResults=50&key=' + youtubeKey).then(async (raw) => {
|
|
|
|
let data = []
|
|
|
|
try {
|
|
|
|
data = JSON.parse(raw).items
|
2018-03-13 10:08:52 +13:00
|
|
|
} catch (e) {}
|
|
|
|
if (!data) {
|
|
|
|
data = []
|
|
|
|
}
|
2017-10-19 02:02:07 +13:00
|
|
|
data.forEach((video) => {
|
|
|
|
cache.set(video.id, video.snippet)
|
|
|
|
completeCount++
|
2016-08-20 02:51:40 +12:00
|
|
|
})
|
2017-10-19 02:02:07 +13:00
|
|
|
if (completeCount === cache.size || data.length === 0) {
|
|
|
|
html = await replaceAsync(html, matchYouTube, replaceYouTube)
|
2016-08-24 02:32:55 +12:00
|
|
|
}
|
2018-03-26 18:49:15 +13:00
|
|
|
return html
|
2017-10-19 02:02:07 +13:00
|
|
|
})
|
|
|
|
}
|
2017-05-26 08:11:44 +12:00
|
|
|
|
2018-03-26 18:49:15 +13:00
|
|
|
function replaceYouTube (match, queryString) {
|
|
|
|
queryString = entities.decode(queryString)
|
2019-10-08 22:31:42 +13:00
|
|
|
const youtubeId = query.get(queryString)
|
2018-03-13 10:08:52 +13:00
|
|
|
let thumbnail = 'https://img.youtube.com/vi/' + youtubeId + '/hqdefault.jpg'
|
2019-10-08 22:31:42 +13:00
|
|
|
const youtubeUrl = 'https://youtube.com/watch?v=' + queryString
|
2017-10-19 02:02:07 +13:00
|
|
|
let title = 'Youtube Video'
|
|
|
|
let caption = ''
|
2019-10-08 22:31:42 +13:00
|
|
|
const data = cache.get(youtubeId)
|
2018-03-26 18:49:15 +13:00
|
|
|
|
2017-10-19 02:02:07 +13:00
|
|
|
if (data) {
|
|
|
|
thumbnail = (data.thumbnails.standard || data.thumbnails.high || data.thumbnails.medium || data.thumbnails.default).url
|
|
|
|
title = data.title
|
|
|
|
caption = data.title + ' on YouTube'
|
|
|
|
} else {
|
2018-03-26 18:49:15 +13:00
|
|
|
return Promise.resolve(match)
|
2016-08-20 02:51:40 +12:00
|
|
|
}
|
2017-10-19 02:02:07 +13:00
|
|
|
return render(m('figure.youtube', [
|
2019-10-08 19:37:27 +13:00
|
|
|
m('a', { href: youtubeUrl, rel: 'nofollow' },
|
|
|
|
m('img', { src: thumbnail, alt: title })
|
2017-10-19 02:02:07 +13:00
|
|
|
),
|
2019-10-08 19:37:27 +13:00
|
|
|
m('figcaption', m('a', { href: youtubeUrl, rel: 'nofollow' }, caption))
|
|
|
|
]), { strict: true })
|
2017-10-19 02:02:07 +13:00
|
|
|
}
|
2016-06-28 09:19:01 +12:00
|
|
|
}
|
|
|
|
|
|
|
|
export function fixDoubleSpacing (html) {
|
|
|
|
// from FimFictionConverter by Nyerguds
|
|
|
|
html = html.replace(/\s\s+/g, ' ')
|
|
|
|
// push spaces to the closed side of tags
|
|
|
|
html = html.replace(/\s+(<[a-z][^>]*>)\s+/g, ' $1')
|
|
|
|
html = html.replace(/\s+(<\/[a-z][^>]*>)\s+/g, '$1 ')
|
|
|
|
return html
|
|
|
|
}
|
2016-08-11 08:26:14 +12:00
|
|
|
|
|
|
|
export function fixParagraphIndent (html) {
|
2016-08-22 21:42:33 +12:00
|
|
|
// from FimFictionConverter by Nyerguds
|
2019-10-08 22:31:42 +13:00
|
|
|
const fixIndent = 2
|
2016-08-11 08:26:14 +12:00
|
|
|
if (fixIndent > 0) {
|
|
|
|
// only trigger indenting when finding as many whitespace characters in a row as indicated by the FixIndent setting.
|
|
|
|
|
|
|
|
// Add indented class, with the search keeping into account that there could be opening tags behind the p tag.
|
|
|
|
html = html.replace(new RegExp('<p>((<([^>]+)>)*)\\s{' + fixIndent + '}\\s*', 'g'), '<p class="indented">$1')
|
|
|
|
html = html.replace(new RegExp('<p class="(((?!indented)[^>])*)">((<([^>]+)>)*)\\s{' + fixIndent + '}\\s*', 'g'), '<p class="indented $1">$3')
|
|
|
|
|
|
|
|
// Cleanup of remaining start whitespace in already indented paragraphs:
|
|
|
|
html = html.replace(/<p([^>]*)>((<[^>]+>)*)\\s+/g, '<p$1>$2')
|
|
|
|
}
|
|
|
|
return html
|
|
|
|
}
|