2017-10-19 02:02:07 +13:00
|
|
|
|
2018-03-13 23:10:04 +13:00
|
|
|
import htmlToTextModule from 'html-to-text'
|
2018-03-15 23:33:52 +13:00
|
|
|
import urlRegex from 'url-regex'
|
2018-03-13 23:10:04 +13:00
|
|
|
import matchWords from 'match-words'
|
2018-03-14 21:33:46 +13:00
|
|
|
import syllable from 'syllable'
|
2018-03-13 23:10:04 +13:00
|
|
|
|
2017-10-19 02:02:07 +13:00
|
|
|
export function replaceAsync (str, re, callback) {
|
|
|
|
// http://es5.github.io/#x15.5.4.11
|
|
|
|
str = String(str)
|
|
|
|
let parts = []
|
|
|
|
let i = 0
|
|
|
|
if (Object.prototype.toString.call(re) === '[object RegExp]') {
|
|
|
|
if (re.global) { re.lastIndex = i }
|
|
|
|
let m
|
|
|
|
while ((m = re.exec(str))) {
|
|
|
|
let args = m.concat([m.index, m.input])
|
|
|
|
parts.push(str.slice(i, m.index), callback.apply(null, args))
|
|
|
|
i = re.lastIndex
|
|
|
|
if (!re.global) { break } // for non-global regexes only take the first match
|
|
|
|
if (m[0].length === 0) { re.lastIndex++ }
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
re = String(re)
|
|
|
|
i = str.indexOf(re)
|
|
|
|
parts.push(str.slice(0, i), callback(re, i, str))
|
|
|
|
i += re.length
|
|
|
|
}
|
|
|
|
parts.push(str.slice(i))
|
|
|
|
return Promise.all(parts).then(function (strings) {
|
|
|
|
return strings.join('')
|
|
|
|
})
|
|
|
|
}
|
2018-03-13 10:05:57 +13:00
|
|
|
|
|
|
|
let webpdecoder = null
|
|
|
|
|
|
|
|
export function webp2png (data) {
|
|
|
|
return new Promise((resolve, reject) => {
|
|
|
|
const libwebp = require('./vendor/libwebp')
|
|
|
|
const WebPRiffParser = require('./vendor/libwebp-demux').WebPRiffParser
|
|
|
|
const PNGPacker = require('node-png/lib/packer')
|
|
|
|
|
|
|
|
if (!webpdecoder) {
|
|
|
|
webpdecoder = new libwebp.WebPDecoder()
|
|
|
|
}
|
|
|
|
|
|
|
|
let frame = WebPRiffParser(data, 0).frames[0]
|
|
|
|
let width = [0]
|
|
|
|
let height = [0]
|
|
|
|
let decodedData = webpdecoder.WebPDecodeRGBA(
|
|
|
|
data,
|
|
|
|
frame['src_off'], frame['src_size'],
|
|
|
|
width, height
|
|
|
|
)
|
|
|
|
|
|
|
|
let png = new PNGPacker({})
|
|
|
|
let buffers = []
|
|
|
|
png.on('data', (chunk) => {
|
|
|
|
buffers.push(chunk)
|
|
|
|
})
|
|
|
|
png.once('end', () => {
|
|
|
|
let pngData = Buffer.concat(buffers)
|
|
|
|
resolve(pngData)
|
|
|
|
})
|
|
|
|
png.pack(decodedData, width[0], height[0])
|
|
|
|
})
|
|
|
|
}
|
2018-03-13 23:10:04 +13:00
|
|
|
|
2018-03-15 23:33:52 +13:00
|
|
|
export function sleep (ms) {
|
|
|
|
return new Promise((resolve) => setTimeout(resolve, ms))
|
|
|
|
}
|
|
|
|
|
|
|
|
export function htmlToText (html, options = {}) {
|
|
|
|
options = Object.assign({
|
2018-03-13 23:10:04 +13:00
|
|
|
wordwrap: false,
|
|
|
|
ignoreImage: true,
|
|
|
|
ignoreHref: true
|
2018-03-15 23:33:52 +13:00
|
|
|
}, options)
|
|
|
|
return htmlToTextModule.fromString(html, options)
|
2018-03-14 21:33:46 +13:00
|
|
|
}
|
|
|
|
|
2018-03-13 23:10:04 +13:00
|
|
|
export function htmlWordCount (html) {
|
2018-03-15 23:33:52 +13:00
|
|
|
html = html.replace(/<pre>.*?<\/pre>/g, '') // Ignore codeblocks
|
2018-03-13 23:10:04 +13:00
|
|
|
let text = htmlToText(html)
|
2018-03-15 23:33:52 +13:00
|
|
|
text = text.replace(urlRegex(), '') // Remove urls
|
2018-03-13 23:10:04 +13:00
|
|
|
|
|
|
|
let count = 0
|
|
|
|
try {
|
|
|
|
count = matchWords(text).length
|
|
|
|
} catch (err) { count = 0 }
|
|
|
|
return count
|
|
|
|
}
|
|
|
|
|
2018-03-14 21:33:46 +13:00
|
|
|
export async function readingEase (text, wakeupInterval = Infinity, progresscb) {
|
|
|
|
const result = {
|
|
|
|
sentences: 0, words: 0, syllables: 0, grade: NaN, ease: NaN
|
|
|
|
}
|
|
|
|
|
|
|
|
// sentence tokenizer by Darkentor
|
|
|
|
const tokenSentences = text
|
|
|
|
.replace('\0', '')
|
|
|
|
.replace(/\s+/g, ' ') // Replace all whitespace (including newlines) with a single space
|
2018-03-20 23:49:49 +13:00
|
|
|
.normalize('NFD').replace(/[\u0300-\u036f]/g, '') // strip diacritics since JS's \w group and explicit [a-z]|[A-Z] don't account for them
|
|
|
|
.replace(/(mr|mrs|dr|ms|prof|rev|col|cmdr|flt|lt|brgdr|hon|wng|capt|rt|revd|gen|cdre|admrl|herr|hr|frau|alderman|alhaji|brig|cdr|cik|consul|datin|dato|datuk|seri|dhr|dipl|ing|dott|sa|dra|drs|en|encik|eng|eur|exma|sra|exmo|sr|lieut|fr|fraulein|fru|graaf|gravin|grp|hajah|haji|hajim|hra|ir|lcda|lic|maj|mlle|mme|mstr|nti|sri|rva|sig|na|ra|sqn|ldr|srta|wg|co|esq|inc|iou|ltd|mdlle|messers|messrs|mlles|mm|mmes|mt|p\.s|pvt|st|viz)\./gi, '$1')
|
2018-03-14 21:33:46 +13:00
|
|
|
.replace(/(((^|\w).*?[^\w\s,]+)(?=\s+\W*[A-Z])|:|;)/g, '$1\0')
|
|
|
|
.split(/\s*\0\s*/)
|
|
|
|
|
2018-03-20 23:49:49 +13:00
|
|
|
if (!/[a-z]/i.test(text)) {
|
|
|
|
return null
|
|
|
|
}
|
|
|
|
|
|
|
|
await sleep(0)
|
|
|
|
|
2018-03-14 21:33:46 +13:00
|
|
|
if (typeof progresscb === 'function') {
|
|
|
|
progresscb(0)
|
|
|
|
}
|
|
|
|
|
|
|
|
await sleep(0)
|
|
|
|
|
|
|
|
const counts = { syllables: 0, words: 0 }
|
|
|
|
let lastTime = Date.now()
|
|
|
|
|
|
|
|
for (let i = 0; i < tokenSentences.length; i++) {
|
|
|
|
let now = Date.now()
|
|
|
|
if (lastTime + wakeupInterval < now) {
|
|
|
|
lastTime = now
|
|
|
|
if (typeof progresscb === 'function') {
|
|
|
|
progresscb(i / tokenSentences.length)
|
|
|
|
}
|
|
|
|
await sleep(0)
|
|
|
|
}
|
|
|
|
const sentence = tokenSentences[i]
|
|
|
|
// strip all punctuation and numbers from the sentence
|
|
|
|
const words = sentence
|
|
|
|
.replace(/[^\w\s]|_/g, '')
|
|
|
|
.replace(/\s+/g, ' ')
|
|
|
|
.split(' ')
|
|
|
|
.filter(letter => letter)
|
|
|
|
|
|
|
|
counts.syllables += words.reduce((total, word) => total + syllable(word), 0)
|
|
|
|
counts.words += words.length
|
|
|
|
}
|
|
|
|
|
|
|
|
const { words, syllables } = counts
|
|
|
|
const sentences = tokenSentences.length
|
|
|
|
const grade = 0.39 * (words / sentences) + 11.8 * (syllables / words) - 15.59
|
|
|
|
const ease = 206.835 - 1.015 * (words / sentences) - 84.6 * (syllables / words)
|
|
|
|
|
|
|
|
tokenSentences.length = 0
|
|
|
|
|
|
|
|
if (!ease) {
|
|
|
|
return null
|
|
|
|
}
|
|
|
|
|
|
|
|
Object.assign(result, {
|
|
|
|
sentences, words, syllables, grade, ease
|
|
|
|
})
|
|
|
|
|
|
|
|
if (typeof progresscb === 'function') {
|
|
|
|
progresscb(1)
|
|
|
|
}
|
|
|
|
|
|
|
|
return result
|
2018-03-13 23:10:04 +13:00
|
|
|
}
|