From 64f92af60b950f5e32f85279079d14db6d9e1504 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Wed, 27 Mar 2019 15:39:30 -0400 Subject: [PATCH] working setuptools package distribution --- .gitignore | 6 ++++++ MANIFEST.in | 3 +++ VERSION | 1 + archive | 1 - archivebox/requirements.txt | 0 archivebox/scripts/readability.js | 27 ------------------------ requirements.txt | 16 ++++++++++++++ setup.py | 35 ++++++++++++++++++++++++++++++- 8 files changed, 60 insertions(+), 29 deletions(-) create mode 100644 MANIFEST.in create mode 100644 VERSION delete mode 120000 archive delete mode 100644 archivebox/requirements.txt delete mode 100644 archivebox/scripts/readability.js create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore index 1dcc07e1..d44c22ec 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,9 @@ output/ data data/ archivebox/output +archivebox/data +archivebox/VERSION + +archivebox.egg-info/ +build/ +dist/ diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..e82bd579 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +include archivebox/VERSION +graft archivebox/templates +graft archivebox/templates/static diff --git a/VERSION b/VERSION new file mode 100644 index 00000000..0d91a54c --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +0.3.0 diff --git a/archive b/archive deleted file mode 120000 index 041799a6..00000000 --- a/archive +++ /dev/null @@ -1 +0,0 @@ -bin/archivebox \ No newline at end of file diff --git a/archivebox/requirements.txt b/archivebox/requirements.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/archivebox/scripts/readability.js b/archivebox/scripts/readability.js deleted file mode 100644 index ce937f54..00000000 --- a/archivebox/scripts/readability.js +++ /dev/null @@ -1,27 +0,0 @@ -() => { - function Readability(e,t){if(t&&t.documentElement)e=t,t=arguments[2];else if(!e||!e.documentElement)throw new Error("First argument to Readability constructor should be a document object.");var i;t=t||{},this._doc=e,this._articleTitle=null,this._articleByline=null,this._articleDir=null,this._articleSiteName=null,this._attempts=[],this._debug=!!t.debug,this._maxElemsToParse=t.maxElemsToParse||this.DEFAULT_MAX_ELEMS_TO_PARSE,this._nbTopCandidates=t.nbTopCandidates||this.DEFAULT_N_TOP_CANDIDATES,this._charThreshold=t.charThreshold||this.DEFAULT_CHAR_THRESHOLD,this._classesToPreserve=this.CLASSES_TO_PRESERVE.concat(t.classesToPreserve||[]),this._flags=this.FLAG_STRIP_UNLIKELYS|this.FLAG_WEIGHT_CLASSES|this.FLAG_CLEAN_CONDITIONALLY,this._debug?(i=function(e){var t=e.nodeName+" ";if(e.nodeType==e.TEXT_NODE)return t+'("'+e.textContent+'")';var i=e.className&&"."+e.className.replace(/ /g,"."),a="";return e.id?a="(#"+e.id+i+")":i&&(a="("+i+")"),t+a},this.log=function(){if("undefined"!=typeof dump){var e=Array.prototype.map.call(arguments,function(e){return e&&e.nodeName?i(e):e}).join(" ");dump("Reader: (Readability) "+e+"\n")}else if("undefined"!=typeof console){var t=["Reader: (Readability) "].concat(arguments);console.log.apply(console,t)}}):this.log=function(){}}Readability.prototype={FLAG_STRIP_UNLIKELYS:1,FLAG_WEIGHT_CLASSES:2,FLAG_CLEAN_CONDITIONALLY:4,ELEMENT_NODE:1,TEXT_NODE:3,DEFAULT_MAX_ELEMS_TO_PARSE:0,DEFAULT_N_TOP_CANDIDATES:5,DEFAULT_TAGS_TO_SCORE:"section,h2,h3,h4,h5,h6,p,td,pre".toUpperCase().split(","),DEFAULT_CHAR_THRESHOLD:500,REGEXPS:{unlikelyCandidates:/-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|foot|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i,okMaybeItsACandidate:/and|article|body|column|main|shadow/i,positive:/article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i,negative:/hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|gdpr|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i,extraneous:/print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i,byline:/byline|author|dateline|writtenby|p-author/i,replaceFonts:/<(\/?)font[^>]*>/gi,normalize:/\s{2,}/g,videos:/\/\/(www\.)?((dailymotion|youtube|youtube-nocookie|player\.vimeo|v\.qq)\.com|(archive|upload\.wikimedia)\.org|player\.twitch\.tv)/i,nextLink:/(next|weiter|continue|>([^\|]|$)|»([^\|]|$))/i,prevLink:/(prev|earl|old|new|<|«)/i,whitespace:/^\s*$/,hasContent:/\S$/},DIV_TO_P_ELEMS:["A","BLOCKQUOTE","DL","DIV","IMG","OL","P","PRE","TABLE","UL","SELECT"],ALTER_TO_DIV_EXCEPTIONS:["DIV","ARTICLE","SECTION","P"],PRESENTATIONAL_ATTRIBUTES:["align","background","bgcolor","border","cellpadding","cellspacing","frame","hspace","rules","style","valign","vspace"],DEPRECATED_SIZE_ATTRIBUTE_ELEMS:["TABLE","TH","TD","HR","PRE"],PHRASING_ELEMS:["ABBR","AUDIO","B","BDO","BR","BUTTON","CITE","CODE","DATA","DATALIST","DFN","EM","EMBED","I","IMG","INPUT","KBD","LABEL","MARK","MATH","METER","NOSCRIPT","OBJECT","OUTPUT","PROGRESS","Q","RUBY","SAMP","SCRIPT","SELECT","SMALL","SPAN","STRONG","SUB","SUP","TEXTAREA","TIME","VAR","WBR"],CLASSES_TO_PRESERVE:["page"],_postProcessContent:function(e){this._fixRelativeUris(e),this._cleanClasses(e)},_removeNodes:function(e,t){for(var i=e.length-1;0<=i;i--){var a=e[i],n=a.parentNode;n&&(t&&!t.call(this,a,i,e)||n.removeChild(a))}},_replaceNodeTags:function(e,t){for(var i=e.length-1;0<=i;i--){var a=e[i];this._setNodeTag(a,t)}},_forEachNode:function(e,t){Array.prototype.forEach.call(e,t,this)},_someNode:function(e,t){return Array.prototype.some.call(e,t,this)},_everyNode:function(e,t){return Array.prototype.every.call(e,t,this)},_concatNodeLists:function(){var t=Array.prototype.slice,e=t.call(arguments).map(function(e){return t.call(e)});return Array.prototype.concat.apply([],e)},_getAllNodesWithTag:function(i,e){return i.querySelectorAll?i.querySelectorAll(e.join(",")):[].concat.apply([],e.map(function(e){var t=i.getElementsByTagName(e);return Array.isArray(t)?t:Array.from(t)}))},_cleanClasses:function(e){var t=this._classesToPreserve,i=(e.getAttribute("class")||"").split(/\s+/).filter(function(e){return-1!=t.indexOf(e)}).join(" ");for(i?e.setAttribute("class",i):e.removeAttribute("class"),e=e.firstElementChild;e;e=e.nextElementSibling)this._cleanClasses(e)},_fixRelativeUris:function(e){var t=this._doc.baseURI,i=this._doc.documentURI;function a(e){if(t==i&&"#"==e.charAt(0))return e;try{return new URL(e,t).href}catch(e){}return e}var n=this._getAllNodesWithTag(e,["a"]);this._forEachNode(n,function(e){var t=e.getAttribute("href");if(t)if(0===t.indexOf("javascript:")){var i=this._doc.createTextNode(e.textContent);e.parentNode.replaceChild(i,e)}else e.setAttribute("href",a(t))});var r=this._getAllNodesWithTag(e,["img"]);this._forEachNode(r,function(e){var t=e.getAttribute("src");t&&e.setAttribute("src",a(t))})},_getArticleTitle:function(){var e=this._doc,t="",i="";try{"string"!=typeof(t=i=e.title.trim())&&(t=i=this._getInnerText(e.getElementsByTagName("title")[0]))}catch(e){}var a=!1;function n(e){return e.split(/\s+/).length}if(/ [\|\-\\\/>»] /.test(t))a=/ [\\\/>»] /.test(t),n(t=i.replace(/(.*)[\|\-\\\/>»] .*/gi,"$1"))<3&&(t=i.replace(/[^\|\-\\\/>»]*[\|\-\\\/>»](.*)/gi,"$1"));else if(-1!==t.indexOf(": ")){var r=this._concatNodeLists(e.getElementsByTagName("h1"),e.getElementsByTagName("h2")),s=t.trim();this._someNode(r,function(e){return e.textContent.trim()===s})||(n(t=i.substring(i.lastIndexOf(":")+1))<3?t=i.substring(i.indexOf(":")+1):5»]+/g,""))-1)&&(t=i),t},_prepDocument:function(){var e=this._doc;this._removeNodes(e.getElementsByTagName("style")),e.body&&this._replaceBrs(e.body),this._replaceNodeTags(e.getElementsByTagName("font"),"SPAN")},_nextElement:function(e){for(var t=e;t&&t.nodeType!=this.ELEMENT_NODE&&this.REGEXPS.whitespace.test(t.textContent);)t=t.nextSibling;return t},_replaceBrs:function(e){this._forEachNode(this._getAllNodesWithTag(e,["br"]),function(e){for(var t=e.nextSibling,i=!1;(t=this._nextElement(t))&&"BR"==t.tagName;){i=!0;var a=t.nextSibling;t.parentNode.removeChild(t),t=a}if(i){var n=this._doc.createElement("p");for(e.parentNode.replaceChild(n,e),t=n.nextSibling;t;){if("BR"==t.tagName){var r=this._nextElement(t.nextSibling);if(r&&"BR"==r.tagName)break}if(!this._isPhrasingContent(t))break;var s=t.nextSibling;n.appendChild(t),t=s}for(;n.lastChild&&this._isWhitespace(n.lastChild);)n.removeChild(n.lastChild);"P"===n.parentNode.tagName&&this._setNodeTag(n.parentNode,"DIV")}})},_setNodeTag:function(e,t){if(this.log("_setNodeTag",e,t),e.__JSDOMParser__)return e.localName=t.toLowerCase(),e.tagName=t.toUpperCase(),e;for(var i=e.ownerDocument.createElement(t);e.firstChild;)i.appendChild(e.firstChild);e.parentNode.replaceChild(i,e),e.readability&&(i.readability=e.readability);for(var a=0;a