1
0
Fork 0
mirror of synced 2024-07-03 05:21:02 +12:00
bulk-downloader-for-reddit/bulkredditdownloader/parser.py

235 lines
7.1 KiB
Python
Raw Normal View History

2018-07-10 07:58:11 +12:00
from pprint import pprint
try:
2021-02-07 14:05:18 +13:00
from bulkredditdownloader.errors import InvalidRedditLink
2018-07-10 07:58:11 +12:00
except ModuleNotFoundError:
from errors import InvalidRedditLink
2021-02-07 01:29:13 +13:00
def QueryParser(passed_queries: str) -> dict:
extracted_queries = {}
2018-07-10 07:58:11 +12:00
question_mark_index = passed_queries.index("?")
header = passed_queries[:question_mark_index]
extracted_queries["HEADER"] = header
queries = passed_queries[question_mark_index + 1:]
2018-07-10 07:58:11 +12:00
parsed_queries = queries.split("&")
2018-07-10 07:58:11 +12:00
for query in parsed_queries:
query = query.split("=")
extracted_queries[query[0]] = query[1]
2018-07-10 07:58:11 +12:00
if extracted_queries["HEADER"] == "search":
extracted_queries["q"] = extracted_queries["q"].replace("%20", " ")
2018-07-10 07:58:11 +12:00
return extracted_queries
2018-07-10 07:58:11 +12:00
2021-02-07 01:29:13 +13:00
def LinkParser(link: str) -> dict:
result = {}
short_link = False
2018-07-10 07:58:11 +12:00
if "reddit.com" not in link:
2018-08-09 09:17:04 +12:00
raise InvalidRedditLink("Invalid reddit link")
2018-07-10 07:58:11 +12:00
splitted_link = link.split("/")
2018-07-10 07:58:11 +12:00
if splitted_link[0] == "https:" or splitted_link[0] == "http:":
splitted_link = splitted_link[2:]
2018-07-10 07:58:11 +12:00
try:
if (splitted_link[-2].endswith("reddit.com") and
splitted_link[-1] == "") or splitted_link[-1].endswith("reddit.com"):
2018-07-10 07:58:11 +12:00
result["sort"] = "best"
return result
2018-07-10 07:58:11 +12:00
except IndexError:
if splitted_link[0].endswith("reddit.com"):
result["sort"] = "best"
return result
2018-07-10 07:58:11 +12:00
if "redd.it" in splitted_link:
short_link = True
2018-07-10 07:58:11 +12:00
if splitted_link[0].endswith("reddit.com"):
splitted_link = splitted_link[1:]
if "comments" in splitted_link:
result = {"post": link}
return result
elif "me" in splitted_link or \
"u" in splitted_link or \
"user" in splitted_link or \
"r" in splitted_link or \
"m" in splitted_link:
2018-07-10 07:58:11 +12:00
if "r" in splitted_link:
result["subreddit"] = splitted_link[splitted_link.index("r") + 1]
2018-07-10 07:58:11 +12:00
elif "m" in splitted_link:
result["multireddit"] = splitted_link[splitted_link.index("m") + 1]
result["user"] = splitted_link[splitted_link.index("m") - 1]
2018-07-10 07:58:11 +12:00
else:
for index in range(len(splitted_link)):
if splitted_link[index] == "u" or splitted_link[index] == "user":
result["user"] = splitted_link[index + 1]
2018-07-10 07:58:11 +12:00
elif splitted_link[index] == "me":
result["user"] = "me"
2018-07-10 07:58:11 +12:00
for index in range(len(splitted_link)):
2021-02-07 01:29:13 +13:00
if splitted_link[index] in ["hot", "top", "new", "controversial", "rising"]:
2018-07-10 07:58:11 +12:00
result["sort"] = splitted_link[index]
2018-07-10 07:58:11 +12:00
if index == 0:
result["subreddit"] = "frontpage"
elif splitted_link[index] in ["submitted", "saved", "posts", "upvoted"]:
if splitted_link[index] == "submitted" or splitted_link[index] == "posts":
result["submitted"] = {}
2018-07-10 07:58:11 +12:00
elif splitted_link[index] == "saved":
result["saved"] = True
elif splitted_link[index] == "upvoted":
result["upvoted"] = True
2018-07-10 07:58:11 +12:00
elif "?" in splitted_link[index]:
2021-02-07 01:29:13 +13:00
parsed_query = QueryParser(splitted_link[index])
if parsed_query["HEADER"] == "search":
del parsed_query["HEADER"]
result["search"] = parsed_query
2018-07-10 07:58:11 +12:00
elif parsed_query["HEADER"] == "submitted" or \
parsed_query["HEADER"] == "posts":
del parsed_query["HEADER"]
result["submitted"] = parsed_query
2018-07-10 07:58:11 +12:00
else:
del parsed_query["HEADER"]
result["queries"] = parsed_query
2018-07-10 07:58:11 +12:00
if not ("upvoted" in result or
"saved" in result or
"submitted" in result or
"multireddit" in result) and "user" in result:
result["submitted"] = {}
2018-07-10 07:58:11 +12:00
return result
2018-07-10 07:58:11 +12:00
2021-02-07 01:29:13 +13:00
def LinkDesigner(link) -> dict:
attributes = LinkParser(link)
mode = {}
2018-07-10 07:58:11 +12:00
if "post" in attributes:
mode["post"] = attributes["post"]
mode["sort"] = ""
mode["time"] = ""
return mode
2018-07-10 07:58:11 +12:00
elif "search" in attributes:
mode["search"] = attributes["search"]["q"]
2018-07-10 07:58:11 +12:00
if "restrict_sr" in attributes["search"]:
if not (attributes["search"]["restrict_sr"] == 0 or
attributes["search"]["restrict_sr"] == "off" or
attributes["search"]["restrict_sr"] == ""):
2018-07-10 07:58:11 +12:00
if "subreddit" in attributes:
mode["subreddit"] = attributes["subreddit"]
2018-07-10 07:58:11 +12:00
elif "multireddit" in attributes:
mode["multreddit"] = attributes["multireddit"]
mode["user"] = attributes["user"]
2018-07-10 07:58:11 +12:00
else:
mode["subreddit"] = "all"
2018-07-10 07:58:11 +12:00
else:
mode["subreddit"] = "all"
2018-07-10 07:58:11 +12:00
if "t" in attributes["search"]:
mode["time"] = attributes["search"]["t"]
2018-07-10 07:58:11 +12:00
else:
mode["time"] = "all"
2018-07-10 07:58:11 +12:00
if "sort" in attributes["search"]:
mode["sort"] = attributes["search"]["sort"]
2018-07-10 07:58:11 +12:00
else:
mode["sort"] = "relevance"
2018-07-10 07:58:11 +12:00
if "include_over_18" in attributes["search"]:
if attributes["search"]["include_over_18"] == 1 or attributes["search"]["include_over_18"] == "on":
mode["nsfw"] = True
2018-07-10 07:58:11 +12:00
else:
mode["nsfw"] = False
2018-07-10 07:58:11 +12:00
else:
if "queries" in attributes:
if not ("submitted" in attributes or "posts" in attributes):
2018-07-10 07:58:11 +12:00
if "t" in attributes["queries"]:
mode["time"] = attributes["queries"]["t"]
2018-07-10 07:58:11 +12:00
else:
mode["time"] = "day"
2018-07-10 07:58:11 +12:00
else:
if "t" in attributes["queries"]:
mode["time"] = attributes["queries"]["t"]
2018-07-10 07:58:11 +12:00
else:
mode["time"] = "all"
2018-07-10 07:58:11 +12:00
if "sort" in attributes["queries"]:
mode["sort"] = attributes["queries"]["sort"]
2018-07-10 07:58:11 +12:00
else:
mode["sort"] = "new"
2018-07-10 07:58:11 +12:00
else:
mode["time"] = "day"
if "subreddit" in attributes and "search" not in attributes:
mode["subreddit"] = attributes["subreddit"]
2018-07-10 07:58:11 +12:00
elif "user" in attributes and "search" not in attributes:
mode["user"] = attributes["user"]
2018-07-10 07:58:11 +12:00
if "submitted" in attributes:
mode["submitted"] = True
2018-07-10 07:58:11 +12:00
if "sort" in attributes["submitted"]:
mode["sort"] = attributes["submitted"]["sort"]
elif "sort" in mode:
2018-07-10 07:58:11 +12:00
pass
else:
mode["sort"] = "new"
2018-07-10 07:58:11 +12:00
if "t" in attributes["submitted"]:
mode["time"] = attributes["submitted"]["t"]
2018-07-10 07:58:11 +12:00
else:
mode["time"] = "all"
2018-07-10 07:58:11 +12:00
elif "saved" in attributes:
mode["saved"] = True
2018-07-10 07:58:11 +12:00
elif "upvoted" in attributes:
mode["upvoted"] = True
2018-07-10 07:58:11 +12:00
elif "multireddit" in attributes:
mode["multireddit"] = attributes["multireddit"]
2018-07-10 07:58:11 +12:00
if "sort" in attributes:
mode["sort"] = attributes["sort"]
elif "sort" in mode:
2018-07-10 07:58:11 +12:00
pass
else:
mode["sort"] = "hot"
return mode
2018-07-10 07:58:11 +12:00
2018-07-10 07:58:11 +12:00
if __name__ == "__main__":
while True:
link = input("> ")
pprint(LinkDesigner(link))