diff --git a/activitylog/activitylog.py b/activitylog/activitylog.py index 896c7de..66d68c3 100644 --- a/activitylog/activitylog.py +++ b/activitylog/activitylog.py @@ -6,12 +6,19 @@ from redbot.core.utils.mod import is_mod_or_superior import discord from .time_utils import * -from datetime import datetime, timedelta +from datetime import datetime +from dateutil.relativedelta import relativedelta import time import os import asyncio import glob +# plotting +from bisect import bisect_left +import matplotlib.pyplot as plt +from matplotlib.dates import AutoDateLocator, AutoDateFormatter +import pandas as pd + __version__ = "3.1.0" TIMESTAMP_FORMAT = "%Y-%m-%d %X" # YYYY-MM-DD HH:MM:SS @@ -314,30 +321,260 @@ class ActivityLogger(commands.Cog): return msg, None + @commands.command(name="fixavglen") + @checks.is_owner() + async def fixavglen(self, ctx): + for guild in self.bot.guilds: + for member in guild.members: + async with self.config.member(member).stats() as stats: + stats["avg_len"] = stats["total_msg"] - stats["bot_cmd"] + await ctx.tick() + + @commands.command(name="graphstats") + @checks.mod() + @commands.guild_only() + async def user_stats_graph(self, ctx, user: discord.Member, split: str, *, till: str): + """ + Create a graph of a users activity over time. + + `split` is how to split the data on the graph, like per hour, per day, etc. + Possible values are: + "h" for hourly + "d" for daily + "w" for weekly + "m" for monthly + "y" for yearly + + `till` can be a date or interval + + **Times in graph are all in UTC** + + Dates/times look like: + February 14 at 6pm EDT + 2019-04-13 06:43:00 PST + 01/20/18 at 21:00:43 + + times default to UTC if no timezone provided + + Intervals look like: + 5 minutes + 1 minute 30 seconds + 1 hour + 2 days + 30 days + 5h30m + (etc) + """ + interval = parse_timedelta(till) + date = None + if not interval: + try: + date = parse_time(till).replace(tzinfo=None) + except: + await ctx.send("Invalid date or interval! Try again.") + return + + split = split.lower() + if split not in ["h", "d", "w", "m", "y"]: + await ctx.send("Invalid split! Try again.") + return + + guild = ctx.guild + log_files = sorted(glob.glob(os.path.join(PATH, str(guild.id), "*.log")), reverse=True) + # remove audit log entries + log_files = [log for log in log_files if "guild" not in log] + + if interval: + end_time = datetime.utcnow() - interval + else: + end_time = date + + # get messages split by channel + messages = self.log_handler(log_files, end_time, split_channels=True) + + ### set up data dictionary + num_messages = {} + to_delete = [] + # make sure to include only text channels + for ch_id in messages.keys(): + channel = guild.get_channel(ch_id) + # channel may be deleted, but still want to include message data + if isinstance(channel, discord.VoiceChannel): + to_delete.append(ch_id) + continue + num_messages[ch_id] = 0 + # delete voice channels + for ch_id in to_delete: + del messages[ch_id] + + data = {"times": [], "num_messages": []} + # add all the possible times based on the split + # first for each one zero out now time to the minute, day, etc + # then go through and add all possible times to get data for + now = datetime.utcnow() + if split == "h": + now -= relativedelta(minute=0, second=0, microsecond=0) + end_time -= relativedelta(minute=0, second=0, microsecond=0) + while now >= end_time: + data["times"].append(now) + data["num_messages"].append(num_messages.copy()) + now = now - relativedelta(hours=1) + elif split == "d": + now -= relativedelta(hour=0, minute=0, second=0, microsecond=0) + end_time -= relativedelta(hour=0, minute=0, second=0, microsecond=0) + while now >= end_time: + data["times"].append(now) + data["num_messages"].append(num_messages.copy()) + now = now - relativedelta(days=1) + elif split == "w": + now -= relativedelta(days=now.weekday(), hour=0, minute=0, second=0, microsecond=0) + end_time -= relativedelta(days=end_time.weekday(), hour=0, minute=0, second=0, microsecond=0) + while now >= end_time: + data["times"].append(now) + data["num_messages"].append(num_messages.copy()) + now = now - relativedelta(weeks=1) + elif split == "m": + now -= relativedelta(day=1, hour=0, minute=0, second=0, microsecond=0) + end_time -= relativedelta(day=1, hour=0, minute=0, second=0, microsecond=0) + while now >= end_time: + data["times"].append(now) + data["num_messages"].append(num_messages.copy()) + now = now - relativedelta(months=1) + elif split == "y": + now -= relativedelta(month=1, day=1, hour=0, minute=0, second=0, microsecond=0) + end_time -= relativedelta(month=1, day=1, hour=0, minute=0, second=0, microsecond=0) + while now >= end_time: + data["times"].append(now) + data["num_messages"].append(num_messages.copy()) + now = now - relativedelta(years=1) + + if not data["times"]: + await ctx.send(error("Your split is too large for the time provided, try a smaller split or longer time.")) + return + + data["times"].reverse() + + # calculate number of messages for the user for every split + for ch_id, msgs in messages.items(): + for message in msgs: + if str(user.id) not in message: + continue + # grab time of the message + current_time = parse_time_naive(message[:19]) + # find what time to put it in using binary search + index = bisect_left(data["times"], current_time) - 1 + # add message to channel + data["num_messages"][index][ch_id] += 1 + + df = pd.DataFrame(data) + # make dict of num_messages into columns for every channel + df = pd.concat([df.drop("num_messages", axis=1), df["num_messages"].apply(pd.Series)], axis=1) + # calculate total messages for each time. + df["Total"] = df.drop("times", axis=1).sum(axis=1) + + # change channel ids to real names, or leave as delete channel + names = {} + for i, ch_id in enumerate(data["num_messages"][0].keys()): + channel = guild.get_channel(ch_id) + names[ch_id] = channel.name if channel else f"Deleted Channel {i+1}" + df = df.rename(columns=names) + + # drop channels with no data (all zeros) and check if theres still data + df = df.loc[:, (df != 0).any(axis=0)] + if len(df.columns) <= 1: + await ctx.send(warning("There is no messages from that user in the time period you specified.")) + return + + # make graph and send it + fontsize = 30 + fig = plt.figure(figsize=(50,30)) + ax = plt.axes() + + # set date formater for x axis + xtick_locator = AutoDateLocator() + ax.xaxis.set_major_locator(xtick_locator) + ax.xaxis.set_major_formatter(AutoDateFormatter(xtick_locator)) + + # define graph and table save paths + save_path = str(PATH / f"plot_{ctx.message.id}.png") + table_save_path = str(PATH / f"plot_data_{ctx.message.id}.txt") + + # plot each column + for col_name, col_data in df.iteritems(): + if col_name == "times": + continue + plt.plot("times", col_name, data=df, linewidth=3, marker='o', markersize=8) + + # make graph look nice + plt.title(f"{user} message history from {end_time} to now", fontsize=fontsize) + plt.xlabel("dates", fontsize=fontsize) + plt.ylabel("messages", fontsize=fontsize) + plt.xticks(fontsize=fontsize) + plt.yticks(fontsize=fontsize) + plt.grid(True) + plt.legend(loc="best", prop={'size': 30}) + fig.tight_layout() + + fig.savefig(save_path, dpi=fig.dpi) + plt.close() + + with open(table_save_path, "w") as f: + f.write(str(df)) + + with open(save_path, "rb") as f, open(table_save_path, "r") as t: + files = (discord.File(f, filename="graph.png"), discord.File(t, filename="graph_data.txt")) + await ctx.send(files=files) + + os.remove(save_path) + os.remove(table_save_path) + @staticmethod - # gets logs up till a specified end_time - def log_handler(log_files: list, end_time: datetime, start: datetime = None): - messages = [] - counter = 0 - stop = False + def log_handler(log_files: list, end_time: datetime, start: datetime = None, split_channels: bool = False): + """ + gets messages up to a specified end time, with optional start time. + + returns a list of messages. + + if the split_channels is true, returns a dictionary of + channel ids -> messages + """ + if split_channels: + messages = {} + else: + messages = [] + # runs in descending order, with most recent log file first for log in log_files: + if split_channels: + channel_id = int(log.split("_")[-1].strip(".log")) + if channel_id not in messages: + messages[channel_id] = [] with open(log, "r") as f: for line in reversed(list(f)): # time interval check: current_time = parse_time_naive(line[:19]) + print("log current time: ", current_time, " log end time: ", end_time) if start and start < current_time: continue if end_time > current_time: - stop = True + print("breaking from log ", log) break - messages.append(line) - counter += 1 - if stop: - break + if split_channels: + messages[channel_id].append(line) + else: + messages.append(line) + # don't break if end_time > current_time in a log, so that getting + # logs for a user from an entire guild works, as different channels + # need to be checked. this doesn't save that much time when used + # on a specific channel + print(messages) # reverse messages to get correct order - messages.reverse() + if split_channels: + for ch_id in messages.keys(): + messages[ch_id].reverse() + else: + messages.reverse() return messages @@ -976,7 +1213,7 @@ class ActivityLogger(commands.Cog): kwargs.update(day=1) start = timestamp.replace(**kwargs) elif rotation_code == "w": - start = timestamp - timedelta(days=timestamp.weekday()) + start = timestamp - relativedelta(days=timestamp.weekday()) spec = start.strftime("%Y%m%d") @@ -1173,15 +1410,17 @@ class ActivityLogger(commands.Cog): # don't calculate bot stats and make sure this isnt dm message if message.author.id != self.bot.user.id and isinstance(message.author, discord.Member): + is_bot_msg = False async with self.config.member(message.author).stats() as stats: stats["total_msg"] += 1 if len(message.content) > 0: for prefix in self.cache[message.guild.id]["prefixes"]: if prefix == message.content[: len(prefix)]: stats["bot_cmd"] += 1 + is_bot_msg = True break - else: - stats["avg_len"] += len(message.content.split(" ")) + if not is_bot_msg: + stats["avg_len"] += len(message.content.split(" ")) if message.attachments and dl_attachment: for i, data in enumerate(attachments): diff --git a/activitylog/info.json b/activitylog/info.json index 8297438..0497b8a 100644 --- a/activitylog/info.json +++ b/activitylog/info.json @@ -11,7 +11,7 @@ "description": "Log messages, dms, attachments, guild updates (roles, channels, bans, kicks etc) and be able to retrieve these logs for review. Saves these right to disk and customize then to rotate log files. Tracks user's stats; how many messages they send, how many are bot commands, time spent in VC, and moderator actions against them. Also provides an upgraded userinfo command that gives these stats alongside the information Red's userinfo command gives. This is rewritten from @calebj's V2 cog.", "hidden": false, "install_msg": "Thank you for using this cog! Make sure to set bot prefixes using [p]logset prefixes for proper stat logging.", - "requirements": ["python-dateutil", "pytz"], + "requirements": ["python-dateutil", "pytz", "matplotlib", "pandas"], "short": "Log messages, audit actions, and track user statistics.", "tags": [ "brandons209", diff --git a/activitylog/time_utils.py b/activitylog/time_utils.py index 0711fc0..5e891df 100644 --- a/activitylog/time_utils.py +++ b/activitylog/time_utils.py @@ -2,15 +2,18 @@ from __future__ import annotations import re -from datetime import datetime as dt, timedelta +from datetime import datetime as dt from typing import Optional import pytz from dateutil import parser from dateutil.tz import gettz +from dateutil.relativedelta import relativedelta TIME_RE_STRING = r"\s?".join( [ + r"((?P\d+?)\s?(years?|y))?", + r"((?P\d+?)\s?(months?|mt))?", r"((?P\d+?)\s?(weeks?|w))?", r"((?P\d+?)\s?(days?|d))?", r"((?P\d+?)\s?(hours?|hrs|hr?))?", @@ -47,10 +50,10 @@ def parse_time_naive(datetimestring: str): return parser.parse(datetimestring) -def parse_timedelta(argument: str) -> Optional[timedelta]: +def parse_timedelta(argument: str) -> Optional[relativedelta]: matches = TIME_RE.match(argument) if matches: params = {k: int(v) for k, v in matches.groupdict().items() if v} if params: - return timedelta(**params) + return relativedelta(**params) return None