add progress bar to correlation commands

This commit is contained in:
Brandon 2022-11-02 03:06:41 -04:00
parent bf08c3e7ee
commit 27db92ea4a

View file

@ -1927,149 +1927,158 @@ class ActivityLogger(commands.Cog):
log_files = glob.glob(os.path.join(PATH, str(guild.id), "*.log")) log_files = glob.glob(os.path.join(PATH, str(guild.id), "*.log"))
log_files = [log for log in log_files if "guild" not in log] log_files = [log for log in log_files if "guild" not in log]
async with ctx.channel.typing(): # get messages split by channel
# get messages split by channel messages = await self.loop.run_in_executor(
messages = await self.loop.run_in_executor( None,
None, functools.partial(
functools.partial( self.log_handler,
self.log_handler, log_files,
log_files, guild.created_at,
guild.created_at, split_channels=True,
split_channels=True, ),
), )
)
def process_messages(): async def process_messages():
for ch_id, data in messages.items(): progress_msg_str = "Processed {}/{} channels."
channel = guild.get_channel(ch_id) progress_msg = await ctx.send(progress_msg_str.format(0, len(messages)))
# channel may be deleted, but still want to include message data progress_index = 0
if isinstance(channel, discord.VoiceChannel): for ch_id, data in messages.items():
joined_at = {} channel = guild.get_channel(ch_id)
# ignore for now, need to figure out how to filter out when the bot fails to log a user leaving # channel may be deleted, but still want to include message data
for message in data: if isinstance(channel, discord.VoiceChannel):
joined_at = {}
# ignore for now, need to figure out how to filter out when the bot fails to log a user leaving
for message in data:
try:
user_id = int(message.split("(id")[-1].split(")")[0].strip().strip(":"))
user = guild.get_member(user_id)
if not user:
continue
if "Voice channel join:" in message:
join_time = parse_time_naive(message[:19])
if join_time is None:
continue
joined_at[user] = join_time
# check others in VC to make sure a leave wasnt missed, 24 hours should be a fine time
to_delete = []
for other_user, join_time in joined_at.items():
time_in_vc = datetime.utcnow() - joined_at[user]
if time_in_vc > VOICE_TIME_LIMIT:
to_delete.append(other_user)
for u in to_delete:
del joined_at[u]
elif "Voice channel leave:" in message and user in joined_at:
leave_time = parse_time_naive(message[:19])
if leave_time is None:
continue
time_in_vc = leave_time - joined_at[user]
minutes = np.floor(time_in_vc.total_seconds() / 60)
if len(joined_at) > 2:
corr_weight = (
corr_weights["vc_per_minute"]
* corr_weights["vc_people_multiplier"]
/ (len(joined_at) - 2)
) * minutes
else:
corr_weight = corr_weights["vc_per_minute"] * minutes
# add correlation data to everyone in the vc when someone leaves
for other_user, join_time in joined_at.items():
if user == other_user:
continue
adj_matrix_voice[members[user], members[other_user]] += corr_weight
adj_matrix_voice[members[other_user], members[user]] += corr_weight
del joined_at[user]
except IndexError:
pass
except KeyError: # happens if user rejoins after running this command
pass
except ValueError:
pass
await asyncio.sleep(0)
else:
to_delete = []
for message in data:
# delete things like message edits
if "edited message from" in message and "to read:" in message:
to_delete.append(message)
elif " deleted message from " in message:
to_delete.append(message)
await asyncio.sleep(0)
for msg in to_delete:
data.remove(msg)
await asyncio.sleep(0)
for i, message in enumerate(data):
try:
user1_id = int(message.split("(id:")[1].split(")")[0])
user1 = guild.get_member(user1_id)
except IndexError:
pass
except KeyError:
pass
if user1 is None:
continue
curr_msg_time = parse_time_naive(message[:19])
if curr_msg_time is None:
continue
try:
if "replied to" in message.split("(id:")[1].split("):")[0]:
# add correlation to matrix
user2_id = int(message.split("(id:")[2].split("):")[0])
user2 = guild.get_member(user2_id)
# don't care about people who arent in the server
if not (user2 is None or user1 == user2):
adj_matrix[members[user1], members[user2]] += corr_weights["reply"]
adj_matrix[members[user2], members[user1]] += corr_weights["reply"]
continue
except IndexError:
pass
except KeyError: # happens if user rejoins after running this command
pass
except ValueError:
pass
# get messages around current message and add weights
for j in range(max(i - 5, 0), i):
try: try:
user_id = int(message.split("(id")[-1].split(")")[0].strip().strip(":")) prev_message = data[j]
user = guild.get_member(user_id) user2 = int(prev_message.split("(id:")[1].split(")")[0])
if not user: user2 = guild.get_member(user2)
if user2 is None:
continue continue
if "Voice channel join:" in message: if user1 == user2:
join_time = parse_time_naive(message[:19]) continue
if join_time is None:
continue
joined_at[user] = join_time
# check others in VC to make sure a leave wasnt missed, 24 hours should be a fine time
to_delete = []
for other_user, join_time in joined_at.items():
time_in_vc = datetime.utcnow() - joined_at[user]
if time_in_vc > VOICE_TIME_LIMIT:
to_delete.append(other_user)
for u in to_delete:
del joined_at[u]
elif "Voice channel leave:" in message and user in joined_at:
leave_time = parse_time_naive(message[:19])
if leave_time is None:
continue
time_in_vc = leave_time - joined_at[user]
minutes = np.floor(time_in_vc.total_seconds() / 60)
if len(joined_at) > 2:
corr_weight = (
corr_weights["vc_per_minute"]
* corr_weights["vc_people_multiplier"]
/ (len(joined_at) - 2)
) * minutes
else:
corr_weight = corr_weights["vc_per_minute"] * minutes
# add correlation data to everyone in the vc when someone leaves # filter out messages being too far away time wise
for other_user, join_time in joined_at.items(): prev_msg_time = parse_time_naive(prev_message[:19])
if user == other_user: if prev_msg_time is None or curr_msg_time - prev_msg_time > CORR_MSG_DELTA:
continue continue
adj_matrix_voice[members[user], members[other_user]] += corr_weight
adj_matrix_voice[members[other_user], members[user]] += corr_weight
del joined_at[user] adj_matrix[members[user1], members[user2]] += corr_weights["messages"][j - i]
except IndexError: except IndexError:
pass pass
except KeyError: # not sure why this happens... TODO figure it out except KeyError: # happens if user rejoins after running this command
pass pass
except ValueError: await asyncio.sleep(0)
pass
else:
to_delete = []
for message in data:
# delete things like message edits
if "edited message from" in message and "to read:" in message:
to_delete.append(message)
elif " deleted message from " in message:
to_delete.append(message)
for msg in to_delete: progress_index += 1
data.remove(msg) try:
await progress_msg.edit(content=progress_msg_str.format(progress_index, len(messages)))
except:
progress_msg = await ctx.send(progress_msg_str.format(0, len(messages)))
for i, message in enumerate(data): await process_messages()
try:
user1_id = int(message.split("(id:")[1].split(")")[0])
user1 = guild.get_member(user1_id)
except IndexError:
pass
except KeyError:
pass
if user1 is None:
continue
curr_msg_time = parse_time_naive(message[:19]) # define table save paths
if curr_msg_time is None: table_save_path = str(PATH / f"plot_data_{ctx.message.id}")
continue
try:
if "replied to" in message.split("(id:")[1].split("):")[0]:
# add correlation to matrix
user2_id = int(message.split("(id:")[2].split("):")[0])
user2 = guild.get_member(user2_id)
# don't care about people who arent in the server
if not (user2 is None or user1 == user2):
adj_matrix[members[user1], members[user2]] += corr_weights["reply"]
adj_matrix[members[user2], members[user1]] += corr_weights["reply"]
continue
except IndexError:
pass
except KeyError: # not sure why this happens... TODO figure it out
pass
except ValueError:
pass
# get messages around current message and add weights
for j in range(max(i - 5, 0), i):
try:
prev_message = data[j]
user2 = int(prev_message.split("(id:")[1].split(")")[0])
user2 = guild.get_member(user2)
if user2 is None:
continue
if user1 == user2:
continue
# filter out messages being too far away time wise
prev_msg_time = parse_time_naive(prev_message[:19])
if prev_msg_time is None or curr_msg_time - prev_msg_time > CORR_MSG_DELTA:
continue
adj_matrix[members[user1], members[user2]] += corr_weights["messages"][j - i]
except IndexError:
pass
await self.loop.run_in_executor(
None,
functools.partial(
process_messages,
),
)
# define table save paths
table_save_path = str(PATH / f"plot_data_{ctx.message.id}")
member_names = [m.name for m in members.keys()] member_names = [m.name for m in members.keys()]
adj_matrix = pd.DataFrame(data=adj_matrix, index=member_names, columns=member_names) adj_matrix = pd.DataFrame(data=adj_matrix, index=member_names, columns=member_names)
@ -2120,211 +2129,218 @@ class ActivityLogger(commands.Cog):
log_files = glob.glob(os.path.join(PATH, str(guild.id), "*.log")) log_files = glob.glob(os.path.join(PATH, str(guild.id), "*.log"))
log_files = [log for log in log_files if "guild" not in log] log_files = [log for log in log_files if "guild" not in log]
async with ctx.channel.typing(): # get messages split by channel
# get messages split by channel messages = await self.loop.run_in_executor(
messages = await self.loop.run_in_executor( None,
None, functools.partial(
functools.partial( self.log_handler,
self.log_handler, log_files,
log_files, guild.created_at,
guild.created_at, split_channels=True,
split_channels=True, ),
), )
)
def process_messages(): async def process_messages():
for ch_id, data in messages.items(): progress_msg_str = "Processed {}/{} channels."
channel = guild.get_channel(ch_id) progress_msg = await ctx.send(progress_msg_str.format(0, len(messages)))
# channel may be deleted, but still want to include message data progress_index = 0
if isinstance(channel, discord.VoiceChannel): for ch_id, data in messages.items():
joined_at = {} channel = guild.get_channel(ch_id)
# ignore for now, need to figure out how to filter out when the bot fails to log a user leaving # channel may be deleted, but still want to include message data
for message in data: if isinstance(channel, discord.VoiceChannel):
joined_at = {}
# ignore for now, need to figure out how to filter out when the bot fails to log a user leaving
for message in data:
try:
user_id = int(message.split("(id")[-1].split(")")[0].strip().strip(":"))
user = guild.get_member(user_id)
if not user:
continue
if "Voice channel join:" in message:
join_time = parse_time_naive(message[:19])
if join_time is None:
continue
joined_at[user] = join_time
# check others in VC to make sure a leave wasnt missed, 24 hours should be a fine time
to_delete = []
for other_user, join_time in joined_at.items():
time_in_vc = datetime.utcnow() - joined_at[user]
if time_in_vc > VOICE_TIME_LIMIT:
to_delete.append(other_user)
for u in to_delete:
del joined_at[u]
elif "Voice channel leave:" in message and user in joined_at:
leave_time = parse_time_naive(message[:19])
if leave_time is None:
continue
time_in_vc = leave_time - joined_at[user]
minutes = np.floor(time_in_vc.total_seconds() / 60)
if len(joined_at) > 2:
corr_weight = (
corr_weights["vc_per_minute"]
* corr_weights["vc_people_multiplier"]
/ (len(joined_at) - 2)
) * minutes
else:
corr_weight = corr_weights["vc_per_minute"] * minutes
# add correlation data to everyone in the vc when someone leaves
if user == member:
for other_user, join_time in joined_at.items():
if user == other_user:
continue
adj_matrix_voice[members[user], members[other_user]] += corr_weight
adj_matrix_voice[members[other_user], members[user]] += corr_weight
else:
for other_user, join_time in joined_at.items():
if user == other_user or other_user != member:
continue
adj_matrix_voice[members[user], members[other_user]] += corr_weight
adj_matrix_voice[members[other_user], members[user]] += corr_weight
del joined_at[user]
except IndexError:
pass
except KeyError: # happens if user rejoins after running this command
pass
except ValueError:
pass
await asyncio.sleep(0)
else:
to_delete = []
for message in data:
# delete things like message edits
if "edited message from" in message and "to read:" in message:
to_delete.append(message)
elif " deleted message from " in message:
to_delete.append(message)
await asyncio.sleep(0)
for msg in to_delete:
data.remove(msg)
await asyncio.sleep(0)
for i, message in enumerate(data):
user1_id = int(message.split("(id:")[1].split(")")[0])
user1 = guild.get_member(user1_id)
if user1 is None:
continue
curr_msg_time = parse_time_naive(message[:19])
if curr_msg_time is None:
continue
try:
if "replied to" in message.split("(id:")[1].split("):")[0]:
# add correlation to matrix
user2_id = int(message.split("(id:")[2].split("):")[0])
user2 = guild.get_member(user2_id)
# don't care about people who arent in the server
if not (user2 is None or user1 == user2) and (user1 == member or user2 == member):
adj_matrix[members[user1], members[user2]] += corr_weights["reply"]
adj_matrix[members[user2], members[user1]] += corr_weights["reply"]
continue
except IndexError:
pass
except KeyError: # happens if user rejoins after running this command
pass
except ValueError:
pass
await asyncio.sleep(0)
# get messages around current message and add weights
for j in range(max(i - 5, 0), i):
try: try:
user_id = int(message.split("(id")[-1].split(")")[0].strip().strip(":")) prev_message = data[j]
user = guild.get_member(user_id) user2 = int(prev_message.split("(id:")[1].split(")")[0])
if not user: user2 = guild.get_member(user2)
if user2 is None:
continue continue
if "Voice channel join:" in message: if user1 == user2:
join_time = parse_time_naive(message[:19]) continue
if join_time is None:
continue
joined_at[user] = join_time
# check others in VC to make sure a leave wasnt missed, 24 hours should be a fine time
to_delete = []
for other_user, join_time in joined_at.items():
time_in_vc = datetime.utcnow() - joined_at[user]
if time_in_vc > VOICE_TIME_LIMIT:
to_delete.append(other_user)
for u in to_delete:
del joined_at[u]
elif "Voice channel leave:" in message and user in joined_at:
leave_time = parse_time_naive(message[:19])
if leave_time is None:
continue
time_in_vc = leave_time - joined_at[user]
minutes = np.floor(time_in_vc.total_seconds() / 60)
if len(joined_at) > 2:
corr_weight = (
corr_weights["vc_per_minute"]
* corr_weights["vc_people_multiplier"]
/ (len(joined_at) - 2)
) * minutes
else:
corr_weight = corr_weights["vc_per_minute"] * minutes
# add correlation data to everyone in the vc when someone leaves if user1 != member and user2 != member:
if user == member: continue
for other_user, join_time in joined_at.items():
if user == other_user:
continue
adj_matrix_voice[members[user], members[other_user]] += corr_weight
adj_matrix_voice[members[other_user], members[user]] += corr_weight
else:
for other_user, join_time in joined_at.items():
if user == other_user or other_user != member:
continue
adj_matrix_voice[members[user], members[other_user]] += corr_weight
adj_matrix_voice[members[other_user], members[user]] += corr_weight
del joined_at[user] # filter out messages being too far away time wise
prev_msg_time = parse_time_naive(prev_message[:19])
if prev_msg_time is None or curr_msg_time - prev_msg_time > CORR_MSG_DELTA:
continue
adj_matrix[members[user1], members[user2]] += corr_weights["messages"][j - i]
except IndexError: except IndexError:
pass pass
except KeyError: # not sure why this happens... TODO figure it out except KeyError: # happens if user rejoins after running this command
pass pass
except ValueError: await asyncio.sleep(0)
pass
else:
to_delete = []
for message in data:
# delete things like message edits
if "edited message from" in message and "to read:" in message:
to_delete.append(message)
elif " deleted message from " in message:
to_delete.append(message)
for msg in to_delete: progress_index += 1
data.remove(msg)
for i, message in enumerate(data):
user1_id = int(message.split("(id:")[1].split(")")[0])
user1 = guild.get_member(user1_id)
if user1 is None:
continue
curr_msg_time = parse_time_naive(message[:19])
if curr_msg_time is None:
continue
try:
if "replied to" in message.split("(id:")[1].split("):")[0]:
# add correlation to matrix
user2_id = int(message.split("(id:")[2].split("):")[0])
user2 = guild.get_member(user2_id)
# don't care about people who arent in the server
if not (user2 is None or user1 == user2) and (user1 == member or user2 == member):
adj_matrix[members[user1], members[user2]] += corr_weights["reply"]
adj_matrix[members[user2], members[user1]] += corr_weights["reply"]
continue
except IndexError:
pass
except KeyError: # not sure why this happens... TODO figure it out
pass
except ValueError:
pass
# get messages around current message and add weights
for j in range(max(i - 5, 0), i):
try:
prev_message = data[j]
user2 = int(prev_message.split("(id:")[1].split(")")[0])
user2 = guild.get_member(user2)
if user2 is None:
continue
if user1 == user2:
continue
if user1 != member and user2 != member:
continue
# filter out messages being too far away time wise
prev_msg_time = parse_time_naive(prev_message[:19])
if prev_msg_time is None or curr_msg_time - prev_msg_time > CORR_MSG_DELTA:
continue
adj_matrix[members[user1], members[user2]] += corr_weights["messages"][j - i]
except IndexError:
pass
await self.loop.run_in_executor(
None,
functools.partial(
process_messages,
),
)
member_names = [m.name for m in members.keys()]
adj_matrix = pd.DataFrame(data=adj_matrix, index=member_names, columns=member_names)
adj_matrix_voice = pd.DataFrame(data=adj_matrix_voice, index=member_names, columns=member_names)
adj_matrix_all = (
adj_matrix + adj_matrix_voice
) # have to add first otherwise tables dont line up for addition
# drop users who do not correlate to anyone else
for column in adj_matrix.columns:
try: try:
if (adj_matrix.loc[member.name, column] == 0).all() and column != member.name: await progress_msg.edit(content=progress_msg_str.format(progress_index, len(messages)))
adj_matrix = adj_matrix.drop(columns=column) except:
adj_matrix = adj_matrix.drop(index=column) progress_msg = await ctx.send(progress_msg_str.format(0, len(messages)))
if (adj_matrix_voice.loc[member.name, column] == 0).all() and column != member.name: await process_messages()
adj_matrix_voice = adj_matrix_voice.drop(columns=column)
adj_matrix_voice = adj_matrix_voice.drop(index=column)
if (adj_matrix_all.loc[member.name, column] == 0).all() and column != member.name: member_names = [m.name for m in members.keys()]
adj_matrix_all = adj_matrix_all.drop(columns=column) adj_matrix = pd.DataFrame(data=adj_matrix, index=member_names, columns=member_names)
adj_matrix_all = adj_matrix_all.drop(index=column) adj_matrix_voice = pd.DataFrame(data=adj_matrix_voice, index=member_names, columns=member_names)
except KeyError: # not sure why this happens... TODO figure it out adj_matrix_all = adj_matrix + adj_matrix_voice # have to add first otherwise tables dont line up for addition
continue
# only graph the most correlated people, since otherwise the graph is unreadable # drop users who do not correlate to anyone else
sums = adj_matrix_all.sum().sort_values(ascending=False) for column in adj_matrix.columns:
graph_cols = sums[:20] try:
# fix because networkx is dumb, see https://stackoverflow.com/questions/69349516/using-a-square-matrix-with-networkx-but-keep-getting-adjacency-matrix-not-square if (adj_matrix.loc[member.name, column] == 0).all() and column != member.name:
stack = adj_matrix_all.loc[graph_cols.index, graph_cols.index].stack() adj_matrix = adj_matrix.drop(columns=column)
stack = stack[stack >= 1].rename_axis(("source", "target")).reset_index(name="weight") adj_matrix = adj_matrix.drop(index=column)
graph = nx.from_pandas_edgelist(stack, edge_attr=True) if (adj_matrix_voice.loc[member.name, column] == 0).all() and column != member.name:
adj_matrix_voice = adj_matrix_voice.drop(columns=column)
adj_matrix_voice = adj_matrix_voice.drop(index=column)
# make graph and send it if (adj_matrix_all.loc[member.name, column] == 0).all() and column != member.name:
fontsize = 30 adj_matrix_all = adj_matrix_all.drop(columns=column)
fig = plt.figure(figsize=(30, 30)) adj_matrix_all = adj_matrix_all.drop(index=column)
plt.axis("off") except KeyError: # not sure why this happens... TODO figure it out
continue
# define graph and table save paths # only graph the most correlated people, since otherwise the graph is unreadable
save_path = str(PATH / f"plot_{ctx.message.id}.png") sums = adj_matrix_all.sum().sort_values(ascending=False)
table_save_path = str(PATH / f"plot_data_{ctx.message.id}") graph_cols = sums[:20]
# fix because networkx is dumb, see https://stackoverflow.com/questions/69349516/using-a-square-matrix-with-networkx-but-keep-getting-adjacency-matrix-not-square
stack = adj_matrix_all.loc[graph_cols.index, graph_cols.index].stack()
stack = stack[stack >= 1].rename_axis(("source", "target")).reset_index(name="weight")
widths = nx.get_edge_attributes(graph, "weight") graph = nx.from_pandas_edgelist(stack, edge_attr=True)
widths = np.array(list(widths.values()))
# clamp widths
widths = np.clip(widths, 1, 15)
pos = nx.spring_layout(graph, k=4) # make graph and send it
fontsize = 30
fig = plt.figure(figsize=(30, 30))
plt.axis("off")
nx.draw(graph, pos=pos, with_labels=True, width=widths, font_size=fontsize, node_size=fontsize * 2500) # define graph and table save paths
save_path = str(PATH / f"plot_{ctx.message.id}.png")
table_save_path = str(PATH / f"plot_data_{ctx.message.id}")
# make graph look nice widths = nx.get_edge_attributes(graph, "weight")
plt.title( widths = np.array(list(widths.values()))
f"Member correlation for {member} in {guild}", # clamp widths
fontsize=fontsize, widths = np.clip(widths, 1, 15)
)
fig.savefig(save_path, dpi=fig.dpi) pos = nx.spring_layout(graph, k=4)
plt.close()
nx.draw(graph, pos=pos, with_labels=True, width=widths, font_size=fontsize, node_size=fontsize * 2500)
# make graph look nice
plt.title(
f"Member correlation for {member} in {guild}",
fontsize=fontsize,
)
fig.savefig(save_path, dpi=fig.dpi)
plt.close()
adj_matrix.to_csv(table_save_path + "_text.txt", index=True) adj_matrix.to_csv(table_save_path + "_text.txt", index=True)
adj_matrix_voice.to_csv(table_save_path + "_voice.txt", index=True) adj_matrix_voice.to_csv(table_save_path + "_voice.txt", index=True)