add chatbot

This commit is contained in:
Brandon 2022-04-27 20:57:45 -04:00
parent 576c29b48c
commit f2bb5c99d4
4 changed files with 639 additions and 0 deletions

7
chatbot/__init__.py Normal file
View file

@ -0,0 +1,7 @@
from .chatbot import Chatbot
__red_end_user_data_statement__ = "This cog does not persistently store data or metadata about users."
def setup(bot):
bot.add_cog(Chatbot(bot))

613
chatbot/chatbot.py Normal file
View file

@ -0,0 +1,613 @@
from redbot.core import commands, checks, Config
from redbot.core.utils.chat_formatting import *
from redbot.core.data_manager import cog_data_path
from aitextgen import aitextgen
from typing import Literal
from datetime import datetime, timedelta
import asyncio, os, time, random
class Chatbot(commands.Cog):
"""
Chatbot using aitextgen model.
"""
def __init__(self, bot):
self.bot = bot
self.config = Config.get_conf(self, identifier=156613221365446546, force_registration=True)
default_guild = {
"temp": 0.9,
"max_len": 2,
"history": 10,
"max_time": 1500,
"dead_channels": [],
"dead_revive_time": 3000,
}
default_channel = {"autoreply": False, "randomness": 0.25, "timeout": 1500}
default_global = {"use_gpu": False, "autoboot": False}
self.config.register_guild(**default_guild)
self.config.register_channel(**default_channel)
self.config.register_global(**default_global)
# TODO add settings cache
self.model = None
# maps channel -> datetime of last message for autoreply channels
self.talking_channels = {}
# maps channel -> last history number of messages objects
self.history = {}
# when generating for a channel, ignore new messages
self.channel_lock = []
self.special_tokens = {
"end_convo": "<end_convo>",
"start_convo": "<start_convo>",
}
self.init_task = asyncio.create_task(self.init())
self.loop = asyncio.get_event_loop()
os.environ["TOKENIZERS_PARALLELISM"] = "true"
async def load_model(self):
root = cog_data_path(cog_instance=self)
model_path = os.path.join(root, "pytorch_model.bin")
config_path = os.path.join(root, "config.json")
if os.path.isfile(model_path) and os.path.isfile(config_path):
use_gpu = await self.config.use_gpu()
self.model = aitextgen(model_folder=root, use_gpu=use_gpu)
else:
await self.bot.send_to_owners(
error(
"Your model for cog `chatbot` could not be found. Make sure to have two files, `pytorch_model.bin` and `config.json` in the cog's data directory."
)
)
async def init(self):
if await self.config.autoboot():
await self.load_model()
while True:
for guild in self.bot.guilds:
if await self.bot.cog_disabled_in_guild(self, guild):
continue
dead = await self.config.guild(guild).dead_channels()
dead_time = await self.config.guild(guild).dead_revive_time()
for id in dead:
channel = guild.get_channel(id)
if not channel:
continue
last_msg = None
async for msg in channel.history(limit=1):
last_msg = msg
now = datetime.utcnow()
if (now - last_msg.created_at).total_seconds() < dead_time:
continue
max_len = await self.config.guild(guild).max_len()
temp = await self.config.guild(guild).temp()
context = ""
if channel in self.history:
for msg in self.history[channel]:
context += msg.clean_content.strip() + "\n"
output = self.get_ai_response(context, max_len, temp)
await channel.send(output)
await asyncio.sleep(60)
def cog_unload(self):
if self.init_task:
self.init_task.cancel()
async def timed_wait(self, message: discord.Message):
"""
Helper function for boot sequence
"""
loops = random.randint(2, 3)
loading = ["|", "/", "-", "\\"]
content = message.content
for _ in range(loops):
for l in loading:
await message.edit(content=f"{content}\n{l}")
await asyncio.sleep(0.5)
await message.edit(content=content)
@commands.group(name="ai")
@commands.guild_only()
@checks.admin()
async def ai(self, ctx):
"""
Manage your Chatbot
"""
pass
@ai.command(name="gpu")
@checks.is_owner()
async def ai_gpu(self, ctx, use_gpu: bool):
"""
Turn on GPU for model loading
Make sure to have GPU version of PyTorch installed, along with CUDA toolkit.
"""
await self.config.use_gpu.set(use_gpu)
await ctx.tick()
@ai.command(name="boot")
@checks.is_owner()
async def ai_boot(self, ctx, lets_boot: bool):
"""
Load the model and set it to load on startup
"""
await self.load_model()
await self.config.autoboot.set(True)
txt = "-----SYSTEM STARTUP-----\n"
msg = await ctx.send(txt)
await self.timed_wait(msg)
txt += "KERNEL LOADED\nHARDWARE OK\n\n"
await msg.edit(content=txt)
await self.timed_wait(msg)
txt += "-----LAUNCHING SYSTEMS-----\nCORE ANALYTICS\nHEURISTIC ENGINES\n"
await msg.edit(content=txt)
await self.timed_wait(msg)
txt += "RECURSION PROCESSORS\nEVOLUTIONARY GENERATORS\nCOMPUTATIONAL LINGUISTICS\n"
await msg.edit(content=txt)
await self.timed_wait(msg)
txt += "NATURAL LANGUAGE PROCESSING\nPATTERN MINING\nERROR HANDLING\n"
await msg.edit(content=txt)
await self.timed_wait(msg)
txt += "ALGORITHMIC ENGINES\nAUTONOMOUS IMPROVEMENT\n"
await msg.edit(content=txt)
await self.timed_wait(msg)
txt += "IMAGE PROCESSING\nCONTEXT ENGINE\n"
await msg.edit(content=txt)
await self.timed_wait(msg)
txt += "-----CORE SYSTEMS ONLINE-----\n\n-----PERFORMING SELF DIAGNOSTICS-----\n"
await msg.edit(content=txt)
await self.timed_wait(msg)
txt += "**[OK]** CORE HEURISTICS\n**[OK]** ADVANCED PATTERN RECOGNITION\n"
await msg.edit(content=txt)
await self.timed_wait(msg)
txt += "**[SUCESSFUL]** NATURAL LANGUAGE PROCESSING TESTS\n**[SUCESSFUL]** CONTEXTUALIZATION TESTS\n"
await msg.edit(content=txt)
await self.timed_wait(msg)
txt += "**[SUCESSFUL]** SYSTEMS INTERGRATION TEST\n\n"
await msg.edit(content=txt)
await self.timed_wait(msg)
txt += f"**STARTUP COMPLETE**\n\n{ctx.guild.me.mention} v4.2.3 online and functioning."
await msg.edit(content=txt)
@ai.group(name="channel")
async def ai_channel(self, ctx):
"""
Manage channel settings
"""
pass
@ai_channel.group(name="revive")
async def ai_channel_revive(self, ctx):
"""
Dead chat reviver settings
"""
pass
@ai_channel_revive.command(name="time")
async def ai_channel_revive_time(self, ctx, time: int):
"""
Set the time for chat to be dead to revive it in seconds
"""
await self.config.guild(ctx.guild).dead_revive_time.set(time)
await ctx.tick()
@ai_channel_revive.command("add")
async def ai_channel_revive_add(self, ctx, *, channel: discord.TextChannel):
"""
Add a channel to revive when dead
"""
async with self.config.guild(ctx.guild).dead_channels() as dead:
if channel.id not in dead:
dead.append(channel.id)
await ctx.tick()
@ai_channel_revive.command("del")
async def ai_channel_revive_del(self, ctx, *, channel: discord.TextChannel):
"""
Delete a channel from the revive list
"""
async with self.config.guild(ctx.guild).dead_channels() as dead:
try:
dead.remove(channel.id)
except:
pass
await ctx.tick()
@ai_channel_revive.command("list")
async def ai_channel_revive_list(self, ctx):
"""
List revive channels
"""
msg = "**Revive Channel List:**\n"
async with self.config.guild(ctx.guild).dead_channels() as dead:
msg += "\n".join(
[ctx.guild.get_channel(c).mention if ctx.guild.get_channel(c) is not None else "" for c in dead]
)
for page in pagify(msg):
await ctx.send(page)
@ai_channel.command(name="autoreply")
async def ai_channel_reply(self, ctx, channel: discord.TextChannel, on_off: bool):
"""
Turn on autoreply in channel
"""
await self.config.channel(channel).autoreply.set(on_off)
await ctx.tick()
@ai_channel.command(name="random")
async def ai_channel_random(self, ctx, channel: discord.TextChannel, randomness: float):
"""
Set the percent chance for the bot to reply in channel
Value should be between 0 and 1
Setting it to 1 will mean it always reply to each message
"""
await self.config.channel(channel).randomness.set(randomness)
await ctx.tick()
@ai_channel.command(name="timeout")
async def ai_channel_timeout(self, ctx, channel: discord.TextChannel, timeout: int):
"""
Number of seconds for bot to stop replying to messages, **in seconds**
Occurs once no new messages are sent for this time period in the channel.
"""
await self.config.channel(channel).timeout.set(timeout)
await ctx.tick()
@ai.group(name="model")
async def ai_model(self, ctx):
"""
Manage model settings
"""
pass
@ai_model.command(name="settings")
async def ai_model_settings(self, ctx):
"""
View model settings
"""
settings = await self.config.guild(ctx.guild).all()
msg = f"Temperature: {settings['temp']}\nHistory # Messages: {settings['history']}\n Max Output Lines: {settings['max_len']}\nMax History Time: {settings['max_time']} seconds"
embed = discord.Embed(colour=ctx.guild.me.colour, description=msg, title=f"Settings for {ctx.guild}")
await ctx.send(embed=embed)
@ai_model.command(name="temp")
async def ai_model_temp(self, ctx, temp: float):
"""
Change temperature of output generation
Higher temperature means more variation in output, lower temperature is more deterministic.
Mess with this between 0 and 1 to see what works best.
"""
await self.config.guild(ctx.guild).temp.set(temp)
await ctx.tick()
@ai_model.command(name="history")
async def ai_model_history(self, ctx, history: int):
"""
Change history length of model
History is the number of messages to consider as context when generating output, lower history is less context.
"""
await self.config.guild(ctx.guild).history.set(history)
await ctx.tick()
@ai_model.command(name="time")
async def ai_model_time(self, ctx, history_time: int):
"""
Change time limit of history messages, in seconds
This means to only consider messages as history within this time limit
Messages outside of this time won't be considered in generation
"""
await self.config.guild(ctx.guild).max_time.set(history_time)
await ctx.tick()
@ai_model.command(name="lines")
async def ai_model_lines(self, ctx, lines: int):
"""
Change maximum number of output lines to generated
It's good to keep to relatively small.
"""
await self.config.guild(ctx.guild).max_len.set(lines)
await ctx.tick()
@ai.command(
name="data",
usage="<days of data> <max time of convo in seconds> <min number of lines in convo> <include links> [list of channels]",
)
@checks.is_owner()
async def ai_data(
self, ctx, lookback: int, maxtime: int, minlines: int, include_links: bool, *channels: discord.TextChannel
):
"""
Gather data from channels for training
**Use -1 for <days of data> to get ALL messages in channels**
**WARNING** this will take a long time!
Training data will be saved to the cog's data directory.
Maxtime is the maximum number of seconds between messages before considering it a new conversation.
Minlines is the mininum number of messages in a conversation to keep the conversation in the training data.
Please see the training repo here: https://github.com/brandons209/AI-Chatbot
"""
if lookback > 0:
after = datetime.utcnow() - timedelta(days=lookback)
else:
after = None
data = []
prefix = ctx.clean_prefix
status_msg = await ctx.send(f"Processing 1/{len(channels)} channels (this may take a while)")
for i, channel in enumerate(channels):
try:
await status_msg.edit(content=f"Processing {i+1}/{len(channels)} channels (this may take a while)")
except:
status_msg = await ctx.send(f"Processing {i+1}/{len(channels)} channels (this may take a while)")
prev_msg = None
try:
async for msg in channel.history(limit=None, after=after, oldest_first=True):
if (
len(msg.clean_content) < 1
or msg.author.bot
or msg.clean_content[: len(prefix)] == prefix
or ("http" in msg.clean_content and not include_links)
):
continue
if prev_msg is not None and (msg.created_at - prev_msg.created_at).total_seconds() > maxtime:
data.append(self.special_tokens["end_convo"])
data.append(self.special_tokens["start_convo"])
elif prev_msg is None:
data.append(self.special_tokens["start_convo"])
data.append(msg.clean_content.strip())
prev_msg = msg
if data and data[-1] != self.special_tokens["end_convo"]:
data.append(self.special_tokens["end_convo"])
except Exception as e:
await ctx.send(f"Error processing channel {channel.mention}: {e}")
# filter out short conversations
try:
await status_msg.edit(content="Cleaning data...")
except:
status_msg = await ctx.send("Cleaning data...")
start = 0
to_delete = []
for i in range(len(data)):
if data[i] == self.special_tokens["start_convo"]:
start = i
continue
if data[i] == self.special_tokens["end_convo"] and (i - start) < minlines:
to_delete.extend([j for j in range(start, i + 1)])
data = [l for i, l in enumerate(data) if i not in to_delete]
with open(os.path.join(cog_data_path(cog_instance=self), f"{ctx.guild.id}-cleaned.txt"), "w") as f:
f.write("\n".join(data))
try:
await status_msg.edit(content=info("Done. Saved to the cog's data path."))
except:
await ctx.send(info("Done. Saved to the cog's data path."))
def process_input(self, message: str) -> str:
"""
Process the input message
Args:
message (str): The message to process
"""
# Remove bot's @s from input
processed_input = message.replace(("<@!" + str(self.bot.user.id) + ">"), "").strip()
processed_input = message.replace(str(self.bot.user), "").strip()
print(processed_input)
# strip spaces at beginning of text
processed_input = "\n".join([s.strip() for s in processed_input.split("\n")])
return processed_input
def get_ai_response(self, message: str, max_len: int, temp: float):
"""
Get a response from the model up to max length
Args:
message (str): The message to use for generation
max_len (int): Maximum number of lines to generate
temp (float): Model generation temperature
"""
numtokens = len(self.model.tokenizer(message)["input_ids"])
if numtokens >= 1000:
while numtokens >= 1000:
message = " ".join(message.split(" ")[20:]).strip() # pretty arbitrary
numtokens = len(self.model.tokenizer(message)["input_ids"])
output = ""
i = 0 # in case of inf loop, three tries to generate a non-empty messages TODO: make configurable
while output == "" and i < 3:
text = self.model.generate(
max_length=numtokens + 70 + 5 * max_len,
prompt=message + "\n",
temperature=temp,
return_as_list=True,
)[0]
text = (
text[len(message) :]
.replace(self.special_tokens["end_convo"], "")
.replace(self.special_tokens["start_convo"], "")
.strip()
) # remove the input text from the output text
j = 0
while output == "" and j < 100: # TODO configure this too?
for i in range(
0, random.randint(1, max_len)
): # include a random amount of lines up to maxlines in the response
try:
output += text.splitlines()[i + 1] + "\n"
except:
continue
output = output.strip()
j += 1
i += 1
if output == "":
# fill with default message if still empty
output = "🤔"
return output
@commands.Cog.listener()
async def on_message_delete(self, message: discord.Message):
if await self.bot.cog_disabled_in_guild(self, message.guild):
return
if message.channel in self.history:
try:
self.history[message.channel].remove(message)
except:
pass
print(self.history[message.channel])
@commands.Cog.listener()
async def on_message(self, message: discord.Message):
if await self.bot.cog_disabled_in_guild(self, message.guild):
return
if not self.model and (await self.config.autoboot()):
await self.bot.send_to_owners(
error(
"Your model for cog `chatbot` could not be found. Make sure to have two files, `pytorch_model.bin` and `config.json` in the cog's data directory."
)
)
return
start = time.time()
author = message.author
guild = message.guild
channel = message.channel
ref = message.reference
ctx = await self.bot.get_context(message)
if len(message.content) < 1 or guild is None or ctx.prefix is not None or author.bot: # or author == guild.me
return
if not channel in self.history:
self.history[channel] = []
self.history[channel].append(message)
print(self.channel_lock)
if channel in self.channel_lock:
return
ref_message = ref.resolved if ref else None
ref_message = await channel.fetch_message(ref.message_id) if ref_message is not None else ref_message
autoreply = await self.config.channel(channel).autoreply()
ran_chat = False
# if bot wasnt mentioned, replied too, or talking in a channel
if not (
guild.me in message.mentions
or (
self.talking_channels.get(channel, None) is not None
and (datetime.utcnow() - self.talking_channels[channel]).total_seconds()
< (await self.config.channel(channel).timeout())
)
or (ref_message is not None and ref_message.author == guild.me)
):
# if not any of that, see if this is a auto channel and check random
if not autoreply or random.random() > (await self.config.channel(channel).randomness()):
try:
del self.talking_channels[channel]
except:
pass
return
else:
ran_chat = True
if ran_chat:
self.talking_channels[channel] = message.created_at
print(f"checks: {time.time() - start}")
start = time.time()
self.channel_lock.append(channel)
async with channel.typing():
history_len = await self.config.guild(guild).history()
max_time = await self.config.guild(guild).max_time()
max_len = await self.config.guild(guild).max_len()
temp = await self.config.guild(guild).temp()
print(f"history: {time.time() - start}")
start = time.time()
context = ""
# remove old messages
self.history[channel] = self.history[channel][-1 * history_len :]
for msg in self.history[channel]:
if (datetime.utcnow() - msg.created_at).total_seconds() < max_time:
context += msg.clean_content.strip() + "\n"
# probably-stupid way of making every line but the last have a newline after it
context = context.rstrip(context[-1]).strip()
context = self.process_input(context)
if not context:
return
print(f"process context: {time.time() - start}")
start = time.time()
response = await self.loop.run_in_executor(None, self.get_ai_response, context, max_len, temp)
print(f"response: {time.time() - start}")
try:
self.channel_lock.remove(channel)
except ValueError:
pass
print(self.channel_lock)
return await message.reply(response, mention_author=False)
async def red_delete_data_for_user(
self,
*,
requester: Literal["discord_deleted_user", "owner", "user", "user_strict"],
user_id: int,
):
pass

10
chatbot/info.json Normal file
View file

@ -0,0 +1,10 @@
{
"author": [
"brandons209"
],
"name": "Chatbot",
"description": "Chating using OpenAI's GPT2 and aitextgen. To start, put an aitextgen model in the cog's data directory and run `[p]ai boot`",
"hidden": false,
"end_user_data_statement": "This cog does not store user data.",
"requirements": ["aitextgen"]
}

View file

@ -317,6 +317,15 @@ class MoreAdmin(commands.Cog):
async def adminset_sus_kick(self, ctx, *, threshold: str):
"""
Set threshold for kicking new accounts with DM
Intervals look like:
5 minutes
1 minute 30 seconds
1 hour
2 days
30 days
5h30m
(etc)
"""
threshold = parse_timedelta(threshold)
if not threshold: