From 2d227d438cc629b6ddb5ca0c20b51193480c6382 Mon Sep 17 00:00:00 2001 From: Tobias Weise Date: Tue, 27 Aug 2024 22:39:29 +0200 Subject: [PATCH] more overhaul --- README.md | 7 - backend/Dockerfile | 8 +- backend/app.py | 354 ++++++++++++++++------------------ backend/lib/chatbot.py | 231 ++++++++++------------ backend/lib/elastictools.py | 43 ++--- backend/lib/mail.py | 134 +------------ backend/lib/models.py | 1 + backend/lib/user.py | 6 +- backend/public/index.html | 128 +++++++----- backend/public/main.js | 177 ++++++++++++----- backend/requirements.txt | 10 +- deployment/docker-compose.yml | 37 ++-- ideas.md | 18 +- 13 files changed, 530 insertions(+), 624 deletions(-) diff --git a/README.md b/README.md index da6f848..e0f8722 100644 --- a/README.md +++ b/README.md @@ -13,12 +13,6 @@ docker-compose build docker-compose up -d ``` -After deploy: - -### WebUI for Ollama: -* http://localhost:8888 -* use to install model llama3 (or more https://ollama.com/library) - ---- ## Usage @@ -43,7 +37,6 @@ After deploy: ### Backend * FastAPI * RabbitMQ/Kafka? -* OpenSearch diff --git a/backend/Dockerfile b/backend/Dockerfile index 38993dd..5525a4f 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -13,10 +13,16 @@ WORKDIR /code COPY requirements.txt /code/requirements.txt #RUN pip3 install --no-cache-dir --upgrade -r requirements.txt RUN pip3 install --no-cache-dir -r requirements.txt - +RUN pip3 freeze > current_requirements.txt COPY . . ENTRYPOINT ["python3", "/code/app.py"] #ENTRYPOINT ["fastapi", "run", "main.py", "--port", "8000"] +#gunicorn -w 4 -b 0.0.0.0 'hello:create_app()' +#ENTRYPOINT ["gunicorn", "-w", "1", "-b", "0.0.0.0", "app:create_app()"] + +#ENTRYPOINT ["gunicorn", "-w", "1", "-b", "0.0.0.0:5000", "app:create_app()"] +#gunicorn app:app --worker-class eventlet -w 1 --bind 0.0.0.0:5000 --reload + diff --git a/backend/app.py b/backend/app.py index e1d69f8..c67df6a 100644 --- a/backend/app.py +++ b/backend/app.py @@ -1,10 +1,7 @@ """ OpenAPI access via http://localhost:5000/openapi/ on local docker-compose deployment - """ -#import warnings -#warnings.filterwarnings("ignore") #------std lib modules:------- import os, sys, json, time @@ -17,50 +14,26 @@ from functools import wraps import base64 #-------ext libs-------------- -#llm -from langchain.callbacks.manager import CallbackManager -from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler - -#import tiktoken -from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain.chains import RetrievalQA -from langchain.callbacks.base import BaseCallbackHandler, BaseCallbackManager -from langchain.prompts import PromptTemplate - -from langchain_community.llms import Ollama -from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader -from langchain_community.embeddings import OllamaEmbeddings - -#from langchain_community.vectorstores.elasticsearch import ElasticsearchStore #deprecated -from langchain_elasticsearch import ElasticsearchStore -from uuid import uuid4 from elasticsearch import NotFoundError, Elasticsearch # for normal read/write without vectors from elasticsearch_dsl import Search, A, Document, Date, Integer, Keyword, Float, Long, Text, connections -from elasticsearch.exceptions import ConnectionError from pydantic import BaseModel, Field - -import logging_loki import jwt as pyjwt - #flask, openapi from flask import Flask, send_from_directory, send_file, Response, request, jsonify -from flask_cors import CORS, cross_origin -from werkzeug.utils import secure_filename -from flask_openapi3 import Info, Tag, OpenAPI, Server, FileStorage +from flask_openapi3 import Info, Tag, OpenAPI, Server #FileStorage from flask_socketio import SocketIO, join_room, leave_room, rooms, send +#from werkzeug.utils import secure_filename -from cryptography.fernet import Fernet -from cryptography.hazmat.primitives import hashes -from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC +import asyncio #----------home grown-------------- from lib.funcs import group_by -from lib.elastictools import get_by_id, update_by_id, delete_by_id, wait_for_elasticsearch -from lib.models import init_indicies, QueryLog, Chatbot, User, Text -from lib.chatbot import ask_bot, train_text, download_llm +from lib.elastictools import get_by_id, wait_for_elasticsearch +from lib.models import init_indicies, Chatbot, User, Text +from lib.chatbot import ask_bot, ask_bot2, train_text, download_llm from lib.speech import text_to_speech from lib.mail import send_mail from lib.user import hash_password, create_user, create_default_users @@ -69,6 +42,15 @@ from lib.user import hash_password, create_user, create_default_users BOT_ROOT_PATH = os.getenv("BOT_ROOT_PATH") assert BOT_ROOT_PATH +ollama_url = os.getenv("OLLAMA_URI") +assert ollama_url + +elastic_url = os.getenv("ELASTIC_URI") +assert elastic_url + +jwt_secret = os.getenv("SECRET") +assert jwt_secret + # JWT Bearer Sample jwt = { @@ -80,11 +62,24 @@ security_schemes = {"jwt": jwt} security = [{"jwt": []}] +def get_module_versions(): + with open("requirements.txt", "r") as f: + modules = {line.split("#")[0] for line in f.read().split("\n") if line.split("#")[0] != ""} + with open("current_requirements.txt", "r") as f: + d = {k: v for (k, v) in [line.split("#")[0].split("==") for line in f.read().split("\n") if line.split("#")[0] != ""]} + return {k: v for k, v in d.items() if k in modules} + + + +import multiprocessing +cpus = multiprocessing.cpu_count() + + info = Info( - title="Chatbot-API", + title="CreativeBots-API", version="1.0.0", - summary="The REST-API", - description="Default model: ..." + summary="The REST-API to manage bots, users and more!", + description="CPUs: " + str(cpus) + "
" + json.dumps(get_module_versions(), indent=4).replace("\n", "
") ) servers = [ Server(url=BOT_ROOT_PATH ) @@ -108,6 +103,9 @@ def uses_jwt(required=True): Wraps routes in a jwt-required logic and passes decoded jwt and user from elasticsearch to the route as keyword """ + jwt_secret = os.getenv("SECRET") + assert jwt_secret + def non_param_deco(f): @wraps(f) def decorated_route(*args, **kwargs): @@ -115,7 +113,6 @@ def uses_jwt(required=True): if "Authorization" in request.headers: token = request.headers["Authorization"].split(" ")[1] - if not token: if required: return jsonify({ @@ -130,7 +127,7 @@ def uses_jwt(required=True): try: - data = pyjwt.decode(token, app.config["jwt_secret"], algorithms=["HS256"]) + data = pyjwt.decode(token, jwt_secret, algorithms=["HS256"]) except Exception as e: return jsonify({ 'status': 'error', @@ -160,24 +157,6 @@ def uses_jwt(required=True): -env_to_conf = { - "ELASTIC_URI": "elastic_uri", - "SECRET": "jwt_secret" -} - -#import values from env into flask config and do existence check -for env_key, conf_key in env_to_conf.items(): - x = os.getenv(env_key) - if not x: - msg = "Environment variable '%s' not set!" % env_key - app.logger.fatal(msg) - sys.exit(1) - else: - app.config[conf_key] = x - - - - socket = SocketIO(app, cors_allowed_origins="*") @socket.on('connect') @@ -192,24 +171,60 @@ def sockcon(data): socket.emit('backend response', {'msg': f'Connected to room {room} !', "room": room}) # looks like iOS needs an answer -#TODO: pydantic message type validation +class SocketMessage(BaseModel): + room: str = Field(None, description="Status Code") + question: str = Field(None, description="Status Code") + system_prompt: str = Field(None, description="Status Code") + bot_id: str = Field(None, description="Status Code") +#TODO: pydantic message type validation + @socket.on('client message') def handle_message(message): + SocketMessage.model_validate(message) + + #try: room = message["room"] question = message["question"] system_prompt = message["system_prompt"] bot_id = message["bot_id"] - #except: - # return + start = datetime.now().timestamp() + d = ask_bot2(system_prompt + " " + question, bot_id) + + def get_scores(*args): + score_docs = d["get_score_docs"]() + return score_docs + + + def do_streaming(*args): + start_stream = datetime.now().timestamp() + for chunk in d["answer_generator"](): + socket.emit('backend token', {'data': chunk, "done": False}, to=room) + stream_duration = round(datetime.now().timestamp() - start_stream, 2) + print("Stream duration: ", stream_duration, flush=True) + + + async def f(): + ls = await asyncio.gather( + asyncio.to_thread(get_scores, 1,2,3), + asyncio.to_thread(do_streaming, 1,2,3) + ) + return ls + + [score_docs, _] = asyncio.run(f()) + + socket.emit('backend token', { + 'done': True, + "score_docs": score_docs + }, to=room) + + duration = round(datetime.now().timestamp() - start, 2) + print("Total duration: ", duration, flush=True) - for chunk in ask_bot(system_prompt + " " + question, bot_id): - socket.emit('backend token', {'data': chunk, "done": False}, to=room) - socket.emit('backend token', {'done': True}, to=room) #======================= TAGS ============================= @@ -240,8 +255,7 @@ def login(form: LoginRequest): 'message': msg }), 400 - client = Elasticsearch(app.config['elastic_uri']) - match get_by_id(client, index="user", id_field_name="email", id_value=form.email): + match get_by_id(index="user", id_field_name="email", id_value=form.email): case []: msg = "User with email '%s' doesn't exist!" % form.email app.logger.error(msg) @@ -252,7 +266,7 @@ def login(form: LoginRequest): case [user]: if user["password_hash"] == hash_password(form.password + form.email): - token = pyjwt.encode({"email": form.email}, app.config['jwt_secret'], algorithm="HS256") + token = pyjwt.encode({"email": form.email}, jwt_secret, algorithm="HS256") #app.logger.info(token) return jsonify({ 'status': 'success', @@ -331,14 +345,50 @@ class GetSpeechRequest(BaseModel): @app.post('/text2speech', summary="", tags=[], security=security) def text2speech(form: GetSpeechRequest): file_name = text_to_speech(form.text) - - #return send_file(file_path, mimetype='audio/mpeg') #, attachment_filename= 'Audiofiles.zip', as_attachment = True) return jsonify({ "status": "success", "file": "/" + file_name }) +#============ Bot CRUD =============== + +class CreateBotRequest(BaseModel): + name: str = Field(None, description="The bot's name") + visibility: str = Field('private', description="The bot's visibility to other users ('private', 'public')") + description: str = Field('', description="The bot's description of purpose and being") + system_prompt: str = Field('', description="The bot's defining system prompt") + llm_model: str = Field("llama3", description="The bot's used LLM") + + #status = Keyword() + #temperature = Float() + + +@app.post('/bot', summary="", tags=[bot_tag], security=security) +@uses_jwt() +def create_bot(form: CreateBotRequest, decoded_jwt, user): + """ + Creates a chatbot for the JWT associated user. + """ + CreateBotRequest.model_validate(form) + + bot = Chatbot() + bot.name = form.name + bot.visibility = form.visibility + bot.description = form.description + bot.system_prompt = form.system_prompt + bot.llm_model = form.llm_model + + #add meta data + bot.creation_date = datetime.now() + bot.creator_id = user.meta.id + bot.save() + + return jsonify({ + "bot_id": bot.meta.id + }) + + class GetBotRequest(BaseModel): id: str = Field(None, description="The bot's id") @@ -395,25 +445,23 @@ def get_bots(query: GetBotRequest, decoded_jwt, user): - -class CreateBotRequest(BaseModel): +class UpdateBotRequest(BaseModel): + id: str = Field(None, description="The bot's id") name: str = Field(None, description="The bot's name") - visibility: str = Field('private', description="The bot's visibility to other users ('private', 'public')") - description: str = Field('', description="The bot's description of purpose and being") - system_prompt: str = Field('', description="The bot's defining system prompt") - llm_model: str = Field("llama3", description="The bot's used LLM") - - #status = Keyword() - #temperature = Float() + visibility: str = Field(None, description="The bot's visibility to other users ('private', 'public')") + description: str = Field(None, description="The bot's description of purpose and being") + system_prompt: str = Field(None, description="The bot's defining system prompt") + llm_model: str = Field(None, description="The bot's used LLM") -@app.post('/bot', summary="", tags=[bot_tag], security=security) +@app.put('/bot', summary="", tags=[bot_tag], security=security) @uses_jwt() -def create_bot(form: CreateBotRequest, decoded_jwt, user): +def update_bot(form: UpdateBotRequest, decoded_jwt, user): """ - Creates a chatbot for the JWT associated user. + Change a chatbot via it's id """ - bot = Chatbot() + bot = Chatbot.get(id=form.id) + bot.name = form.name bot.visibility = form.visibility bot.description = form.description @@ -421,16 +469,16 @@ def create_bot(form: CreateBotRequest, decoded_jwt, user): bot.llm_model = form.llm_model #add meta data - bot.creation_date = datetime.now() - bot.creator_id = user.meta.id + bot.changed_date = datetime.now() bot.save() return jsonify({ - "bot_id": bot.meta.id + "status": "success" }) + class DeleteBotRequest(BaseModel): id: str = Field(None, description="The bot's id") @@ -442,26 +490,13 @@ def delete_bot(form: DeleteBotRequest, decoded_jwt, user): """ bot = Chatbot.get(id=form.id) bot.delete() - return jsonify({ "status": "success" }) -class UpdateBotRequest(BaseModel): - id: str = Field(None, description="The bot's id") - -@app.put('/bot', summary="", tags=[bot_tag], security=security) -@uses_jwt() -def update_bot(form: UpdateBotRequest, decoded_jwt, user): - """ - Changes a chatbot - """ - - return jsonify({ - "status": "success" - }) +#============================================================================ class AskBotRequest(BaseModel): @@ -469,94 +504,53 @@ class AskBotRequest(BaseModel): question: str = Field(None, description="The question the bot should answer") - - -from langchain.chains import create_retrieval_chain -from langchain.chains.combine_documents import create_stuff_documents_chain -from langchain_core.prompts import ChatPromptTemplate - - - @app.get('/bot/ask', summary="", tags=[bot_tag], security=security) @uses_jwt() def query_bot(query: AskBotRequest, decoded_jwt, user): """ Asks a chatbot """ - start = datetime.now().timestamp() bot_id = query.bot_id - prompt = query.question + question = query.question - system_prompt = ( - "Antworte freundlich, mit einer ausführlichen Erklärung, sofern vorhanden auf Basis der folgenden Informationen. Please answer in the language of the question." - "\n\n" - "{context}" - ) + start = datetime.now().timestamp() + + d = ask_bot2(system_prompt + " " + question, bot_id) + + def get_scores(*args): + score_docs = d["get_score_docs"]() + return score_docs - ch_prompt = ChatPromptTemplate.from_messages( - [ - ("system", system_prompt), - ("human", "{input}"), - ] - ) + def do_streaming(*args): + start_stream = datetime.now().timestamp() + answer = "" + for chunk in d["answer_generator"](): + answer += chunk - ollama_url = os.getenv("OLLAMA_URI") + stream_duration = round(datetime.now().timestamp() - start_stream, 2) + print("Stream duration: ", stream_duration, flush=True) + return answer - embeddings = OllamaEmbeddings(model="llama3", base_url=ollama_url) - - vector_store = ElasticsearchStore( - es_url=app.config['elastic_uri'], - index_name= "chatbot_" + bot_id.lower(), - distance_strategy="COSINE", - embedding=embeddings - ) - - - bot = Chatbot.get(id=bot_id) - llm = Ollama( - model=bot.llm_model, - base_url=ollama_url - ) - - k = 4 - scoredocs = vector_store.similarity_search_with_score(prompt, k=k) - - retriever = vector_store.as_retriever() - question_answer_chain = create_stuff_documents_chain(llm, ch_prompt) - rag_chain = create_retrieval_chain(retriever, question_answer_chain) - - - r = "" - #for chunk in rag_chain.stream({"input": "What is Task Decomposition?"}): - for chunk in rag_chain.stream({"input": prompt}): - print(chunk, flush=True) - if "answer" in chunk: - r += chunk["answer"] - - - - #for chunk in ask_bot(question=query.question, bot_id=query.bot_id): - # r += chunk - - - - xs = [] - for doc, score in scoredocs: - #print(doc.__dict__, flush=True) - #print(doc, flush=True) - xs.append([dict(doc), score]) + async def f(): + ls = await asyncio.gather( + asyncio.to_thread(get_scores, 1,2,3), + asyncio.to_thread(do_streaming, 1,2,3) + ) + return ls + [score_docs, answer] = asyncio.run(f()) duration = round(datetime.now().timestamp() - start, 2) + print("Total duration: ", duration, flush=True) app.logger.info(duration) return jsonify({ - "answer": r, + "answer": answer, "duration": str(duration), #"docs": ls#, "score_docs": xs @@ -608,12 +602,14 @@ def index(): @app.route('/') #generische Route (auch Unterordner) def catchAll(path): - #return send_from_directory('.', path) return send_from_directory('./public', path) +#import logging_loki -def main(): + + +def create_app(): LOG_LEVEL = os.getenv("LOG_LEVEL") if LOG_LEVEL: logging.basicConfig(level=eval("logging." + LOG_LEVEL)) @@ -621,32 +617,16 @@ def main(): logging.basicConfig(level=logging.WARN) #TODO: implement some kind of logging mechanism - - """ - USE_LOKI_LOGGER = os.getenv("USE_LOKI_LOGGER") - if USE_LOKI_LOGGER: - handler = logging_loki.LokiHandler( - url="http://loki:3100/loki/api/v1/push", - tags={"application": "CreativeBots"}, - #auth=("username", "password"), - version="1", - ) - app.logger.addHandler(handler) - """ - - #wait_for_elasticsearch() - download_llm() - connections.create_connection(hosts=app.config['elastic_uri'], request_timeout=60) + download_llm("llama3") + connections.create_connection(hosts=elastic_url, request_timeout=60) wait_for_elasticsearch() init_indicies() create_default_users() - app.run(debug=False, threaded=True, host='0.0.0.0') - + + return app if __name__ == '__main__': - main() - - - + app = create_app() + app.run(debug=False, host='0.0.0.0') diff --git a/backend/lib/chatbot.py b/backend/lib/chatbot.py index 1ce2e26..03824f9 100644 --- a/backend/lib/chatbot.py +++ b/backend/lib/chatbot.py @@ -6,18 +6,9 @@ from uuid import uuid4 from collections import namedtuple import os, hashlib, traceback, logging from datetime import datetime, date - -#from elasticsearch_dsl import Document, Date, Integer, Keyword, Text, connections from elasticsearch_dsl import connections -#from langchain.callbacks.manager import CallbackManager -#from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler - -#import tiktoken from langchain.text_splitter import RecursiveCharacterTextSplitter -#from langchain.chains import RetrievalQA -#from langchain.callbacks.base import BaseCallbackHandler, BaseCallbackManager -#from langchain.prompts import PromptTemplate from langchain_community.llms import Ollama from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader @@ -29,14 +20,9 @@ from langchain.chains import create_retrieval_chain from langchain.chains.combine_documents import create_stuff_documents_chain from langchain_core.prompts import ChatPromptTemplate - - from lib.models import Chatbot, Text, User - - - ollama_url = os.getenv("OLLAMA_URI") elastic_url = os.getenv("ELASTIC_URI") @@ -51,47 +37,53 @@ def train_text(bot_id, text): """ Caution: Long running request! """ + txt_md5 = hashlib.md5(text.encode()).hexdigest() + t = Text.get(id=txt_md5, ignore=404) + if t is not None: + return True - bot = Chatbot.get(id=bot_id) - user = User.get(id=bot.creator_id) + else: + bot = Chatbot.get(id=bot_id) + user = User.get(id=bot.creator_id) - t = Text() - t.text = text - t.md5 = hashlib.md5(text.encode()).hexdigest() + t = Text(meta={'id': txt_md5}) + t = Text() + t.text = text + t.md5 = txt_md5 - #add meta data - t.creation_date = datetime.now() - t.creator_id = user.meta.id - t.save() + #add meta data + t.creation_date = datetime.now() + t.creator_id = user.meta.id + t.save() - #train with given text - chunk_size = 1536 - chunk_overlap = 200 + #train with given text + chunk_size = 1536 + chunk_overlap = 200 - documents = [] - for i, s in enumerate(RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap, length_function=len).split_text(text)): - documents.append(ESDocument( - page_content=s, - metadata={ - "segment_nr": i, - "text_id": t.meta.id, - "chunk_size": chunk_size, - "chunk_overlap": chunk_overlap - } - )) + documents = [] + for i, s in enumerate(RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap, length_function=len).split_text(text)): + documents.append(ESDocument( + page_content=s, + metadata={ + "segment_nr": i, + "text_id": t.meta.id, + "chunk_size": chunk_size, + "chunk_overlap": chunk_overlap + } + )) - embeddings = OllamaEmbeddings(model=bot.llm_model, base_url=ollama_url) + embeddings = OllamaEmbeddings(model=bot.llm_model, base_url=ollama_url) - vector_store = ElasticsearchStore( - es_url=elastic_url, - index_name= "chatbot_" + bot_id.lower(), - embedding=embeddings - ) + vector_store = ElasticsearchStore( + es_url=elastic_url, + index_name= "chatbot_" + bot_id.lower(), + embedding=embeddings + ) - uuids = [str(uuid4()) for _ in range(len(documents))] - vector_store.add_documents(documents=documents, ids=uuids) + uuids = [str(uuid4()) for _ in range(len(documents))] + vector_store.add_documents(documents=documents, ids=uuids) - return True + return True @@ -110,109 +102,92 @@ def ask_bot(question, bot_id): #connections.get_connection() - #if es.indices.exists(index="index"): - - - def ask_bot2(question, bot_id): - bot = Chatbot.get(id=bot_id) - llm = Ollama( - model=bot.llm_model, - base_url=ollama_url - ) - query = bot.system_prompt + " " + question - for chunk in llm.stream(query): - yield chunk - - - - #def query_bot(query: AskBotRequest, decoded_jwt, user): """ Asks a chatbot """ - start = datetime.now().timestamp() - - bot_id = query.bot_id - prompt = query.question - - - system_prompt = ( - "Antworte freundlich, mit einer ausführlichen Erklärung, sofern vorhanden auf Basis der folgenden Informationen. Please answer in the language of the question." - "\n\n" - "{context}" - ) - - - ch_prompt = ChatPromptTemplate.from_messages( - [ - ("system", system_prompt), - ("human", "{input}"), - ] - ) - - embeddings = OllamaEmbeddings(model="llama3", base_url=ollama_url) - - vector_store = ElasticsearchStore( - es_url=app.config['elastic_uri'], - index_name= "chatbot_" + bot_id.lower(), - distance_strategy="COSINE", - embedding=embeddings - ) - bot = Chatbot.get(id=bot_id) - llm = Ollama( - model=bot.llm_model, - base_url=ollama_url - ) + prompt = question + system_prompt = bot.system_prompt + "\n\n{context}" - k = 4 - scoredocs = vector_store.similarity_search_with_score(prompt, k=k) + rag_index = "chatbot_" + bot_id.lower() - retriever = vector_store.as_retriever() - question_answer_chain = create_stuff_documents_chain(llm, ch_prompt) - rag_chain = create_retrieval_chain(retriever, question_answer_chain) + if connections.get_connection().indices.exists(index=rag_index): + + vector_store = ElasticsearchStore( + es_url=elastic_url, + index_name=rag_index, + distance_strategy="COSINE", + embedding=OllamaEmbeddings(model=bot.llm_model, base_url=ollama_url) + ) + + def gen_func(): + llm = Ollama( + model=bot.llm_model, + base_url=ollama_url + ) + + ch_prompt = ChatPromptTemplate.from_messages( + [ + ("system", system_prompt), + ("human", "{input}"), + ] + ) + + retriever = vector_store.as_retriever() + question_answer_chain = create_stuff_documents_chain(llm, ch_prompt) + rag_chain = create_retrieval_chain(retriever, question_answer_chain) + for chunk in rag_chain.stream({"input": prompt}): + print(chunk, flush=True) + if "answer" in chunk: + yield chunk["answer"] - r = "" - #for chunk in rag_chain.stream({"input": "What is Task Decomposition?"}): - for chunk in rag_chain.stream({"input": prompt}): - print(chunk, flush=True) - if "answer" in chunk: - r += chunk["answer"] + def get_score_docs(): + k = 4 + start_vec_search = datetime.now().timestamp() + scoredocs = vector_store.similarity_search_with_score(prompt, k=k) + vec_search_duration = round(datetime.now().timestamp() - start_vec_search, 2) + print("Vec search duration: ", vec_search_duration, flush=True) + xs = [] + for doc, score in scoredocs: + #print(doc.__dict__, flush=True) + #print(doc, flush=True) + xs.append([score, dict(doc)]) + return xs + return { + "answer_generator": gen_func, + "get_score_docs": get_score_docs + } - #for chunk in ask_bot(question=query.question, bot_id=query.bot_id): - # r += chunk + else: + def gen_func(): + bot = Chatbot.get(id=bot_id) + llm = Ollama( + model=bot.llm_model, + base_url=ollama_url + ) + query = bot.system_prompt + " " + question + for chunk in llm.stream(query): + yield chunk + return { + "answer_generator": gen_func, + "get_score_docs": lambda: [] + } - xs = [] - for doc, score in scoredocs: - #print(doc.__dict__, flush=True) - #print(doc, flush=True) - xs.append([dict(doc), score]) - - - duration = round(datetime.now().timestamp() - start, 2) - - #app.logger.info(duration) - - #return jsonify({ - # "answer": r, - # "duration": str(duration), - # #"docs": ls#, - # "score_docs": xs - #}) from ollama import Client as OllamaClient -def download_llm(model="llama3"): +def download_llm(model): #print(ollama_url, flush=True) #ollama_client = OllamaClient(host=ollama_url) @@ -224,7 +199,3 @@ def download_llm(model="llama3"): s = """curl %s/api/pull -d '{ "name": "%s" }' """ % (ollama_url, model) print( os.system(s.strip()) ,flush=True) - - - - diff --git a/backend/lib/elastictools.py b/backend/lib/elastictools.py index 569b6ab..86850a1 100644 --- a/backend/lib/elastictools.py +++ b/backend/lib/elastictools.py @@ -5,34 +5,22 @@ import time, json, os from typing import Any, Tuple, List, Dict, Any, Callable, Optional from elasticsearch import NotFoundError, Elasticsearch # for normal read/write without vectors from elasticsearch_dsl import Search, A, UpdateByQuery, Document, Date, Integer, Keyword, Float, Long, Text, connections +from elasticsearch.exceptions import ConnectionError -def get_by_id(client: Elasticsearch, index: str, id_field_name: str, id_value: str): +def get_by_id(index: str, id_field_name: str, id_value: str): + client = connections.get_connection() response = Search(using=client, index=index).filter("term", **{id_field_name: id_value})[0:10000].execute() return [hit.to_dict() for hit in response] -def update_by_id(client: Elasticsearch, index: str, id_field_name: str, id_value: str, values_to_set: Dict[str, Any]) -> None: +def update_by_id(index: str, id_field_name: str, id_value: str, values_to_set: Dict[str, Any]) -> None: + client = connections.get_connection() #create painless insert script source = "" for k, v in values_to_set.items(): source += f"ctx._source.{k} = {json.dumps(v)};" - """ - body = { - "query": { - "term": { - id_field_name: id_value - } - }, - "script": { - "source": source, - "lang": "painless" - } - } - client.update_by_query(index=index, body=body) - """ - ubq = UpdateByQuery(using=client, index=index) \ .query("term", **{id_field_name: id_value}) \ .script(source=source, lang="painless") @@ -41,8 +29,8 @@ def update_by_id(client: Elasticsearch, index: str, id_field_name: str, id_value return response.success() - -def delete_by_id(client: Elasticsearch, index: str, id_field_name: str, id_value: str): +def delete_by_id(index: str, id_field_name: str, id_value: str): + client = connections.get_connection() s = Search(using=client, index=index).filter("term", **{id_field_name: id_value}) response = s.delete() #if not response.success(): @@ -68,7 +56,8 @@ def simplify_properties(d): return new_d -def get_type_schema(client: Elasticsearch): +def get_type_schema(): + client = connections.get_connection() d = client.indices.get(index="*").body new_d = {} for index, d2 in d.items(): @@ -80,22 +69,20 @@ def get_type_schema(client: Elasticsearch): def wait_for_elasticsearch(): #TODO: find a clean way to wait without exceptions! #Wait for elasticsearch to start up! - - elastic_url = os.getenv("ELASTIC_URI") - assert elastic_url - + #elastic_url = os.getenv("ELASTIC_URI") + #assert elastic_url i = 1 while True: try: - client = Elasticsearch(hosts=elastic_url) + #client = Elasticsearch(hosts=elastic_url) + client = connections.get_connection() client.indices.get_alias(index="*") #connections.create_connection(hosts=app.config['elastic_uri']) #connections.get_connection().cluster.health(wait_for_status='yellow') #init_indicies() print("Elasticsearch found! Run Flask-app!", flush=True) - break - except: - #except ConnectionError: + return + except ConnectionError: i *= 2 #1.5 time.sleep(i) print("Elasticsearch not found! Wait %s seconds!" % i, flush=True) diff --git a/backend/lib/mail.py b/backend/lib/mail.py index 53237bc..75ca1fa 100644 --- a/backend/lib/mail.py +++ b/backend/lib/mail.py @@ -1,50 +1,8 @@ - - from smtplib import * from email.mime.text import MIMEText -""" -import threading -import smtpd -import asyncore -import smtplib - -server = smtpd.SMTPServer(('localhost', 1025), None) - -loop_thread = threading.Thread(target=asyncore.loop, name="Asyncore Loop") -# If you want to make the thread a daemon -# loop_thread.daemon = True -loop_thread.start() -""" - -import asyncio -from aiosmtpd.controller import Controller - -class CustomHandler: - - async def handle_DATA(self, server, session, envelope): - peer = session.peer - mail_from = envelope.mail_from - rcpt_tos = envelope.rcpt_tos - data = envelope.content # type: bytes - # Process message data... - - - #if error_occurred: - # return '500 Could not process your message' - - return '250 OK' - - - -def send_mail2(target_mail, subject, sender_mail, msg): - - #handler = CustomHandler() - #controller = Controller(handler, hostname='127.0.0.1', port=1025) - # Run the event loop in a separate thread. - #controller.start() - +def send_mail(target_mail, subject, sender_mail, msg): msg = MIMEText(msg) msg['Subject'] = subject @@ -55,93 +13,3 @@ def send_mail2(target_mail, subject, sender_mail, msg): smtp.sendmail("Creative Bots", [target_mail], msg.as_string()) smtp.quit() - - #controller.stop() - - - - -import smtplib, dns.resolver - - -def send_mail3(target_mail, subject, sender_mail, msg): - - msg = MIMEText(msg) - msg['Subject'] = subject - msg['From'] = sender_mail - msg['To'] = target_mail - - #smtp = SMTP('mailserver', port=10025) - #smtp.sendmail("Creative Bots", [target_mail], msg.as_string()) - #smtp.quit() - - - [nick, domain] = target_mail.split("@") - - - #domain = 'example.com' - records = dns.resolver.resolve(domain, 'MX') - mx_record = records[0].exchange - - server = smtplib.SMTP(mx_record, 25) - - #server.sendmail('your_email@example.com', 'recipient_email@example.com', 'Hello, this is a test email.') - server.sendmail(sender_mail, target_mail, msg.as_string()) - - - server.quit() - - - - - -import sys -import chilkat - -def send_mail(target_mail, subject, sender_mail, msg): - - - msg = MIMEText(msg) - msg['Subject'] = subject - msg['From'] = sender_mail - msg['To'] = target_mail - - - - - # The mailman object is used for sending and receiving email. - mailman = chilkat.CkMailMan() - - recipient = target_mail - - # Do a DNS MX lookup for the recipient's mail server. - - smtpHostname = mailman.mxLookup(recipient) - if (mailman.get_LastMethodSuccess() != True): - print(mailman.lastErrorText(), flush=True) - #sys.exit() - return False - - print(smtpHostname) - - # Set the SMTP server. - mailman.put_SmtpHost(smtpHostname) - - # Create a new email object - email = chilkat.CkEmail() - - email.put_Subject(subject) - email.put_Body(msg.as_string()) - email.put_From(sender_mail) - email.AddTo("", recipient) - - success = mailman.SendEmail(email) - if (success != True): - print(mailman.lastErrorText(), flush=True) - return False - else: - print("Mail Sent!", flush=True) - return True - - - diff --git a/backend/lib/models.py b/backend/lib/models.py index c592740..0e8062e 100644 --- a/backend/lib/models.py +++ b/backend/lib/models.py @@ -32,6 +32,7 @@ class User(Document): class Chatbot(Document): creation_date = Date() + changed_date = Date() name = Text() creator_id = Keyword() description = Text() diff --git a/backend/lib/user.py b/backend/lib/user.py index 2ff14e6..10b9414 100644 --- a/backend/lib/user.py +++ b/backend/lib/user.py @@ -5,11 +5,10 @@ import os, json, hashlib, traceback, logging from elasticsearch import NotFoundError, Elasticsearch # for normal read/write without vectors from lib.models import User -from lib.elastictools import get_by_id, update_by_id, delete_by_id, wait_for_elasticsearch +from lib.elastictools import get_by_id, update_by_id, wait_for_elasticsearch elastic_url = os.getenv("ELASTIC_URI") - assert elastic_url @@ -27,11 +26,10 @@ def create_user(email, password, role="user", verified=False): def create_default_users(): #create default users - client = Elasticsearch(elastic_url) default_users = os.getenv("DEFAULT_USERS") if default_users: for (email, pwd, role) in json.loads(default_users): - if len(get_by_id(client, index="user", id_field_name="email", id_value=email)) == 0: + if len(get_by_id(index="user", id_field_name="email", id_value=email)) == 0: create_user(email, pwd, role=role, verified=True) diff --git a/backend/public/index.html b/backend/public/index.html index 0d684a7..fb7b440 100644 --- a/backend/public/index.html +++ b/backend/public/index.html @@ -28,6 +28,28 @@ + + @@ -68,7 +90,8 @@ let ele = document.getElementById("register_password"); if(ele.type === "password"){ ele.type = "text"; - } else { + } + else { ele.type = "password"; } } @@ -121,7 +144,8 @@ let ele = document.getElementById("pass"); if(ele.type === "password"){ ele.type = "text"; - } else { + } + else { ele.type = "password"; } } @@ -140,7 +164,7 @@ -
+

Creative Bots

Create and talk to chatbots!

@@ -161,6 +185,7 @@ +

@@ -168,6 +193,29 @@
+ + + + +
+
+ - -
- - - - - -
- - - -
@@ -299,19 +335,6 @@
- -
@@ -337,45 +360,60 @@
- +
- -
- +
-
- - + + +
+ +

Knowledge resources:

+
+ + +
-
- - + +
+ + +
+ + +
@@ -412,18 +450,13 @@ - - -
-
-
- +

A simple UI

@@ -440,14 +473,11 @@

Who? Why?

The guy on this site: tobiasweise.dev

-

For fun and learning...

-

...and maybe getting a job that employs the used skills.

+

For fun and learning.

-
- diff --git a/backend/public/main.js b/backend/public/main.js index dde57ab..faa3d9e 100755 --- a/backend/public/main.js +++ b/backend/public/main.js @@ -2,8 +2,6 @@ "use strict"; - - //idea: generate proxy opject via openapi.json api(url).login_now() function API(jwt){ @@ -109,30 +107,47 @@ async function create_bot(jwt, name, visibility, description, llm, sys_prompt){ return response.json(); } -async function get_bots(jwt){ - if(jwt){ - const response = await fetch("/bot", { - method: "GET", - headers: { - 'accept': '*/*', - 'Authorization': 'Bearer ' + jwt - } - }); - return response.json(); +async function get_bots(jwt, bot_id){ + if(!bot_id){ + if(jwt){ + const response = await fetch("/bot", { + method: "GET", + headers: { + 'accept': '*/*', + 'Authorization': 'Bearer ' + jwt + } + }); + return response.json(); + } + else{ + const response = await fetch("/bot", { + method: "GET", + headers: { + 'accept': '*/*' + } + }); + return response.json(); + } } else{ - const response = await fetch("/bot", { - method: "GET", - headers: { - 'accept': '*/*' - } - }); - return response.json(); + if(jwt){ + const response = await fetch("/bot?id=" + bot_id, { + method: "GET", + headers: { + 'accept': '*/*', + 'Authorization': 'Bearer ' + jwt + } + }); + return response.json(); + } } } -async function change_bot(jwt, name, visibility, description, llm, sys_prompt){ + + +async function change_bot(jwt, id, name, visibility, description, llm, sys_prompt){ const formData = new FormData(); + formData.append("id", id); formData.append("name", name); formData.append("visibility", visibility); formData.append("description", description); @@ -165,6 +180,21 @@ async function delete_bot(jwt, bot_id){ return response.json(); } +async function bot_train_text(jwt, bot_id, text){ + const formData = new FormData(); + formData.append("bot_id", bot_id); + formData.append("text", text); + const response = await fetch("/bot/train/text", { + method: "POST", + headers: { + 'accept': '*/*', + 'Authorization': 'Bearer ' + jwt + }, + body: formData + }); + return response.json(); +} + async function* ask_question(bot_id, question, system_prompt=""){ @@ -207,7 +237,7 @@ async function* ask_question(bot_id, question, system_prompt=""){ else{ done = true; socket.off('backend token'); - dom_ele.dispatchEvent(new CustomEvent(evt_name, { detail: "" })); + dom_ele.dispatchEvent(new CustomEvent(evt_name, { detail: obj })); } }); socket.emit('client message', {question, system_prompt, bot_id, room}); @@ -343,6 +373,7 @@ window.onload = async ()=>{ let change_bot_description = document.getElementById("change_bot_description"); let change_bot_llm_select = document.getElementById("change_bot_llm_select"); let change_bot_system_prompt = document.getElementById("change_bot_system_prompt"); + let change_bot_rag_text = document.getElementById("change_bot_rag_text"); let delete_bot_btn = document.getElementById("delete_bot_btn"); @@ -367,14 +398,44 @@ window.onload = async ()=>{ return bot_select.options[i].text; } - - function clean_bot_create_form(){ bot_name.value = ""; bot_description.value = ""; bot_system_prompt.value = ""; } + async function fill_bot_change_form(bot_id){ + + let jwt = localStorage.getItem("jwt"); + if(jwt){ + let bot = await get_bots(jwt, bot_id); + + change_bot_name.value = bot.name; + change_bot_visibility_select.value = bot.visibility; + change_bot_description.value = bot.description; + change_bot_llm_select.value = bot.llm_model; + change_bot_system_prompt.value = bot.system_prompt; + + /* + { + "creation_date": "Fri, 26 Jul 2024 19:13:18 GMT", + "creator_id": "cWG-75ABLLZSH2M7pxp8", + "description": "basic bot", + "id": "uqJ28JABAAhLOtEyOj2_", + "llm_model": "llama3", + "name": "Testbot", + "system_prompt": "", + "visibility": "public" + } + */ + + } + + } + + + + function set_ui_loggedin(b){ if(b){ console.log("User logged in!"); @@ -429,6 +490,11 @@ window.onload = async ()=>{ set_bot_list(bot_select, bots); set_bot_list(change_bot_select, bots); set_ui_loggedin(true); + + + let bot_id = change_bot_select.value; + await fill_bot_change_form(bot_id); + } } @@ -491,10 +557,21 @@ window.onload = async ()=>{ } }; + change_bot_select.onchange = async ()=>{ + let jwt = localStorage.getItem("jwt"); + if(jwt){ + let bot_id = change_bot_select.value; + await fill_bot_change_form(bot_id); + } + }; + + change_bot_btn.onclick = async ()=>{ let jwt = localStorage.getItem("jwt"); if(jwt){ + let bot_id = change_bot_select.value + let name = change_bot_name.value; let visibility = change_bot_visibility_select.value; let description = change_bot_description.value; @@ -512,7 +589,18 @@ window.onload = async ()=>{ } try{ - let {bot_id} = await change_bot(jwt, name, visibility, description, llm, sys_prompt); + await change_bot(jwt, bot_id, name, visibility, description, llm, sys_prompt); + + let text = change_bot_rag_text.value; + if(text.trim() !== ""){ + + + await bot_train_text(jwt, bot_id, text); + + //TODO: kill bot on partially failed creation? + } + + alert_bot_change(true); await update_ui(); } @@ -724,20 +812,23 @@ window.onload = async ()=>{ let acc_text = ""; for await (let token of ask_question(bot_select.value, input_string, system_prompt.value)){ - //console.log(token); - acc_text += "" + token; - switch(view_select.value){ - case "md": - table_cell.innerHTML = ""; - let ele = replace_dom_code(replace_code, parse_html(marked.parse(acc_text))); - table_cell.appendChild(ele); - break; + console.log(token); - case "plain": - table_cell.innerHTML = `
${acc_text}
`; - break; + if(typeof token === "string"){ + acc_text += token; + switch(view_select.value){ + case "md": + table_cell.innerHTML = ""; + let ele = replace_dom_code(replace_code, parse_html(marked.parse(acc_text))); + table_cell.appendChild(ele); + break; + + case "plain": + table_cell.innerHTML = `
${acc_text}
`; + break; + } + scroll_down(); } - scroll_down(); } /* @@ -783,17 +874,3 @@ window.onload = async ()=>{ }; - - - - - - - - - - - - - - diff --git a/backend/requirements.txt b/backend/requirements.txt index ed45ade..60714c6 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -7,19 +7,20 @@ bs4 elasticsearch elasticsearch-dsl +ollama langchain langchain-community -tiktoken langchain_ollama langchain-elasticsearch pydantic + fastapi +gunicorn Werkzeug flask -Flask-Cors Flask-SocketIO flask-openapi3 @@ -34,8 +35,3 @@ neo4j pyttsx3 -aiosmtpd -dnspython -chilkat - - diff --git a/deployment/docker-compose.yml b/deployment/docker-compose.yml index 32c4421..faa512b 100644 --- a/deployment/docker-compose.yml +++ b/deployment/docker-compose.yml @@ -18,7 +18,6 @@ services: minio: container_name: ${APP_PREFIX}_minio - #image: docker.io/bitnami/minio #:2022 image: minio/minio ports: - "29000:9000" @@ -36,7 +35,6 @@ services: elasticsearch: container_name: ${APP_PREFIX}_elasticsearch image: docker.elastic.co/elasticsearch/elasticsearch:8.11.0 - #image: opensearchproject/opensearch restart: always mem_limit: 4024m ports: @@ -48,9 +46,6 @@ services: - xpack.security.enabled=false - xpack.security.http.ssl.enabled=false - #- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_INITIAL_ADMIN_PASSWORD} # Sets the demo admin user password when using demo configuration, required for OpenSearch 2.12 and later - #- "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" # Set min and max JVM heap sizes to at least 50% of system RAM - #- ES_JAVA_OPTS="-Xms2g -Xmx2g" volumes: - esdata:/usr/share/elasticsearch/data ulimits: @@ -76,22 +71,22 @@ services: - llm_network #command: "ollama pull llama2" - ollama-webui: - container_name: ${APP_PREFIX}_ollama-webui - image: ghcr.io/ollama-webui/ollama-webui:main - volumes: - - ../ollama/ollama-webui:/app/backend/data - depends_on: - - ollama - ports: - - 8888:8080 - environment: - - "/ollama/api=http://ollama:11434/api" - extra_hosts: - - host.docker.internal:host-gateway - restart: unless-stopped - networks: - - llm_network + #ollama-webui: + # container_name: ${APP_PREFIX}_ollama-webui + # image: ghcr.io/ollama-webui/ollama-webui:main + # volumes: + # - ../ollama/ollama-webui:/app/backend/data + # depends_on: + # - ollama + # ports: + # - 8888:8080 + # environment: + # - "/ollama/api=http://ollama:11434/api" + # extra_hosts: + # - host.docker.internal:host-gateway + # restart: unless-stopped + # networks: + # - llm_network #frontend: # container_name: ${APP_PREFIX}_frontend diff --git a/ideas.md b/ideas.md index 1017d8c..6861370 100644 --- a/ideas.md +++ b/ideas.md @@ -19,13 +19,17 @@ https://favtutor.com/articles/meta-llama-3-jailbreak/ * https://medium.com/@lucgagan/understanding-chatgpt-functions-and-how-to-use-them-6643a7d3c01a - - - - - - - +## Use drivers for GPU! + +"ollama recommends running the https://www.amd.com/en/support/linux-drivers: amdgpu version file missing: /sys/module/amdgpu/version stat /sys/module/amdgpu/version: no such file or directory" +"detected amdgpu versions []" +"all detected amdgpus are skipped, falling back to CPU" +"no GPU detected" + + +## Use PyPy for more speed + +...