added lib folder and model download at start... hope it works

2024-08-20 17:47:33 +02:00 · 2024-08-20 17:47:33 +02:00 · 579b76ebd5
commit 579b76ebd5
parent 973821ca4f
15 changed files with 159 additions and 167 deletions
--- a/backend/app.py
+++ b/backend/app.py
@ -57,16 +57,18 @@ from cryptography.hazmat.primitives import hashes
 from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC

 #----------home grown--------------
-#from scraper import WebScraper
-from funcs import group_by
-from elastictools import get_by_id, update_by_id, delete_by_id
-from models import init_indicies, QueryLog, Chatbot, User, Text
+from lib.funcs import group_by
+from lib.elastictools import get_by_id, update_by_id, delete_by_id, wait_for_elasticsearch
+from lib.models import init_indicies, QueryLog, Chatbot, User, Text
+from lib.chatbot import ask_bot, train_text, download_llm
+from lib.speech import text_to_speech
+from lib.mail import send_mail
+from lib.user import hash_password, create_user, create_default_users


-from chatbot import ask_bot, train_text
-from speech import text_to_speech
-
 BOT_ROOT_PATH = os.getenv("BOT_ROOT_PATH")
+assert BOT_ROOT_PATH
+

 # JWT Bearer Sample
 jwt = {
@ -151,35 +153,11 @@ def uses_jwt(required=True):
            kwargs["user"] = user
            return f(*args, **kwargs)

-
        return decorated_route

    return non_param_deco


-def create_key(salt: str, user_email: str) -> Fernet:
-    """
-    Example salt: 9c46f833b3376c5f3b64d8a93951df4b
-    Fernet usage: token = f.encrypt(b"Secret message!")
-    """
-    salt_bstr = bytes(salt, "utf-8")
-    email_bstr = bytes(user_email, "utf-8")
-    #password = b"password"
-    #salt = os.urandom(16)
-    #salt = b"9c46f833b3376c5f3b64d8a93951df4b"
-    kdf = PBKDF2HMAC(
-        algorithm=hashes.SHA256(),
-        length=32,
-        salt=salt_bstr,
-        iterations=48,
-    )
-    key = base64.urlsafe_b64encode(kdf.derive(email_bstr))
-    return Fernet(key)
-
-
-app.config['UPLOAD_FOLDER'] = 'uploads'
-app.config['CORS_HEADERS'] = 'Content-Type'
-app.config['CORS_METHODS'] = ["GET,POST,OPTIONS,DELETE,PUT"]


 env_to_conf = {
@ -200,9 +178,6 @@ for env_key, conf_key in env_to_conf.items():



-#from flask_cors import CORS #falls cross-orgin verwendet werden soll
-#CORS(app)
-
 socket = SocketIO(app, cors_allowed_origins="*")

@socket.on('connect')  
@ -237,11 +212,6 @@ def handle_message(message):
    socket.emit('backend token', {'done': True}, to=room)


-
-def hash_password(s: str) -> str:
-    return hashlib.md5(s.encode('utf-8')).hexdigest()
-
-
 #======================= TAGS =============================

 not_implemented_tag = Tag(name='Not implemented', description='Functionality not yet implemented beyond an empty response')
@ -297,10 +267,6 @@ def login(form: LoginRequest):
                }), 400


-
-
-from mail import send_mail
-
 class RegisterRequest(BaseModel):
    email: str = Field(None, description='The users E-Mail that serves as nick too.')
    password: str = Field(None, description='A short text by the user explaining the rating.')
@ -362,7 +328,6 @@ def register(form: RegisterRequest):
 class GetSpeechRequest(BaseModel):
    text: str = Field(None, description="Some text to convert to mp3")

-
@app.post('/text2speech', summary="", tags=[], security=security)
 def text2speech(form: GetSpeechRequest):
    file_name = text_to_speech(form.text)
@ -538,7 +503,10 @@ def query_bot(query: AskBotRequest, decoded_jwt, user):
        ]
    )

-    embeddings = OllamaEmbeddings(model="llama3", base_url="http://ollama:11434")
+    ollama_url = os.getenv("OLLAMA_URI")
+
+
+    embeddings = OllamaEmbeddings(model="llama3", base_url=ollama_url)

    vector_store = ElasticsearchStore(
            es_url=app.config['elastic_uri'],
@ -551,7 +519,7 @@ def query_bot(query: AskBotRequest, decoded_jwt, user):
    bot = Chatbot.get(id=bot_id)
    llm = Ollama(
        model=bot.llm_model,
-        base_url="http://ollama:11434"
+        base_url=ollama_url
    )

    k = 4
@ -598,7 +566,6 @@ def query_bot(query: AskBotRequest, decoded_jwt, user):


 #-----------------Embedding----------------------
-#ESDocument = namedtuple('Document', ['page_content', 'metadata'])

 class TrainTextRequest(BaseModel):
    bot_id: str = Field(None, description="The bot's id")
@ -627,14 +594,11 @@ def upload(form: TrainTextRequest, decoded_jwt, user):
            'message': 'No data source found'
        }), 400

-
    train_text(bot_id, text)
-
    return jsonify({
        "status": "success"
    })

-
 #-------- non api routes -------------

@app.route("/") #Index Verzeichnis
@ -648,79 +612,34 @@ def catchAll(path):
    return send_from_directory('./public', path)


-
-#def init_indicies():
-#    # create the mappings in elasticsearch
-#    for Index in [QueryLog, Chatbot, User, Text]:
-#        Index.init()
-
-
-def create_user(email, password, role="user", verified=False):
-    user = User(meta={'id': email}, email=email, password_hash=hash_password(password + email), role=role)
-    user.creation_date = datetime.now()
-    user.isEmailVerified = verified
-    user.save()
-    return user
-
-
-def create_default_users():
-    #create default users
-    client = Elasticsearch(app.config['elastic_uri'])
-    default_users = os.getenv("DEFAULT_USERS")
-    if default_users:
-        for (email, pwd, role) in json.loads(default_users):
-            if len(get_by_id(client, index="user", id_field_name="email", id_value=email)) == 0:
-                create_user(email, pwd, role=role, verified=True)
-
-
-
-import requests
-
-
 if __name__ == '__main__':

-    #TODO: implement some kind of logging mechanism
+    LOG_LEVEL = os.getenv("LOG_LEVEL")
+    if LOG_LEVEL:
+        logging.basicConfig(level=eval("logging." + LOG_LEVEL))
+    else:
        logging.basicConfig(level=logging.WARN)

+    #TODO: implement some kind of logging mechanism
+
    """
    USE_LOKI_LOGGER = os.getenv("USE_LOKI_LOGGER")
    if USE_LOKI_LOGGER:
        handler = logging_loki.LokiHandler(
            url="http://loki:3100/loki/api/v1/push", 
-            tags={"application": "Nextsearch"},
+            tags={"application": "CreativeBots"},
            #auth=("username", "password"),
            version="1",
        )
        app.logger.addHandler(handler)
    """

-    #TODO: find a clean way to wait without exceptions!
-    #Wait for elasticsearch to start up!
-    i = 1
-
-    while True:
-        try:
-            #client = Elasticsearch(app.config['elastic_uri'])
-            connections.create_connection(hosts=app.config['elastic_uri'])
-            connections.get_connection().cluster.health(wait_for_status='yellow')
-            init_indicies()
-            print("Elasticsearch found! Run Flask-app!", flush=True)
-            break
-        except:
-            #except ConnectionError:
-            i *= 1.5
-            time.sleep(i)
-            print("Elasticsearch not found! Wait %s seconds!" % i, flush=True)
-
-
+    wait_for_elasticsearch()
+    download_llm()
    connections.create_connection(hosts=app.config['elastic_uri'], request_timeout=60)
-
    init_indicies()
    create_default_users()
-    app.run(debug=True, threaded=True, host='0.0.0.0')
-
-
-
+    app.run(debug=False, threaded=True, host='0.0.0.0')



--- a/backend/backend.env
+++ b/backend/backend.env
@ -1,5 +0,0 @@
-
-BOT_ROOT_PATH=/
-ELASTIC_URI=http://elasticsearch:9200
-OLLAMA_URI=http://ollama:11434
-
--- a/backend/lib/init.py
+++ b/backend/lib/init.py
--- a/backend/lib/chatbot.py
+++ b/backend/lib/chatbot.py
@ -1,6 +1,7 @@
+"""
+All functions around bots

-from models import Chatbot, Text, User
-
+"""
 from uuid import uuid4
 from collections import namedtuple
 import os, hashlib, traceback, logging
@ -24,12 +25,18 @@ from langchain_community.embeddings import OllamaEmbeddings

 from langchain_elasticsearch import ElasticsearchStore

-
 from langchain.chains import create_retrieval_chain
 from langchain.chains.combine_documents import create_stuff_documents_chain
 from langchain_core.prompts import ChatPromptTemplate


+
+from lib.models import Chatbot, Text, User
+
+
+
+
+
 ollama_url = os.getenv("OLLAMA_URI")
 elastic_url = os.getenv("ELASTIC_URI")

@ -145,7 +152,7 @@ def ask_bot2(question, bot_id):
        ]
    )

-    embeddings = OllamaEmbeddings(model="llama3", base_url="http://ollama:11434")
+    embeddings = OllamaEmbeddings(model="llama3", base_url=ollama_url)

    vector_store = ElasticsearchStore(
            es_url=app.config['elastic_uri'],
@ -158,7 +165,7 @@ def ask_bot2(question, bot_id):
    bot = Chatbot.get(id=bot_id)
    llm = Ollama(
        model=bot.llm_model,
-        base_url="http://ollama:11434"
+        base_url=ollama_url
    )

    k = 4
@ -203,10 +210,19 @@ def ask_bot2(question, bot_id):



-
-
-
-
+from ollama import Client as OllamaClient
+
+def download_llm(model="llama3"):
+    #print(ollama_url, flush=True)
+
+    #ollama_client = OllamaClient(host=ollama_url)
+    #x = ollama_client.pull('llama3')
+    #print( type(x), flush=True)
+    #print( x.__dict__, flush=True)
+    #print( x, flush=True)
+
+    s = """curl %s/api/pull -d '{ "name": "%s" }' """ % (ollama_url, model)
+    print( os.system(s.strip()) ,flush=True)



--- a/backend/lib/elastictools.py
+++ b/backend/lib/elastictools.py
@ -1,8 +1,8 @@
 """
 Some helper functions to make querying easier
 """
+import time, json, os
 from typing import Any, Tuple, List, Dict, Any, Callable, Optional
-import json
 from elasticsearch import NotFoundError, Elasticsearch # for normal read/write without vectors
 from elasticsearch_dsl import Search, A, UpdateByQuery, Document, Date, Integer, Keyword, Float, Long, Text, connections

@ -47,7 +47,6 @@ def delete_by_id(client: Elasticsearch, index: str, id_field_name: str, id_value
    response = s.delete()
    #if not response.success():
    #    raise Exception("Unable to delete id '%s' in index '%' !" % (index, id_value))
-
    print(response, flush=True)


@ -78,6 +77,26 @@ def get_type_schema(client: Elasticsearch):



-
+def wait_for_elasticsearch():
+    #TODO: find a clean way to wait without exceptions!
+    #Wait for elasticsearch to start up!
+
+    elastic_url = os.getenv("ELASTIC_URI")
+    assert elastic_url
+
+    i = 1
+    while True:
+        try:
+            client = Elasticsearch(hosts=elastic_url)
+            #connections.create_connection(hosts=app.config['elastic_uri'])
+            #connections.get_connection().cluster.health(wait_for_status='yellow')
+            #init_indicies()
+            print("Elasticsearch found! Run Flask-app!", flush=True)
+            break
+        except:
+            #except ConnectionError:
+            i *= 2 #1.5
+            time.sleep(i)
+            print("Elasticsearch not found! Wait %s seconds!" % i, flush=True)


--- a/backend/lib/funcs.py
+++ b/backend/lib/funcs.py
--- a/backend/lib/mail.py
+++ b/backend/lib/mail.py
--- a/backend/lib/models.py
+++ b/backend/lib/models.py
@ -138,27 +138,7 @@ class QueryLog(Document):



-
 def init_indicies():
    # create the mappings in elasticsearch
    for Index in [QueryLog, Chatbot, User, Text]:
        Index.init()
-
-
-
-
-if __name__ == "__main__":
-    elastic_uri = os.getenv("ELASTIC_URI")
-    #elastic_uri = "http://localhost:9200"
-
-    # create and save and article
-    #article = Article(meta={'id': 42}, title='Hello world!', tags=['test'])
-    #article.body = ''' looong text '''
-    ##article.published_from = datetime.now()
-    #article.save()
-
-    #article = Article.get(id=42)
-    #print(article.is_published())
-
-    # Display cluster health
-    #print(connections.get_connection().cluster.health())
--- a/backend/lib/speech.py
+++ b/backend/lib/speech.py
@ -8,6 +8,8 @@ import pyttsx3
 flite -voice slt -t "This example is useful when there is a need to convert the contents of a file to speech. It can simplify tasks such as reading out the contents of a document or generating voiceovers for specific text files."
 """

+#espeak -v mb-en1 -s 120 "Hello world"
+

 def text_to_speech(text: str) -> str:
    unix_timestamp = datetime.now().timestamp()
--- a/backend/lib/user.py
+++ b/backend/lib/user.py
@ -0,0 +1,42 @@
+"""
+All around managing users
+"""
+import os, json, hashlib, traceback, logging
+from elasticsearch import NotFoundError, Elasticsearch # for normal read/write without vectors
+
+from lib.models import User
+from lib.elastictools import get_by_id, update_by_id, delete_by_id, wait_for_elasticsearch
+
+
+elastic_url = os.getenv("ELASTIC_URI")
+
+assert elastic_url
+
+
+def hash_password(s: str) -> str:
+    return hashlib.md5(s.encode('utf-8')).hexdigest()
+
+
+def create_user(email, password, role="user", verified=False):
+    user = User(meta={'id': email}, email=email, password_hash=hash_password(password + email), role=role)
+    user.creation_date = datetime.now()
+    user.isEmailVerified = verified
+    user.save()
+    return user
+
+
+def create_default_users():
+    #create default users
+    client = Elasticsearch(elastic_url)
+    default_users = os.getenv("DEFAULT_USERS")
+    if default_users:
+        for (email, pwd, role) in json.loads(default_users):
+            if len(get_by_id(client, index="user", id_field_name="email", id_value=email)) == 0:
+                create_user(email, pwd, role=role, verified=True)
+
+
+
+
+
+
+
--- a/backend/lib/webbot.py
+++ b/backend/lib/webbot.py
--- a/backend/public/index.html
+++ b/backend/public/index.html
@ -9,7 +9,6 @@
    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/css/bootstrap.min.css">
    <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/js/bootstrap.min.js"></script>

-
    <!--
    load the following async:?
    -->
@ -25,13 +24,10 @@
    <script async="async" type='text/javascript' src='http://www.x3dom.org/download/x3dom.js'> </script> 
    <link async="async" rel='stylesheet' type='text/css' href='http://www.x3dom.org/download/x3dom.css'>

-
-
    <script async="async" src="https://d3js.org/d3.v3.js"></script>
    <script async="async" src="jsnetworkx.js"></script>
    <script async="async" src="widget.js"></script>

-
 </head>
 <body>

@ -189,11 +185,11 @@
            <a class="nav-link active" data-bs-toggle="tab" href="#home">Chat</a>
          </li>
          <li class="nav-item">
-            <a class="nav-link" data-bs-toggle="tab" href="#create_bot_tab">Create bot</a>
+            <a id="tab2" class="nav-link disabled" data-bs-toggle="tab" href="#create_bot_tab">Create bot</a>
          </li>

          <li class="nav-item">
-            <a class="nav-link" data-bs-toggle="tab" href="#tweak_bot_tab">Tweak bot</a>
+            <a id="tab3" class="nav-link disabled" data-bs-toggle="tab" href="#tweak_bot_tab">Tweak bot</a>
          </li>

        </ul>
@ -232,16 +228,16 @@
            <div class="tab-pane container fade" id="create_bot_tab">

                <div style="height: 10px !important;"></div>
-
+               <!-- 
                <i>Creating a new bot requires an account and login via settings!</i>
                <br>
                <br>
-
+                -->
                <form>


                <label for="bot_name" class="form-label">Name:</label>
-                <input type="bot_name" class="form-control" id="bot_name" placeholder="MyNewBot">
+                <input type="bot_name" class="form-control" id="bot_name" placeholder="The displayed name of the bot.">

                <br>

@ -255,7 +251,7 @@
                <br>

                <label for="bot_description">Description:</label>
-                <textarea id="bot_description" class="form-control" rows="8" name="text" placeholder="A bot that cares."></textarea>
+                <textarea id="bot_description" class="form-control" rows="8" name="text" placeholder="A description of the bot and it's purpose."></textarea>

                <br>

@ -267,7 +263,7 @@
                <br>

                <label for="bot_system_prompt">System prompt:</label>
-                <textarea id="bot_system_prompt" class="form-control" rows="8" name="text" placeholder="Answer all questions short and sweet!"></textarea>
+                <textarea id="bot_system_prompt" class="form-control" rows="8" name="text" placeholder="The prompt that defines the bot's main behaviour."></textarea>


                <br>
@ -320,17 +316,17 @@
            <div class="tab-pane container fade" id="tweak_bot_tab">

                <div style="height: 10px !important;"></div>
-
+                               <!-- 
                <i>Tweaking a new bot requires an account and login via settings!</i>
                <br>
                <br>
-
+                -->

                <form>


                <label for="bot_name" class="form-label">Name:</label>
-                <input type="bot_name" class="form-control" id="bot_name" placeholder="MyNewBot">
+                <input type="bot_name" class="form-control" id="bot_name" placeholder="The displayed name of the bot.">

                <br>

@ -344,7 +340,7 @@
                <br>

                <label for="bot_description">Description:</label>
-                <textarea id="bot_description" class="form-control" rows="8" name="text" placeholder="A bot that cares."></textarea>
+                <textarea id="bot_description" class="form-control" rows="8" name="text" placeholder="A description of the bot and it's purpose."></textarea>

                <br>

@ -356,7 +352,7 @@
                <br>

                <label for="bot_system_prompt">System prompt:</label>
-                <textarea id="bot_system_prompt" class="form-control" rows="8" name="text" placeholder="Answer all questions short and sweet!"></textarea>
+                <textarea id="bot_system_prompt" class="form-control" rows="8" name="text" placeholder="The prompt that defines the bot's main behaviour."></textarea>


                <br>
@ -376,10 +372,12 @@
                      <div class="col"></div>
                      <div class="col"></div>
                      <div class="col"></div>
-                      <div class="col"></div>
                      <div class="col">
                        <button id="change_bot_btn" disabled type="button" class="btn btn-primary text-white">Change bot</button>
                      </div>
+                      <div class="col">
+                        <button id="delete_bot_btn" disablxed type="button" class="btn btn-danger text-white">Delete bot</button>
+                     </div>
                </div>

                <br>
--- a/backend/public/main.js
+++ b/backend/public/main.js
@ -316,26 +316,34 @@ window.onload = async ()=>{
        bot_system_prompt.value = "";
    }

-
-
-
    function set_ui_loggedin(b){
        if(b){
+            console.log("User logged in!");
+
            //enable create bot button
            create_bot_btn.removeAttribute("disabled");
            change_bot_btn.removeAttribute("disabled");
            login_btn.style.display = "none";
            logout_btn.style.display = "block";
+
+            document.getElementById("tab2").classList.remove('disabled');
+            document.getElementById("tab3").classList.remove('disabled');
        }
        else{
+            console.log("User not logged in!");
+
            //disable create bot button
            create_bot_btn.setAttribute("disabled", "disabled");
            change_bot_btn.setAttribute("disabled", "disabled");
            logout_btn.style.display = "none";
            login_btn.style.display = "block";
+
+            document.getElementById("tab2").classList.add('disabled');
+            document.getElementById("tab3").classList.add('disabled');
        }
    }

+
    //init: are we logged in on start?
    let jwt = localStorage.getItem("jwt");
    if(jwt === null){
--- a/deployment/docker-compose.yml
+++ b/deployment/docker-compose.yml
@ -64,7 +64,6 @@ services:
    networks:
      - llm_network

-
  ollama:
    container_name: ${APP_PREFIX}_ollama
    image: ollama/ollama:latest
@ -78,10 +77,9 @@ services:
      - llm_network
    #command: "ollama pull llama2"

-
  ollama-webui:
+    container_name: ${APP_PREFIX}_ollama-webui
    image: ghcr.io/ollama-webui/ollama-webui:main
-    container_name: ollama-webui
    volumes:
      - ../ollama/ollama-webui:/app/backend/data
    depends_on:
@ -112,15 +110,20 @@ services:
    ports:
      - "5000:5000"
    env_file:
-      - ../backend/backend.env
      - .env
+    environment:
+      - LOG_LEVEL=ERROR
+      - BOT_ROOT_PATH=/
+      - ELASTIC_URI=http://elasticsearch:9200
+      - OLLAMA_URI=http://ollama:11434
+
    build: ../backend
    networks:
      - llm_network
    depends_on:
      - elasticsearch
      - ollama
-      #- minio
+      - minio

 volumes:
  esdata:
--- a/prompt_ideas.txt
+++ b/prompt_ideas.txt
@ -4,6 +4,16 @@



+ollama recommends running the https://www.amd.com/en/support/linux-drivers: amdgpu version file missing: 
+
+/sys/module/amdgpu/version stat /sys/module/amdgpu/version: no such file or directory"
+
+
+
+
+
+
+

 system_prompt = "Answer in the language of the question. Give a short answer. Answer all questions without reservations or advice."