bunch more stuff and experimental rag in ask route

2024-08-12 02:17:56 +02:00 · 2024-08-12 02:17:56 +02:00 · 903ea0366d
commit 903ea0366d
parent fcbe05a6e8
15 changed files with 636 additions and 170 deletions
--- a/.gitea/workflows/deploy_via_docker_compose.yml
+++ b/.gitea/workflows/deploy_via_docker_compose.yml
@ -14,18 +14,18 @@ jobs:
        password: ${{ secrets.PASSWORD }}
        port: ${{ secrets.PORT }}
        #script_stop: true
+        #requires stored git password: git config credential.helper store
        script: |
-            whoami
-            ls
            cd Ollama-Bot/deployment
            docker-compose down
            git pull
-            echo "Git pull finishes!"
+            echo "============ Git pull finished! ========="
            docker-compose build
-            echo "Image build finishes!"
+            echo "============= Image build finishes! =============="
            docker-compose push
-            echo "Image push finishes!"
+            echo "============== Image push finishes! ==============="
            docker-compose up -d
-            echo "Docker compose deployment finished!!"
-
+            echo "========== Docker compose deployment finished! =============="
+            docker rmi $(docker images -f "dangling=true" -q)
+            echo "========== Docker dangling images cleanup finished! =============="

--- a/README.md
+++ b/README.md
@ -30,32 +30,8 @@ After deploy:
 ```bash
 docker login registry.tobiasweise.dev
 docker-compose push
-
-#sudo docker tag llm-python-backend nucberlin:5123/llm-python-backend
-#sudo docker push nucberlin:5123/llm-python-backend
 ```

----
-
-## Ideas
-
-### Knowledge graph creation
-
-https://www.linkedin.com/posts/sivas-subramaniyan_microsoft-research-is-bullish-on-the-concept-activity-7194953376470638592-dQ-U/?utm_source=share&utm_medium=member_desktop
-
-
-clean dangling images
-
-sudo docker rmi $(sudo docker images -f "dangling=true" -q)
-
-
-
-
-Give just the translation of the given input to German and nothing else.
-
-
-
-



--- a/architecture.html
+++ b/architecture.html
@ -0,0 +1,34 @@
+<!DOCTYPE html>
+<html>
+<head>
+<!--
+<script src="https://cdnjs.cloudflare.com/ajax/libs/viz.js/2.1.2/viz.js"></script>
+-->
+<script src="https://cdnjs.cloudflare.com/ajax/libs/viz.js/1.7.1/viz.js"></script>
+</head>
+<body>
+<div id="graph"></div>
+<script>
+let src = `
+
+digraph {
+rankdir="LR"
+node [fontsize=10, shape=box, height=0.25]
+edge [fontsize=10]
+
+
+frontend -> backend
+
+backend -> minio
+backend -> elasticsearch
+backend -> ollama
+
+
+
+}
+
+`;
+document.getElementById("graph").innerHTML = Viz(src, {engine:"dot"});
+</script>
+</body>
+</html>
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@ -4,6 +4,7 @@ RUN apt-get update
 RUN apt-get install -y firefox-esr
 RUN apt-get install -y ffmpeg
 RUN apt-get install -y espeak
+RUN apt-get install -y flite

 #RUN curl https://ollama.ai/install.sh | sh
 #RUN ollama run llama2
--- a/backend/app.py
+++ b/backend/app.py
@ -28,10 +28,13 @@ from langchain.callbacks.base import BaseCallbackHandler, BaseCallbackManager
 from langchain.prompts import PromptTemplate

 from langchain_community.llms import Ollama
-from langchain_community.vectorstores.elasticsearch import ElasticsearchStore
 from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader
 from langchain_community.embeddings import OllamaEmbeddings

+#from langchain_community.vectorstores.elasticsearch import ElasticsearchStore  #deprecated
+from langchain_elasticsearch import ElasticsearchStore
+from uuid import uuid4
+
 from elasticsearch import NotFoundError, Elasticsearch # for normal read/write without vectors
 from elasticsearch_dsl import Search, A, Document, Date, Integer, Keyword, Float, Long, Text, connections
 from elasticsearch.exceptions import ConnectionError
@ -53,13 +56,11 @@ from cryptography.fernet import Fernet
 from cryptography.hazmat.primitives import hashes
 from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC

-import pyttsx3
-
 #----------home grown--------------
 #from scraper import WebScraper
 from funcs import group_by
 from elastictools import get_by_id, update_by_id, delete_by_id
-from models import QueryLog, Chatbot, User
+from models import QueryLog, Chatbot, User, Text


 #LLM_PAYLOAD = int(os.getenv("LLM_PAYLOAD"))
@ -264,11 +265,6 @@ def handle_message(message):



-
-def create_embedding():
-    pass
-
-
 def hash_password(s: str) -> str:
    return hashlib.md5(s.encode('utf-8')).hexdigest()

@ -333,57 +329,16 @@ def login(form: LoginRequest):

 #-----bot routes------

+from speech import text_to_speech
+

 class GetSpeechRequest(BaseModel):
    text: str = Field(None, description="Some text to convert to mp3")


@app.post('/text2speech', summary="", tags=[], security=security)
-@uses_jwt()
-def text2speech(form: GetSpeechRequest, decoded_jwt, user):
-    engine = pyttsx3.init()
-
-    def get_voice(s):
-        for v in engine.getProperty("voices"):
-            if s == v.id:
-                return v
-
-    def set_voice(v):
-        engine.setProperty("voice", v.id)
-
-    def set_volume(n):
-        engine.setProperty('volume', engine.getProperty('volume') + n)
-
-    def set_rate(n):
-        engine.setProperty('rate', engine.getProperty('rate') + n)
-
-    #voices = engine.getProperty('voices')
-    #engine.setProperty('voice', voices[1].id)
-    set_voice(get_voice("english"))
-    set_volume(-5.0)
-    set_rate(-40)
-
-    #espeak -v mb-en1 -s 120 "Hello world"
-    #sudo apt-get install mbrola mbrola-en1
-
-    unix_timestamp = datetime.now().timestamp()
-    file_name = f'speech_{unix_timestamp}.mp3'
-    file_path = f'./public/{file_name}'
-
-    engine.save_to_file(form.text, file_path)
-    engine.runAndWait()
-
-    timeout = 10
-    t = 0
-    step = 0.1
-    while not os.path.isfile(file_path):
-        time.sleep(step)
-        t += step
-        if t > timeout:
-            raise Exception("Timeout(%s s) for creating speech.mp3!" % timeout)
-
-    time.sleep(step)
-
+def text2speech(form: GetSpeechRequest):
+    file_name = text_to_speech(form.text)

    #return send_file(file_path, mimetype='audio/mpeg') #, attachment_filename= 'Audiofiles.zip', as_attachment = True)
    return jsonify({
@ -393,9 +348,6 @@ def text2speech(form: GetSpeechRequest, decoded_jwt, user):



-
-
-
 class GetBotRequest(BaseModel):
    id: str = Field(None, description="The bot's id")

@ -514,44 +466,154 @@ def update_bot(form: UpdateBotRequest, decoded_jwt, user):
    return ""


+
+
 class AskBotRequest(BaseModel):
    bot_id: str = Field(None, description="The bot's id")
    question: str = Field(None, description="The question the bot should answer")

+
+
@app.get('/bot/ask', summary="", tags=[bot_tag], security=security)
@uses_jwt()
 def query_bot(query: AskBotRequest, decoded_jwt, user):
    """
    Asks a chatbot
    """
-    r = ""
-    for chunk in ask_bot(question=query.question, bot_id=query.bot_id):
-        r += chunk
+    start = datetime.now().timestamp()
+
+    bot_id = query.bot_id
+    prompt = query.question
+
+
+    history = ""
+
+    system_prompt = "Antworte freundlich, mit einer ausführlichen Erklärung, sofern vorhanden auf Basis der folgenden Informationen. Please answer in the language of the question."
+
+
+    prompt_template = system_prompt +"""
+    <ctx>    
+        {context}
+    </ctx>
+    <hs>
+   """+ history +"""
+    </hs>    
+    Question: {question}
+    """
+
+    chat_prompt = PromptTemplate(
+        template=prompt_template, input_variables=["context", "question"]
+    )
+
+    embeddings = OllamaEmbeddings(model="llama3", base_url="http://ollama:11434")
+
+    vector_store = ElasticsearchStore(
+            es_url=app.config['elastic_uri'],
+            index_name= "chatbot_" + bot_id.lower(),
+            distance_strategy="COSINE",
+            embedding=embeddings
+         )
+
+
+
+    bot = Chatbot.get(id=bot_id)
+    llm = Ollama(
+        model=bot.llm_model,
+        base_url="http://ollama:11434"
+    )
+    #query = bot.system_prompt + " " + question
+    #for chunk in llm.stream(query):
+    #    yield chunk
+
+
+    #chunk_size = 1536
+    #chunk_overlap = 200
+    LLM_PAYLOAD=16384
+    CHUNK_SIZE=1536
+
+
+    k = int(LLM_PAYLOAD / CHUNK_SIZE) - 1
+    if (k < 2):
+        k = 2
+
+    #scoredocs = vector_store.similarity_search_with_score(prompt, k=k+10)
+    scoredocs = vector_store.similarity_search_with_score(prompt, k=k+10)
+
+
+    query = RetrievalQA.from_chain_type(
+        llm=llm,
+        chain_type="stuff",
+        verbose=False,
+        return_source_documents=True,
+        retriever=vector_store.as_retriever(search_kwargs={'k': k}),
+        chain_type_kwargs={"prompt": chat_prompt}
+    )
+
+    #inputTokens = 0
+    #outputTokens = 0
+
+    #with get_openai_callback() as cb:
+    qares = query.invoke({'query': prompt})
+    qadocs = qares['source_documents'] # STS: deliver doc names and page numbers in the future
+
+    ls = [dict(x) for x in qadocs]
+
+    for qadoc in qadocs:
+        print(qadoc, flush=True)
+
+
+
+    for x in scoredocs:
+        #xs = [x.to_dict() for x in scoredocs]
+        print(x, flush=True)
+
+
+    r = qares['result']
+
+
+
+
+    #r = ""
+    #for chunk in ask_bot(question=query.question, bot_id=query.bot_id):
+    #    r += chunk
+
+
+
+
+    duration = round(datetime.now().timestamp() - start, 2)
+
+    app.logger.info(duration)

    return jsonify({
-        "answer": r
+        "answer": r,
+        "duration": str(duration),
+        "docs": ls#,
+        #"score_docs": xs
    })


+
+
 #-----------------Embedding----------------------
+ESDocument = namedtuple('Document', ['page_content', 'metadata'])

 class TrainTextRequest(BaseModel):
-    chatbot_id: str = Field(None, description="The bot's id")
+    bot_id: str = Field(None, description="The bot's id")
    text: str = Field(None, description="Some text")

 #TODO: needs to be reimplemented with another mechanism like celeery to manage longer running tasks and give feedback to frontend

-@app.post('/bot/train', summary="", tags=[jwt_tag], security=security)
+@app.post('/bot/train/text', summary="", tags=[jwt_tag], security=security)
@uses_jwt()
-def upload(form: TrainTextRequest, decoded_jwt, nextsearch_user):
+def upload(form: TrainTextRequest, decoded_jwt, user):
    """
    Caution: Long running request!
    """
-    chatbot_id = form.chatbot_id
+    bot_id = form.bot_id
    text = form.text

    # validate body
-    if not chatbot_id:
+    if not bot_id:
        return jsonify({
            'status': 'error',
            'message': 'chatbotId is required'
@ -564,39 +626,55 @@ def upload(form: TrainTextRequest, decoded_jwt, nextsearch_user):
        }), 400


+    t = Text()
+    t.text = text
+    t.md5 = hashlib.md5(text.encode()).hexdigest()

-    ESDocument = namedtuple('Document', ['page_content', 'metadata'])
-
-    txt_id = hashlib.md5(text.encode()).hexdigest()
+    #add meta data
+    t.creation_date = datetime.now()
+    t.creator_id = user.meta.id
+    t.save()

    #train with given text
-    ls = []
-    for i, s in enumerate(RecursiveCharacterTextSplitter(chunk_size=1536, chunk_overlap=200, length_function=len).split_text(text)):
-        ls.append(ESDocument(
+    chunk_size = 1536
+    chunk_overlap = 200
+
+    documents = []
+    for i, s in enumerate(RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap, length_function=len).split_text(text)):
+        documents.append(ESDocument(
            page_content=s,
            metadata={
-                "chatbot_id": chatbot_id,
-                "text_id": txt_id
+                "segment_nr": i,
+                "text_id": t.meta.id,
+                "chunk_size": chunk_size,
+                "chunk_overlap": chunk_overlap
            }
        ))

+    embeddings = OllamaEmbeddings(model="llama3", base_url="http://ollama:11434")

-    def determine_index(chatbot_id: str) -> str:
-        index_prefix = "chatbot"
-        return f"{index_prefix}_{chatbot_id.lower()}"
+    vector_store = ElasticsearchStore(
+        es_url=app.config['elastic_uri'],
+        index_name= "chatbot_" + bot_id.lower(),
+        embedding=embeddings
+    )

-
-    #index = determine_index(chatbot_id)
-
-    embedding = OllamaEmbeddings()
-
-    ElasticsearchStore.from_documents(ls, embedding, index_name="embed_text", es_url=app.config['elastic_uri'])
+    uuids = [str(uuid4()) for _ in range(len(documents))]
+    vector_store.add_documents(documents=documents, ids=uuids)

    return jsonify({
        "status": "success"
    })


+
+
+
+
+
+
+
+
 #======== DEBUG routes ============

@app.get('/debug/schema', summary="", tags=[debug_tag])
@ -646,7 +724,7 @@ def catchAll(path):

 def init_indicies():
    # create the mappings in elasticsearch
-    for Index in [QueryLog, Chatbot, User]:
+    for Index in [QueryLog, Chatbot, User, Text]:
        Index.init()


--- a/backend/backend.env
+++ b/backend/backend.env
@ -2,11 +2,8 @@

 ELASTIC_URI=http://elasticsearch:9200

-DEFAULT_USERS=[["tobias_weise@gmx.de", "myEpicPwd123", "admin"]]
-
 # DEFAULT_USERS is list of lists, each nested list respectively contains email, password and role
-# e.g. [["user@gmail.com", "1234", "user"], ["admin@gmail.com", "1234", "admin"]]
-# leave empty if you don't wish to seed users
+DEFAULT_USERS=[["tobias_weise@gmx.de", "myEpicPwd123", "admin"]]


 #-----------not used yet----------------
--- a/backend/models.py
+++ b/backend/models.py
@ -2,15 +2,12 @@ import os
 from elasticsearch_dsl import Document, InnerDoc, Nested, Date, Integer, Keyword, Float, Long, Text, connections, Object, Boolean


-
-
 class User(Document):
    creation_date = Date()
    email = Keyword()
    password_hash = Text(index=False)
    role = Keyword()

-
    #salt = Text(index=False)
    #profileImage = Text(index=False)
    #profileImage = Keyword()
@ -68,6 +65,25 @@ class Chatbot(Document):



+
+
+class Text(Document):
+    creation_date = Date()
+    creator_id = Keyword()
+    text = Text()
+    md5 = Keyword()
+
+    class Index:
+        name = 'text'
+        settings = {
+            "number_of_shards": 1,
+        }
+
+    def save(self, ** kwargs):
+        return super(Text, self).save(**kwargs)
+
+
+
 #======= Query Log ===========


--- a/backend/public/index.html
+++ b/backend/public/index.html
@ -32,6 +32,14 @@
    <script type='text/javascript' src='http://www.x3dom.org/download/x3dom.js'> </script> 
    <link rel='stylesheet' type='text/css' href='http://www.x3dom.org/download/x3dom.css'></link>

+
+
+
+    <script src="https://d3js.org/d3.v3.js"></script>
+    <script src="jsnetworkx.js"></script>
+    <script src="widget.js"></script>
+
+
 </head>
 <body>

@ -324,6 +332,43 @@
    <script>
    //idea: generate proxy opject via openapi.json   api(url).login_now()

+    function API(jwt){
+
+        const handler = {
+            get(obj, prop) {
+                //return prop in obj ? obj[prop] : 37;
+
+                //alert(prop);
+
+                (async function f(){
+
+
+                const response = await fetch("/openapi/openapi.json", {
+                    method: "GET",
+                    headers: {
+                        'accept': '*/*'
+                    }
+                });
+
+                let s = await response.json();
+
+                console.log(s);
+
+
+
+                })();
+
+
+
+            },
+        };
+
+        return new Proxy({}, handler);
+    }
+
+    //API().abc;
+
+
    async function login(email, pwd){
        const formData = new FormData();
        formData.append("email", email);
@ -360,14 +405,14 @@
        }
    }

-    async function text2speech(jwt, txt){
+    async function text2speech(txt){
        const formData = new FormData();
        formData.append("text", txt);
        const response = await fetch("/text2speech", {
            method: "POST",
            headers: {
-                'accept': '*/*',
-                'Authorization': 'Bearer ' + jwt
+                'accept': '*/*'//,
+                //'Authorization': 'Bearer ' + jwt
            },
            body: formData
        });
@ -454,8 +499,27 @@
    }


+    function parse_html(html){
+        const parser = new DOMParser();
+        return parser.parseFromString(html, 'text/html').documentElement;
+    }
+
+    function parse_xml(xml){
+        const parser = new DOMParser();
+        return parser.parseFromString(xml, 'text/xml').documentElement;
+    }
+
+
+    function parse_dot_lang(txt){
+        let layout = "dot";
+        return Viz(txt, {engine:layout});
+    }
+
+
+
+
    window.onload = async ()=>{
-        //document.documentElement.style.setProperty("--bs-primary-rgb", "45, 124, 172");
+        document.documentElement.style.setProperty("--bs-primary-rgb", "45, 124, 172");

        //chat
        let user_input = document.getElementById("user_input");
@ -650,43 +714,112 @@
            set_bot_list(ls);
        };

+        function replace_dom_code(f, root_ele){
+            let eles = root_ele.getElementsByTagName("code");
+            for(let i=0; i<eles.length; i++){
+                let ele = eles[i];

-        function map_md_code(f, md_txt){
-            let r = "";
-            let text = true;
-            let parts = md_txt.split("```");
-            console.log(parts);
+                //let ele2 = parse_html(f(ele));
+                let ele2 = f(ele);

-            for(let s of parts){
-                if(text){
-                    r += s;
-                    text = false;
+                if(ele2){
+                    ele.parentNode.replaceChild(ele2, ele);
                }
-                else{
-                    r += f(s);
-                    text = true;
-                }
-
            }
-            return r;
+            return root_ele;
        }


-        function render_code(s){

-            console.log(s);
+        function is_graph(obj){
+            if("nodes" in obj){
+                if("edges" in obj || "links" in obj){
+                    return true;
+                }
+            }
+            return false;
+        }

-            if(s.startsWith("dot")){

-                s = s.replace(/dot\\s/i, "");
-                //let layout = "fdp";
-                let layout = "dot";
-                return `<dot-graph layout="${layout}" style="width:100%; height:100%;">${s}</dot-graph>`;
+        function rename_attr(obj, old, s){
+            if(obj[old]){
+                obj[s] = obj[old];
+                delete obj[old];
+            }
+            return obj;
+        }

+
+
+        function translate_graph(obj){
+            let ret_obj = {
+                nodes: [],
+                edges: []
+            };
+
+            obj = rename_attr(obj, "links", "edges");
+
+            if(obj.nodes){
+                if(Array.isArray(obj.nodes)){
+                    for(let node of obj.nodes){
+
+                        if(typeof node === "object"){
+                            if(node.id){
+                                node = rename_attr(node, "name", "label");
+                                ret_obj.nodes.push([node.id, { "radius": 15, "color": "orange"} ]);
+                            }
+                        }
+
+                    }
+                }
            }

+            if(obj.edges){
+                if(Array.isArray(obj.edges)){
+                    for(let edge of obj.edges){
+                        if(typeof edge === "object"){
+                            edge = rename_attr(edge, "source", "from");
+                            edge = rename_attr(edge, "target", "to");
+                            if(edge.from){
+                                if(edge.to){
+                                    let e = [edge.from, edge.to, {"color": "black"}];
+                                    ret_obj.edges.push(e);
+                                }
+                            }
+                        }

-            return s;
+                    }
+                }
+            }
+
+            console.log(ret_obj);
+            return ret_obj;
+        }
+
+
+        function replace_code(code_ele){
+            let txt = code_ele.innerHTML;
+
+            try{
+                return parse_html(parse_dot_lang(txt));
+            }
+            catch(err){
+                //console.log(err);
+            }
+
+            try{
+                let obj = JSON.parse(txt);
+                if(is_graph(obj)){
+                    let s = `<net-graph style="width:400px; height:350px;">${JSON.stringify(translate_graph(obj))}</net-graph>`;
+                    return parse_html(s);
+                }
+
+            }
+            catch(err){
+                //console.log(err);
+            }
+
+            return code_ele;
        }


@ -711,8 +844,9 @@
                    acc_text += "" + token;
                    switch(view_select.value){
                        case "md":
-                            //table_cell.innerHTML = marked.parse(acc_text);
-                            table_cell.innerHTML = marked.parse(map_md_code(render_code, acc_text));
+                            table_cell.innerHTML = "";
+                            let ele = replace_dom_code(replace_code, parse_html(marked.parse(acc_text)));
+                            table_cell.appendChild(ele);
                            break;

                        case "plain":
@ -727,7 +861,6 @@
                  var audio = new Audio('https://interactive-examples.mdn.mozilla.net/media/cc0-audio/t-rex-roar.mp3');
                  audio.play();
                }
-
                */


@ -736,35 +869,29 @@


                let extra_s = "";
-                let jwt = localStorage.getItem("jwt");
-                if(jwt){
-                    let{file} = await text2speech(jwt, final_answer);
+                let{file} = await text2speech(final_answer);

-                    //autoplay controls
-                    extra_s = `
-                        <audio controls>
-                            <source src="${file}" type="audio/mpeg">
-                        </audio>`;
-
-                    console.log(file);
-                }
+                //autoplay controls
+                extra_s = `
+                    <audio controls>
+                        <source src="${file}" type="audio/mpeg">
+                    </audio>`;

+                //console.log(file);

                switch(view_select.value){

                    case "md":
-                        //table_cell.innerHTML = marked.parse(final_answer) + extra_s;
-
-                        table_cell.innerHTML = marked.parse(map_md_code(render_code, acc_text)) + extra_s;
+                        table_cell.innerHTML = "";
+                        let ele = replace_dom_code(replace_code, parse_html(marked.parse(acc_text) + extra_s));
+                        table_cell.appendChild(ele);
                        break;

-                    default:
+                    case "plain":
                        table_cell.innerHTML = `<pre>${final_answer}</pre>`;
                        break;
                }

-
-
                scroll_down();

            }
--- a/backend/public/jsnetworkx.js
+++ b/backend/public/jsnetworkx.js
--- a/backend/public/widget.js
+++ b/backend/public/widget.js
@ -0,0 +1,87 @@
+
+class NetGraphElement extends HTMLElement {
+
+    static get observedAttributes() {
+        return ['G'];
+    }
+
+    constructor() {
+        super();
+        this.attachShadow({mode: 'open'});
+        this.content_div = document.createElement('div');
+        this.shadowRoot.appendChild(this.content_div);
+        this.slot_ele = document.createElement('slot');
+        this.shadowRoot.appendChild(this.slot_ele);
+        this.G = new jsnx.MultiDiGraph();
+    }
+
+    connectedCallback(){
+        let style = this.hasAttribute('style') ? this.getAttribute('style') : "";
+        let weighted = this.hasAttribute('weighted') ? JSON.parse(this.getAttribute('weighted')) : false;
+        let withLabels = this.hasAttribute('withLabels') ? JSON.parse(this.getAttribute('withLabels')) : true;
+        let label_color = this.hasAttribute('labelColor') ? this.getAttribute('labelColor') : "black";
+
+        this.content_div.style = style;
+        let that = this;
+
+        jsnx.draw(that.G, {
+            element: that.content_div,
+            weighted,
+            withLabels,
+            labelStyle: {fill: label_color},
+            edgeStyle: {
+                'stroke-width': 5,
+                fill: d => d.data[0].color
+            },
+            nodeStyle: {
+                fill: d => d.data.color
+            },
+            nodeAttr: {
+                r: d => d.data.radius | 10,
+                title: d => d.label
+            }
+        }, true); //true ensures redrawing
+
+        this.slot_ele.addEventListener('slotchange', e => {
+            let text = that.innerText.trim();
+            let{nodes, edges} = JSON.parse(text);
+
+            for(let[id, data] of nodes){
+                that.G.addNode(id, data);
+            }
+
+            for(let[a, b, data] of edges){
+                that.G.addEdge(a, b, data);
+            }
+
+            jsnx.draw(that.G, {
+                element: that.content_div,
+                weighted,
+                withLabels,
+                labelStyle: {fill: label_color},
+                edgeStyle: {
+                    'stroke-width': 5,
+                    fill: d => d.data[0].color
+                },
+                nodeStyle: {
+                    fill: d => d.data.color
+                },
+                nodeAttr: {
+                    r: d => d.data.radius | 10,
+                    title: d => d.label
+                }
+            }, true); //true ensures redrawing
+
+            that.slot_ele.style.display = "none";
+            that.content_div.children[0].setAttribute("width", that.content_div.style.width);
+            that.content_div.children[0].setAttribute("height", that.content_div.style.height);
+        });
+    }
+
+    disconnectedCallback() {
+
+    }
+
+}
+
+customElements.define('net-graph', NetGraphElement);
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@ -6,9 +6,12 @@ bs4

 elasticsearch
 elasticsearch-dsl
+
 langchain
 langchain-community
 tiktoken
+langchain_ollama
+langchain-elasticsearch

 pydantic

--- a/backend/speech.py
+++ b/backend/speech.py
@ -0,0 +1,88 @@
+import os, time
+import os.path
+from datetime import datetime, date
+import pyttsx3
+
+
+"""
+flite -voice slt -t "This example is useful when there is a need to convert the contents of a file to speech. It can simplify tasks such as reading out the contents of a document or generating voiceovers for specific text files."
+"""
+
+
+def text_to_speech(text: str) -> str:
+    unix_timestamp = datetime.now().timestamp()
+
+    txt_file_path = "./public/speech_%s.txt" % unix_timestamp
+
+
+    with open(txt_file_path, "w") as f:
+        f.write(text)
+
+
+    file_name = f'speech_{unix_timestamp}.wav'
+    file_path = f'./public/{file_name}'
+    #os.system('flite -voice slt -o %s -t "%s"' % (file_path, text))
+
+    os.system('flite -voice slt -o %s -f %s' % (file_path, txt_file_path))
+
+    return file_name
+
+
+
+def text_to_speech2(text: str) -> str:
+    engine = pyttsx3.init()
+
+    def get_voice(s):
+        for v in engine.getProperty("voices"):
+            if s == v.id:
+                return v
+
+    def set_voice(v):
+        engine.setProperty("voice", v.id)
+
+    def set_volume(n):
+        engine.setProperty('volume', engine.getProperty('volume') + n)
+
+    def set_rate(n):
+        engine.setProperty('rate', engine.getProperty('rate') + n)
+
+    #voices = engine.getProperty('voices')
+    #engine.setProperty('voice', voices[1].id)
+    set_voice(get_voice("english"))
+    set_volume(-5.0)
+    set_rate(-40)
+
+    #espeak -v mb-en1 -s 120 "Hello world"
+    #sudo apt-get install mbrola mbrola-en1
+
+    unix_timestamp = datetime.now().timestamp()
+    file_name = f'speech_{unix_timestamp}.mp3'
+    file_path = f'./public/{file_name}'
+
+    engine.save_to_file(text, file_path)
+    engine.runAndWait()
+
+    timeout = 10
+    t = 0
+    step = 0.1
+    while not os.path.isfile(file_path):
+        time.sleep(step)
+        t += step
+        if t > timeout:
+            raise Exception("Timeout(%s s) for creating speech.mp3!" % timeout)
+
+    time.sleep(step)
+    return file_name
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/deployment/docker-compose.yml
+++ b/deployment/docker-compose.yml
@ -44,8 +44,8 @@ services:
      - discovery.type=single-node
      - logger.level=ERROR
      - bootstrap.memory_lock=true # Disable JVM heap memory swapping
-
      - xpack.security.enabled=false
+      - xpack.security.http.ssl.enabled=false

      #- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_INITIAL_ADMIN_PASSWORD}    # Sets the demo admin user password when using demo configuration, required for OpenSearch 2.12 and later
      #- "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" # Set min and max JVM heap sizes to at least 50% of system RAM
--- a/ideas.md
+++ b/ideas.md
@ -0,0 +1,44 @@
+
+
+
+# Ideas
+
+
+https://favtutor.com/articles/meta-llama-3-jailbreak/
+
+
+
+## Knowledge graph creation
+
+* https://www.linkedin.com/posts/sivas-subramaniyan_microsoft-research-is-bullish-on-the-concept-activity-7194953376470638592-dQ-U/?utm_source=share&utm_medium=member_desktop
+
+## GraphRAG
+
+...
+
+
+## Function calls
+
+* https://medium.com/@lucgagan/understanding-chatgpt-functions-and-how-to-use-them-6643a7d3c01a
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/prompt_ideas.txt
+++ b/prompt_ideas.txt
@ -42,3 +42,10 @@ system_prompt = "Write the answer in Japanese."



+Give just the translation of the given input to German and nothing else.
+
+
+
+
+
+
				`@ -42,3 +42,10 @@ system_prompt = "Write the answer in Japanese."`



				`Give just the translation of the given input to German and nothing else.`