bunch more stuff and experimental rag in ask route
All checks were successful
Gitea Docker Redeploy / Redploy-App-on-self-via-SSH (push) Successful in 1m18s

This commit is contained in:
Tobias Weise 2024-08-12 02:17:56 +02:00
parent fcbe05a6e8
commit 903ea0366d
15 changed files with 636 additions and 170 deletions

View File

@ -14,18 +14,18 @@ jobs:
password: ${{ secrets.PASSWORD }} password: ${{ secrets.PASSWORD }}
port: ${{ secrets.PORT }} port: ${{ secrets.PORT }}
#script_stop: true #script_stop: true
#requires stored git password: git config credential.helper store
script: | script: |
whoami
ls
cd Ollama-Bot/deployment cd Ollama-Bot/deployment
docker-compose down docker-compose down
git pull git pull
echo "Git pull finishes!" echo "============ Git pull finished! ========="
docker-compose build docker-compose build
echo "Image build finishes!" echo "============= Image build finishes! =============="
docker-compose push docker-compose push
echo "Image push finishes!" echo "============== Image push finishes! ==============="
docker-compose up -d docker-compose up -d
echo "Docker compose deployment finished!!" echo "========== Docker compose deployment finished! =============="
docker rmi $(docker images -f "dangling=true" -q)
echo "========== Docker dangling images cleanup finished! =============="

View File

@ -30,32 +30,8 @@ After deploy:
```bash ```bash
docker login registry.tobiasweise.dev docker login registry.tobiasweise.dev
docker-compose push docker-compose push
#sudo docker tag llm-python-backend nucberlin:5123/llm-python-backend
#sudo docker push nucberlin:5123/llm-python-backend
``` ```
----
## Ideas
### Knowledge graph creation
https://www.linkedin.com/posts/sivas-subramaniyan_microsoft-research-is-bullish-on-the-concept-activity-7194953376470638592-dQ-U/?utm_source=share&utm_medium=member_desktop
clean dangling images
sudo docker rmi $(sudo docker images -f "dangling=true" -q)
Give just the translation of the given input to German and nothing else.

34
architecture.html Normal file
View File

@ -0,0 +1,34 @@
<!DOCTYPE html>
<html>
<head>
<!--
<script src="https://cdnjs.cloudflare.com/ajax/libs/viz.js/2.1.2/viz.js"></script>
-->
<script src="https://cdnjs.cloudflare.com/ajax/libs/viz.js/1.7.1/viz.js"></script>
</head>
<body>
<div id="graph"></div>
<script>
let src = `
digraph {
rankdir="LR"
node [fontsize=10, shape=box, height=0.25]
edge [fontsize=10]
frontend -> backend
backend -> minio
backend -> elasticsearch
backend -> ollama
}
`;
document.getElementById("graph").innerHTML = Viz(src, {engine:"dot"});
</script>
</body>
</html>

View File

@ -4,6 +4,7 @@ RUN apt-get update
RUN apt-get install -y firefox-esr RUN apt-get install -y firefox-esr
RUN apt-get install -y ffmpeg RUN apt-get install -y ffmpeg
RUN apt-get install -y espeak RUN apt-get install -y espeak
RUN apt-get install -y flite
#RUN curl https://ollama.ai/install.sh | sh #RUN curl https://ollama.ai/install.sh | sh
#RUN ollama run llama2 #RUN ollama run llama2

View File

@ -28,10 +28,13 @@ from langchain.callbacks.base import BaseCallbackHandler, BaseCallbackManager
from langchain.prompts import PromptTemplate from langchain.prompts import PromptTemplate
from langchain_community.llms import Ollama from langchain_community.llms import Ollama
from langchain_community.vectorstores.elasticsearch import ElasticsearchStore
from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader
from langchain_community.embeddings import OllamaEmbeddings from langchain_community.embeddings import OllamaEmbeddings
#from langchain_community.vectorstores.elasticsearch import ElasticsearchStore #deprecated
from langchain_elasticsearch import ElasticsearchStore
from uuid import uuid4
from elasticsearch import NotFoundError, Elasticsearch # for normal read/write without vectors from elasticsearch import NotFoundError, Elasticsearch # for normal read/write without vectors
from elasticsearch_dsl import Search, A, Document, Date, Integer, Keyword, Float, Long, Text, connections from elasticsearch_dsl import Search, A, Document, Date, Integer, Keyword, Float, Long, Text, connections
from elasticsearch.exceptions import ConnectionError from elasticsearch.exceptions import ConnectionError
@ -53,13 +56,11 @@ from cryptography.fernet import Fernet
from cryptography.hazmat.primitives import hashes from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
import pyttsx3
#----------home grown-------------- #----------home grown--------------
#from scraper import WebScraper #from scraper import WebScraper
from funcs import group_by from funcs import group_by
from elastictools import get_by_id, update_by_id, delete_by_id from elastictools import get_by_id, update_by_id, delete_by_id
from models import QueryLog, Chatbot, User from models import QueryLog, Chatbot, User, Text
#LLM_PAYLOAD = int(os.getenv("LLM_PAYLOAD")) #LLM_PAYLOAD = int(os.getenv("LLM_PAYLOAD"))
@ -264,11 +265,6 @@ def handle_message(message):
def create_embedding():
pass
def hash_password(s: str) -> str: def hash_password(s: str) -> str:
return hashlib.md5(s.encode('utf-8')).hexdigest() return hashlib.md5(s.encode('utf-8')).hexdigest()
@ -333,57 +329,16 @@ def login(form: LoginRequest):
#-----bot routes------ #-----bot routes------
from speech import text_to_speech
class GetSpeechRequest(BaseModel): class GetSpeechRequest(BaseModel):
text: str = Field(None, description="Some text to convert to mp3") text: str = Field(None, description="Some text to convert to mp3")
@app.post('/text2speech', summary="", tags=[], security=security) @app.post('/text2speech', summary="", tags=[], security=security)
@uses_jwt() def text2speech(form: GetSpeechRequest):
def text2speech(form: GetSpeechRequest, decoded_jwt, user): file_name = text_to_speech(form.text)
engine = pyttsx3.init()
def get_voice(s):
for v in engine.getProperty("voices"):
if s == v.id:
return v
def set_voice(v):
engine.setProperty("voice", v.id)
def set_volume(n):
engine.setProperty('volume', engine.getProperty('volume') + n)
def set_rate(n):
engine.setProperty('rate', engine.getProperty('rate') + n)
#voices = engine.getProperty('voices')
#engine.setProperty('voice', voices[1].id)
set_voice(get_voice("english"))
set_volume(-5.0)
set_rate(-40)
#espeak -v mb-en1 -s 120 "Hello world"
#sudo apt-get install mbrola mbrola-en1
unix_timestamp = datetime.now().timestamp()
file_name = f'speech_{unix_timestamp}.mp3'
file_path = f'./public/{file_name}'
engine.save_to_file(form.text, file_path)
engine.runAndWait()
timeout = 10
t = 0
step = 0.1
while not os.path.isfile(file_path):
time.sleep(step)
t += step
if t > timeout:
raise Exception("Timeout(%s s) for creating speech.mp3!" % timeout)
time.sleep(step)
#return send_file(file_path, mimetype='audio/mpeg') #, attachment_filename= 'Audiofiles.zip', as_attachment = True) #return send_file(file_path, mimetype='audio/mpeg') #, attachment_filename= 'Audiofiles.zip', as_attachment = True)
return jsonify({ return jsonify({
@ -393,9 +348,6 @@ def text2speech(form: GetSpeechRequest, decoded_jwt, user):
class GetBotRequest(BaseModel): class GetBotRequest(BaseModel):
id: str = Field(None, description="The bot's id") id: str = Field(None, description="The bot's id")
@ -514,44 +466,154 @@ def update_bot(form: UpdateBotRequest, decoded_jwt, user):
return "" return ""
class AskBotRequest(BaseModel): class AskBotRequest(BaseModel):
bot_id: str = Field(None, description="The bot's id") bot_id: str = Field(None, description="The bot's id")
question: str = Field(None, description="The question the bot should answer") question: str = Field(None, description="The question the bot should answer")
@app.get('/bot/ask', summary="", tags=[bot_tag], security=security) @app.get('/bot/ask', summary="", tags=[bot_tag], security=security)
@uses_jwt() @uses_jwt()
def query_bot(query: AskBotRequest, decoded_jwt, user): def query_bot(query: AskBotRequest, decoded_jwt, user):
""" """
Asks a chatbot Asks a chatbot
""" """
r = "" start = datetime.now().timestamp()
for chunk in ask_bot(question=query.question, bot_id=query.bot_id):
r += chunk bot_id = query.bot_id
prompt = query.question
history = ""
system_prompt = "Antworte freundlich, mit einer ausführlichen Erklärung, sofern vorhanden auf Basis der folgenden Informationen. Please answer in the language of the question."
prompt_template = system_prompt +"""
<ctx>
{context}
</ctx>
<hs>
"""+ history +"""
</hs>
Question: {question}
"""
chat_prompt = PromptTemplate(
template=prompt_template, input_variables=["context", "question"]
)
embeddings = OllamaEmbeddings(model="llama3", base_url="http://ollama:11434")
vector_store = ElasticsearchStore(
es_url=app.config['elastic_uri'],
index_name= "chatbot_" + bot_id.lower(),
distance_strategy="COSINE",
embedding=embeddings
)
bot = Chatbot.get(id=bot_id)
llm = Ollama(
model=bot.llm_model,
base_url="http://ollama:11434"
)
#query = bot.system_prompt + " " + question
#for chunk in llm.stream(query):
# yield chunk
#chunk_size = 1536
#chunk_overlap = 200
LLM_PAYLOAD=16384
CHUNK_SIZE=1536
k = int(LLM_PAYLOAD / CHUNK_SIZE) - 1
if (k < 2):
k = 2
#scoredocs = vector_store.similarity_search_with_score(prompt, k=k+10)
scoredocs = vector_store.similarity_search_with_score(prompt, k=k+10)
query = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
verbose=False,
return_source_documents=True,
retriever=vector_store.as_retriever(search_kwargs={'k': k}),
chain_type_kwargs={"prompt": chat_prompt}
)
#inputTokens = 0
#outputTokens = 0
#with get_openai_callback() as cb:
qares = query.invoke({'query': prompt})
qadocs = qares['source_documents'] # STS: deliver doc names and page numbers in the future
ls = [dict(x) for x in qadocs]
for qadoc in qadocs:
print(qadoc, flush=True)
for x in scoredocs:
#xs = [x.to_dict() for x in scoredocs]
print(x, flush=True)
r = qares['result']
#r = ""
#for chunk in ask_bot(question=query.question, bot_id=query.bot_id):
# r += chunk
duration = round(datetime.now().timestamp() - start, 2)
app.logger.info(duration)
return jsonify({ return jsonify({
"answer": r "answer": r,
"duration": str(duration),
"docs": ls#,
#"score_docs": xs
}) })
#-----------------Embedding---------------------- #-----------------Embedding----------------------
ESDocument = namedtuple('Document', ['page_content', 'metadata'])
class TrainTextRequest(BaseModel): class TrainTextRequest(BaseModel):
chatbot_id: str = Field(None, description="The bot's id") bot_id: str = Field(None, description="The bot's id")
text: str = Field(None, description="Some text") text: str = Field(None, description="Some text")
#TODO: needs to be reimplemented with another mechanism like celeery to manage longer running tasks and give feedback to frontend #TODO: needs to be reimplemented with another mechanism like celeery to manage longer running tasks and give feedback to frontend
@app.post('/bot/train', summary="", tags=[jwt_tag], security=security) @app.post('/bot/train/text', summary="", tags=[jwt_tag], security=security)
@uses_jwt() @uses_jwt()
def upload(form: TrainTextRequest, decoded_jwt, nextsearch_user): def upload(form: TrainTextRequest, decoded_jwt, user):
""" """
Caution: Long running request! Caution: Long running request!
""" """
chatbot_id = form.chatbot_id bot_id = form.bot_id
text = form.text text = form.text
# validate body # validate body
if not chatbot_id: if not bot_id:
return jsonify({ return jsonify({
'status': 'error', 'status': 'error',
'message': 'chatbotId is required' 'message': 'chatbotId is required'
@ -564,39 +626,55 @@ def upload(form: TrainTextRequest, decoded_jwt, nextsearch_user):
}), 400 }), 400
t = Text()
t.text = text
t.md5 = hashlib.md5(text.encode()).hexdigest()
ESDocument = namedtuple('Document', ['page_content', 'metadata']) #add meta data
t.creation_date = datetime.now()
txt_id = hashlib.md5(text.encode()).hexdigest() t.creator_id = user.meta.id
t.save()
#train with given text #train with given text
ls = [] chunk_size = 1536
for i, s in enumerate(RecursiveCharacterTextSplitter(chunk_size=1536, chunk_overlap=200, length_function=len).split_text(text)): chunk_overlap = 200
ls.append(ESDocument(
documents = []
for i, s in enumerate(RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap, length_function=len).split_text(text)):
documents.append(ESDocument(
page_content=s, page_content=s,
metadata={ metadata={
"chatbot_id": chatbot_id, "segment_nr": i,
"text_id": txt_id "text_id": t.meta.id,
"chunk_size": chunk_size,
"chunk_overlap": chunk_overlap
} }
)) ))
embeddings = OllamaEmbeddings(model="llama3", base_url="http://ollama:11434")
def determine_index(chatbot_id: str) -> str: vector_store = ElasticsearchStore(
index_prefix = "chatbot" es_url=app.config['elastic_uri'],
return f"{index_prefix}_{chatbot_id.lower()}" index_name= "chatbot_" + bot_id.lower(),
embedding=embeddings
)
uuids = [str(uuid4()) for _ in range(len(documents))]
#index = determine_index(chatbot_id) vector_store.add_documents(documents=documents, ids=uuids)
embedding = OllamaEmbeddings()
ElasticsearchStore.from_documents(ls, embedding, index_name="embed_text", es_url=app.config['elastic_uri'])
return jsonify({ return jsonify({
"status": "success" "status": "success"
}) })
#======== DEBUG routes ============ #======== DEBUG routes ============
@app.get('/debug/schema', summary="", tags=[debug_tag]) @app.get('/debug/schema', summary="", tags=[debug_tag])
@ -646,7 +724,7 @@ def catchAll(path):
def init_indicies(): def init_indicies():
# create the mappings in elasticsearch # create the mappings in elasticsearch
for Index in [QueryLog, Chatbot, User]: for Index in [QueryLog, Chatbot, User, Text]:
Index.init() Index.init()

View File

@ -2,11 +2,8 @@
ELASTIC_URI=http://elasticsearch:9200 ELASTIC_URI=http://elasticsearch:9200
DEFAULT_USERS=[["tobias_weise@gmx.de", "myEpicPwd123", "admin"]]
# DEFAULT_USERS is list of lists, each nested list respectively contains email, password and role # DEFAULT_USERS is list of lists, each nested list respectively contains email, password and role
# e.g. [["user@gmail.com", "1234", "user"], ["admin@gmail.com", "1234", "admin"]] DEFAULT_USERS=[["tobias_weise@gmx.de", "myEpicPwd123", "admin"]]
# leave empty if you don't wish to seed users
#-----------not used yet---------------- #-----------not used yet----------------

View File

@ -2,15 +2,12 @@ import os
from elasticsearch_dsl import Document, InnerDoc, Nested, Date, Integer, Keyword, Float, Long, Text, connections, Object, Boolean from elasticsearch_dsl import Document, InnerDoc, Nested, Date, Integer, Keyword, Float, Long, Text, connections, Object, Boolean
class User(Document): class User(Document):
creation_date = Date() creation_date = Date()
email = Keyword() email = Keyword()
password_hash = Text(index=False) password_hash = Text(index=False)
role = Keyword() role = Keyword()
#salt = Text(index=False) #salt = Text(index=False)
#profileImage = Text(index=False) #profileImage = Text(index=False)
#profileImage = Keyword() #profileImage = Keyword()
@ -68,6 +65,25 @@ class Chatbot(Document):
class Text(Document):
creation_date = Date()
creator_id = Keyword()
text = Text()
md5 = Keyword()
class Index:
name = 'text'
settings = {
"number_of_shards": 1,
}
def save(self, ** kwargs):
return super(Text, self).save(**kwargs)
#======= Query Log =========== #======= Query Log ===========

View File

@ -32,6 +32,14 @@
<script type='text/javascript' src='http://www.x3dom.org/download/x3dom.js'> </script> <script type='text/javascript' src='http://www.x3dom.org/download/x3dom.js'> </script>
<link rel='stylesheet' type='text/css' href='http://www.x3dom.org/download/x3dom.css'></link> <link rel='stylesheet' type='text/css' href='http://www.x3dom.org/download/x3dom.css'></link>
<script src="https://d3js.org/d3.v3.js"></script>
<script src="jsnetworkx.js"></script>
<script src="widget.js"></script>
</head> </head>
<body> <body>
@ -324,6 +332,43 @@
<script> <script>
//idea: generate proxy opject via openapi.json api(url).login_now() //idea: generate proxy opject via openapi.json api(url).login_now()
function API(jwt){
const handler = {
get(obj, prop) {
//return prop in obj ? obj[prop] : 37;
//alert(prop);
(async function f(){
const response = await fetch("/openapi/openapi.json", {
method: "GET",
headers: {
'accept': '*/*'
}
});
let s = await response.json();
console.log(s);
})();
},
};
return new Proxy({}, handler);
}
//API().abc;
async function login(email, pwd){ async function login(email, pwd){
const formData = new FormData(); const formData = new FormData();
formData.append("email", email); formData.append("email", email);
@ -360,14 +405,14 @@
} }
} }
async function text2speech(jwt, txt){ async function text2speech(txt){
const formData = new FormData(); const formData = new FormData();
formData.append("text", txt); formData.append("text", txt);
const response = await fetch("/text2speech", { const response = await fetch("/text2speech", {
method: "POST", method: "POST",
headers: { headers: {
'accept': '*/*', 'accept': '*/*'//,
'Authorization': 'Bearer ' + jwt //'Authorization': 'Bearer ' + jwt
}, },
body: formData body: formData
}); });
@ -454,8 +499,27 @@
} }
function parse_html(html){
const parser = new DOMParser();
return parser.parseFromString(html, 'text/html').documentElement;
}
function parse_xml(xml){
const parser = new DOMParser();
return parser.parseFromString(xml, 'text/xml').documentElement;
}
function parse_dot_lang(txt){
let layout = "dot";
return Viz(txt, {engine:layout});
}
window.onload = async ()=>{ window.onload = async ()=>{
//document.documentElement.style.setProperty("--bs-primary-rgb", "45, 124, 172"); document.documentElement.style.setProperty("--bs-primary-rgb", "45, 124, 172");
//chat //chat
let user_input = document.getElementById("user_input"); let user_input = document.getElementById("user_input");
@ -650,43 +714,112 @@
set_bot_list(ls); set_bot_list(ls);
}; };
function replace_dom_code(f, root_ele){
let eles = root_ele.getElementsByTagName("code");
for(let i=0; i<eles.length; i++){
let ele = eles[i];
function map_md_code(f, md_txt){ //let ele2 = parse_html(f(ele));
let r = ""; let ele2 = f(ele);
let text = true;
let parts = md_txt.split("```");
console.log(parts);
for(let s of parts){ if(ele2){
if(text){ ele.parentNode.replaceChild(ele2, ele);
r += s;
text = false;
} }
else{
r += f(s);
text = true;
} }
return root_ele;
}
return r;
} }
function render_code(s){
console.log(s);
if(s.startsWith("dot")){
s = s.replace(/dot\\s/i, "");
//let layout = "fdp";
let layout = "dot";
return `<dot-graph layout="${layout}" style="width:100%; height:100%;">${s}</dot-graph>`;
function is_graph(obj){
if("nodes" in obj){
if("edges" in obj || "links" in obj){
return true;
}
}
return false;
} }
return s; function rename_attr(obj, old, s){
if(obj[old]){
obj[s] = obj[old];
delete obj[old];
}
return obj;
}
function translate_graph(obj){
let ret_obj = {
nodes: [],
edges: []
};
obj = rename_attr(obj, "links", "edges");
if(obj.nodes){
if(Array.isArray(obj.nodes)){
for(let node of obj.nodes){
if(typeof node === "object"){
if(node.id){
node = rename_attr(node, "name", "label");
ret_obj.nodes.push([node.id, { "radius": 15, "color": "orange"} ]);
}
}
}
}
}
if(obj.edges){
if(Array.isArray(obj.edges)){
for(let edge of obj.edges){
if(typeof edge === "object"){
edge = rename_attr(edge, "source", "from");
edge = rename_attr(edge, "target", "to");
if(edge.from){
if(edge.to){
let e = [edge.from, edge.to, {"color": "black"}];
ret_obj.edges.push(e);
}
}
}
}
}
}
console.log(ret_obj);
return ret_obj;
}
function replace_code(code_ele){
let txt = code_ele.innerHTML;
try{
return parse_html(parse_dot_lang(txt));
}
catch(err){
//console.log(err);
}
try{
let obj = JSON.parse(txt);
if(is_graph(obj)){
let s = `<net-graph style="width:400px; height:350px;">${JSON.stringify(translate_graph(obj))}</net-graph>`;
return parse_html(s);
}
}
catch(err){
//console.log(err);
}
return code_ele;
} }
@ -711,8 +844,9 @@
acc_text += "" + token; acc_text += "" + token;
switch(view_select.value){ switch(view_select.value){
case "md": case "md":
//table_cell.innerHTML = marked.parse(acc_text); table_cell.innerHTML = "";
table_cell.innerHTML = marked.parse(map_md_code(render_code, acc_text)); let ele = replace_dom_code(replace_code, parse_html(marked.parse(acc_text)));
table_cell.appendChild(ele);
break; break;
case "plain": case "plain":
@ -727,7 +861,6 @@
var audio = new Audio('https://interactive-examples.mdn.mozilla.net/media/cc0-audio/t-rex-roar.mp3'); var audio = new Audio('https://interactive-examples.mdn.mozilla.net/media/cc0-audio/t-rex-roar.mp3');
audio.play(); audio.play();
} }
*/ */
@ -736,9 +869,7 @@
let extra_s = ""; let extra_s = "";
let jwt = localStorage.getItem("jwt"); let{file} = await text2speech(final_answer);
if(jwt){
let{file} = await text2speech(jwt, final_answer);
//autoplay controls //autoplay controls
extra_s = ` extra_s = `
@ -746,25 +877,21 @@
<source src="${file}" type="audio/mpeg"> <source src="${file}" type="audio/mpeg">
</audio>`; </audio>`;
console.log(file); //console.log(file);
}
switch(view_select.value){ switch(view_select.value){
case "md": case "md":
//table_cell.innerHTML = marked.parse(final_answer) + extra_s; table_cell.innerHTML = "";
let ele = replace_dom_code(replace_code, parse_html(marked.parse(acc_text) + extra_s));
table_cell.innerHTML = marked.parse(map_md_code(render_code, acc_text)) + extra_s; table_cell.appendChild(ele);
break; break;
default: case "plain":
table_cell.innerHTML = `<pre>${final_answer}</pre>`; table_cell.innerHTML = `<pre>${final_answer}</pre>`;
break; break;
} }
scroll_down(); scroll_down();
} }

File diff suppressed because one or more lines are too long

87
backend/public/widget.js Executable file
View File

@ -0,0 +1,87 @@
class NetGraphElement extends HTMLElement {
static get observedAttributes() {
return ['G'];
}
constructor() {
super();
this.attachShadow({mode: 'open'});
this.content_div = document.createElement('div');
this.shadowRoot.appendChild(this.content_div);
this.slot_ele = document.createElement('slot');
this.shadowRoot.appendChild(this.slot_ele);
this.G = new jsnx.MultiDiGraph();
}
connectedCallback(){
let style = this.hasAttribute('style') ? this.getAttribute('style') : "";
let weighted = this.hasAttribute('weighted') ? JSON.parse(this.getAttribute('weighted')) : false;
let withLabels = this.hasAttribute('withLabels') ? JSON.parse(this.getAttribute('withLabels')) : true;
let label_color = this.hasAttribute('labelColor') ? this.getAttribute('labelColor') : "black";
this.content_div.style = style;
let that = this;
jsnx.draw(that.G, {
element: that.content_div,
weighted,
withLabels,
labelStyle: {fill: label_color},
edgeStyle: {
'stroke-width': 5,
fill: d => d.data[0].color
},
nodeStyle: {
fill: d => d.data.color
},
nodeAttr: {
r: d => d.data.radius | 10,
title: d => d.label
}
}, true); //true ensures redrawing
this.slot_ele.addEventListener('slotchange', e => {
let text = that.innerText.trim();
let{nodes, edges} = JSON.parse(text);
for(let[id, data] of nodes){
that.G.addNode(id, data);
}
for(let[a, b, data] of edges){
that.G.addEdge(a, b, data);
}
jsnx.draw(that.G, {
element: that.content_div,
weighted,
withLabels,
labelStyle: {fill: label_color},
edgeStyle: {
'stroke-width': 5,
fill: d => d.data[0].color
},
nodeStyle: {
fill: d => d.data.color
},
nodeAttr: {
r: d => d.data.radius | 10,
title: d => d.label
}
}, true); //true ensures redrawing
that.slot_ele.style.display = "none";
that.content_div.children[0].setAttribute("width", that.content_div.style.width);
that.content_div.children[0].setAttribute("height", that.content_div.style.height);
});
}
disconnectedCallback() {
}
}
customElements.define('net-graph', NetGraphElement);

View File

@ -6,9 +6,12 @@ bs4
elasticsearch elasticsearch
elasticsearch-dsl elasticsearch-dsl
langchain langchain
langchain-community langchain-community
tiktoken tiktoken
langchain_ollama
langchain-elasticsearch
pydantic pydantic

88
backend/speech.py Normal file
View File

@ -0,0 +1,88 @@
import os, time
import os.path
from datetime import datetime, date
import pyttsx3
"""
flite -voice slt -t "This example is useful when there is a need to convert the contents of a file to speech. It can simplify tasks such as reading out the contents of a document or generating voiceovers for specific text files."
"""
def text_to_speech(text: str) -> str:
unix_timestamp = datetime.now().timestamp()
txt_file_path = "./public/speech_%s.txt" % unix_timestamp
with open(txt_file_path, "w") as f:
f.write(text)
file_name = f'speech_{unix_timestamp}.wav'
file_path = f'./public/{file_name}'
#os.system('flite -voice slt -o %s -t "%s"' % (file_path, text))
os.system('flite -voice slt -o %s -f %s' % (file_path, txt_file_path))
return file_name
def text_to_speech2(text: str) -> str:
engine = pyttsx3.init()
def get_voice(s):
for v in engine.getProperty("voices"):
if s == v.id:
return v
def set_voice(v):
engine.setProperty("voice", v.id)
def set_volume(n):
engine.setProperty('volume', engine.getProperty('volume') + n)
def set_rate(n):
engine.setProperty('rate', engine.getProperty('rate') + n)
#voices = engine.getProperty('voices')
#engine.setProperty('voice', voices[1].id)
set_voice(get_voice("english"))
set_volume(-5.0)
set_rate(-40)
#espeak -v mb-en1 -s 120 "Hello world"
#sudo apt-get install mbrola mbrola-en1
unix_timestamp = datetime.now().timestamp()
file_name = f'speech_{unix_timestamp}.mp3'
file_path = f'./public/{file_name}'
engine.save_to_file(text, file_path)
engine.runAndWait()
timeout = 10
t = 0
step = 0.1
while not os.path.isfile(file_path):
time.sleep(step)
t += step
if t > timeout:
raise Exception("Timeout(%s s) for creating speech.mp3!" % timeout)
time.sleep(step)
return file_name

View File

@ -44,8 +44,8 @@ services:
- discovery.type=single-node - discovery.type=single-node
- logger.level=ERROR - logger.level=ERROR
- bootstrap.memory_lock=true # Disable JVM heap memory swapping - bootstrap.memory_lock=true # Disable JVM heap memory swapping
- xpack.security.enabled=false - xpack.security.enabled=false
- xpack.security.http.ssl.enabled=false
#- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_INITIAL_ADMIN_PASSWORD} # Sets the demo admin user password when using demo configuration, required for OpenSearch 2.12 and later #- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_INITIAL_ADMIN_PASSWORD} # Sets the demo admin user password when using demo configuration, required for OpenSearch 2.12 and later
#- "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" # Set min and max JVM heap sizes to at least 50% of system RAM #- "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" # Set min and max JVM heap sizes to at least 50% of system RAM

44
ideas.md Normal file
View File

@ -0,0 +1,44 @@
# Ideas
https://favtutor.com/articles/meta-llama-3-jailbreak/
## Knowledge graph creation
* https://www.linkedin.com/posts/sivas-subramaniyan_microsoft-research-is-bullish-on-the-concept-activity-7194953376470638592-dQ-U/?utm_source=share&utm_medium=member_desktop
## GraphRAG
...
## Function calls
* https://medium.com/@lucgagan/understanding-chatgpt-functions-and-how-to-use-them-6643a7d3c01a

View File

@ -42,3 +42,10 @@ system_prompt = "Write the answer in Japanese."
Give just the translation of the given input to German and nothing else.