bunch more stuff and experimental rag in ask route
All checks were successful
Gitea Docker Redeploy / Redploy-App-on-self-via-SSH (push) Successful in 1m18s
All checks were successful
Gitea Docker Redeploy / Redploy-App-on-self-via-SSH (push) Successful in 1m18s
This commit is contained in:
parent
fcbe05a6e8
commit
903ea0366d
@ -14,18 +14,18 @@ jobs:
|
||||
password: ${{ secrets.PASSWORD }}
|
||||
port: ${{ secrets.PORT }}
|
||||
#script_stop: true
|
||||
#requires stored git password: git config credential.helper store
|
||||
script: |
|
||||
whoami
|
||||
ls
|
||||
cd Ollama-Bot/deployment
|
||||
docker-compose down
|
||||
git pull
|
||||
echo "Git pull finishes!"
|
||||
echo "============ Git pull finished! ========="
|
||||
docker-compose build
|
||||
echo "Image build finishes!"
|
||||
echo "============= Image build finishes! =============="
|
||||
docker-compose push
|
||||
echo "Image push finishes!"
|
||||
echo "============== Image push finishes! ==============="
|
||||
docker-compose up -d
|
||||
echo "Docker compose deployment finished!!"
|
||||
|
||||
echo "========== Docker compose deployment finished! =============="
|
||||
docker rmi $(docker images -f "dangling=true" -q)
|
||||
echo "========== Docker dangling images cleanup finished! =============="
|
||||
|
||||
|
24
README.md
24
README.md
@ -30,32 +30,8 @@ After deploy:
|
||||
```bash
|
||||
docker login registry.tobiasweise.dev
|
||||
docker-compose push
|
||||
|
||||
#sudo docker tag llm-python-backend nucberlin:5123/llm-python-backend
|
||||
#sudo docker push nucberlin:5123/llm-python-backend
|
||||
```
|
||||
|
||||
----
|
||||
|
||||
## Ideas
|
||||
|
||||
### Knowledge graph creation
|
||||
|
||||
https://www.linkedin.com/posts/sivas-subramaniyan_microsoft-research-is-bullish-on-the-concept-activity-7194953376470638592-dQ-U/?utm_source=share&utm_medium=member_desktop
|
||||
|
||||
|
||||
clean dangling images
|
||||
|
||||
sudo docker rmi $(sudo docker images -f "dangling=true" -q)
|
||||
|
||||
|
||||
|
||||
|
||||
Give just the translation of the given input to German and nothing else.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
34
architecture.html
Normal file
34
architecture.html
Normal file
@ -0,0 +1,34 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<!--
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/viz.js/2.1.2/viz.js"></script>
|
||||
-->
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/viz.js/1.7.1/viz.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<div id="graph"></div>
|
||||
<script>
|
||||
let src = `
|
||||
|
||||
digraph {
|
||||
rankdir="LR"
|
||||
node [fontsize=10, shape=box, height=0.25]
|
||||
edge [fontsize=10]
|
||||
|
||||
|
||||
frontend -> backend
|
||||
|
||||
backend -> minio
|
||||
backend -> elasticsearch
|
||||
backend -> ollama
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
`;
|
||||
document.getElementById("graph").innerHTML = Viz(src, {engine:"dot"});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
@ -4,6 +4,7 @@ RUN apt-get update
|
||||
RUN apt-get install -y firefox-esr
|
||||
RUN apt-get install -y ffmpeg
|
||||
RUN apt-get install -y espeak
|
||||
RUN apt-get install -y flite
|
||||
|
||||
#RUN curl https://ollama.ai/install.sh | sh
|
||||
#RUN ollama run llama2
|
||||
|
246
backend/app.py
246
backend/app.py
@ -28,10 +28,13 @@ from langchain.callbacks.base import BaseCallbackHandler, BaseCallbackManager
|
||||
from langchain.prompts import PromptTemplate
|
||||
|
||||
from langchain_community.llms import Ollama
|
||||
from langchain_community.vectorstores.elasticsearch import ElasticsearchStore
|
||||
from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader
|
||||
from langchain_community.embeddings import OllamaEmbeddings
|
||||
|
||||
#from langchain_community.vectorstores.elasticsearch import ElasticsearchStore #deprecated
|
||||
from langchain_elasticsearch import ElasticsearchStore
|
||||
from uuid import uuid4
|
||||
|
||||
from elasticsearch import NotFoundError, Elasticsearch # for normal read/write without vectors
|
||||
from elasticsearch_dsl import Search, A, Document, Date, Integer, Keyword, Float, Long, Text, connections
|
||||
from elasticsearch.exceptions import ConnectionError
|
||||
@ -53,13 +56,11 @@ from cryptography.fernet import Fernet
|
||||
from cryptography.hazmat.primitives import hashes
|
||||
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
|
||||
|
||||
import pyttsx3
|
||||
|
||||
#----------home grown--------------
|
||||
#from scraper import WebScraper
|
||||
from funcs import group_by
|
||||
from elastictools import get_by_id, update_by_id, delete_by_id
|
||||
from models import QueryLog, Chatbot, User
|
||||
from models import QueryLog, Chatbot, User, Text
|
||||
|
||||
|
||||
#LLM_PAYLOAD = int(os.getenv("LLM_PAYLOAD"))
|
||||
@ -264,11 +265,6 @@ def handle_message(message):
|
||||
|
||||
|
||||
|
||||
|
||||
def create_embedding():
|
||||
pass
|
||||
|
||||
|
||||
def hash_password(s: str) -> str:
|
||||
return hashlib.md5(s.encode('utf-8')).hexdigest()
|
||||
|
||||
@ -333,57 +329,16 @@ def login(form: LoginRequest):
|
||||
|
||||
#-----bot routes------
|
||||
|
||||
from speech import text_to_speech
|
||||
|
||||
|
||||
class GetSpeechRequest(BaseModel):
|
||||
text: str = Field(None, description="Some text to convert to mp3")
|
||||
|
||||
|
||||
@app.post('/text2speech', summary="", tags=[], security=security)
|
||||
@uses_jwt()
|
||||
def text2speech(form: GetSpeechRequest, decoded_jwt, user):
|
||||
engine = pyttsx3.init()
|
||||
|
||||
def get_voice(s):
|
||||
for v in engine.getProperty("voices"):
|
||||
if s == v.id:
|
||||
return v
|
||||
|
||||
def set_voice(v):
|
||||
engine.setProperty("voice", v.id)
|
||||
|
||||
def set_volume(n):
|
||||
engine.setProperty('volume', engine.getProperty('volume') + n)
|
||||
|
||||
def set_rate(n):
|
||||
engine.setProperty('rate', engine.getProperty('rate') + n)
|
||||
|
||||
#voices = engine.getProperty('voices')
|
||||
#engine.setProperty('voice', voices[1].id)
|
||||
set_voice(get_voice("english"))
|
||||
set_volume(-5.0)
|
||||
set_rate(-40)
|
||||
|
||||
#espeak -v mb-en1 -s 120 "Hello world"
|
||||
#sudo apt-get install mbrola mbrola-en1
|
||||
|
||||
unix_timestamp = datetime.now().timestamp()
|
||||
file_name = f'speech_{unix_timestamp}.mp3'
|
||||
file_path = f'./public/{file_name}'
|
||||
|
||||
engine.save_to_file(form.text, file_path)
|
||||
engine.runAndWait()
|
||||
|
||||
timeout = 10
|
||||
t = 0
|
||||
step = 0.1
|
||||
while not os.path.isfile(file_path):
|
||||
time.sleep(step)
|
||||
t += step
|
||||
if t > timeout:
|
||||
raise Exception("Timeout(%s s) for creating speech.mp3!" % timeout)
|
||||
|
||||
time.sleep(step)
|
||||
|
||||
def text2speech(form: GetSpeechRequest):
|
||||
file_name = text_to_speech(form.text)
|
||||
|
||||
#return send_file(file_path, mimetype='audio/mpeg') #, attachment_filename= 'Audiofiles.zip', as_attachment = True)
|
||||
return jsonify({
|
||||
@ -393,9 +348,6 @@ def text2speech(form: GetSpeechRequest, decoded_jwt, user):
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class GetBotRequest(BaseModel):
|
||||
id: str = Field(None, description="The bot's id")
|
||||
|
||||
@ -514,44 +466,154 @@ def update_bot(form: UpdateBotRequest, decoded_jwt, user):
|
||||
return ""
|
||||
|
||||
|
||||
|
||||
|
||||
class AskBotRequest(BaseModel):
|
||||
bot_id: str = Field(None, description="The bot's id")
|
||||
question: str = Field(None, description="The question the bot should answer")
|
||||
|
||||
|
||||
|
||||
@app.get('/bot/ask', summary="", tags=[bot_tag], security=security)
|
||||
@uses_jwt()
|
||||
def query_bot(query: AskBotRequest, decoded_jwt, user):
|
||||
"""
|
||||
Asks a chatbot
|
||||
"""
|
||||
r = ""
|
||||
for chunk in ask_bot(question=query.question, bot_id=query.bot_id):
|
||||
r += chunk
|
||||
start = datetime.now().timestamp()
|
||||
|
||||
bot_id = query.bot_id
|
||||
prompt = query.question
|
||||
|
||||
|
||||
history = ""
|
||||
|
||||
system_prompt = "Antworte freundlich, mit einer ausführlichen Erklärung, sofern vorhanden auf Basis der folgenden Informationen. Please answer in the language of the question."
|
||||
|
||||
|
||||
prompt_template = system_prompt +"""
|
||||
<ctx>
|
||||
{context}
|
||||
</ctx>
|
||||
<hs>
|
||||
"""+ history +"""
|
||||
</hs>
|
||||
Question: {question}
|
||||
"""
|
||||
|
||||
chat_prompt = PromptTemplate(
|
||||
template=prompt_template, input_variables=["context", "question"]
|
||||
)
|
||||
|
||||
embeddings = OllamaEmbeddings(model="llama3", base_url="http://ollama:11434")
|
||||
|
||||
vector_store = ElasticsearchStore(
|
||||
es_url=app.config['elastic_uri'],
|
||||
index_name= "chatbot_" + bot_id.lower(),
|
||||
distance_strategy="COSINE",
|
||||
embedding=embeddings
|
||||
)
|
||||
|
||||
|
||||
|
||||
bot = Chatbot.get(id=bot_id)
|
||||
llm = Ollama(
|
||||
model=bot.llm_model,
|
||||
base_url="http://ollama:11434"
|
||||
)
|
||||
#query = bot.system_prompt + " " + question
|
||||
#for chunk in llm.stream(query):
|
||||
# yield chunk
|
||||
|
||||
|
||||
#chunk_size = 1536
|
||||
#chunk_overlap = 200
|
||||
LLM_PAYLOAD=16384
|
||||
CHUNK_SIZE=1536
|
||||
|
||||
|
||||
k = int(LLM_PAYLOAD / CHUNK_SIZE) - 1
|
||||
if (k < 2):
|
||||
k = 2
|
||||
|
||||
#scoredocs = vector_store.similarity_search_with_score(prompt, k=k+10)
|
||||
scoredocs = vector_store.similarity_search_with_score(prompt, k=k+10)
|
||||
|
||||
|
||||
query = RetrievalQA.from_chain_type(
|
||||
llm=llm,
|
||||
chain_type="stuff",
|
||||
verbose=False,
|
||||
return_source_documents=True,
|
||||
retriever=vector_store.as_retriever(search_kwargs={'k': k}),
|
||||
chain_type_kwargs={"prompt": chat_prompt}
|
||||
)
|
||||
|
||||
#inputTokens = 0
|
||||
#outputTokens = 0
|
||||
|
||||
#with get_openai_callback() as cb:
|
||||
qares = query.invoke({'query': prompt})
|
||||
qadocs = qares['source_documents'] # STS: deliver doc names and page numbers in the future
|
||||
|
||||
ls = [dict(x) for x in qadocs]
|
||||
|
||||
for qadoc in qadocs:
|
||||
print(qadoc, flush=True)
|
||||
|
||||
|
||||
|
||||
for x in scoredocs:
|
||||
#xs = [x.to_dict() for x in scoredocs]
|
||||
print(x, flush=True)
|
||||
|
||||
|
||||
r = qares['result']
|
||||
|
||||
|
||||
|
||||
|
||||
#r = ""
|
||||
#for chunk in ask_bot(question=query.question, bot_id=query.bot_id):
|
||||
# r += chunk
|
||||
|
||||
|
||||
|
||||
|
||||
duration = round(datetime.now().timestamp() - start, 2)
|
||||
|
||||
app.logger.info(duration)
|
||||
|
||||
return jsonify({
|
||||
"answer": r
|
||||
"answer": r,
|
||||
"duration": str(duration),
|
||||
"docs": ls#,
|
||||
#"score_docs": xs
|
||||
})
|
||||
|
||||
|
||||
|
||||
|
||||
#-----------------Embedding----------------------
|
||||
ESDocument = namedtuple('Document', ['page_content', 'metadata'])
|
||||
|
||||
class TrainTextRequest(BaseModel):
|
||||
chatbot_id: str = Field(None, description="The bot's id")
|
||||
bot_id: str = Field(None, description="The bot's id")
|
||||
text: str = Field(None, description="Some text")
|
||||
|
||||
#TODO: needs to be reimplemented with another mechanism like celeery to manage longer running tasks and give feedback to frontend
|
||||
|
||||
@app.post('/bot/train', summary="", tags=[jwt_tag], security=security)
|
||||
@app.post('/bot/train/text', summary="", tags=[jwt_tag], security=security)
|
||||
@uses_jwt()
|
||||
def upload(form: TrainTextRequest, decoded_jwt, nextsearch_user):
|
||||
def upload(form: TrainTextRequest, decoded_jwt, user):
|
||||
"""
|
||||
Caution: Long running request!
|
||||
"""
|
||||
chatbot_id = form.chatbot_id
|
||||
bot_id = form.bot_id
|
||||
text = form.text
|
||||
|
||||
# validate body
|
||||
if not chatbot_id:
|
||||
if not bot_id:
|
||||
return jsonify({
|
||||
'status': 'error',
|
||||
'message': 'chatbotId is required'
|
||||
@ -564,39 +626,55 @@ def upload(form: TrainTextRequest, decoded_jwt, nextsearch_user):
|
||||
}), 400
|
||||
|
||||
|
||||
t = Text()
|
||||
t.text = text
|
||||
t.md5 = hashlib.md5(text.encode()).hexdigest()
|
||||
|
||||
ESDocument = namedtuple('Document', ['page_content', 'metadata'])
|
||||
|
||||
txt_id = hashlib.md5(text.encode()).hexdigest()
|
||||
#add meta data
|
||||
t.creation_date = datetime.now()
|
||||
t.creator_id = user.meta.id
|
||||
t.save()
|
||||
|
||||
#train with given text
|
||||
ls = []
|
||||
for i, s in enumerate(RecursiveCharacterTextSplitter(chunk_size=1536, chunk_overlap=200, length_function=len).split_text(text)):
|
||||
ls.append(ESDocument(
|
||||
chunk_size = 1536
|
||||
chunk_overlap = 200
|
||||
|
||||
documents = []
|
||||
for i, s in enumerate(RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap, length_function=len).split_text(text)):
|
||||
documents.append(ESDocument(
|
||||
page_content=s,
|
||||
metadata={
|
||||
"chatbot_id": chatbot_id,
|
||||
"text_id": txt_id
|
||||
"segment_nr": i,
|
||||
"text_id": t.meta.id,
|
||||
"chunk_size": chunk_size,
|
||||
"chunk_overlap": chunk_overlap
|
||||
}
|
||||
))
|
||||
|
||||
embeddings = OllamaEmbeddings(model="llama3", base_url="http://ollama:11434")
|
||||
|
||||
def determine_index(chatbot_id: str) -> str:
|
||||
index_prefix = "chatbot"
|
||||
return f"{index_prefix}_{chatbot_id.lower()}"
|
||||
vector_store = ElasticsearchStore(
|
||||
es_url=app.config['elastic_uri'],
|
||||
index_name= "chatbot_" + bot_id.lower(),
|
||||
embedding=embeddings
|
||||
)
|
||||
|
||||
|
||||
#index = determine_index(chatbot_id)
|
||||
|
||||
embedding = OllamaEmbeddings()
|
||||
|
||||
ElasticsearchStore.from_documents(ls, embedding, index_name="embed_text", es_url=app.config['elastic_uri'])
|
||||
uuids = [str(uuid4()) for _ in range(len(documents))]
|
||||
vector_store.add_documents(documents=documents, ids=uuids)
|
||||
|
||||
return jsonify({
|
||||
"status": "success"
|
||||
})
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#======== DEBUG routes ============
|
||||
|
||||
@app.get('/debug/schema', summary="", tags=[debug_tag])
|
||||
@ -646,7 +724,7 @@ def catchAll(path):
|
||||
|
||||
def init_indicies():
|
||||
# create the mappings in elasticsearch
|
||||
for Index in [QueryLog, Chatbot, User]:
|
||||
for Index in [QueryLog, Chatbot, User, Text]:
|
||||
Index.init()
|
||||
|
||||
|
||||
|
@ -2,11 +2,8 @@
|
||||
|
||||
ELASTIC_URI=http://elasticsearch:9200
|
||||
|
||||
DEFAULT_USERS=[["tobias_weise@gmx.de", "myEpicPwd123", "admin"]]
|
||||
|
||||
# DEFAULT_USERS is list of lists, each nested list respectively contains email, password and role
|
||||
# e.g. [["user@gmail.com", "1234", "user"], ["admin@gmail.com", "1234", "admin"]]
|
||||
# leave empty if you don't wish to seed users
|
||||
DEFAULT_USERS=[["tobias_weise@gmx.de", "myEpicPwd123", "admin"]]
|
||||
|
||||
|
||||
#-----------not used yet----------------
|
||||
|
@ -2,15 +2,12 @@ import os
|
||||
from elasticsearch_dsl import Document, InnerDoc, Nested, Date, Integer, Keyword, Float, Long, Text, connections, Object, Boolean
|
||||
|
||||
|
||||
|
||||
|
||||
class User(Document):
|
||||
creation_date = Date()
|
||||
email = Keyword()
|
||||
password_hash = Text(index=False)
|
||||
role = Keyword()
|
||||
|
||||
|
||||
#salt = Text(index=False)
|
||||
#profileImage = Text(index=False)
|
||||
#profileImage = Keyword()
|
||||
@ -68,6 +65,25 @@ class Chatbot(Document):
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class Text(Document):
|
||||
creation_date = Date()
|
||||
creator_id = Keyword()
|
||||
text = Text()
|
||||
md5 = Keyword()
|
||||
|
||||
class Index:
|
||||
name = 'text'
|
||||
settings = {
|
||||
"number_of_shards": 1,
|
||||
}
|
||||
|
||||
def save(self, ** kwargs):
|
||||
return super(Text, self).save(**kwargs)
|
||||
|
||||
|
||||
|
||||
#======= Query Log ===========
|
||||
|
||||
|
||||
|
@ -32,6 +32,14 @@
|
||||
<script type='text/javascript' src='http://www.x3dom.org/download/x3dom.js'> </script>
|
||||
<link rel='stylesheet' type='text/css' href='http://www.x3dom.org/download/x3dom.css'></link>
|
||||
|
||||
|
||||
|
||||
|
||||
<script src="https://d3js.org/d3.v3.js"></script>
|
||||
<script src="jsnetworkx.js"></script>
|
||||
<script src="widget.js"></script>
|
||||
|
||||
|
||||
</head>
|
||||
<body>
|
||||
|
||||
@ -324,6 +332,43 @@
|
||||
<script>
|
||||
//idea: generate proxy opject via openapi.json api(url).login_now()
|
||||
|
||||
function API(jwt){
|
||||
|
||||
const handler = {
|
||||
get(obj, prop) {
|
||||
//return prop in obj ? obj[prop] : 37;
|
||||
|
||||
//alert(prop);
|
||||
|
||||
(async function f(){
|
||||
|
||||
|
||||
const response = await fetch("/openapi/openapi.json", {
|
||||
method: "GET",
|
||||
headers: {
|
||||
'accept': '*/*'
|
||||
}
|
||||
});
|
||||
|
||||
let s = await response.json();
|
||||
|
||||
console.log(s);
|
||||
|
||||
|
||||
|
||||
})();
|
||||
|
||||
|
||||
|
||||
},
|
||||
};
|
||||
|
||||
return new Proxy({}, handler);
|
||||
}
|
||||
|
||||
//API().abc;
|
||||
|
||||
|
||||
async function login(email, pwd){
|
||||
const formData = new FormData();
|
||||
formData.append("email", email);
|
||||
@ -360,14 +405,14 @@
|
||||
}
|
||||
}
|
||||
|
||||
async function text2speech(jwt, txt){
|
||||
async function text2speech(txt){
|
||||
const formData = new FormData();
|
||||
formData.append("text", txt);
|
||||
const response = await fetch("/text2speech", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
'accept': '*/*',
|
||||
'Authorization': 'Bearer ' + jwt
|
||||
'accept': '*/*'//,
|
||||
//'Authorization': 'Bearer ' + jwt
|
||||
},
|
||||
body: formData
|
||||
});
|
||||
@ -454,8 +499,27 @@
|
||||
}
|
||||
|
||||
|
||||
function parse_html(html){
|
||||
const parser = new DOMParser();
|
||||
return parser.parseFromString(html, 'text/html').documentElement;
|
||||
}
|
||||
|
||||
function parse_xml(xml){
|
||||
const parser = new DOMParser();
|
||||
return parser.parseFromString(xml, 'text/xml').documentElement;
|
||||
}
|
||||
|
||||
|
||||
function parse_dot_lang(txt){
|
||||
let layout = "dot";
|
||||
return Viz(txt, {engine:layout});
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
window.onload = async ()=>{
|
||||
//document.documentElement.style.setProperty("--bs-primary-rgb", "45, 124, 172");
|
||||
document.documentElement.style.setProperty("--bs-primary-rgb", "45, 124, 172");
|
||||
|
||||
//chat
|
||||
let user_input = document.getElementById("user_input");
|
||||
@ -650,43 +714,112 @@
|
||||
set_bot_list(ls);
|
||||
};
|
||||
|
||||
function replace_dom_code(f, root_ele){
|
||||
let eles = root_ele.getElementsByTagName("code");
|
||||
for(let i=0; i<eles.length; i++){
|
||||
let ele = eles[i];
|
||||
|
||||
function map_md_code(f, md_txt){
|
||||
let r = "";
|
||||
let text = true;
|
||||
let parts = md_txt.split("```");
|
||||
console.log(parts);
|
||||
//let ele2 = parse_html(f(ele));
|
||||
let ele2 = f(ele);
|
||||
|
||||
for(let s of parts){
|
||||
if(text){
|
||||
r += s;
|
||||
text = false;
|
||||
if(ele2){
|
||||
ele.parentNode.replaceChild(ele2, ele);
|
||||
}
|
||||
else{
|
||||
r += f(s);
|
||||
text = true;
|
||||
}
|
||||
|
||||
}
|
||||
return r;
|
||||
return root_ele;
|
||||
}
|
||||
|
||||
|
||||
function render_code(s){
|
||||
|
||||
console.log(s);
|
||||
function is_graph(obj){
|
||||
if("nodes" in obj){
|
||||
if("edges" in obj || "links" in obj){
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
if(s.startsWith("dot")){
|
||||
|
||||
s = s.replace(/dot\\s/i, "");
|
||||
//let layout = "fdp";
|
||||
let layout = "dot";
|
||||
return `<dot-graph layout="${layout}" style="width:100%; height:100%;">${s}</dot-graph>`;
|
||||
function rename_attr(obj, old, s){
|
||||
if(obj[old]){
|
||||
obj[s] = obj[old];
|
||||
delete obj[old];
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
|
||||
|
||||
|
||||
function translate_graph(obj){
|
||||
let ret_obj = {
|
||||
nodes: [],
|
||||
edges: []
|
||||
};
|
||||
|
||||
obj = rename_attr(obj, "links", "edges");
|
||||
|
||||
if(obj.nodes){
|
||||
if(Array.isArray(obj.nodes)){
|
||||
for(let node of obj.nodes){
|
||||
|
||||
if(typeof node === "object"){
|
||||
if(node.id){
|
||||
node = rename_attr(node, "name", "label");
|
||||
ret_obj.nodes.push([node.id, { "radius": 15, "color": "orange"} ]);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(obj.edges){
|
||||
if(Array.isArray(obj.edges)){
|
||||
for(let edge of obj.edges){
|
||||
if(typeof edge === "object"){
|
||||
edge = rename_attr(edge, "source", "from");
|
||||
edge = rename_attr(edge, "target", "to");
|
||||
if(edge.from){
|
||||
if(edge.to){
|
||||
let e = [edge.from, edge.to, {"color": "black"}];
|
||||
ret_obj.edges.push(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log(ret_obj);
|
||||
return ret_obj;
|
||||
}
|
||||
|
||||
|
||||
function replace_code(code_ele){
|
||||
let txt = code_ele.innerHTML;
|
||||
|
||||
try{
|
||||
return parse_html(parse_dot_lang(txt));
|
||||
}
|
||||
catch(err){
|
||||
//console.log(err);
|
||||
}
|
||||
|
||||
try{
|
||||
let obj = JSON.parse(txt);
|
||||
if(is_graph(obj)){
|
||||
let s = `<net-graph style="width:400px; height:350px;">${JSON.stringify(translate_graph(obj))}</net-graph>`;
|
||||
return parse_html(s);
|
||||
}
|
||||
|
||||
}
|
||||
catch(err){
|
||||
//console.log(err);
|
||||
}
|
||||
|
||||
return code_ele;
|
||||
}
|
||||
|
||||
|
||||
@ -711,8 +844,9 @@
|
||||
acc_text += "" + token;
|
||||
switch(view_select.value){
|
||||
case "md":
|
||||
//table_cell.innerHTML = marked.parse(acc_text);
|
||||
table_cell.innerHTML = marked.parse(map_md_code(render_code, acc_text));
|
||||
table_cell.innerHTML = "";
|
||||
let ele = replace_dom_code(replace_code, parse_html(marked.parse(acc_text)));
|
||||
table_cell.appendChild(ele);
|
||||
break;
|
||||
|
||||
case "plain":
|
||||
@ -727,7 +861,6 @@
|
||||
var audio = new Audio('https://interactive-examples.mdn.mozilla.net/media/cc0-audio/t-rex-roar.mp3');
|
||||
audio.play();
|
||||
}
|
||||
|
||||
*/
|
||||
|
||||
|
||||
@ -736,35 +869,29 @@
|
||||
|
||||
|
||||
let extra_s = "";
|
||||
let jwt = localStorage.getItem("jwt");
|
||||
if(jwt){
|
||||
let{file} = await text2speech(jwt, final_answer);
|
||||
let{file} = await text2speech(final_answer);
|
||||
|
||||
//autoplay controls
|
||||
extra_s = `
|
||||
<audio controls>
|
||||
<source src="${file}" type="audio/mpeg">
|
||||
</audio>`;
|
||||
|
||||
console.log(file);
|
||||
}
|
||||
//autoplay controls
|
||||
extra_s = `
|
||||
<audio controls>
|
||||
<source src="${file}" type="audio/mpeg">
|
||||
</audio>`;
|
||||
|
||||
//console.log(file);
|
||||
|
||||
switch(view_select.value){
|
||||
|
||||
case "md":
|
||||
//table_cell.innerHTML = marked.parse(final_answer) + extra_s;
|
||||
|
||||
table_cell.innerHTML = marked.parse(map_md_code(render_code, acc_text)) + extra_s;
|
||||
table_cell.innerHTML = "";
|
||||
let ele = replace_dom_code(replace_code, parse_html(marked.parse(acc_text) + extra_s));
|
||||
table_cell.appendChild(ele);
|
||||
break;
|
||||
|
||||
default:
|
||||
case "plain":
|
||||
table_cell.innerHTML = `<pre>${final_answer}</pre>`;
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
|
||||
scroll_down();
|
||||
|
||||
}
|
||||
|
8
backend/public/jsnetworkx.js
Normal file
8
backend/public/jsnetworkx.js
Normal file
File diff suppressed because one or more lines are too long
87
backend/public/widget.js
Executable file
87
backend/public/widget.js
Executable file
@ -0,0 +1,87 @@
|
||||
|
||||
class NetGraphElement extends HTMLElement {
|
||||
|
||||
static get observedAttributes() {
|
||||
return ['G'];
|
||||
}
|
||||
|
||||
constructor() {
|
||||
super();
|
||||
this.attachShadow({mode: 'open'});
|
||||
this.content_div = document.createElement('div');
|
||||
this.shadowRoot.appendChild(this.content_div);
|
||||
this.slot_ele = document.createElement('slot');
|
||||
this.shadowRoot.appendChild(this.slot_ele);
|
||||
this.G = new jsnx.MultiDiGraph();
|
||||
}
|
||||
|
||||
connectedCallback(){
|
||||
let style = this.hasAttribute('style') ? this.getAttribute('style') : "";
|
||||
let weighted = this.hasAttribute('weighted') ? JSON.parse(this.getAttribute('weighted')) : false;
|
||||
let withLabels = this.hasAttribute('withLabels') ? JSON.parse(this.getAttribute('withLabels')) : true;
|
||||
let label_color = this.hasAttribute('labelColor') ? this.getAttribute('labelColor') : "black";
|
||||
|
||||
this.content_div.style = style;
|
||||
let that = this;
|
||||
|
||||
jsnx.draw(that.G, {
|
||||
element: that.content_div,
|
||||
weighted,
|
||||
withLabels,
|
||||
labelStyle: {fill: label_color},
|
||||
edgeStyle: {
|
||||
'stroke-width': 5,
|
||||
fill: d => d.data[0].color
|
||||
},
|
||||
nodeStyle: {
|
||||
fill: d => d.data.color
|
||||
},
|
||||
nodeAttr: {
|
||||
r: d => d.data.radius | 10,
|
||||
title: d => d.label
|
||||
}
|
||||
}, true); //true ensures redrawing
|
||||
|
||||
this.slot_ele.addEventListener('slotchange', e => {
|
||||
let text = that.innerText.trim();
|
||||
let{nodes, edges} = JSON.parse(text);
|
||||
|
||||
for(let[id, data] of nodes){
|
||||
that.G.addNode(id, data);
|
||||
}
|
||||
|
||||
for(let[a, b, data] of edges){
|
||||
that.G.addEdge(a, b, data);
|
||||
}
|
||||
|
||||
jsnx.draw(that.G, {
|
||||
element: that.content_div,
|
||||
weighted,
|
||||
withLabels,
|
||||
labelStyle: {fill: label_color},
|
||||
edgeStyle: {
|
||||
'stroke-width': 5,
|
||||
fill: d => d.data[0].color
|
||||
},
|
||||
nodeStyle: {
|
||||
fill: d => d.data.color
|
||||
},
|
||||
nodeAttr: {
|
||||
r: d => d.data.radius | 10,
|
||||
title: d => d.label
|
||||
}
|
||||
}, true); //true ensures redrawing
|
||||
|
||||
that.slot_ele.style.display = "none";
|
||||
that.content_div.children[0].setAttribute("width", that.content_div.style.width);
|
||||
that.content_div.children[0].setAttribute("height", that.content_div.style.height);
|
||||
});
|
||||
}
|
||||
|
||||
disconnectedCallback() {
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
customElements.define('net-graph', NetGraphElement);
|
@ -6,9 +6,12 @@ bs4
|
||||
|
||||
elasticsearch
|
||||
elasticsearch-dsl
|
||||
|
||||
langchain
|
||||
langchain-community
|
||||
tiktoken
|
||||
langchain_ollama
|
||||
langchain-elasticsearch
|
||||
|
||||
pydantic
|
||||
|
||||
|
88
backend/speech.py
Normal file
88
backend/speech.py
Normal file
@ -0,0 +1,88 @@
|
||||
import os, time
|
||||
import os.path
|
||||
from datetime import datetime, date
|
||||
import pyttsx3
|
||||
|
||||
|
||||
"""
|
||||
flite -voice slt -t "This example is useful when there is a need to convert the contents of a file to speech. It can simplify tasks such as reading out the contents of a document or generating voiceovers for specific text files."
|
||||
"""
|
||||
|
||||
|
||||
def text_to_speech(text: str) -> str:
|
||||
unix_timestamp = datetime.now().timestamp()
|
||||
|
||||
txt_file_path = "./public/speech_%s.txt" % unix_timestamp
|
||||
|
||||
|
||||
with open(txt_file_path, "w") as f:
|
||||
f.write(text)
|
||||
|
||||
|
||||
file_name = f'speech_{unix_timestamp}.wav'
|
||||
file_path = f'./public/{file_name}'
|
||||
#os.system('flite -voice slt -o %s -t "%s"' % (file_path, text))
|
||||
|
||||
os.system('flite -voice slt -o %s -f %s' % (file_path, txt_file_path))
|
||||
|
||||
return file_name
|
||||
|
||||
|
||||
|
||||
def text_to_speech2(text: str) -> str:
|
||||
engine = pyttsx3.init()
|
||||
|
||||
def get_voice(s):
|
||||
for v in engine.getProperty("voices"):
|
||||
if s == v.id:
|
||||
return v
|
||||
|
||||
def set_voice(v):
|
||||
engine.setProperty("voice", v.id)
|
||||
|
||||
def set_volume(n):
|
||||
engine.setProperty('volume', engine.getProperty('volume') + n)
|
||||
|
||||
def set_rate(n):
|
||||
engine.setProperty('rate', engine.getProperty('rate') + n)
|
||||
|
||||
#voices = engine.getProperty('voices')
|
||||
#engine.setProperty('voice', voices[1].id)
|
||||
set_voice(get_voice("english"))
|
||||
set_volume(-5.0)
|
||||
set_rate(-40)
|
||||
|
||||
#espeak -v mb-en1 -s 120 "Hello world"
|
||||
#sudo apt-get install mbrola mbrola-en1
|
||||
|
||||
unix_timestamp = datetime.now().timestamp()
|
||||
file_name = f'speech_{unix_timestamp}.mp3'
|
||||
file_path = f'./public/{file_name}'
|
||||
|
||||
engine.save_to_file(text, file_path)
|
||||
engine.runAndWait()
|
||||
|
||||
timeout = 10
|
||||
t = 0
|
||||
step = 0.1
|
||||
while not os.path.isfile(file_path):
|
||||
time.sleep(step)
|
||||
t += step
|
||||
if t > timeout:
|
||||
raise Exception("Timeout(%s s) for creating speech.mp3!" % timeout)
|
||||
|
||||
time.sleep(step)
|
||||
return file_name
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -44,8 +44,8 @@ services:
|
||||
- discovery.type=single-node
|
||||
- logger.level=ERROR
|
||||
- bootstrap.memory_lock=true # Disable JVM heap memory swapping
|
||||
|
||||
- xpack.security.enabled=false
|
||||
- xpack.security.http.ssl.enabled=false
|
||||
|
||||
#- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_INITIAL_ADMIN_PASSWORD} # Sets the demo admin user password when using demo configuration, required for OpenSearch 2.12 and later
|
||||
#- "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" # Set min and max JVM heap sizes to at least 50% of system RAM
|
||||
|
44
ideas.md
Normal file
44
ideas.md
Normal file
@ -0,0 +1,44 @@
|
||||
|
||||
|
||||
|
||||
# Ideas
|
||||
|
||||
|
||||
https://favtutor.com/articles/meta-llama-3-jailbreak/
|
||||
|
||||
|
||||
|
||||
## Knowledge graph creation
|
||||
|
||||
* https://www.linkedin.com/posts/sivas-subramaniyan_microsoft-research-is-bullish-on-the-concept-activity-7194953376470638592-dQ-U/?utm_source=share&utm_medium=member_desktop
|
||||
|
||||
## GraphRAG
|
||||
|
||||
...
|
||||
|
||||
|
||||
## Function calls
|
||||
|
||||
* https://medium.com/@lucgagan/understanding-chatgpt-functions-and-how-to-use-them-6643a7d3c01a
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -42,3 +42,10 @@ system_prompt = "Write the answer in Japanese."
|
||||
|
||||
|
||||
|
||||
Give just the translation of the given input to German and nothing else.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user