fixes, login and more
This commit is contained in:
parent
357054b5af
commit
f6f5138f6e
374
backend/app.py
374
backend/app.py
@ -7,28 +7,23 @@ OpenAPI access via http://localhost:5000/openapi/ on local docker-compose deploy
|
|||||||
#warnings.filterwarnings("ignore")
|
#warnings.filterwarnings("ignore")
|
||||||
|
|
||||||
#std lib modules:
|
#std lib modules:
|
||||||
import os, sys, json
|
import os, sys, json, time
|
||||||
from typing import Any, Tuple, List, Dict, Any, Callable, Optional
|
from typing import Any, Tuple, List, Dict, Any, Callable, Optional
|
||||||
from datetime import datetime, date
|
from datetime import datetime, date
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
import hashlib, traceback, logging
|
import hashlib, traceback, logging
|
||||||
|
from functools import wraps
|
||||||
|
|
||||||
#llm
|
#llm
|
||||||
from langchain.callbacks.manager import CallbackManager
|
from langchain.callbacks.manager import CallbackManager
|
||||||
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
||||||
from langchain_community.llms import Ollama
|
from langchain_community.llms import Ollama
|
||||||
|
|
||||||
#import openai #even used?
|
|
||||||
import tiktoken
|
import tiktoken
|
||||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||||
from langchain.chains import RetrievalQA
|
from langchain.chains import RetrievalQA
|
||||||
#from langchain.callbacks import get_openai_callback
|
|
||||||
#from langchain_community.callbacks import get_openai_callback
|
|
||||||
|
|
||||||
#from langchain_openai import ChatOpenAI, AzureChatOpenAI
|
|
||||||
#from langchain_openai import OpenAIEmbeddings, AzureOpenAIEmbeddings
|
|
||||||
from langchain_community.vectorstores.elasticsearch import ElasticsearchStore
|
from langchain_community.vectorstores.elasticsearch import ElasticsearchStore
|
||||||
#from langchain.document_loaders import PyPDFLoader, Docx2txtLoader
|
|
||||||
from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader
|
from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader
|
||||||
|
|
||||||
from langchain.callbacks.base import BaseCallbackHandler, BaseCallbackManager
|
from langchain.callbacks.base import BaseCallbackHandler, BaseCallbackManager
|
||||||
@ -36,11 +31,15 @@ from langchain.prompts import PromptTemplate
|
|||||||
|
|
||||||
#ext libs
|
#ext libs
|
||||||
from elasticsearch import NotFoundError, Elasticsearch # for normal read/write without vectors
|
from elasticsearch import NotFoundError, Elasticsearch # for normal read/write without vectors
|
||||||
from elasticsearch_dsl import Search, A
|
from elasticsearch_dsl import Search, A, Document, Date, Integer, Keyword, Float, Long, Text, connections
|
||||||
from elasticsearch_dsl import Document, Date, Integer, Keyword, Float, Long, Text, connections
|
from elasticsearch.exceptions import ConnectionError
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
import logging_loki
|
||||||
|
import jwt as pyjwt
|
||||||
|
|
||||||
|
|
||||||
#flask, openapi
|
#flask, openapi
|
||||||
from flask import Flask, send_from_directory, Response, request, jsonify
|
from flask import Flask, send_from_directory, Response, request, jsonify
|
||||||
import sys, os
|
import sys, os
|
||||||
@ -50,20 +49,159 @@ from flask_openapi3 import Info, Tag, OpenAPI, Server, FileStorage
|
|||||||
from flask_socketio import SocketIO, join_room, leave_room, rooms, send
|
from flask_socketio import SocketIO, join_room, leave_room, rooms, send
|
||||||
|
|
||||||
|
|
||||||
#home grown
|
import base64
|
||||||
|
import os
|
||||||
|
from cryptography.fernet import Fernet
|
||||||
|
from cryptography.hazmat.primitives import hashes
|
||||||
|
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
|
||||||
|
|
||||||
|
#----------home grown--------------
|
||||||
#from scraper import WebScraper
|
#from scraper import WebScraper
|
||||||
from funcs import group_by
|
from funcs import group_by
|
||||||
#from elastictools import update_by_id, delete_by_id
|
from elastictools import get_by_id, update_by_id, delete_by_id
|
||||||
|
from models import QueryLog, Chatbot, User
|
||||||
|
|
||||||
#TODO: implement some kind of logging mechanism
|
|
||||||
#logging.basicConfig(filename='record.log', level=logging.DEBUG)
|
|
||||||
#logging.basicConfig(level=logging.DEBUG)
|
|
||||||
logging.basicConfig(level=logging.WARN)
|
|
||||||
|
|
||||||
app = Flask(__name__)
|
|
||||||
|
|
||||||
from flask_cors import CORS #falls cross-orgin verwendet werden soll
|
#LLM_PAYLOAD = int(os.getenv("LLM_PAYLOAD"))
|
||||||
CORS(app)
|
#CHUNK_SIZE = int(os.getenv("CHUNK_SIZE"))
|
||||||
|
BOT_ROOT_PATH = os.getenv("BOT_ROOT_PATH")
|
||||||
|
|
||||||
|
|
||||||
|
# JWT Bearer Sample
|
||||||
|
jwt = {
|
||||||
|
"type": "http",
|
||||||
|
"scheme": "bearer",
|
||||||
|
"bearerFormat": "JWT"
|
||||||
|
}
|
||||||
|
security_schemes = {"jwt": jwt}
|
||||||
|
security = [{"jwt": []}]
|
||||||
|
|
||||||
|
|
||||||
|
info = Info(
|
||||||
|
title="Chatbot-API",
|
||||||
|
version="1.0.0",
|
||||||
|
summary="The REST-API",
|
||||||
|
description="Default model: ..."
|
||||||
|
)
|
||||||
|
servers = [
|
||||||
|
Server(url=BOT_ROOT_PATH )
|
||||||
|
]
|
||||||
|
|
||||||
|
class NotFoundResponse(BaseModel):
|
||||||
|
code: int = Field(-1, description="Status Code")
|
||||||
|
message: str = Field("Resource not found!", description="Exception Information")
|
||||||
|
|
||||||
|
app = OpenAPI(
|
||||||
|
__name__,
|
||||||
|
info=info,
|
||||||
|
servers=servers,
|
||||||
|
responses={404: NotFoundResponse},
|
||||||
|
security_schemes=security_schemes
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def jwt_required(f):
|
||||||
|
"""
|
||||||
|
Wraps routes in a jwt-required logic and passes decoded jwt and user from elasticsearch to the route as keyword
|
||||||
|
"""
|
||||||
|
|
||||||
|
@wraps(f)
|
||||||
|
def decorated_route(*args, **kwargs):
|
||||||
|
token = None
|
||||||
|
if "Authorization" in request.headers:
|
||||||
|
token = request.headers["Authorization"].split(" ")[1]
|
||||||
|
if not token:
|
||||||
|
return jsonify({
|
||||||
|
'status': 'error',
|
||||||
|
"message": "Authentication Token is missing!",
|
||||||
|
}), 401
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = pyjwt.decode(token, app.config["jwt_secret"], algorithms=["HS256"])
|
||||||
|
except Exception as e:
|
||||||
|
return jsonify({
|
||||||
|
'status': 'error',
|
||||||
|
"message": "JWT-decryption: " + str(e)
|
||||||
|
}), 401
|
||||||
|
|
||||||
|
try:
|
||||||
|
#user = get_by_id(client, index="user", id_field_name="email", id_value=data["email"])[0]
|
||||||
|
response = Search(using=client, index="user").filter("term", **{"email": data["email"]})[0:5].execute()
|
||||||
|
for hit in response:
|
||||||
|
user = hit
|
||||||
|
break
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return jsonify({
|
||||||
|
'status': 'error',
|
||||||
|
"message": "Invalid Authentication token!"
|
||||||
|
}), 401
|
||||||
|
|
||||||
|
kwargs["decoded_jwt"] = data
|
||||||
|
kwargs["user"] = user
|
||||||
|
return f(*args, **kwargs)
|
||||||
|
|
||||||
|
return decorated_route
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def create_key(salt: str, user_email: str) -> Fernet:
|
||||||
|
"""
|
||||||
|
Example salt: 9c46f833b3376c5f3b64d8a93951df4b
|
||||||
|
Fernet usage: token = f.encrypt(b"Secret message!")
|
||||||
|
"""
|
||||||
|
salt_bstr = bytes(salt, "utf-8")
|
||||||
|
email_bstr = bytes(user_email, "utf-8")
|
||||||
|
#password = b"password"
|
||||||
|
#salt = os.urandom(16)
|
||||||
|
#salt = b"9c46f833b3376c5f3b64d8a93951df4b"
|
||||||
|
kdf = PBKDF2HMAC(
|
||||||
|
algorithm=hashes.SHA256(),
|
||||||
|
length=32,
|
||||||
|
salt=salt_bstr,
|
||||||
|
iterations=48,
|
||||||
|
)
|
||||||
|
key = base64.urlsafe_b64encode(kdf.derive(email_bstr))
|
||||||
|
return Fernet(key)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#app = Flask(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
app.config['UPLOAD_FOLDER'] = 'uploads'
|
||||||
|
app.config['CORS_HEADERS'] = 'Content-Type'
|
||||||
|
app.config['CORS_METHODS'] = ["GET,POST,OPTIONS,DELETE,PUT"]
|
||||||
|
|
||||||
|
|
||||||
|
env_to_conf = {
|
||||||
|
"BACKEND_INTERNAL_URL": "api_url",
|
||||||
|
"ELASTIC_URI": "elastic_uri",
|
||||||
|
"SECRET": "jwt_secret"
|
||||||
|
}
|
||||||
|
|
||||||
|
#import values from env into flask config and do existence check
|
||||||
|
for env_key, conf_key in env_to_conf.items():
|
||||||
|
x = os.getenv(env_key)
|
||||||
|
if not x:
|
||||||
|
msg = "Environment variable '%s' not set!" % env_key
|
||||||
|
app.logger.fatal(msg)
|
||||||
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
app.config[conf_key] = x
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#from flask_cors import CORS #falls cross-orgin verwendet werden soll
|
||||||
|
#CORS(app)
|
||||||
|
|
||||||
socket = SocketIO(app, cors_allowed_origins="*")
|
socket = SocketIO(app, cors_allowed_origins="*")
|
||||||
|
|
||||||
@ -74,7 +212,7 @@ def sockcon(data):
|
|||||||
to avoid broadcasting messages
|
to avoid broadcasting messages
|
||||||
answer in callback only to room with sid
|
answer in callback only to room with sid
|
||||||
"""
|
"""
|
||||||
room = request.sid
|
room = request.sid + request.remote_addr
|
||||||
join_room(room)
|
join_room(room)
|
||||||
socket.emit('backend response', {'msg': f'Connected to room {room} !', "room": room}) # looks like iOS needs an answer
|
socket.emit('backend response', {'msg': f'Connected to room {room} !', "room": room}) # looks like iOS needs an answer
|
||||||
|
|
||||||
@ -112,16 +250,129 @@ def handle_message(message):
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def create_embedding():
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def hash_password(s: str) -> str:
|
||||||
|
return hashlib.md5(s.encode('utf-8')).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
#======================= TAGS =============================
|
||||||
|
|
||||||
|
jwt_tag = Tag(name='JWT', description='Requires a valid JSON Web Token')
|
||||||
|
not_implemented_tag = Tag(name='Not implemented', description='Functionality not yet implemented beyond an empty response')
|
||||||
|
|
||||||
#==============Routes===============
|
#==============Routes===============
|
||||||
|
|
||||||
|
class LoginRequest(BaseModel):
|
||||||
|
email: str = Field(None, description='A short text by the user explaining the rating.')
|
||||||
|
password: str = Field(None, description='A short text by the user explaining the rating.')
|
||||||
|
|
||||||
|
|
||||||
|
@app.post('/login', summary="", tags=[], security=security)
|
||||||
|
def login(form: LoginRequest):
|
||||||
|
"""
|
||||||
|
Get your JWT to verify access rights
|
||||||
|
"""
|
||||||
|
client = Elasticsearch(app.config['elastic_uri'])
|
||||||
|
match get_by_id(client, index="user", id_field_name="email", id_value=form.email):
|
||||||
|
case []:
|
||||||
|
msg = "User with email '%s' doesn't exist!" % form.email
|
||||||
|
app.logger.error(msg)
|
||||||
|
return jsonify({
|
||||||
|
'status': 'error',
|
||||||
|
'message': msg
|
||||||
|
}), 400
|
||||||
|
|
||||||
|
case [user]:
|
||||||
|
if user["password_hash"] == hash_password(form.password):
|
||||||
|
return pyjwt.encode({"email": form.email}, app.config['jwt_secret'], algorithm="HS256")
|
||||||
|
else:
|
||||||
|
msg = "Invalid password!"
|
||||||
|
app.logger.error(msg)
|
||||||
|
return jsonify({
|
||||||
|
'status': 'error',
|
||||||
|
'message': msg
|
||||||
|
}), 400
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class IndexSchemaRequest(BaseModel):
|
||||||
|
#end: datetime = Field("2100-01-31T16:47+00:00", description="""The interval end datetime in <a href="https://en.wikipedia.org/wiki/ISO_8601">ISO 8601</a> format""")
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@app.get('/bot', summary="", tags=[jwt_tag], security=security)
|
||||||
|
@jwt_required
|
||||||
|
def get_all_bots(decoded_jwt, user):
|
||||||
|
"""
|
||||||
|
List all bots for a user identified by the JWT.
|
||||||
|
"""
|
||||||
|
#client = Elasticsearch(app.config['elastic_uri'])
|
||||||
|
#bots = get_by_id(client, index="chatbot", id_field_name="createdBy", id_value=nextsearch_user.meta.id)
|
||||||
|
#return jsonify(bots)
|
||||||
|
return jsonify([])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@app.post('/bot', summary="", tags=[jwt_tag, not_implemented_tag], security=security)
|
||||||
|
@jwt_required
|
||||||
|
def create_bot(query: IndexSchemaRequest):
|
||||||
|
"""
|
||||||
|
Creates a chatbot for the JWT associated user.
|
||||||
|
"""
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#======== DEBUG routes ============
|
||||||
|
|
||||||
|
@app.get('/bot/debug/schema', summary="", tags=[])
|
||||||
|
def get_schema(query: IndexSchemaRequest):
|
||||||
|
"""
|
||||||
|
|
||||||
|
"""
|
||||||
|
#chatbots = query.chatbots
|
||||||
|
#client = Elasticsearch(app.config['elastic_uri'])
|
||||||
|
|
||||||
|
def simplify_properties(d):
|
||||||
|
new_d = {}
|
||||||
|
for field, d3 in d["properties"].items():
|
||||||
|
if "type" in d3:
|
||||||
|
new_d[field] = d3["type"]
|
||||||
|
elif "properties" in d3:
|
||||||
|
new_d[field] = simplify_properties(d3)
|
||||||
|
return new_d
|
||||||
|
|
||||||
|
|
||||||
|
def get_type_schema(client: Elasticsearch):
|
||||||
|
d = client.indices.get(index="*").body
|
||||||
|
new_d = {}
|
||||||
|
for index, d2 in d.items():
|
||||||
|
new_d[index] = simplify_properties(d2["mappings"])
|
||||||
|
return new_d
|
||||||
|
|
||||||
|
return jsonify( get_type_schema(client) )
|
||||||
|
|
||||||
|
|
||||||
|
#TODO: route that takes a schema json and compares to internal structure and returns boolean
|
||||||
|
|
||||||
|
|
||||||
|
#-------- non api routes -------------
|
||||||
|
|
||||||
@app.route("/") #Index Verzeichnis
|
@app.route("/") #Index Verzeichnis
|
||||||
def index():
|
def index():
|
||||||
return send_from_directory('.', "index.html")
|
return send_from_directory('.', "index.html")
|
||||||
|
|
||||||
@app.route("/info") #spezielle Nutzer definierte Route
|
#@app.route("/info") #spezielle Nutzer definierte Route
|
||||||
def info():
|
#def info():
|
||||||
return sys.version+" "+os.getcwd()
|
# return sys.version+" "+os.getcwd()
|
||||||
|
|
||||||
@app.route('/<path:path>') #generische Route (auch Unterordner)
|
@app.route('/<path:path>') #generische Route (auch Unterordner)
|
||||||
def catchAll(path):
|
def catchAll(path):
|
||||||
@ -129,13 +380,78 @@ def catchAll(path):
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def init_indicies():
|
||||||
|
# create the mappings in elasticsearch
|
||||||
|
for Index in [QueryLog, Chatbot, User]:
|
||||||
|
Index.init()
|
||||||
|
|
||||||
|
|
||||||
|
def create_default_users():
|
||||||
|
#create default users
|
||||||
|
client = Elasticsearch(app.config['elastic_uri'])
|
||||||
|
default_users = os.getenv("DEFAULT_USERS")
|
||||||
|
if default_users:
|
||||||
|
for (email, pwd, role) in json.loads(default_users):
|
||||||
|
if len(get_by_id(client, index="user", id_field_name="email", id_value=email)) == 0:
|
||||||
|
user = User(email=email, password_hash=hash_password(pwd), role=role)
|
||||||
|
#user.published_from = datetime.now()
|
||||||
|
user.save()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
#Wenn HTTPS benötigt wird (Pfade für RHEL7/können je OS variieren)
|
|
||||||
#cert = "/etc/pki/tls/certs/cert-payment.pem" #cert
|
#TODO: implement some kind of logging mechanism
|
||||||
#key = "/etc/pki/tls/private/cert-payment-private.pem" #key
|
#logging.basicConfig(filename='record.log', level=logging.DEBUG)
|
||||||
#context = (cert, key)
|
#logging.basicConfig(level=logging.DEBUG)
|
||||||
#app.run(debug=True, host='0.0.0.0', ssl_context=context)
|
logging.basicConfig(level=logging.WARN)
|
||||||
app.run(debug=True, host='0.0.0.0')
|
|
||||||
#app.run(debug=True)
|
"""
|
||||||
|
USE_LOKI_LOGGER = os.getenv("USE_LOKI_LOGGER")
|
||||||
|
if USE_LOKI_LOGGER:
|
||||||
|
handler = logging_loki.LokiHandler(
|
||||||
|
url="http://loki:3100/loki/api/v1/push",
|
||||||
|
tags={"application": "Nextsearch"},
|
||||||
|
#auth=("username", "password"),
|
||||||
|
version="1",
|
||||||
|
)
|
||||||
|
app.logger.addHandler(handler)
|
||||||
|
"""
|
||||||
|
|
||||||
|
connections.create_connection(hosts=app.config['elastic_uri'])
|
||||||
|
|
||||||
|
#client = Elasticsearch(app.config['elastic_uri'])
|
||||||
|
#client = Elasticsearch(hosts=[{"host": "elasticsearch"}], retry_on_timeout=True)
|
||||||
|
client = Elasticsearch(app.config['elastic_uri'], retry_on_timeout=True)
|
||||||
|
|
||||||
|
|
||||||
|
#TODO: find a clean way to wait without exceptions!
|
||||||
|
#Wait for elasticsearch to start up!
|
||||||
|
i = 1
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
#client = Elasticsearch(app.config['elastic_uri'])
|
||||||
|
|
||||||
|
client.cluster.health(wait_for_status='yellow')
|
||||||
|
print("Elasticsearch found! Run Flask-app!", flush=True)
|
||||||
|
break
|
||||||
|
except ConnectionError:
|
||||||
|
i *= 1.5
|
||||||
|
time.sleep(i)
|
||||||
|
print("Elasticsearch not found! Wait %s seconds!" % i, flush=True)
|
||||||
|
|
||||||
|
|
||||||
|
# Display cluster health
|
||||||
|
#app.logger.debug(connections.get_connection().cluster.health())
|
||||||
|
|
||||||
|
|
||||||
|
init_indicies()
|
||||||
|
create_default_users()
|
||||||
|
app.run(debug=True, host='0.0.0.0')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -0,0 +1,32 @@
|
|||||||
|
|
||||||
|
|
||||||
|
ELASTIC_URI=http://elasticsearch:9200
|
||||||
|
|
||||||
|
DEFAULT_USERS=[["user@gmail.com", "1234", "user"], ["admin@gmail.com", "1234", "admin"]]
|
||||||
|
|
||||||
|
# DEFAULT_USERS is list of lists, each nested list respectively contains email, password and role
|
||||||
|
# e.g. [["user@gmail.com", "1234", "user"], ["admin@gmail.com", "1234", "admin"]]
|
||||||
|
# leave empty if you don't wish to seed users
|
||||||
|
|
||||||
|
|
||||||
|
#-----------not used yet----------------
|
||||||
|
|
||||||
|
#JWT encryption secret:
|
||||||
|
SECRET=1234
|
||||||
|
|
||||||
|
|
||||||
|
LLM_PAYLOAD=16384
|
||||||
|
CHUNK_SIZE=1536
|
||||||
|
|
||||||
|
|
||||||
|
#A flag for using Loki for logging. To deactivate comment it out
|
||||||
|
USE_LOKI_LOGGER=1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
BOT_ROOT_PATH=/
|
||||||
|
|
||||||
|
BACKEND_INTERNAL_URL=http://backend:5000
|
||||||
|
|
||||||
|
|
@ -4,8 +4,12 @@ Some helper functions to make querying easier
|
|||||||
from typing import Any, Tuple, List, Dict, Any, Callable, Optional
|
from typing import Any, Tuple, List, Dict, Any, Callable, Optional
|
||||||
import json
|
import json
|
||||||
from elasticsearch import NotFoundError, Elasticsearch # for normal read/write without vectors
|
from elasticsearch import NotFoundError, Elasticsearch # for normal read/write without vectors
|
||||||
from elasticsearch_dsl import Search, A
|
from elasticsearch_dsl import Search, A, UpdateByQuery, Document, Date, Integer, Keyword, Float, Long, Text, connections
|
||||||
from elasticsearch_dsl import Document, Date, Integer, Keyword, Float, Long, Text, connections
|
|
||||||
|
|
||||||
|
def get_by_id(client: Elasticsearch, index: str, id_field_name: str, id_value: str):
|
||||||
|
response = Search(using=client, index=index).filter("term", **{id_field_name: id_value})[0:10000].execute()
|
||||||
|
return [hit.to_dict() for hit in response]
|
||||||
|
|
||||||
|
|
||||||
def update_by_id(client: Elasticsearch, index: str, id_field_name: str, id_value: str, values_to_set: Dict[str, Any]) -> None:
|
def update_by_id(client: Elasticsearch, index: str, id_field_name: str, id_value: str, values_to_set: Dict[str, Any]) -> None:
|
||||||
@ -14,6 +18,7 @@ def update_by_id(client: Elasticsearch, index: str, id_field_name: str, id_value
|
|||||||
for k, v in values_to_set.items():
|
for k, v in values_to_set.items():
|
||||||
source += f"ctx._source.{k} = {json.dumps(v)};"
|
source += f"ctx._source.{k} = {json.dumps(v)};"
|
||||||
|
|
||||||
|
"""
|
||||||
body = {
|
body = {
|
||||||
"query": {
|
"query": {
|
||||||
"term": {
|
"term": {
|
||||||
@ -26,6 +31,14 @@ def update_by_id(client: Elasticsearch, index: str, id_field_name: str, id_value
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
client.update_by_query(index=index, body=body)
|
client.update_by_query(index=index, body=body)
|
||||||
|
"""
|
||||||
|
|
||||||
|
ubq = UpdateByQuery(using=client, index=index) \
|
||||||
|
.query("term", **{id_field_name: id_value}) \
|
||||||
|
.script(source=source, lang="painless")
|
||||||
|
|
||||||
|
response = ubq.execute()
|
||||||
|
return response.success()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,84 +1,127 @@
|
|||||||
import os
|
import os
|
||||||
from elasticsearch_dsl import Document, InnerDoc, Date, Integer, Keyword, Float, Long, Text, connections, Object
|
from elasticsearch_dsl import Document, InnerDoc, Nested, Date, Integer, Keyword, Float, Long, Text, connections, Object, Boolean
|
||||||
|
|
||||||
# Define a default Elasticsearch client
|
|
||||||
connections.create_connection(hosts="http://localhost:9200")
|
|
||||||
|
|
||||||
class Article(Document):
|
|
||||||
title = Text(analyzer='snowball', fields={'raw': Keyword()})
|
|
||||||
body = Text(analyzer='snowball')
|
class User(Document):
|
||||||
tags = Keyword()
|
email = Keyword()
|
||||||
published_from = Date()
|
password_hash = Text(index=False)
|
||||||
lines = Integer()
|
role = Keyword()
|
||||||
|
|
||||||
|
#salt = Text(index=False)
|
||||||
|
#profileImage = Text(index=False)
|
||||||
|
#profileImage = Keyword()
|
||||||
|
|
||||||
|
isEmailVerified = Boolean()
|
||||||
|
#status = Text()
|
||||||
|
|
||||||
|
#otpExpires = Date()
|
||||||
|
#resetPasswordToken = Text(index=False)
|
||||||
|
#mailToken = Text(index=False)
|
||||||
|
|
||||||
class Index:
|
class Index:
|
||||||
name = 'blog'
|
name = 'user'
|
||||||
settings = {
|
settings = {
|
||||||
"number_of_shards": 1,
|
"number_of_shards": 1,
|
||||||
}
|
}
|
||||||
|
|
||||||
def save(self, ** kwargs):
|
def save(self, ** kwargs):
|
||||||
self.lines = len(self.body.split())
|
return super(User, self).save(**kwargs)
|
||||||
return super(Article, self).save(** kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
#======= nextsearch_log ===========
|
|
||||||
|
class Chatbot(Document):
|
||||||
|
name = Text()
|
||||||
|
createdBy = Keyword()
|
||||||
|
description = Text()
|
||||||
|
systemPrompt = Text(index=False)
|
||||||
|
|
||||||
|
#slug = Keyword()
|
||||||
|
files = Nested()
|
||||||
|
text = Text()
|
||||||
|
links = Nested()
|
||||||
|
|
||||||
|
#chatbotImage = Text(index=False)
|
||||||
|
sourceCharacters = Integer()
|
||||||
|
|
||||||
|
#visibility = Keyword()
|
||||||
|
#status = Keyword()
|
||||||
|
|
||||||
|
temperature = Float()
|
||||||
|
llm_model = Keyword()
|
||||||
|
|
||||||
|
|
||||||
|
class Index:
|
||||||
|
name = 'chatbot'
|
||||||
|
settings = {
|
||||||
|
"number_of_shards": 1,
|
||||||
|
}
|
||||||
|
|
||||||
|
def save(self, ** kwargs):
|
||||||
|
return super(Chatbot, self).save(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#======= Query Log ===========
|
||||||
|
|
||||||
|
|
||||||
class Sources(InnerDoc):
|
class Sources(InnerDoc):
|
||||||
score = Float()
|
score = Float()
|
||||||
sourceFileId = Text()
|
#sourceFileId = Text()
|
||||||
sourceType = Text()
|
sourceType = Text()
|
||||||
tags = Text()
|
tags = Text()
|
||||||
|
|
||||||
class NextsearchLog(Document):
|
#new fields
|
||||||
a = Text()
|
sourceFileId = Keyword()
|
||||||
|
filename = Keyword()
|
||||||
|
url = Keyword()
|
||||||
|
txt_id = Keyword()
|
||||||
|
page = Integer()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class QueryLog(Document):
|
||||||
|
answer = Text()
|
||||||
|
question = Text()
|
||||||
|
|
||||||
chatbotid = Keyword()
|
chatbotid = Keyword()
|
||||||
durasecs = Float()
|
durasecs = Float()
|
||||||
inCt = Float()
|
#inCt = Float()
|
||||||
inToks = Long()
|
inToks = Long()
|
||||||
llm = Text()
|
llm = Text()
|
||||||
outCt = Float()
|
#outCt = Float()
|
||||||
outToks = Long()
|
outToks = Long()
|
||||||
q = Text()
|
|
||||||
queryid = Keyword()
|
#queryid = Keyword()
|
||||||
rating = Long()
|
#rating = Long()
|
||||||
reason = Text()
|
#reason = Text()
|
||||||
reasontags = Text()
|
#reasontags = Text()
|
||||||
session = Keyword()
|
session = Keyword()
|
||||||
|
|
||||||
sources = Object(Sources) #Text(analyzer='snowball')
|
sources = Object(Sources)
|
||||||
temperature = Float()
|
temperature = Float()
|
||||||
totalCt = Float()
|
#totalCt = Float()
|
||||||
|
|
||||||
timest = Date() #timestamp
|
timest = Date() #timestamp
|
||||||
date = Date() #iso date
|
date = Date() #iso date
|
||||||
|
|
||||||
class Index:
|
class Index:
|
||||||
#name = 'test_nextsearch_log'
|
name = 'query_log'
|
||||||
name = 'nextsearch_log'
|
|
||||||
settings = {
|
settings = {
|
||||||
"number_of_shards": 1,
|
"number_of_shards": 1,
|
||||||
}
|
}
|
||||||
|
|
||||||
def save(self, ** kwargs):
|
def save(self, ** kwargs):
|
||||||
self.lines = len(self.body.split())
|
return super(QueryLog, self).save(**kwargs)
|
||||||
return super(NextsearchLog, self).save(** kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
elastic_uri = os.getenv("ELASTIC_URI")
|
elastic_uri = os.getenv("ELASTIC_URI")
|
||||||
#elastic_uri = "http://localhost:9200"
|
#elastic_uri = "http://localhost:9200"
|
||||||
assert elastic_uri
|
|
||||||
|
|
||||||
# Define a default Elasticsearch client
|
|
||||||
connections.create_connection(hosts=elastic_uri)
|
|
||||||
#connections.create_connection(hosts)
|
|
||||||
|
|
||||||
# create the mappings in elasticsearch
|
|
||||||
NextsearchLog.init()
|
|
||||||
|
|
||||||
# create the mappings in elasticsearch
|
|
||||||
#Article.init()
|
|
||||||
|
|
||||||
# create and save and article
|
# create and save and article
|
||||||
#article = Article(meta={'id': 42}, title='Hello world!', tags=['test'])
|
#article = Article(meta={'id': 42}, title='Hello world!', tags=['test'])
|
||||||
|
@ -13,3 +13,9 @@ Flask-SocketIO
|
|||||||
flask-openapi3
|
flask-openapi3
|
||||||
|
|
||||||
minio
|
minio
|
||||||
|
|
||||||
|
python-logging-loki
|
||||||
|
pyjwt
|
||||||
|
cryptography
|
||||||
|
|
||||||
|
|
||||||
|
@ -36,12 +36,14 @@ services:
|
|||||||
container_name: ${APP_PREFIX}_elasticsearch
|
container_name: ${APP_PREFIX}_elasticsearch
|
||||||
image: docker.elastic.co/elasticsearch/elasticsearch:8.11.0
|
image: docker.elastic.co/elasticsearch/elasticsearch:8.11.0
|
||||||
restart: always
|
restart: always
|
||||||
|
mem_limit: 4024m
|
||||||
ports:
|
ports:
|
||||||
- "9200:9200"
|
- "9200:9200"
|
||||||
environment:
|
environment:
|
||||||
- discovery.type=single-node
|
- discovery.type=single-node
|
||||||
- xpack.security.enabled=false
|
- xpack.security.enabled=false
|
||||||
- logger.level=ERROR
|
- logger.level=ERROR
|
||||||
|
#- ES_JAVA_OPTS="-Xms2g -Xmx2g"
|
||||||
volumes:
|
volumes:
|
||||||
- esdata:/usr/share/elasticsearch/data
|
- esdata:/usr/share/elasticsearch/data
|
||||||
ulimits:
|
ulimits:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user