from langchain_openai import AzureOpenAIEmbeddings, AzureOpenAI, AzureChatOpenAI from langchain_ollama import OllamaEmbeddings, OllamaLLM import io from langchain.chains import LLMChain from langchain_core.prompts import PromptTemplate import os import json import transport import cms import uuid from multiprocessing import Process import pandas as pd import copy from gtts import gTTS import io import requests # USE_OPENAI = os.environ.get('USE_OPENAI',0) class ILLM : def __init__(self,**_args) : self.USE_OPENAI = 'openai' in _args # self._args = _args['ollama'] if not self.USE_OPENAI else _args['openai'] _path = _args['ollama'] if not self.USE_OPENAI else _args['openai'] f = open(_path) self._args = json.loads( f.read() ) f.close() self._prompt = _args['prompt'] if 'prompt' in _args else {} def embed(self,_question): _pointer = AzureOpenAIEmbeddings if self.USE_OPENAI else OllamaEmbeddings _kwargs = self._args if 'embedding' not in self._args else self._args['embedding'] _handler = _pointer(**_kwargs) return _handler.embed_query(_question) def answer(self,_question,_context) : _pointer = AzureChatOpenAI if self.USE_OPENAI else OllamaLLM _kwargs = self._args if 'completion' not in self._args else self._args['completion'] _llm = _pointer(**_kwargs) _prompt = PromptTemplate(**self._prompt) chain = LLMChain(llm=_llm,prompt=_prompt) _input = {'context':_context,'question':_question} _input = json.loads(json.dumps(_input)) resp = chain.invoke(_input) return resp def schema(self): return 'openai' if self.USE_OPENAI else 'public' def documents(self,_vector) : _schema = 'openai' if self.USE_OPENAI else 'ollama' pgr = transport.get.reader(label='llm', schema=_schema) sql = f"""SELECT file, name, page, content, embeddings <-> '{json.dumps(_vector)}' similarity FROM {_schema}.documents ORDER BY similarity ASC LIMIT 5 """ _df = pgr.read(sql=sql) pgr.close() return _df def lookup (self,index:int,token) : _schema = 'openai' if self.USE_OPENAI else 'ollama' pgr = transport.get.reader(label='llm', schema=_schema) index = int(index) + 1 _sql = f"SELECT * FROM (select row_number() over(partition by token) as row_index, answer from llm_logs where token='{token}') as _x where row_index = {index}" print (_sql) _df = pgr.read(sql= _sql) return _df.answer[0] if _df.shape[0] > 0 else None @cms.Plugin(mimetype="application/json",method="POST") def answer (**_args): _request = _args['request'] _config = _args['config']['system']['source']['llm'] _question = _request.json['question'] token = str(uuid.uuid4()) if 'token' not in _request.json else _request.json['token'] _index = _request.json['index'] if 'index' in _request.json else 0 _llmproc = ILLM(**_config) # # Turn the question into a vector and send it to the LLM Server # _vector = _llmproc.embed(_question) _df = _llmproc.documents(_vector) _pages = _df.apply(lambda row: row.content,axis=1).tolist() # # submit the request to the # return _df[['name','page','similarity']].to_dict(orient='records') # # Let us submit the question to the llm-server # resp = _llmproc.answer(_question, _pages) # # @TODO : # - Log questions, answers and sources to see what things are like _context = _args['config']['system']['context'].strip() _out = {"token":token,"openai":_llmproc.USE_OPENAI,"answer":resp["text"],"documents": _df[["name","page"]].to_dict(orient='records')} try: def _logger(): _log = pd.DataFrame([dict(_out,**{'question':_question})]) _log.documents = _df[["name","page"]].to_json(orient='records') pgw = transport.get.writer (label='llm',table='llm_logs') pgw.write(_log) pgw.close() # # send the thread pthread = Process(target=_logger) pthread.start() except Exception as e: print (e) # _out['stream'] = f'{_context}/api/medix/audio?token={token}&index={_index}' return _out @cms.Plugin(mimetype="text/plain",method="POST") def info (**_args): _config = _args['config'] return list(_config['system']['source']['llm'].keys())[0] @cms.Plugin(mimetype="audio/mpeg",method="GET") def audio (**_args): _request = _args['request'] _config = _args['config']['system']['source']['llm'] _index = _request.args['index'] _token = _request.args['token'] _llmproc = ILLM(**_config) _llmproc = ILLM(**_config) _text = _llmproc.lookup(_index,_token) g = gTTS(_text,lang='en') return g.stream() # stream = io.BytesIO() # for line in g.stream() : # stream.write(line) # stream.seek(0) # return stream #g.stream()