diff --git a/.gitignore b/.gitignore index dd4aac18..fba1cc85 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,4 @@ llama-cpp-env/ *-env/ build-envs/ portable-git/ +llm_cache/* diff --git a/LlamaCPP/llama_adapter.py b/LlamaCPP/llama_adapter.py index 20e25b10..968f0c73 100644 --- a/LlamaCPP/llama_adapter.py +++ b/LlamaCPP/llama_adapter.py @@ -81,7 +81,7 @@ def error_callback(self, ex: Exception): elif isinstance(ex, RuntimeError): self.put_msg({"type": "error", "err_type": "runtime_error"}) else: - self.put_msg({"type": "error", "err_type": "unknow_exception"}) + self.put_msg({"type": "error", "err_type": "unknown_exception"}) print(f"exception:{str(ex)}") def text_conversation(self, params: LLMParams): diff --git a/LlamaCPP/llama_params.py b/LlamaCPP/llama_params.py index 42799f1e..86f18ab8 100644 --- a/LlamaCPP/llama_params.py +++ b/LlamaCPP/llama_params.py @@ -1,4 +1,4 @@ -from typing import Dict, List +from typing import Any, Dict, List class LLMParams: prompt: List[Dict[str, str]] @@ -6,12 +6,14 @@ class LLMParams: enable_rag: bool model_repo_id: str max_tokens: int + generation_parameters: Dict[str, Any] def __init__( - self, prompt: list, device: int, enable_rag: bool, model_repo_id: str, max_tokens: int + self, prompt: list, device: int, enable_rag: bool, model_repo_id: str, max_tokens: int, **kwargs ) -> None: self.prompt = prompt self.device = device self.enable_rag = enable_rag self.model_repo_id = model_repo_id - self.max_tokens = max_tokens \ No newline at end of file + self.max_tokens = max_tokens + self.generation_parameters = kwargs \ No newline at end of file diff --git a/OpenVINO/.gitignore b/OpenVINO/.gitignore new file mode 100644 index 00000000..6348809c --- /dev/null +++ b/OpenVINO/.gitignore @@ -0,0 +1,16 @@ +.vscode/ +__pycache__/ +models/llm/ +temp/ +test/ +dist/ +build/ +cache/ +test/ +env/ + +!tools/*.exe +llm_cache/ +TinyLlama-* +laion/ +db/ \ No newline at end of file diff --git a/OpenVINO/openvino_adapter.py b/OpenVINO/openvino_adapter.py new file mode 100644 index 00000000..8a3902c9 --- /dev/null +++ b/OpenVINO/openvino_adapter.py @@ -0,0 +1,169 @@ +import threading +from queue import Empty, Queue +import json +import traceback +from typing import Dict, List, Callable +from openvino_interface import LLMInterface +from openvino_params import LLMParams + +RAG_PROMPT_FORMAT = "Answer the questions based on the information below. \n{context}\n\nQuestion: {prompt}" + +class LLM_SSE_Adapter: + msg_queue: Queue + finish: bool + singal: threading.Event + llm_interface: LLMInterface + should_stop: bool + + def __init__(self, llm_interface: LLMInterface): + self.msg_queue = Queue(-1) + self.finish = False + self.singal = threading.Event() + self.llm_interface = llm_interface + self.should_stop = False + + def put_msg(self, data): + self.msg_queue.put_nowait(data) + self.singal.set() + + def load_model_callback(self, event: str): + data = {"type": "load_model", "event": event} + self.put_msg(data) + + def text_in_callback(self, msg: str): + data = {"type": "text_in", "value": msg} + self.put_msg(data) + + def text_out_callback(self, msg: str, type=1): + data = {"type": "text_out", "value": msg, "dtype": type} + self.put_msg(data) + + def first_latency_callback(self, first_latency: str): + data = {"type": "first_token_latency", "value": first_latency} + self.put_msg(data) + + def after_latency_callback(self, after_latency: str): + data = {"type": "after_token_latency", "value": after_latency} + self.put_msg(data) + + def sr_latency_callback(self, sr_latency: str): + data = {"type": "sr_latency", "value": sr_latency} + self.put_msg(data) + + def error_callback(self, ex: Exception): + if ( + isinstance(ex, NotImplementedError) + and ex.__str__() == "Access to repositories lists is not implemented." + ): + self.put_msg( + { + "type": "error", + "err_type": "repositories_not_found", + } + ) + # elif isinstance(ex, NotEnoughDiskSpaceException): + # self.put_msg( + # { + # "type": "error", + # "err_type": "not_enough_disk_space", + # "need": bytes2human(ex.requires_space), + # "free": bytes2human(ex.free_space), + # } + # ) + # elif isinstance(ex, DownloadException): + # self.put_msg({"type": "error", "err_type": "download_exception"}) + # # elif isinstance(ex, llm_biz.StopGenerateException): + # # pass + elif isinstance(ex, RuntimeError): + self.put_msg({"type": "error", "err_type": "runtime_error"}) + else: + self.put_msg({"type": "error", "err_type": "unknown_exception"}) + self.put_msg(f"exception:{str(ex)}") + + def text_conversation(self, params: LLMParams): + thread = threading.Thread( + target=self.text_conversation_run, + args=[params], + ) + thread.start() + return self.generator() + + + def stream_function(self, output): + self.text_out_callback(output) + if self.llm_interface.stop_generate: + self.put_msg("Stopping generation.") + return True # Stop generation + + return False + + + def text_conversation_run( + self, + params: LLMParams, + ): + try: + self.llm_interface.load_model(params, callback=self.load_model_callback) + + prompt = params.prompt + full_prompt = convert_prompt(prompt) + self.llm_interface.create_chat_completion(full_prompt, self.stream_function, params.max_tokens) + + except Exception as ex: + traceback.print_exc() + self.error_callback(ex) + finally: + self.llm_interface.stop_generate = False + self.finish = True + self.singal.set() + + def generator(self): + while True: + while not self.msg_queue.empty(): + try: + data = self.msg_queue.get_nowait() + msg = f"data:{json.dumps(data)}\0" + print(msg) + yield msg + except Empty(Exception): + break + if not self.finish: + self.singal.clear() + self.singal.wait() + else: + break + + +_default_prompt = { + "role": "system", + "content": "You are a helpful digital assistant. Please provide safe, ethical and accurate information to the user. Please keep the output text language the same as the user input.", + } + +def convert_prompt(prompt: List[Dict[str, str]]): + chat_history = [_default_prompt] + prompt_len = prompt.__len__() + i = 0 + while i < prompt_len: + chat_history.append({"role": "user", "content": prompt[i].get("question")}) + if i < prompt_len - 1: + chat_history.append( + {"role": "assistant", "content": prompt[i].get("answer")} + ) + i = i + 1 + return chat_history + + +def process_rag( + prompt: str, + device: str, + text_out_callback: Callable[[str, int], None] = None, + ): + import rag + rag.to(device) + query_success, context, rag_source = rag.query(prompt) + if query_success: + print("rag query input\r\n{}output:\r\n{}".format(prompt, context)) + prompt = RAG_PROMPT_FORMAT.format(prompt=prompt, context=context) + if text_out_callback is not None: + text_out_callback(rag_source, 2) + return prompt \ No newline at end of file diff --git a/OpenVINO/openvino_backend.py b/OpenVINO/openvino_backend.py new file mode 100644 index 00000000..5c45f7d4 --- /dev/null +++ b/OpenVINO/openvino_backend.py @@ -0,0 +1,52 @@ +from typing import Dict, List, Callable +from os import path +from openvino_interface import LLMInterface +import openvino_genai +from openvino_params import LLMParams +import openvino_model_config as model_config +import gc + +class OpenVino(LLMInterface): + def __init__(self): + self._model = None + self.stop_generate = False + self._last_repo_id = None + + def load_model(self, params: LLMParams, callback: Callable[[str], None] = None): + model_repo_id = params.model_repo_id + if self._model is None or self._last_repo_id != model_repo_id: + if callback is not None: + callback("start") + self.unload_model() + callback(params.model_repo_id) + + model_base_path = model_config.openVINOConfig.get("openvinoLLM") + model_name = model_repo_id.replace("/", "---") + model_path = path.abspath(path.join(model_base_path, model_name)) + + enable_compile_cache = dict() + enable_compile_cache["CACHE_DIR"] = "llm_cache" + self._model = openvino_genai.LLMPipeline(model_path, "GPU", **enable_compile_cache) + self._tokenizer = self._model.get_tokenizer() + + self._last_repo_id = model_repo_id + if callback is not None: + callback("finish") + + + def create_chat_completion(self, messages: List[Dict[str, str]], streamer: Callable[[str], None], max_tokens: int = 1024): + config = openvino_genai.GenerationConfig() + config.max_new_tokens = max_tokens + + full_prompt = self._tokenizer.apply_chat_template(messages, add_generation_prompt=True) + return self._model.generate(full_prompt, config, streamer) + + + def unload_model(self): + if self._model is not None: + del self._model + gc.collect() + self._model = None + + def get_backend_type(self): + return "openvino" \ No newline at end of file diff --git a/OpenVINO/openvino_interface.py b/OpenVINO/openvino_interface.py new file mode 100644 index 00000000..1743fe75 --- /dev/null +++ b/OpenVINO/openvino_interface.py @@ -0,0 +1,24 @@ +from abc import ABC, abstractmethod +from typing import Dict, List, Optional +from openvino_params import LLMParams + +class LLMInterface(ABC): + stop_generate: bool + _model: Optional[object] + + @abstractmethod + def load_model(self, params: LLMParams, **kwargs): + pass + + @abstractmethod + def unload_model(self): + pass + + @abstractmethod + def create_chat_completion(self, messages: List[Dict[str, str]]): + pass + + @abstractmethod + def get_backend_type(self): + pass + diff --git a/OpenVINO/openvino_model_config.py b/OpenVINO/openvino_model_config.py new file mode 100644 index 00000000..2804dc0f --- /dev/null +++ b/OpenVINO/openvino_model_config.py @@ -0,0 +1,4 @@ +openVINOConfig = { + "openvinoLLM": "../service/models/llm/openvino", +} + diff --git a/OpenVINO/openvino_params.py b/OpenVINO/openvino_params.py new file mode 100644 index 00000000..86f18ab8 --- /dev/null +++ b/OpenVINO/openvino_params.py @@ -0,0 +1,19 @@ +from typing import Any, Dict, List + +class LLMParams: + prompt: List[Dict[str, str]] + device: int + enable_rag: bool + model_repo_id: str + max_tokens: int + generation_parameters: Dict[str, Any] + + def __init__( + self, prompt: list, device: int, enable_rag: bool, model_repo_id: str, max_tokens: int, **kwargs + ) -> None: + self.prompt = prompt + self.device = device + self.enable_rag = enable_rag + self.model_repo_id = model_repo_id + self.max_tokens = max_tokens + self.generation_parameters = kwargs \ No newline at end of file diff --git a/OpenVINO/openvino_rag.py b/OpenVINO/openvino_rag.py new file mode 100644 index 00000000..8e5918c5 --- /dev/null +++ b/OpenVINO/openvino_rag.py @@ -0,0 +1,190 @@ +import gc +import json +import os +import time +from typing import Any, List, Dict + +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain.embeddings import LlamaCppEmbeddings +from langchain_community.document_loaders.markdown import UnstructuredMarkdownLoader +from langchain_community.document_loaders.pdf import PyPDFLoader +from langchain_community.document_loaders.text import TextLoader +from langchain_community.document_loaders.word_document import ( + UnstructuredWordDocumentLoader, + Docx2txtLoader, +) +from langchain_community.vectorstores.faiss import FAISS, Document + +#### CONFIGURATIONS ------------------------------------------------------------------------------------------------------------------------ +INDEX_DATABASE_PATH = "./db/" # Faiss database folder +CHUNK_SIZE = 1600 # Chunk size for text splitter +CHUNK_OVERLAP = 400 # Chunk overlap for text splitter +INDEX_NUM = 2 # Number of content pieces to retrieve +MAX_NEW_TOKENS = 320 # Max length of LLM output + + +# Embedding model class - create a wrapper for embedding model +class EmbeddingWrapper: + def __init__(self, model_path: str): + start = time.time() + print(f"******* loading {model_path} start ") + self.model = LlamaCppEmbeddings(model_path=model_path) + print( + "******* loading {} finish. cost {:3f}s".format( + model_path, time.time() - start + ) + ) + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + t0 = time.time() + embeddings = self.model.embed_documents(texts) + t1 = time.time() + print("-----------LlamaCpp--embedding cost time(s): ", t1 - t0) + return embeddings + + def embed_query(self, text: str) -> List[float]: + return self.model.embed_query(text) + + +# Faiss database - manage embeddings and file indexing +class EmbeddingDatabase: + db: FAISS + embeddings: EmbeddingWrapper + text_splitter: RecursiveCharacterTextSplitter + index_list: List[Dict[str, Any]] + + def __init__(self, embeddings: EmbeddingWrapper): + self.embeddings = embeddings + index_cache = os.path.join(INDEX_DATABASE_PATH, "index.faiss") + self.db = ( + FAISS.load_local(INDEX_DATABASE_PATH, self.embeddings) + if os.path.exists(index_cache) + else None + ) + index_json = os.path.join(INDEX_DATABASE_PATH, "index.json") + self.index_list = ( + self.__load_exists_index(index_json) + if os.path.exists(index_json) + else list() + ) + self.text_splitter = RecursiveCharacterTextSplitter( + chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP, length_function=len + ) + + def __load_exists_index(self, index_json: str): + try: + with open(index_json, "r") as f: + return json.load(f) + except Exception as e: + print(f"load index.json error: {e}") + return list() + + def __save_index(self, file_base_name: str, md5: str, doc_ids: str): + self.index_list.append({"name": file_base_name, "md5": md5, "doc_ids": doc_ids}) + if not os.path.exists(INDEX_DATABASE_PATH): + os.makedirs(INDEX_DATABASE_PATH) + index_json = os.path.join(INDEX_DATABASE_PATH, "index.json") + with open(index_json, "w") as f: + json.dump(self.index_list, f) + self.db.save_local(INDEX_DATABASE_PATH) + + def __add_documents(self, file_base_name: str, docs: List[Document], md5: str): + if self.db is None: + self.db = FAISS.from_documents(docs, self.embeddings) + else: + self.db.add_documents(docs) + print(docs[0].metadata) + self.__save_index(file_base_name, md5, [doc.metadata["doc_id"] for doc in docs]) + + def __analyze_file_to_db(self, file: str, md5: str): + file_base_name = os.path.basename(file) + file_ext = os.path.splitext(file_base_name)[1].lower() + + if file_ext == ".txt": + raw_documents = TextLoader(file, encoding="utf-8").load() + elif file_ext == ".pdf": + raw_documents = PyPDFLoader(file).load() + elif file_ext == ".doc": + raw_documents = UnstructuredWordDocumentLoader(file).load() + elif file_ext == ".docx": + raw_documents = Docx2txtLoader(file).load() + elif file_ext == ".md": + raw_documents = UnstructuredMarkdownLoader(file).load() + else: + raise Exception(f"Unsupported file extension {file_ext}") + + docs = self.text_splitter.split_documents(raw_documents) + if docs: + print("Analyze {} got {} index files.".format(file_base_name, len(docs))) + self.__add_documents(file_base_name, docs, md5) + else: + raise Exception(f"Cannot analyze {file_base_name}") + + def add_index_file(self, file: str): + md5 = self.__calculate_md5(file) + for item in self.index_list: + if item["md5"] == md5: + print(f"{os.path.basename(file)} already indexed.") + return 1, md5 + + self.__analyze_file_to_db(file, md5) + return 0, md5 + + def query_database(self, query: str): + if not query: + raise Exception("Query cannot be None or empty") + + print("******* Querying database...") + if self.db is None: + return False, None, None + + docs = self.db.similarity_search_with_relevance_scores( + query, k=INDEX_NUM, score_threshold=0.4 + ) + if not docs: + return False, None, None + + doc_contents = [doc.page_content for doc, _ in docs] + source_files = {doc.metadata["source"] for doc, _ in docs} + return True, "\n\n".join(doc_contents), "\n".join(source_files) + + def __calculate_md5(self, file_path: str) -> str: + import hashlib + + hasher = hashlib.md5() + with open(file_path, "rb") as f: + buf = f.read() + hasher.update(buf) + return hasher.hexdigest() + + +def init(model_path: str): + global embedding_database, embedding_wrapper + embedding_wrapper = EmbeddingWrapper(model_path=model_path) + embedding_database = EmbeddingDatabase(embedding_wrapper) + + +def add_index_file(file: str): + return embedding_database.add_index_file(file) + + +def query(query: str): + return embedding_database.query_database(query) + + +def dispose(): + global embedding_database, embedding_wrapper + embedding_database = None + embedding_wrapper = None + gc.collect() + + +if __name__ == "__main__": + # Example Usage + init(model_path="/Users/daniel/silicon/AI-Playground/LlamaCPP/models/llm/gguf/bge-large-en-v1.5-q8_0.gguf") + add_index_file("/Users/daniel/silicon/AI-Playground/hello.txt") + success, context, source = query("What is the content about?") + print("Query success:", success) + print("Context:", context) + print("Source Files:", source) + dispose() diff --git a/OpenVINO/openvino_test.py b/OpenVINO/openvino_test.py new file mode 100644 index 00000000..1c5db2ce --- /dev/null +++ b/OpenVINO/openvino_test.py @@ -0,0 +1,26 @@ +import requests + + +url = "http://127.0.0.1:29000/api/llm/chat" +params = { + "prompt": [{"question": "Your name is Luca", "answer": "My name is Luca."}, {"question": "What is your name?"}], + "device": "", + "enable_rag": False, + "model_repo_id": "meta-llama-3.1-8b-instruct-q5_k_m.gguf", +} +response = requests.post(url, json=params, stream=True) +# Check if the response status code is 200 (OK) +response.raise_for_status() +e = 1 +# Iterate over the response lines +for line in response.iter_lines(): + e += 1 + if line: + # Decode the line (assuming UTF-8 encoding) + decoded_line = line.decode('utf-8') + + # SSE events typically start with "data: " + if decoded_line.startswith("data:"): + # Extract the data part + data = decoded_line[len("data:"):] + print(data) # Process the data as needed \ No newline at end of file diff --git a/OpenVINO/openvino_web_api.py b/OpenVINO/openvino_web_api.py new file mode 100644 index 00000000..cbe6ccf4 --- /dev/null +++ b/OpenVINO/openvino_web_api.py @@ -0,0 +1,46 @@ +import os +os.environ['PATH'] = os.path.abspath('../openvino-env/Library/bin') + os.pathsep + os.environ['PATH'] +from apiflask import APIFlask +from flask import jsonify, request, Response, stream_with_context +from openvino_backend import OpenVino +from openvino_adapter import LLM_SSE_Adapter +from openvino_params import LLMParams + +app = APIFlask(__name__) +llm_backend = OpenVino() + + +@app.get("/health") +def health(): + return jsonify({"code": 0, "message": "success"}) + + +@app.post("/api/llm/chat") +def llm_chat(): + params = request.get_json() + params.pop("print_metrics", None) + llm_params = LLMParams(**params) + sse_invoker = LLM_SSE_Adapter(llm_backend) + it = sse_invoker.text_conversation(llm_params) + return Response(stream_with_context(it), content_type="text/event-stream") + + +@app.post("/api/free") +def free(): + llm_backend.unload_model() + return jsonify({"code": 0, "message": "success"}) + + +@app.get("/api/llm/stopGenerate") +def stop_llm_generate(): + llm_backend.stop_generate = True + return jsonify({"code": 0, "message": "success"}) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="AI Playground Web service") + parser.add_argument("--port", type=int, default=59997, help="Service listen port") + args = parser.parse_args() + app.run(host="127.0.0.1", port=args.port, use_reloader=False) diff --git a/OpenVINO/requirements.txt b/OpenVINO/requirements.txt new file mode 100644 index 00000000..33fa30eb --- /dev/null +++ b/OpenVINO/requirements.txt @@ -0,0 +1,6 @@ +--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly +Flask==3.0.3 +apiflask==2.3.0 +openvino==2025.1.0.dev20250203 +openvino-genai==2025.1.0.0.dev20250203 +openvino-tokenizers==2025.1.0.0.dev20250203 diff --git a/WebUI/.gitignore b/WebUI/.gitignore index 00fddc80..ef23ebb0 100644 --- a/WebUI/.gitignore +++ b/WebUI/.gitignore @@ -14,6 +14,7 @@ dist-ssr release/ ComfyUI/ LlamaCPP/ +OpenVino/ # Editor directories and files .vscode/* diff --git a/WebUI/build/build-config.json b/WebUI/build/build-config.json index 0df090fa..9fbbb997 100644 --- a/WebUI/build/build-config.json +++ b/WebUI/build/build-config.json @@ -26,6 +26,16 @@ "to": "LlamaCPP", "filter": ["!__pycache__/"] }, + { + "from": "external/OpenVINO", + "to": "OpenVINO", + "filter": ["!__pycache__/", "!.cache/", "!db/", "!llm_cache/"] + }, + { + "from": "external/OpenVINO", + "to": "OpenVINO", + "filter": ["!__pycache__/", "!.cache/", "!db/", "!llm_cache/"] + }, { "from": "external/intel_extension_for_pytorch-2.3.110+bmg-cp311-cp311-win_amd64.whl", "to": "intel_extension_for_pytorch-2.3.110+bmg-cp311-cp311-win_amd64.whl" diff --git a/WebUI/build/scripts/prepare-python-env.js b/WebUI/build/scripts/prepare-python-env.js index e9fd552b..687b815d 100644 --- a/WebUI/build/scripts/prepare-python-env.js +++ b/WebUI/build/scripts/prepare-python-env.js @@ -82,6 +82,7 @@ python311.zip ../service ../ComfyUI ../LlamaCPP +../OpenVINO # Uncomment to run site.main() automatically import site diff --git a/WebUI/build/scripts/provide-electron-build-resources.js b/WebUI/build/scripts/provide-electron-build-resources.js index c1e254c0..be1bc003 100644 --- a/WebUI/build/scripts/provide-electron-build-resources.js +++ b/WebUI/build/scripts/provide-electron-build-resources.js @@ -9,6 +9,7 @@ const buildResourcesDirArg = argv.build_resources_dir const pythonEnvDirArg = argv.python_env_dir const aiBackendDirArg = argv.backend_dir const llamaCppBackendDirArg = argv.llamacpp_dir +const openVINOBackendDirArg = argv.openvino_dir const targetDirectoryArg = argv.target_dir if ( @@ -16,10 +17,11 @@ if ( !pythonEnvDirArg || !aiBackendDirArg || !targetDirectoryArg || - !llamaCppBackendDirArg + !llamaCppBackendDirArg || + !openVINOBackendDirArg ) { console.error( - 'Usage: node provide-electron-build-resources.js --build_resources_dir=$DIR --python_env_dir=$DIR --backend_dir=$DIR --llamacpp_dir=$DIR --target_dir=$DIR\n', + 'Usage: node provide-electron-build-resources.js --build_resources_dir=$DIR --python_env_dir=$DIR --backend_dir=$DIR --llamacpp_dir=$DIR --openvino_dir=$DIR --target_dir=$DIR\n', ) process.exit(1) } @@ -28,6 +30,7 @@ const buildResourcesDir = path.resolve(buildResourcesDirArg) const pythenEnvDir = path.resolve(pythonEnvDirArg) const backendDir = path.resolve(aiBackendDirArg) const llamaCppBackendDir = path.resolve(llamaCppBackendDirArg) +const openVINOBackendDir = path.resolve(openVINOBackendDirArg) const targetDir = path.resolve(targetDirectoryArg) function symlinkDir(dir, target) { @@ -77,6 +80,7 @@ function main() { symlinkDir(backendDir, path.join(targetDir, 'service')) symlinkDir(llamaCppBackendDir, path.join(targetDir, 'LlamaCpp')) + symlinkDir(openVINOBackendDir, path.join(targetDir, 'OpenVINO')) copyFiles(targetDir, sevenZipExe) } diff --git a/WebUI/electron/main.ts b/WebUI/electron/main.ts index 2ed2e8d2..d6e6a811 100644 --- a/WebUI/electron/main.ts +++ b/WebUI/electron/main.ts @@ -416,6 +416,10 @@ function initEventHandle() { return pathsManager.scanGGUFLLMModels() }) + ipcMain.handle('getDownloadedOpenVINOLLMModels', (_event) => { + return pathsManager.scanOpenVINOModels() + }) + ipcMain.handle('getDownloadedEmbeddingModels', (_event) => { return pathsManager.scanEmbedding(false) }) diff --git a/WebUI/electron/pathsManager.ts b/WebUI/electron/pathsManager.ts index f0480c62..a91220df 100644 --- a/WebUI/electron/pathsManager.ts +++ b/WebUI/electron/pathsManager.ts @@ -5,6 +5,7 @@ export class PathsManager { modelPaths: ModelPaths = { llm: '', ggufLLM: '', + openvinoLLM: '', embedding: '', stableDiffusion: '', inpaint: '', @@ -120,6 +121,23 @@ export class PathsManager { return [...modelsSet] } + scanOpenVINOModels() { + const dir = this.modelPaths.openvinoLLM + if (!fs.existsSync(dir)) { + fs.mkdirSync(dir, { recursive: true }) + } + console.log('getting models', dir) + const modelsSet = fs + .readdirSync(dir) + .filter((subDir) => { + const fullpath = path.join(dir, subDir) + return fs.statSync(fullpath).isDirectory() && fs.existsSync(path.join(fullpath)) + }) + .map((subDir) => subDir.replace('---', '/')) + .reduce((set, modelName) => set.add(modelName), new Set()) + + return [...modelsSet] + } scanLora(returnDefaults = true) { const models = returnDefaults ? ['None', 'latent-consistency/lcm-lora-sdxl', 'latent-consistency/lcm-lora-sdv1-5'] diff --git a/WebUI/electron/preload.ts b/WebUI/electron/preload.ts index a8b34f01..f4fbd43e 100644 --- a/WebUI/electron/preload.ts +++ b/WebUI/electron/preload.ts @@ -59,6 +59,7 @@ contextBridge.exposeInMainWorld('electronAPI', { getDownloadedLoras: () => ipcRenderer.invoke('getDownloadedLoras'), getDownloadedLLMs: () => ipcRenderer.invoke('getDownloadedLLMs'), getDownloadedGGUFLLMs: () => ipcRenderer.invoke('getDownloadedGGUFLLMs'), + getDownloadedOpenVINOLLMModels: () => ipcRenderer.invoke('getDownloadedOpenVINOLLMModels'), getDownloadedEmbeddingModels: () => ipcRenderer.invoke('getDownloadedEmbeddingModels'), openImageWithSystem: (url: string) => ipcRenderer.send('openImageWithSystem', url), openImageInFolder: (url: string) => ipcRenderer.send('openImageInFolder', url), diff --git a/WebUI/electron/subprocesses/apiServiceRegistry.ts b/WebUI/electron/subprocesses/apiServiceRegistry.ts index e2cfb15c..e5713713 100644 --- a/WebUI/electron/subprocesses/apiServiceRegistry.ts +++ b/WebUI/electron/subprocesses/apiServiceRegistry.ts @@ -5,6 +5,7 @@ import { BrowserWindow } from 'electron' import { appLoggerInstance } from '../logging/logger.ts' import getPort, { portNumbers } from 'get-port' import { LlamaCppBackendService } from './llamaCppBackendService.ts' +import { OpenVINOBackendService } from './openVINOBackendService.ts' export type backend = 'ai-backend' | 'comfyui-backend' @@ -125,6 +126,14 @@ export async function aiplaygroundApiServiceRegistry( settings, ), ) + instance.register( + new OpenVINOBackendService( + 'openvino-backend', + await getPort({ port: portNumbers(29000, 29999) }), + win, + settings, + ), + ) } return instance } diff --git a/WebUI/electron/subprocesses/comfyUIBackendService.ts b/WebUI/electron/subprocesses/comfyUIBackendService.ts index 95cb6a0f..69fbec0b 100644 --- a/WebUI/electron/subprocesses/comfyUIBackendService.ts +++ b/WebUI/electron/subprocesses/comfyUIBackendService.ts @@ -9,15 +9,17 @@ import { LongLivedPythonApiService, aiBackendServiceDir, GitService, + UvPipService, } from './service.ts' +const serviceFolder = 'ComfyUI' export class ComfyUiBackendService extends LongLivedPythonApiService { readonly isRequired = false - readonly serviceDir = path.resolve(path.join(this.baseDir, 'ComfyUI')) + readonly serviceDir = path.resolve(path.join(this.baseDir, serviceFolder)) readonly pythonEnvDir = path.resolve(path.join(this.baseDir, `comfyui-backend-env`)) - readonly lsLevelZeroDir = this.pythonEnvDir + readonly lsLevelZeroDir = path.resolve(path.join(this.baseDir, 'ai-backend-env')) readonly lsLevelZero = new LsLevelZeroService(this.lsLevelZeroDir) - readonly uvPip = this.lsLevelZero.uvPip + readonly uvPip = new UvPipService(this.pythonEnvDir, serviceFolder) readonly git = new GitService() healthEndpointUrl = `${this.baseUrl}/queue` diff --git a/WebUI/electron/subprocesses/llamaCppBackendService.ts b/WebUI/electron/subprocesses/llamaCppBackendService.ts index 850af553..fa5bf989 100644 --- a/WebUI/electron/subprocesses/llamaCppBackendService.ts +++ b/WebUI/electron/subprocesses/llamaCppBackendService.ts @@ -5,8 +5,9 @@ import * as filesystem from 'fs-extra' import { existingFileOrError } from './osProcessHelper.ts' import { LsLevelZeroService, UvPipService, LongLivedPythonApiService } from './service.ts' +const serviceFolder = 'LlamaCPP' export class LlamaCppBackendService extends LongLivedPythonApiService { - readonly serviceDir = path.resolve(path.join(this.baseDir, 'LlamaCPP')) + readonly serviceDir = path.resolve(path.join(this.baseDir, serviceFolder)) readonly pythonEnvDir = path.resolve(path.join(this.baseDir, `llama-cpp-env`)) // using ls_level_zero from default ai-backend env to avoid oneAPI dep conflicts readonly lsLevelZeroDir = path.resolve(path.join(this.baseDir, 'ai-backend-env')) @@ -15,7 +16,7 @@ export class LlamaCppBackendService extends LongLivedPythonApiService { healthEndpointUrl = `${this.baseUrl}/health` readonly lsLevelZero = new LsLevelZeroService(this.lsLevelZeroDir) - readonly uvPip = new UvPipService(this.pythonEnvDir) + readonly uvPip = new UvPipService(this.pythonEnvDir, serviceFolder) readonly python = this.uvPip.python serviceIsSetUp(): boolean { diff --git a/WebUI/electron/subprocesses/openVINOBackendService.ts b/WebUI/electron/subprocesses/openVINOBackendService.ts new file mode 100644 index 00000000..6a061f55 --- /dev/null +++ b/WebUI/electron/subprocesses/openVINOBackendService.ts @@ -0,0 +1,131 @@ +import { ChildProcess, spawn } from 'node:child_process' +import path from 'node:path' +import * as filesystem from 'fs-extra' +import { existingFileOrError } from './osProcessHelper.ts' +import { LsLevelZeroService, UvPipService, LongLivedPythonApiService } from './service.ts' + +const serviceFolder = 'openVINO' +export class OpenVINOBackendService extends LongLivedPythonApiService { + readonly serviceDir = path.resolve(path.join(this.baseDir, serviceFolder)) + readonly pythonEnvDir = path.resolve(path.join(this.baseDir, `openvino-env`)) + // using ls_level_zero from default ai-backend env to avoid oneAPI dep conflicts + readonly lsLevelZeroDir = path.resolve(path.join(this.baseDir, 'ai-backend-env')) + readonly isRequired = false + + healthEndpointUrl = `${this.baseUrl}/health` + + readonly lsLevelZero = new LsLevelZeroService(this.lsLevelZeroDir) + readonly uvPip = new UvPipService(this.pythonEnvDir, serviceFolder) + readonly python = this.uvPip.python + + serviceIsSetUp(): boolean { + return filesystem.existsSync(this.python.getExePath()) + } + + isSetUp = this.serviceIsSetUp() + + async *set_up(): AsyncIterable { + this.setStatus('installing') + this.appLogger.info('setting up service', this.name) + const self = this + + try { + yield { + serviceName: self.name, + step: 'start', + status: 'executing', + debugMessage: 'starting to set up python environment', + } + await this.lsLevelZero.ensureInstalled() + await this.uvPip.ensureInstalled() + + const deviceArch = await self.lsLevelZero.detectDevice() + yield { + serviceName: self.name, + step: `Detecting intel device`, + status: 'executing', + debugMessage: `detected intel hardware ${deviceArch}`, + } + + yield { + serviceName: self.name, + step: `install dependencies`, + status: 'executing', + debugMessage: `installing dependencies`, + } + const commonRequirements = existingFileOrError(path.join(self.serviceDir, 'requirements.txt')) + await this.uvPip.run([ + 'install', + '--index-strategy', + 'unsafe-best-match', + '-r', + commonRequirements, + ]) + + yield { + serviceName: self.name, + step: `install dependencies`, + status: 'executing', + debugMessage: `dependencies installed`, + } + + this.setStatus('notYetStarted') + yield { + serviceName: self.name, + step: 'end', + status: 'success', + debugMessage: `service set up completely`, + } + } catch (e) { + self.appLogger.warn(`Set up of service failed due to ${e}`, self.name, true) + self.appLogger.warn(`Aborting set up of ${self.name} service environment`, self.name, true) + this.setStatus('installationFailed') + yield { + serviceName: self.name, + step: 'end', + status: 'failed', + debugMessage: `Failed to setup python environment due to ${e}`, + } + } + } + + async spawnAPIProcess(): Promise<{ + process: ChildProcess + didProcessExitEarlyTracker: Promise + }> { + const additionalEnvVariables = { + SYCL_ENABLE_DEFAULT_CONTEXTS: '1', + SYCL_CACHE_PERSISTENT: '1', + PYTHONIOENCODING: 'utf-8', + ...(await this.lsLevelZero.getDeviceSelectorEnv()), + } + + const apiProcess = spawn( + this.python.getExePath(), + ['openvino_web_api.py', '--port', this.port.toString()], + { + cwd: this.serviceDir, + windowsHide: true, + env: Object.assign(process.env, additionalEnvVariables), + }, + ) + + //must be at the same tick as the spawn function call + //otherwise we cannot really track errors given the nature of spawn() with a longlived process + const didProcessExitEarlyTracker = new Promise((resolve, _reject) => { + apiProcess.on('error', (error) => { + this.appLogger.error(`encountered error of process in ${this.name} : ${error}`, this.name) + resolve(true) + }) + apiProcess.on('exit', () => { + this.appLogger.error(`encountered unexpected exit in ${this.name}.`, this.name) + resolve(true) + }) + }) + + return { + process: apiProcess, + didProcessExitEarlyTracker: didProcessExitEarlyTracker, + } + } +} diff --git a/WebUI/electron/subprocesses/service.ts b/WebUI/electron/subprocesses/service.ts index 7f5d7b16..b846230f 100644 --- a/WebUI/electron/subprocesses/service.ts +++ b/WebUI/electron/subprocesses/service.ts @@ -101,7 +101,10 @@ abstract class ExecutableService extends GenericServiceImpl { } export class PythonService extends ExecutableService { - constructor(readonly dir: string) { + constructor( + readonly dir: string, + readonly serviceDir: string, + ) { super('python', dir) } @@ -120,7 +123,9 @@ export class PythonService extends ExecutableService { } async install(): Promise { - this.log('start installing') + this.log( + `installing python env at ${this.dir} from ${this.name} for service ${this.serviceDir}`, + ) await this.clonePythonEnv() } @@ -140,14 +145,30 @@ export class PythonService extends ExecutableService { } this.log(`copying prototypical python env to ${this.dir}`) await filesystem.copy(this.prototypicalEnvDir, this.dir) + filesystem.writeFile( + path.join(this.dir, 'python311._pth'), + ` + python311.zip + . + ../${this.serviceDir} + + # Uncomment to run site.main() automatically + import site + `, + ) } } export class PipService extends ExecutableService { - readonly python: PythonService = new PythonService(this.dir) + readonly python: PythonService - constructor(readonly pythonEnvDir: string) { + constructor( + readonly pythonEnvDir: string, + readonly serviceDir: string, + ) { super('pip', pythonEnvDir) + this.log(`setting up pip service at ${this.dir} for service ${this.serviceDir}`) + this.python = new PythonService(this.dir, this.serviceDir) } getExePath(): string { @@ -218,11 +239,17 @@ export class PipService extends ExecutableService { } export class UvPipService extends PipService { - readonly pip: PipService = new PipService(this.dir) - readonly python: PythonService = this.pip.python - - constructor(readonly pythonEnvDir: string) { - super(pythonEnvDir) + readonly pip: PipService + readonly python: PythonService + + constructor( + readonly pythonEnvDir: string, + readonly serviceDir: string, + ) { + super(pythonEnvDir, serviceDir) + this.log(`setting up uv-pip service at ${this.dir} for service ${this.serviceDir}`) + this.pip = new PipService(this.dir, this.serviceDir) + this.python = this.pip.python this.name = 'uvpip' } @@ -258,7 +285,7 @@ export class UvPipService extends PipService { } export class LsLevelZeroService extends ExecutableService { - readonly uvPip: UvPipService = new UvPipService(this.dir) + readonly uvPip: UvPipService = new UvPipService(this.dir, 'service') readonly requirementsTxtPath = path.resolve( path.join(this.baseDir, 'service/requirements-ls_level_zero.txt'), ) @@ -288,8 +315,8 @@ export class LsLevelZeroService extends ExecutableService { this.log('checking') try { await this.uvPip.check() - await this.uvPip.checkRequirementsTxt(this.requirementsTxtPath) - await this.run() + // await this.uvPip.checkRequirementsTxt(this.requirementsTxtPath) + // await this.run() } catch (e) { this.log(`warning: ${e}`) if (e instanceof ServiceCheckError) throw e diff --git a/WebUI/external/model_config.dev.json b/WebUI/external/model_config.dev.json index fc725096..1c3af3c0 100644 --- a/WebUI/external/model_config.dev.json +++ b/WebUI/external/model_config.dev.json @@ -1,6 +1,7 @@ { "llm": "../service/models/llm/checkpoints", "ggufLLM": "../service/models/llm/ggufLLM", + "openvinoLLM": "../service/models/llm/openvino", "embedding": "../service/models/llm/embedding", "stableDiffusion": "../service/models/stable_diffusion/checkpoints", "inpaint": "../service/models/stable_diffusion/inpaint", diff --git a/WebUI/external/model_config.json b/WebUI/external/model_config.json index f0e8d82d..f2bb56b3 100644 --- a/WebUI/external/model_config.json +++ b/WebUI/external/model_config.json @@ -1,6 +1,7 @@ { "llm": "./resources/service/models/llm/checkpoints", "ggufLLM": "./resources/service/models/llm/ggufLLM", + "openvinoLLM": "./resources/service/models/llm/openvino", "embedding": "./resources/service/models/llm/embedding", "stableDiffusion": "./resources/service/models/stable_diffusion/checkpoints", "inpaint": "./resources/service/models/stable_diffusion/inpaint", diff --git a/WebUI/package.json b/WebUI/package.json index 18c845ea..4a6d401a 100644 --- a/WebUI/package.json +++ b/WebUI/package.json @@ -8,7 +8,7 @@ "fetch-build-resources": "cross-env node ./build/scripts/fetch-python-package-resources.js --target_dir=../build_resources", "prepare-python-env": "cross-env node ./build/scripts/prepare-python-env.js --build_resources_dir=../build_resources --target_dir=../build-envs/online/prototype-python-env", "install-full-python-env": "cross-env node ./build/scripts/install-full-python-env.js --env_dir=../build-envs/online/prototype-python-env --comfy_ui_dir=../build_resources/ComfyUI", - "provide-electron-build-resources": "cross-env node build/scripts/provide-electron-build-resources.js --build_resources_dir=../build_resources --backend_dir=../service --llamacpp_dir=../LlamaCPP --target_dir=./external", + "provide-electron-build-resources": "cross-env node build/scripts/provide-electron-build-resources.js --build_resources_dir=../build_resources --backend_dir=../service --llamacpp_dir=../LlamaCPP --openvino_dir=../OpenVINO --target_dir=./external", "prepare-build": "cross-env npm run prepare-python-env && npm run provide-electron-build-resources -- --python_env_dir=../build-envs/online/prototype-python-env", "build": "node ./build/scripts/patch-nsis-template.js && cross-env-shell VITE_PLATFORM_TITLE=\"for Intel® Arc™\" \"vue-tsc && vite build && electron-builder --config build/build-config.json --win --x64\"", "lint:eslint": "eslint . --fix", diff --git a/WebUI/src/App.vue b/WebUI/src/App.vue index fc2bb2e7..82f669f4 100644 --- a/WebUI/src/App.vue +++ b/WebUI/src/App.vue @@ -246,12 +246,10 @@ import { useTheme } from './assets/js/store/theme.ts' import AddLLMDialog from '@/components/AddLLMDialog.vue' import WarningDialog from '@/components/WarningDialog.vue' import { useBackendServices } from './assets/js/store/backendServices.ts' -import { useTextInference } from '@/assets/js/store/textInference.ts' const backendServices = useBackendServices() const theme = useTheme() const globalSetup = useGlobalSetup() -const textInference = useTextInference() const enhanceCompt = ref>() const answer = ref>() @@ -368,14 +366,6 @@ function switchTab(index: number) { } } -watch(textInference, (newSetting, _oldSetting) => { - if (newSetting.backend === 'LLAMA.CPP') { - answer.value!.disableRag() - } else { - answer.value!.restoreRagState() - } -}) - function miniWindow() { window.electronAPI.miniWindow() } diff --git a/WebUI/src/assets/js/const.ts b/WebUI/src/assets/js/const.ts index 1e38db54..ee47853b 100644 --- a/WebUI/src/assets/js/const.ts +++ b/WebUI/src/assets/js/const.ts @@ -7,6 +7,7 @@ export const MODEL_TYPE_EMBEDDING = 5 export const MODEL_TYPE_INPAINT = 6 export const MODEL_TYPE_PREVIEW = 7 export const MODEL_TYPE_LLAMA_CPP = 8 +export const MODEL_TYPE_OPENVINO = 9 export const MODEL_TYPE_COMFY_UNET = 100 export const MODEL_TYPE_COMFY_CLIP = 101 export const MODEL_TYPE_COMFY_VAE = 102 diff --git a/WebUI/src/assets/js/store/backendServices.ts b/WebUI/src/assets/js/store/backendServices.ts index b19ffc39..ee68be8e 100644 --- a/WebUI/src/assets/js/store/backendServices.ts +++ b/WebUI/src/assets/js/store/backendServices.ts @@ -1,14 +1,15 @@ import { defineStore } from 'pinia' +const backends = ['ai-backend', 'comfyui-backend', 'llamacpp-backend', 'openvino-backend'] as const +export type BackendServiceName = (typeof backends)[number] + export const useBackendServices = defineStore( 'backendServices', () => { const currentServiceInfo = ref([]) - const serviceListeners: Map = new Map([ - ['ai-backend', new BackendServiceSetupProgressListener('ai-backend')], - ['comfyui-backend', new BackendServiceSetupProgressListener('comfyui-backend')], - ['llamacpp-backend', new BackendServiceSetupProgressListener('llamacpp-backend')], - ]) + const serviceListeners = new Map( + backends.map((b) => [b, new BackendServiceSetupProgressListener(b)]), + ) window.electronAPI .getServices() @@ -94,12 +95,38 @@ export const useBackendServices = defineStore( return window.electronAPI.sendStopSignal(serviceName) } + const lastUsedBackend = ref(null) + + function updateLastUsedBackend(currentInferenceBackend: BackendServiceName) { + lastUsedBackend.value = currentInferenceBackend + } + + async function resetLastUsedInferenceBackend(currentInferenceBackend: BackendServiceName) { + const lastUsedBackendSnapshot = lastUsedBackend.value + if (lastUsedBackendSnapshot === null || lastUsedBackendSnapshot === currentInferenceBackend) { + return + } + try { + const stopStatus = await stopService(lastUsedBackendSnapshot) + console.info(`unused service ${lastUsedBackendSnapshot} now in state ${stopStatus}`) + const startStatus = await startService(lastUsedBackendSnapshot) + console.info(`service ${lastUsedBackendSnapshot} now in state ${startStatus}`) + } catch (e) { + console.warn( + `Could not reset last used inference backend ${lastUsedBackendSnapshot} due to ${e}`, + ) + } + } + return { info: currentServiceInfo, serviceInfoUpdateReceived: serviceInfoUpdatePresent, allRequiredSetUp, allRequiredRunning, initalStartupRequestComplete, + lastUsedBackend, + updateLastUsedBackend, + resetLastUsedInferenceBackend, startAllSetUpServices, setUpService, startService, diff --git a/WebUI/src/assets/js/store/globalSetup.ts b/WebUI/src/assets/js/store/globalSetup.ts index 9597883b..ec2bcef2 100644 --- a/WebUI/src/assets/js/store/globalSetup.ts +++ b/WebUI/src/assets/js/store/globalSetup.ts @@ -1,10 +1,8 @@ import { defineStore } from 'pinia' import * as util from '../util' import { useI18N } from './i18n' -import { useBackendServices } from './backendServices' type GlobalSetupState = 'running' | 'verifyBackend' | 'manageInstallations' | 'loading' | 'failed' -type LastUsedBackend = BackendServiceName | 'None' export const useGlobalSetup = defineStore('globalSetup', () => { const state = reactive({ @@ -14,7 +12,6 @@ export const useGlobalSetup = defineStore('globalSetup', () => { }) const defaultBackendBaseUrl = ref('http://127.0.0.1:9999') - const lastUsedBackend = ref('None') const models = ref({ llm: new Array(), @@ -31,8 +28,6 @@ export const useGlobalSetup = defineStore('globalSetup', () => { resolution: 0, quality: 0, enableRag: false, - llm_model: 'microsoft/Phi-3-mini-4k-instruct', - ggufLLM_model: 'bartowski/Llama-3.2-3B-Instruct-GGUF/Llama-3.2-3B-Instruct-Q4_K_S.gguf', sd_model: 'Lykon/dreamshaper-8', inpaint_model: 'Lykon/dreamshaper-8-inpainting', negativePrompt: 'bad hands, nsfw', @@ -73,8 +68,6 @@ export const useGlobalSetup = defineStore('globalSetup', () => { const errorMessage = ref('') const hdPersistentConfirmation = ref(localStorage.getItem('HdPersistentConfirmation') === 'true') - const backendServices = useBackendServices() - watchEffect(() => { localStorage.setItem('HdPersistentConfirmation', hdPersistentConfirmation.value.toString()) }) @@ -221,27 +214,6 @@ export const useGlobalSetup = defineStore('globalSetup', () => { assertSelectExist() } - function updateLastUsedBackend(currentInferenceBackend: BackendServiceName) { - lastUsedBackend.value = currentInferenceBackend - } - - async function resetLastUsedInferenceBackend(currentInferenceBackend: BackendServiceName) { - const lastUsedBackendSnapshot = lastUsedBackend.value - if (lastUsedBackendSnapshot === 'None' || lastUsedBackendSnapshot === currentInferenceBackend) { - return - } - try { - const stopStatus = await backendServices.stopService(lastUsedBackendSnapshot) - console.info(`unused service ${lastUsedBackendSnapshot} now in state ${stopStatus}`) - const startStatus = await backendServices.startService(lastUsedBackendSnapshot) - console.info(`service ${lastUsedBackendSnapshot} now in state ${startStatus}`) - } catch (e) { - console.warn( - `Could not reset last used inference backend ${lastUsedBackendSnapshot} due to ${e}`, - ) - } - } - function assertSelectExist() { let changeUserSetup = false if (models.value.llm.length > 0 && !models.value.llm.includes(modelSettings.llm_model)) { @@ -287,28 +259,6 @@ export const useGlobalSetup = defineStore('globalSetup', () => { } } - async function checkModelAlreadyLoaded(params: CheckModelAlreadyLoadedParameters[]) { - const response = await fetch(`${defaultBackendBaseUrl.value}/api/checkModelAlreadyLoaded`, { - method: 'POST', - body: JSON.stringify({ data: params }), - headers: { - 'Content-Type': 'application/json', - }, - }) - const parsedResponse = (await response.json()) as ApiResponse & { - data: CheckModelAlreadyLoadedResult[] - } - return parsedResponse.data - } - - async function checkIfHuggingFaceUrlExists(repo_id: string) { - const response = await fetch( - `${defaultBackendBaseUrl.value}/api/checkHFRepoExists?repo_id=${repo_id}`, - ) - const data = await response.json() - return data.exists - } - return { state, modelSettings, @@ -318,11 +268,8 @@ export const useGlobalSetup = defineStore('globalSetup', () => { apiHost: defaultBackendBaseUrl, graphicsList, loadingState, - lastUsedBackend, errorMessage, hdPersistentConfirmation, - updateLastUsedBackend, - resetLastUsedInferenceBackend, initSetup, applyPathsSettings, applyModelSettings, @@ -330,8 +277,6 @@ export const useGlobalSetup = defineStore('globalSetup', () => { refreshSDModles, refreshInpaintModles, refreshLora, - checkModelAlreadyLoaded: checkModelAlreadyLoaded, - checkIfHuggingFaceUrlExists, applyPresetModelSettings, restorePathsSettings, } diff --git a/WebUI/src/assets/js/store/imageGeneration.ts b/WebUI/src/assets/js/store/imageGeneration.ts index 4c7f5811..1d80bd82 100644 --- a/WebUI/src/assets/js/store/imageGeneration.ts +++ b/WebUI/src/assets/js/store/imageGeneration.ts @@ -4,8 +4,10 @@ import { useComfyUi } from './comfyUi' import { useStableDiffusion } from './stableDiffusion' import { useI18N } from './i18n' import * as Const from '../const' -import { useGlobalSetup } from './globalSetup' import * as toast from '@/assets/js/toast.ts' +import { useModels } from './models' +import { useBackendServices } from './backendServices' +import { useGlobalSetup } from './globalSetup' export type RefImage = { type: string @@ -412,7 +414,9 @@ export const useImageGeneration = defineStore( const comfyUi = useComfyUi() const stableDiffusion = useStableDiffusion() + const backendServices = useBackendServices() const globalSetup = useGlobalSetup() + const models = useModels() const i18nState = useI18N().state const hdWarningDismissed = ref(false) @@ -747,12 +751,12 @@ export const useImageGeneration = defineStore( const checkList: CheckModelAlreadyLoadedParameters[] = workflow.comfyUIRequirements.requiredModels.map(extractDownloadModelParamsFromString) const checkedModels: CheckModelAlreadyLoadedResult[] = - await globalSetup.checkModelAlreadyLoaded(checkList) + await models.checkModelAlreadyLoaded(checkList) const modelsToBeLoaded = checkedModels.filter( (checkModelExistsResult) => !checkModelExistsResult.already_loaded, ) for (const item of modelsToBeLoaded) { - if (!(await globalSetup.checkIfHuggingFaceUrlExists(item.repo_id))) { + if (!(await models.checkIfHuggingFaceUrlExists(item.repo_id))) { toast.error(`declared model ${item.repo_id} does not exist. Aborting Generation.`) return [] } @@ -780,7 +784,7 @@ export const useImageGeneration = defineStore( }) } - const result = await globalSetup.checkModelAlreadyLoaded(checkList) + const result = await models.checkModelAlreadyLoaded(checkList) return result.filter((checkModelExistsResult) => !checkModelExistsResult.already_loaded) } @@ -790,8 +794,8 @@ export const useImageGeneration = defineStore( stepText.value = i18nState.COM_GENERATING const inferenceBackendService: BackendServiceName = backend.value === 'comfyui' ? 'comfyui-backend' : 'ai-backend' - await globalSetup.resetLastUsedInferenceBackend(inferenceBackendService) - globalSetup.updateLastUsedBackend(inferenceBackendService) + await backendServices.resetLastUsedInferenceBackend(inferenceBackendService) + backendServices.updateLastUsedBackend(inferenceBackendService) if (activeWorkflow.value.backend === 'default') { await stableDiffusion.generate() } else { diff --git a/WebUI/src/assets/js/store/models.ts b/WebUI/src/assets/js/store/models.ts index 91d914fe..d95cbc72 100644 --- a/WebUI/src/assets/js/store/models.ts +++ b/WebUI/src/assets/js/store/models.ts @@ -1,100 +1,161 @@ import { acceptHMRUpdate, defineStore } from 'pinia' +import { type LlmBackend } from './textInference' +import { useBackendServices } from './backendServices' export type ModelType = - | 'llm' | 'embedding' | 'stableDiffusion' | 'inpaint' | 'lora' | 'vae' | 'undefined' - | 'ggufLLM' + | LlmBackend export type Model = { name: string downloaded: boolean type: ModelType + default: boolean } -const predefinedModels: Model[] = [ - { name: 'Qwen/Qwen2-1.5B-Instruct', type: 'llm', downloaded: false }, - { name: 'microsoft/Phi-3-mini-4k-instruct', type: 'llm', downloaded: false }, - { name: 'mistralai/Mistral-7B-Instruct-v0.3', type: 'llm', downloaded: false }, - { name: 'deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', type: 'llm', downloaded: false }, - { name: 'deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', type: 'llm', downloaded: false }, +const predefinedModels: Omit[] = [ + { name: 'Qwen/Qwen2-1.5B-Instruct', type: 'ipexLLM', default: false }, + { name: 'microsoft/Phi-3-mini-4k-instruct', type: 'ipexLLM', default: true }, + { name: 'mistralai/Mistral-7B-Instruct-v0.3', type: 'ipexLLM', default: false }, + { name: 'deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', type: 'ipexLLM', default: false }, + { name: 'deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', type: 'ipexLLM', default: false }, { name: 'bartowski/Llama-3.2-3B-Instruct-GGUF/Llama-3.2-3B-Instruct-Q4_K_S.gguf', - type: 'ggufLLM', - downloaded: false, + type: 'llamaCPP', + default: true, }, { name: 'bartowski/Llama-3.2-3B-Instruct-GGUF/Llama-3.2-3B-Instruct-Q8_0.gguf', - type: 'ggufLLM', - downloaded: false, + type: 'llamaCPP', + default: false, }, { name: 'bartowski/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct-Q5_K_S.gguf', - type: 'ggufLLM', - downloaded: false, + type: 'llamaCPP', + default: false, }, { name: 'HuggingFaceTB/SmolLM2-1.7B-Instruct-GGUF/smollm2-1.7b-instruct-q4_k_m.gguf', - type: 'ggufLLM', - downloaded: false, + type: 'llamaCPP', + default: false, }, + { name: 'OpenVINO/Phi-3-medium-4k-instruct-int4-ov', type: 'openVINO', default: false }, + { name: 'OpenVINO/mixtral-8x7b-instruct-v0.1-int4-ov', type: 'openVINO', default: false }, + { name: 'OpenVINO/Mistral-7B-Instruct-v0.2-fp16-ov', type: 'openVINO', default: false }, + { name: 'OpenVINO/TinyLlama-1.1B-Chat-v1.0-int4-ov', type: 'openVINO', default: true }, + { name: 'OpenVINO/Phi-3.5-mini-instruct-fp16-ov', type: 'openVINO', default: false }, ] -export const userModels: Model[] = [] - export const useModels = defineStore( 'models', () => { const hfToken = ref(undefined) - const models = ref(predefinedModels) - const llms = computed(() => models.value.filter((m) => m.type === 'llm')) + const models = ref([]) + const backendServices = useBackendServices() const downloadList = ref([]) - const ggufLLMs = computed(() => models.value.filter((m) => m.type === 'ggufLLM')) async function refreshModels() { const sdModels = await window.electronAPI.getDownloadedDiffusionModels() const llmModels = await window.electronAPI.getDownloadedLLMs() const ggufModels = await window.electronAPI.getDownloadedGGUFLLMs() + const openVINOLLMModels = await window.electronAPI.getDownloadedOpenVINOLLMModels() const loraModels = await window.electronAPI.getDownloadedLoras() const inpaintModels = await window.electronAPI.getDownloadedInpaintModels() const embeddingModels = await window.electronAPI.getDownloadedEmbeddingModels() const downloadedModels = [ - ...sdModels.map((name) => ({ name, type: 'stableDiffusion', downloaded: true })), - ...llmModels.map((name) => ({ name, type: 'llm', downloaded: true })), - ...ggufModels.map((name) => ({ name, type: 'ggufLLM', downloaded: true })), - ...loraModels.map((name) => ({ name, type: 'lora', downloaded: true })), - ...inpaintModels.map((name) => ({ name, type: 'inpaint', downloaded: true })), - ...embeddingModels.map((name) => ({ name, type: 'embedding', downloaded: true })), + ...sdModels.map<{ name: string; type: ModelType }>((name) => ({ + name, + type: 'stableDiffusion', + })), + ...llmModels.map<{ name: string; type: ModelType }>((name) => ({ name, type: 'ipexLLM' })), + ...ggufModels.map<{ name: string; type: ModelType }>((name) => ({ + name, + type: 'llamaCPP', + })), + ...openVINOLLMModels.map<{ name: string; type: ModelType }>((name) => ({ + name, + type: 'openVINO', + })), + ...loraModels.map<{ name: string; type: ModelType }>((name) => ({ name, type: 'lora' })), + ...inpaintModels.map<{ name: string; type: ModelType }>((name) => ({ + name, + type: 'inpaint', + })), + ...embeddingModels.map<{ name: string; type: ModelType }>((name) => ({ + name, + type: 'embedding', + })), ] - const notYetDownloaded = (model: Model) => + const notYetDownloaded = (model: { name: string }) => !downloadedModels.map((m) => m.name).includes(model.name) + const notPredefined = (model: { name: string }) => + !predefinedModels.map((m) => m.name).includes(model.name) models.value = [ ...downloadedModels, - ...userModels.filter(notYetDownloaded), ...predefinedModels.filter(notYetDownloaded), - ] + ...models.value.filter(notPredefined).filter(notYetDownloaded), + ].map((m) => ({ + ...m, + downloaded: downloadedModels.map((dm) => dm.name).includes(m.name), + default: predefinedModels.find((pm) => pm.name === m.name)?.default ?? false, + })) } async function download(_models: DownloadModelParam[]) {} + async function addModel(model: Model) { + models.value.push(model) + await refreshModels() + } + + const aipgBackendUrl = () => { + const aiBackendUrl = backendServices.info.find( + (item) => item.serviceName === 'ai-backend', + )?.baseUrl + if (!aiBackendUrl) throw new Error('AIPG Backend not running') + return aiBackendUrl + } + + async function checkIfHuggingFaceUrlExists(repo_id: string) { + const response = await fetch(`${aipgBackendUrl()}/api/checkHFRepoExists?repo_id=${repo_id}`) + const data = await response.json() + return data.exists + } + + async function checkModelAlreadyLoaded(params: CheckModelAlreadyLoadedParameters[]) { + const response = await fetch(`${aipgBackendUrl()}/api/checkModelAlreadyLoaded`, { + method: 'POST', + body: JSON.stringify({ data: params }), + headers: { + 'Content-Type': 'application/json', + }, + }) + const parsedResponse = (await response.json()) as ApiResponse & { + data: CheckModelAlreadyLoadedResult[] + } + return parsedResponse.data + } + refreshModels() return { models, - llms, - ggufLLMs, hfToken, hfTokenIsValid: computed(() => hfToken.value?.startsWith('hf_')), downloadList, + addModel, refreshModels, download, + checkIfHuggingFaceUrlExists, + checkModelAlreadyLoaded, } }, { diff --git a/WebUI/src/assets/js/store/stableDiffusion.ts b/WebUI/src/assets/js/store/stableDiffusion.ts index 4bbc8ff5..4f865299 100644 --- a/WebUI/src/assets/js/store/stableDiffusion.ts +++ b/WebUI/src/assets/js/store/stableDiffusion.ts @@ -161,7 +161,7 @@ export const useStableDiffusion = defineStore( backend: 'default', }) } - const result = await globalSetup.checkModelAlreadyLoaded(checkList) + const result = await models.checkModelAlreadyLoaded(checkList) const downloadList: CheckModelAlreadyLoadedParameters[] = [] for (const item of result) { if (!item.already_loaded) { diff --git a/WebUI/src/assets/js/store/textInference.ts b/WebUI/src/assets/js/store/textInference.ts index 2fc17e55..4b5bc6dd 100644 --- a/WebUI/src/assets/js/store/textInference.ts +++ b/WebUI/src/assets/js/store/textInference.ts @@ -1,42 +1,101 @@ import { acceptHMRUpdate, defineStore } from 'pinia' -import { useGlobalSetup } from './globalSetup' import { z } from 'zod' import { useBackendServices } from './backendServices' +import { useModels } from './models' +import * as Const from '@/assets/js/const' -export const backendTypes = ['IPEX-LLM', 'LLAMA.CPP'] as const -const BackendSchema = z.enum(backendTypes) -export type Backend = z.infer +export const llmBackendTypes = ['ipexLLM', 'llamaCPP', 'openVINO'] as const +const LlmBackendSchema = z.enum(llmBackendTypes) +export type LlmBackend = z.infer -const backendModelKey = { - 'IPEX-LLM': 'llm_model', - 'LLAMA.CPP': 'ggufLLM_model', +const backendToService = { + ipexLLM: 'ai-backend', + llamaCPP: 'llamacpp-backend', + openVINO: 'openvino-backend', +} as const + +export type LlmModel = { + name: string + type: LlmBackend + active: boolean + downloaded: boolean } + export const useTextInference = defineStore( 'textInference', () => { - const globalSetup = useGlobalSetup() const backendServices = useBackendServices() - const backend = ref('IPEX-LLM') - const activeModel = ref(null) - const metricsEnabled = ref(false) - const maxTokens = ref(1024) + const models = useModels() + const backend = ref('ipexLLM') - const llamaBackendUrl = computed(() => { - const url = backendServices.info.find( - (item) => item.serviceName === 'llamacpp-backend', - )?.baseUrl - console.log('url', url) - return url + const selectedModels = ref<{ [key in LlmBackend]: string | null }>({ + ipexLLM: null, + llamaCPP: null, + openVINO: null, }) - watch([llamaBackendUrl], () => { - console.log('llamaBackendUrl changed', llamaBackendUrl.value) + const llmModels = computed(() => { + const llmTypeModels = models.models.filter((m) => + ['ipexLLM', 'llamaCPP', 'openVINO'].includes(m.type), + ) + const newModels = llmTypeModels.map((m) => { + const selectedModelForType = selectedModels.value[m.type as LlmBackend] + return { + name: m.name, + type: m.type as LlmBackend, + downloaded: m.downloaded, + active: + m.name === selectedModelForType || + (!llmTypeModels.some((m) => m.name === selectedModelForType) && m.default), + } + }) + console.log('llmModels changed', newModels) + return newModels }) - watch([activeModel], () => { - console.log('activeModel changed', activeModel.value) - globalSetup.applyModelSettings({ [backendModelKey[backend.value]]: activeModel.value }) + const selectModel = (backend: LlmBackend, modelName: string) => { + selectedModels.value[backend] = modelName + } + + const backendToAipgBackendName = { + ipexLLM: 'default', + llamaCPP: 'llama_cpp', + openVINO: 'openvino', + } as const + + const backendToAipgModelTypeNumber = { + ipexLLM: Const.MODEL_TYPE_LLM, + llamaCPP: Const.MODEL_TYPE_LLAMA_CPP, + openVINO: Const.MODEL_TYPE_OPENVINO, + } as const + + async function getDownloadParamsForCurrentModelIfRequired() { + if (!activeModel.value) return [] + const checkList = { + repo_id: activeModel.value, + type: backendToAipgModelTypeNumber[backend.value], + backend: backendToAipgBackendName[backend.value], + } + const checkedModels = await models.checkModelAlreadyLoaded([checkList]) + const notYetDownloaded = checkedModels.filter((m) => !m.already_loaded) + return notYetDownloaded + } + + const activeModel = computed(() => { + const newActiveModel = llmModels.value + .filter((m) => m.type === backend.value) + .find((m) => m.active)?.name + console.log('activeModel changed', newActiveModel) + return newActiveModel }) + const metricsEnabled = ref(false) + const maxTokens = ref(1024) + + const currentBackendUrl = computed( + () => + backendServices.info.find((item) => item.serviceName === backendToService[backend.value]) + ?.baseUrl, + ) function toggleMetrics() { metricsEnabled.value = !metricsEnabled.value @@ -94,22 +153,26 @@ export const useTextInference = defineStore( return { backend, activeModel, - llamaBackendUrl, + selectedModels, + llmModels, + currentBackendUrl, metricsEnabled, - toggleMetrics, maxTokens, fontSizeClass, nameSizeClass, iconSizeClass, isMaxSize, isMinSize, + selectModel, + getDownloadParamsForCurrentModelIfRequired, + toggleMetrics, increaseFontSize, decreaseFontSize, } }, { persist: { - pick: ['backend', 'activeModel', 'maxTokens'], + pick: ['backend', 'selectedModels', 'maxTokens'], }, }, ) diff --git a/WebUI/src/components/AddLLMDialog.vue b/WebUI/src/components/AddLLMDialog.vue index 3ca24e86..58da4499 100644 --- a/WebUI/src/components/AddLLMDialog.vue +++ b/WebUI/src/components/AddLLMDialog.vue @@ -51,7 +51,7 @@ import { Input } from '@/components/ui/input' import { useGlobalSetup } from '@/assets/js/store/globalSetup' import { useI18N } from '@/assets/js/store/i18n' -import { useModels, userModels } from '@/assets/js/store/models' +import { useModels } from '@/assets/js/store/models' import { useTextInference } from '@/assets/js/store/textInference' const i18nState = useI18N().state @@ -70,33 +70,25 @@ const emits = defineEmits<{ }>() const exampleModelName = computed(() => - textInference.backend === 'IPEX-LLM' - ? i18nState.REQUEST_LLM_MODEL_EXAMPLE - : i18nState.REQUEST_LLM_SINGLE_EXAMPLE, + textInference.backend === 'llamaCPP' + ? i18nState.REQUEST_LLM_SINGLE_EXAMPLE + : i18nState.REQUEST_LLM_MODEL_EXAMPLE, ) const examplePlaceholder = computed(() => - textInference.backend === 'IPEX-LLM' - ? i18nState.COM_LLM_HF_PROMPT - : i18nState.COM_LLM_HF_PROMPT_GGUF, + textInference.backend === 'llamaCPP' + ? i18nState.COM_LLM_HF_PROMPT_GGUF + : i18nState.COM_LLM_HF_PROMPT, ) -const isValidModelName = (name: string) => { - if (textInference.backend === 'IPEX-LLM') { - return name.split('/').length === 2 - } else { - return name.split('/').length >= 3 - } -} +const isValidModelName = (name: string) => + textInference.backend === 'llamaCPP' ? name.split('/').length >= 3 : name.split('/').length === 2 function onShow() { animate.value = true } async function addModel() { - const previousModel = globalSetup.modelSettings.llm_model - const cancelAndShowWarning = (text: string) => { - globalSetup.modelSettings.llm_model = previousModel addModelErrorMessage.value = text addModelError.value = true } @@ -113,7 +105,7 @@ async function addModel() { return } - const urlExists = await globalSetup.checkIfHuggingFaceUrlExists(modelRequest.value) + const urlExists = await models.checkIfHuggingFaceUrlExists(modelRequest.value) if (!urlExists) { cancelAndShowWarning(i18nState.ERROR_REPO_NOT_EXISTS) return @@ -123,7 +115,13 @@ async function addModel() { const isLlm = await isLLM(modelRequest.value) const downloadNewModel = async () => { - await registerModel() + await models.addModel({ + name: modelRequest.value, + type: textInference.backend, + downloaded: false, + default: false, + }) + textInference.selectModel(textInference.backend, modelRequest.value) emits('callCheckModel') closeAdd() } @@ -135,20 +133,6 @@ async function addModel() { } } -async function registerModel() { - userModels.push({ - name: modelRequest.value, - type: textInference.backend === 'IPEX-LLM' ? 'llm' : 'ggufLLM', - downloaded: false, - }) - await models.refreshModels() - if (textInference.backend === 'IPEX-LLM') { - globalSetup.modelSettings.llm_model = modelRequest.value - } else { - globalSetup.modelSettings.ggufLLM_model = modelRequest.value - } -} - async function isLLM(repo_id: string) { const response = await fetch(`${globalSetup.apiHost}/api/isLLM?repo_id=${repo_id}`) const data = await response.json() diff --git a/WebUI/src/components/DownloadDialog.vue b/WebUI/src/components/DownloadDialog.vue index f24354a4..2a99784c 100644 --- a/WebUI/src/components/DownloadDialog.vue +++ b/WebUI/src/components/DownloadDialog.vue @@ -245,7 +245,7 @@ function dataProcess(line: string) { case 'runtime_error': errorText.value = i18nState.ERROR_RUNTIME_ERROR break - case 'unknow_exception': + case 'unknown_exception': errorText.value = i18nState.ERROR_GENERATE_UNKONW_EXCEPTION break } diff --git a/WebUI/src/components/InstallationManagement.vue b/WebUI/src/components/InstallationManagement.vue index f3b536cb..257bb65a 100644 --- a/WebUI/src/components/InstallationManagement.vue +++ b/WebUI/src/components/InstallationManagement.vue @@ -263,6 +263,8 @@ function getInfoURL(serviceName: string) { return 'https://github.com/comfyanonymous/ComfyUI' case 'llamacpp-backend': return 'https://github.com/abetlen/llama-cpp-python' + case 'openvino-backend': + return 'https://github.com/openvinotoolkit/openvino.genai' default: return undefined } diff --git a/WebUI/src/components/ModelDropDownItem.vue b/WebUI/src/components/ModelDropDownItem.vue index 7e21e4f5..201b720c 100644 --- a/WebUI/src/components/ModelDropDownItem.vue +++ b/WebUI/src/components/ModelDropDownItem.vue @@ -8,7 +8,7 @@