From ec0c311d1e11cdf62d332f1ca53ea9f21b9cb853 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C5=82awomir=20Paszko?= Date: Sat, 22 Apr 2023 22:29:24 +0200 Subject: [PATCH] Fixes. Added base for LLAMA_index. --- temp/README.md | 0 vectordb-create.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) delete mode 100644 temp/README.md create mode 100644 vectordb-create.py diff --git a/temp/README.md b/temp/README.md deleted file mode 100644 index e69de29..0000000 diff --git a/vectordb-create.py b/vectordb-create.py new file mode 100644 index 0000000..85d843e --- /dev/null +++ b/vectordb-create.py @@ -0,0 +1,29 @@ +''' +This script creates a vector database from texts with +llama_index. +''' +import logging +import sys +from llama_index import ( + GPTSimpleVectorIndex, + GPTSimpleKeywordTableIndex, + GPTListIndex, + SimpleDirectoryReader +) + +# Setup logging and configure basics +logging.basicConfig(stream=sys.stdout, level=logging.INFO) +logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout)) + + +# Data set load and create if not exists +db_index = None +if (not os.path.exists('trainingdata/sesa.json')): + logging.info('Creating database...') + db_documents = SimpleDirectoryReader('trainingdata/sesa').load_data() + db_index = GPTSimpleVectorIndex.from_documents(db_documents) + db_index.save_to_disk('trainingdata/sesa.json') + +if (db_index is None): + logging.info('Loading database...') + db_index = GPTSimpleVectorIndex.('trainingdata/sesa.json') \ No newline at end of file