From aa100011da73e662dcc6ba87569d70466df185df Mon Sep 17 00:00:00 2001 From: Nikos Papailiou <115017354+NikolaosPapailiou@users.noreply.github.com> Date: Wed, 25 Sep 2024 09:28:26 +0300 Subject: [PATCH] Add explicit markdown parser (#532) --- .../tiledb/vector_search/object_readers/directory_reader.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/apis/python/src/tiledb/vector_search/object_readers/directory_reader.py b/apis/python/src/tiledb/vector_search/object_readers/directory_reader.py index 1a815468d..2e861d262 100644 --- a/apis/python/src/tiledb/vector_search/object_readers/directory_reader.py +++ b/apis/python/src/tiledb/vector_search/object_readers/directory_reader.py @@ -355,6 +355,7 @@ def __init__( handlers={ "application/pdf": PyMuPDFParser(), "text/plain": TextParser(), + "text/markdown": TextParser(), "text/html": BS4HTMLParser(), "application/msword": MsWordParser(), "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ( @@ -385,7 +386,7 @@ def lazy_load( mime_type = mimetypes.guess_type(self.uri)[0] f = vfs.open(self.uri) - if mime_type is None: + if mime_type is None or mime_type.startswith("text"): mime_type = "text/plain" if mime_type.startswith("image/"):