From 064fa06253b5357883c1ef0fd0aef0428cacc784 Mon Sep 17 00:00:00 2001 From: zilto Date: Fri, 10 Jan 2025 13:37:08 -0500 Subject: [PATCH 1/4] added projection kwarg --- sources/mongodb/__init__.py | 11 ++- sources/mongodb/helpers.py | 104 +++++++++++++++++++++++---- tests/mongodb/test_mongodb_source.py | 100 ++++++++++++++++++++++++++ 3 files changed, 200 insertions(+), 15 deletions(-) diff --git a/sources/mongodb/__init__.py b/sources/mongodb/__init__.py index db6b9d054..351b850e7 100644 --- a/sources/mongodb/__init__.py +++ b/sources/mongodb/__init__.py @@ -1,6 +1,6 @@ """Source that loads collections form any a mongo database, supports incremental loads.""" -from typing import Any, Dict, Iterable, List, Optional +from typing import Any, Dict, Iterable, List, Optional, Union, Mapping import dlt from dlt.common.data_writers import TDataItemFormat @@ -73,6 +73,7 @@ def mongodb( parallel=parallel, limit=limit, filter_=filter_ or {}, + projection=None, ) @@ -90,6 +91,7 @@ def mongodb_collection( chunk_size: Optional[int] = 10000, data_item_format: Optional[TDataItemFormat] = "object", filter_: Optional[Dict[str, Any]] = None, + projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None, ) -> Any: """ A DLT source which loads a collection from a mongo database using PyMongo. @@ -109,6 +111,12 @@ def mongodb_collection( object - Python objects (dicts, lists). arrow - Apache Arrow tables. filter_ (Optional[Dict[str, Any]]): The filter to apply to the collection. + projection: (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select columns + when loading the collection. Supported inputs: + include (list) - ["year", "title"] + include (dict) - {"year": 1, "title": 1} + exclude (dict) - {"released": 0, "runtime": 0} + Note: Can't mix include and exclude statements '{"title": 1, "released": 0}` Returns: Iterable[DltResource]: A list of DLT resources for each collection to be loaded. @@ -136,4 +144,5 @@ def mongodb_collection( chunk_size=chunk_size, data_item_format=data_item_format, filter_=filter_ or {}, + projection=projection, ) diff --git a/sources/mongodb/helpers.py b/sources/mongodb/helpers.py index fe5dcc69c..0c0182ed1 100644 --- a/sources/mongodb/helpers.py +++ b/sources/mongodb/helpers.py @@ -1,7 +1,7 @@ """Mongo database source helpers""" from itertools import islice -from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Tuple +from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Tuple, Union, Iterable, Mapping import dlt from bson.decimal128 import Decimal128 @@ -18,6 +18,7 @@ from pymongo import ASCENDING, DESCENDING, MongoClient from pymongo.collection import Collection from pymongo.cursor import Cursor +from pymongo.helpers import _fields_list_to_dict if TYPE_CHECKING: @@ -106,6 +107,41 @@ def _filter_op(self) -> Dict[str, Any]: filt[self.cursor_field]["$gt"] = self.incremental.end_value return filt + + def _projection_op(self, projection) -> Optional[Dict[str, Any]]: + """Build a projection operator. + + A tuple of fields to include or a dict specifying fields to include or exclude. + The incremental `primary_key` needs to be handle differently for inclusion + and exclusion projections. + + Returns: + Tuple[str, ...] | Dict[str, Any]: A tuple or dictionary with the projection operator. + """ + if projection is None: + return None + + projection_dict = dict(_fields_list_to_dict(projection, "projection")) + + # NOTE we can still filter on primary_key if it's excluded from projection + if self.incremental: + # this is an inclusion projection + if any(v == 1 for v in projection.values()): + # ensure primary_key is included + projection_dict.update({self.incremental.primary_key: 1}) + # this is an exclusion projection + else: + try: + # ensure primary_key isn't excluded + projection_dict.pop(self.incremental.primary_key) + except KeyError: + pass # primary_key was properly not included in exclusion projection + else: + dlt.common.logger.warn( + f"Primary key `{self.incremental.primary_key} was removed from exclusion projection" + ) + + return projection_dict def _limit(self, cursor: Cursor, limit: Optional[int] = None) -> TCursor: # type: ignore """Apply a limit to the cursor, if needed. @@ -128,7 +164,10 @@ def _limit(self, cursor: Cursor, limit: Optional[int] = None) -> TCursor: # typ return cursor def load_documents( - self, filter_: Dict[str, Any], limit: Optional[int] = None + self, + filter_: Dict[str, Any], + limit: Optional[int] = None, + projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None, ) -> Iterator[TDataItem]: """Construct the query and load the documents from the collection. @@ -143,7 +182,9 @@ def load_documents( _raise_if_intersection(filter_op, filter_) filter_op.update(filter_) - cursor = self.collection.find(filter=filter_op) + projection_op = self._projection_op(projection) + + cursor = self.collection.find(filter=filter_op, projection=projection_op) if self._sort_op: cursor = cursor.sort(self._sort_op) @@ -171,7 +212,11 @@ def _create_batches(self, limit: Optional[int] = None) -> List[Dict[str, int]]: return batches - def _get_cursor(self, filter_: Dict[str, Any]) -> TCursor: + def _get_cursor( + self, + filter_: Dict[str, Any], + projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None, + ) -> TCursor: """Get a reading cursor for the collection. Args: @@ -184,7 +229,9 @@ def _get_cursor(self, filter_: Dict[str, Any]) -> TCursor: _raise_if_intersection(filter_op, filter_) filter_op.update(filter_) - cursor = self.collection.find(filter=filter_op) + projection_op = self._projection_op(projection) + + cursor = self.collection.find(filter=filter_op, projection=projection_op) if self._sort_op: cursor = cursor.sort(self._sort_op) @@ -201,7 +248,10 @@ def _run_batch(self, cursor: TCursor, batch: Dict[str, int]) -> TDataItem: return data def _get_all_batches( - self, filter_: Dict[str, Any], limit: Optional[int] = None + self, + filter_: Dict[str, Any], + limit: Optional[int] = None, + projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None, ) -> Iterator[TDataItem]: """Load all documents from the collection in parallel batches. @@ -213,13 +263,16 @@ def _get_all_batches( Iterator[TDataItem]: An iterator of the loaded documents. """ batches = self._create_batches(limit=limit) - cursor = self._get_cursor(filter_=filter_) + cursor = self._get_cursor(filter_=filter_, projection=projection) for batch in batches: yield self._run_batch(cursor=cursor, batch=batch) def load_documents( - self, filter_: Dict[str, Any], limit: Optional[int] = None + self, + filter_: Dict[str, Any], + limit: Optional[int] = None, + projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None, ) -> Iterator[TDataItem]: """Load documents from the collection in parallel. @@ -230,7 +283,9 @@ def load_documents( Yields: Iterator[TDataItem]: An iterator of the loaded documents. """ - for document in self._get_all_batches(limit=limit, filter_=filter_): + for document in self._get_all_batches( + limit=limit, filter_=filter_, projection=projection + ): yield document @@ -241,7 +296,10 @@ class CollectionArrowLoader(CollectionLoader): """ def load_documents( - self, filter_: Dict[str, Any], limit: Optional[int] = None + self, + filter_: Dict[str, Any], + limit: Optional[int] = None, + projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None, ) -> Iterator[Any]: """ Load documents from the collection in Apache Arrow format. @@ -264,7 +322,12 @@ def load_documents( _raise_if_intersection(filter_op, filter_) filter_op.update(filter_) - cursor = self.collection.find_raw_batches(filter_, batch_size=self.chunk_size) + projection_op = self._projection_op(projection) + + # NOTE the `filter_op` isn't passed + cursor = self.collection.find_raw_batches( + filter_, batch_size=self.chunk_size, projection=projection_op + ) if self._sort_op: cursor = cursor.sort(self._sort_op) # type: ignore @@ -283,7 +346,11 @@ class CollectionArrowLoaderParallel(CollectionLoaderParallel): Apache Arrow for data processing. """ - def _get_cursor(self, filter_: Dict[str, Any]) -> TCursor: + def _get_cursor( + self, + filter_: Dict[str, Any], + projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None, + ) -> TCursor: """Get a reading cursor for the collection. Args: @@ -296,8 +363,10 @@ def _get_cursor(self, filter_: Dict[str, Any]) -> TCursor: _raise_if_intersection(filter_op, filter_) filter_op.update(filter_) + projection_op = self._projection_op(projection) + cursor = self.collection.find_raw_batches( - filter=filter_op, batch_size=self.chunk_size + filter=filter_op, batch_size=self.chunk_size, projection=projection_op ) if self._sort_op: cursor = cursor.sort(self._sort_op) # type: ignore @@ -326,6 +395,7 @@ def collection_documents( client: TMongoClient, collection: TCollection, filter_: Dict[str, Any], + projection: Union[Dict[str, Any], List[str]], # TODO kwargs reserved for dlt? incremental: Optional[dlt.sources.incremental[Any]] = None, parallel: bool = False, limit: Optional[int] = None, @@ -348,6 +418,12 @@ def collection_documents( Supported formats: object - Python objects (dicts, lists). arrow - Apache Arrow tables. + projection: (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select columns + when loading the collection. Supported inputs: + include (list) - ["year", "title"] + include (dict) - {"year": 1, "title": 1} + exclude (dict) - {"released": 0, "runtime": 0} + Note: Can't mix include and exclude statements '{"title": 1, "released": 0}` Returns: Iterable[DltResource]: A list of DLT resources for each collection to be loaded. @@ -372,7 +448,7 @@ def collection_documents( loader = LoaderClass( client, collection, incremental=incremental, chunk_size=chunk_size ) - for data in loader.load_documents(limit=limit, filter_=filter_): + for data in loader.load_documents(limit=limit, filter_=filter_, projection=projection): yield data diff --git a/tests/mongodb/test_mongodb_source.py b/tests/mongodb/test_mongodb_source.py index b39ce6997..88f4ba1c5 100644 --- a/tests/mongodb/test_mongodb_source.py +++ b/tests/mongodb/test_mongodb_source.py @@ -409,6 +409,106 @@ def test_filter_intersect(destination_name): pipeline.run(movies) +@pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) +def test_projection_list_inclusion(destination_name): + pipeline = dlt.pipeline( + pipeline_name="mongodb_test", + destination=destination_name, + dataset_name="mongodb_test_data", + full_refresh=True, + ) + collection_name = "movies" + projection = ["title", "poster"] + expected_columns = projection + ["_id", "_dlt_id", "_dlt_load_id"] + + movies = mongodb_collection( + collection=collection_name, + projection=projection, + limit=2 + ) + pipeline.run(movies) + loaded_columns = pipeline.default_schema.get_table_columns(collection_name).keys() + + assert set(loaded_columns) == set(expected_columns) + + +@pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) +def test_projection_dict_inclusion(destination_name): + pipeline = dlt.pipeline( + pipeline_name="mongodb_test", + destination=destination_name, + dataset_name="mongodb_test_data", + full_refresh=True, + ) + collection_name = "movies" + projection = {"title": 1, "poster": 1} + expected_columns = list(projection.keys()) + ["_id", "_dlt_id", "_dlt_load_id"] + + movies = mongodb_collection( + collection=collection_name, + projection=projection, + limit=2 + ) + pipeline.run(movies) + loaded_columns = pipeline.default_schema.get_table_columns(collection_name).keys() + + assert set(loaded_columns) == set(expected_columns) + + +@pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) +def test_projection_dict_exclusion(destination_name): + pipeline = dlt.pipeline( + pipeline_name="mongodb_test", + destination=destination_name, + dataset_name="mongodb_test_data", + full_refresh=True, + ) + collection_name = "movies" + columns_to_exclude = [ + "runtime", "released", "year", "plot", "fullplot", "lastupdated", "type", + "directors", "imdb", "cast", "countries", "genres", "tomatoes", "num_mflix_comments", + "rated", "awards" + ] + projection = {col: 0 for col in columns_to_exclude} + expected_columns = ["title", "poster", "_id", "_dlt_id", "_dlt_load_id"] + + movies = mongodb_collection( + collection=collection_name, + projection=projection, + limit=2 + ) + pipeline.run(movies) + loaded_columns = pipeline.default_schema.get_table_columns(collection_name).keys() + + assert set(loaded_columns) == set(expected_columns) + + +@pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) +def test_projection_nested_field(destination_name): + pipeline = dlt.pipeline( + pipeline_name="mongodb_test", + destination=destination_name, + dataset_name="mongodb_test_data", + full_refresh=True, + ) + collection_name = "movies" + projection = ["imdb.votes", "poster"] + expected_columns = ["imdb__votes", "poster", "_id", "_dlt_id", "_dlt_load_id"] + # other documents nested under `imdb` shouldn't be loaded + not_expected_columns = ["imdb__rating", "imdb__id"] + + movies = mongodb_collection( + collection=collection_name, + projection=projection, + limit=2 + ) + pipeline.run(movies) + loaded_columns = pipeline.default_schema.get_table_columns(collection_name).keys() + + assert set(loaded_columns) == set(expected_columns) + assert len(set(loaded_columns).intersection(not_expected_columns)) == 0 + + @pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) @pytest.mark.parametrize("data_item_format", ["object", "arrow"]) def test_mongodb_without_pymongoarrow( From 39a644810b236bc05143a71e712b7f9ccfef7527 Mon Sep 17 00:00:00 2001 From: zilto Date: Wed, 15 Jan 2025 20:50:32 -0500 Subject: [PATCH 2/4] upgrade pymongo and pymongoarrow --- poetry.lock | 276 ++++++++++++++++++++++++++++++++++++- pyproject.toml | 4 +- sources/mongodb/helpers.py | 15 +- 3 files changed, 281 insertions(+), 14 deletions(-) diff --git a/poetry.lock b/poetry.lock index 6c1979521..b2b827e6d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.0.0 and should not be changed by hand. [[package]] name = "adlfs" @@ -6,6 +6,7 @@ version = "2023.10.0" description = "Access Azure Datalake Gen1 with fsspec and dask" optional = false python-versions = ">=3.8" +groups = ["filesystem"] files = [ {file = "adlfs-2023.10.0-py3-none-any.whl", hash = "sha256:dfdc8cc782bd78262435fb1bc2a8cfdbdd80342bb1b1ae9dfff968de912b0b09"}, {file = "adlfs-2023.10.0.tar.gz", hash = "sha256:f5cf06c5b0074d17d43838d4c434791a98420d9e768b36a1a02c7b3930686543"}, @@ -28,6 +29,7 @@ version = "2.5.4" description = "Async client for aws services using botocore and aiohttp" optional = false python-versions = ">=3.7" +groups = ["filesystem"] files = [ {file = "aiobotocore-2.5.4-py3-none-any.whl", hash = "sha256:4b32218728ca3d0be83835b604603a0cd6c329066e884bb78149334267f92440"}, {file = "aiobotocore-2.5.4.tar.gz", hash = "sha256:60341f19eda77e41e1ab11eef171b5a98b5dbdb90804f5334b6f90e560e31fae"}, @@ -49,6 +51,7 @@ version = "3.8.6" description = "Async http client/server framework (asyncio)" optional = false python-versions = ">=3.6" +groups = ["main", "facebook_ads", "filesystem", "unstructured_data", "unstructured_data_lint"] files = [ {file = "aiohttp-3.8.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:41d55fc043954cddbbd82503d9cc3f4814a40bcef30b3569bc7b5e34130718c1"}, {file = "aiohttp-3.8.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1d84166673694841d8953f0a8d0c90e1087739d24632fe86b1a08819168b4566"}, @@ -157,6 +160,7 @@ version = "0.11.0" description = "itertools and builtins for AsyncIO and mixed iterables" optional = false python-versions = ">=3.6" +groups = ["filesystem"] files = [ {file = "aioitertools-0.11.0-py3-none-any.whl", hash = "sha256:04b95e3dab25b449def24d7df809411c10e62aab0cbe31a50ca4e68748c43394"}, {file = "aioitertools-0.11.0.tar.gz", hash = "sha256:42c68b8dd3a69c2bf7f2233bf7df4bb58b557bca5252ac02ed5187bbc67d6831"}, @@ -171,6 +175,7 @@ version = "1.3.1" description = "aiosignal: a list of registered asynchronous callbacks" optional = false python-versions = ">=3.7" +groups = ["main", "facebook_ads", "filesystem", "unstructured_data", "unstructured_data_lint"] files = [ {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"}, {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"}, @@ -185,6 +190,7 @@ version = "4.0.0" description = "High level compatibility layer for multiple asynchronous event loop implementations" optional = false python-versions = ">=3.8" +groups = ["unstructured_data"] files = [ {file = "anyio-4.0.0-py3-none-any.whl", hash = "sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f"}, {file = "anyio-4.0.0.tar.gz", hash = "sha256:f7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a"}, @@ -206,6 +212,7 @@ version = "0.0.1" description = "" optional = false python-versions = "*" +groups = ["unstructured_data"] files = [ {file = "argilla-0.0.1-py3-none-any.whl", hash = "sha256:8bdc3c505bcfb47ba4b91f5658034eae53bf7d4f9317980397605c0c55817396"}, {file = "argilla-0.0.1.tar.gz", hash = "sha256:5017854754e89f573b31af25b25b803f51cea9ca1fa0bcf00505dee1f45cf7c9"}, @@ -217,6 +224,7 @@ version = "3.2.2" description = "Asana API client" optional = false python-versions = "*" +groups = ["asana_dlt"] files = [ {file = "asana-3.2.2-py2.py3-none-any.whl", hash = "sha256:e8426ae5f5cda2c27d29874145acb589b91e673a84e3fbd45404679499d9604a"}, {file = "asana-3.2.2.tar.gz", hash = "sha256:3a0c64ad5baaa8c52465fe400cedbc873b2127a77df135af518fd8da1af8d6b9"}, @@ -232,6 +240,7 @@ version = "0.3.3" description = "Some handy helper functions for Python's AST module." optional = false python-versions = ">=3.6" +groups = ["dev"] files = [ {file = "astatine-0.3.3-py3-none-any.whl", hash = "sha256:6d8c914f01fbea252cb8f31563f2e766a9ab03c02b9bcc37d18f7d9138828401"}, {file = "astatine-0.3.3.tar.gz", hash = "sha256:0c58a7844b5890ff16da07dbfeb187341d8324cb4378940f89d795cbebebce08"}, @@ -247,6 +256,7 @@ version = "2.4.0" description = "Annotate AST trees with source code positions" optional = false python-versions = "*" +groups = ["dev"] files = [ {file = "asttokens-2.4.0-py2.py3-none-any.whl", hash = "sha256:cf8fc9e61a86461aa9fb161a14a0841a03c405fa829ac6b202670b3495d2ce69"}, {file = "asttokens-2.4.0.tar.gz", hash = "sha256:2e0171b991b2c959acc6c49318049236844a5da1d65ba2672c4880c1c894834e"}, @@ -264,6 +274,7 @@ version = "1.6.3" description = "An AST unparser for Python" optional = false python-versions = "*" +groups = ["main", "dltpure"] files = [ {file = "astunparse-1.6.3-py2.py3-none-any.whl", hash = "sha256:c2652417f2c8b5bb325c885ae329bdf3f86424075c4fd1a128674bc6fba4b8e8"}, {file = "astunparse-1.6.3.tar.gz", hash = "sha256:5ad93a8456f0d084c3456d059fd9a92cce667963232cbf763eac3bc5b7940872"}, @@ -279,6 +290,7 @@ version = "4.0.3" description = "Timeout context manager for asyncio programs" optional = false python-versions = ">=3.7" +groups = ["main", "facebook_ads", "filesystem", "unstructured_data", "unstructured_data_lint"] files = [ {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"}, {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, @@ -290,6 +302,7 @@ version = "23.1.0" description = "Classes Without Boilerplate" optional = false python-versions = ">=3.7" +groups = ["main", "dev", "facebook_ads", "filesystem", "salesforce", "scrapy", "unstructured_data", "unstructured_data_lint"] files = [ {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"}, {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"}, @@ -308,6 +321,7 @@ version = "22.10.0" description = "Self-service finite-state machines for the programmer on the go." optional = false python-versions = "*" +groups = ["dev", "scrapy"] files = [ {file = "Automat-22.10.0-py2.py3-none-any.whl", hash = "sha256:c3164f8742b9dc440f3682482d32aaff7bb53f71740dd018533f9de286b64180"}, {file = "Automat-22.10.0.tar.gz", hash = "sha256:e56beb84edad19dcc11d30e8d9b895f75deeb5ef5e96b84a467066b3b84bb04e"}, @@ -326,6 +340,7 @@ version = "1.29.4" description = "Microsoft Azure Core Library for Python" optional = false python-versions = ">=3.7" +groups = ["filesystem"] files = [ {file = "azure-core-1.29.4.tar.gz", hash = "sha256:500b3aa9bf2e90c5ccc88bb105d056114ca0ce7d0ce73afb8bc4d714b2fc7568"}, {file = "azure_core-1.29.4-py3-none-any.whl", hash = "sha256:b03261bcba22c0b9290faf9999cedd23e849ed2577feee90515694cea6bc74bf"}, @@ -345,6 +360,7 @@ version = "0.0.53" description = "Azure Data Lake Store Filesystem Client Library for Python" optional = false python-versions = "*" +groups = ["filesystem"] files = [ {file = "azure-datalake-store-0.0.53.tar.gz", hash = "sha256:05b6de62ee3f2a0a6e6941e6933b792b800c3e7f6ffce2fc324bc19875757393"}, {file = "azure_datalake_store-0.0.53-py2.py3-none-any.whl", hash = "sha256:a30c902a6e360aa47d7f69f086b426729784e71c536f330b691647a51dc42b2b"}, @@ -361,6 +377,7 @@ version = "1.14.1" description = "Microsoft Azure Identity Library for Python" optional = false python-versions = ">=3.7" +groups = ["filesystem"] files = [ {file = "azure-identity-1.14.1.zip", hash = "sha256:48e2a9dbdc59b4f095f841d867d9a8cbe4c1cdbbad8251e055561afd47b4a9b8"}, {file = "azure_identity-1.14.1-py3-none-any.whl", hash = "sha256:3a5bef8e9c3281e864e869739be8d67424bff616cddae96b546ca2a5168d863d"}, @@ -378,6 +395,7 @@ version = "12.18.3" description = "Microsoft Azure Blob Storage Client Library for Python" optional = false python-versions = ">=3.7" +groups = ["filesystem"] files = [ {file = "azure-storage-blob-12.18.3.tar.gz", hash = "sha256:d8ced0deee3367fa3d4f3d1a03cd9edadf4440c0a371f503d623fa6c807554ee"}, {file = "azure_storage_blob-12.18.3-py3-none-any.whl", hash = "sha256:c278dde2ac41857a68d615c9f2b36d894ba877a7e84d62795603c7e79d0bb5e9"}, @@ -398,6 +416,7 @@ version = "2.2.1" description = "Function decoration for backoff and retry" optional = false python-versions = ">=3.7,<4.0" +groups = ["unstructured_data"] files = [ {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"}, {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"}, @@ -409,6 +428,8 @@ version = "0.2.1" description = "Backport of the standard library zoneinfo module" optional = false python-versions = ">=3.6" +groups = ["main", "dev", "dltpure", "salesforce"] +markers = "python_version < \"3.9\"" files = [ {file = "backports.zoneinfo-0.2.1-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:da6013fd84a690242c310d77ddb8441a559e9cb3d3d59ebac9aca1a57b2e18bc"}, {file = "backports.zoneinfo-0.2.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:89a48c0d158a3cc3f654da4c2de1ceba85263fafb861b98b59040a5086259722"}, @@ -437,6 +458,7 @@ version = "1.7.5" description = "Security oriented static analyser for python code." optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "bandit-1.7.5-py3-none-any.whl", hash = "sha256:75665181dc1e0096369112541a056c59d1c5f66f9bb74a8d686c3c362b83f549"}, {file = "bandit-1.7.5.tar.gz", hash = "sha256:bdfc739baa03b880c2d15d0431b31c658ffc348e907fe197e54e0389dd59e11e"}, @@ -460,6 +482,7 @@ version = "23.9.1" description = "The uncompromising code formatter." optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "black-23.9.1-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:d6bc09188020c9ac2555a498949401ab35bb6bf76d4e0f8ee251694664df6301"}, {file = "black-23.9.1-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:13ef033794029b85dfea8032c9d3b92b42b526f1ff4bf13b2182ce4e917f5100"}, @@ -506,6 +529,7 @@ version = "1.31.17" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">= 3.7" +groups = ["filesystem"] files = [ {file = "botocore-1.31.17-py3-none-any.whl", hash = "sha256:6ac34a1d34aa3750e78b77b8596617e2bab938964694d651939dba2cbde2c12b"}, {file = "botocore-1.31.17.tar.gz", hash = "sha256:396459065dba4339eb4da4ec8b4e6599728eb89b7caaceea199e26f7d824a41c"}, @@ -525,6 +549,7 @@ version = "5.3.1" description = "Extensible memoizing collections and decorators" optional = false python-versions = ">=3.7" +groups = ["main", "filesystem", "google_ads", "google_analytics", "google_sheets"] files = [ {file = "cachetools-5.3.1-py3-none-any.whl", hash = "sha256:95ef631eeaea14ba2e36f06437f36463aac3a096799e876ee55e5cdccb102590"}, {file = "cachetools-5.3.1.tar.gz", hash = "sha256:dce83f2d9b4e1f732a8cd44af8e8fab2dbe46201467fc98b3ef8f269092bf62b"}, @@ -536,6 +561,7 @@ version = "2023.7.22" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.6" +groups = ["main", "airtable", "asana_dlt", "dev", "dltpure", "facebook_ads", "filesystem", "google_ads", "google_analytics", "google_sheets", "salesforce", "scrapy", "stripe_analytics", "unstructured_data", "unstructured_data_lint"] files = [ {file = "certifi-2023.7.22-py3-none-any.whl", hash = "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9"}, {file = "certifi-2023.7.22.tar.gz", hash = "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082"}, @@ -547,6 +573,7 @@ version = "1.16.0" description = "Foreign Function Interface for Python calling C code." optional = false python-versions = ">=3.8" +groups = ["main", "filesystem", "salesforce", "scrapy", "unstructured_data"] files = [ {file = "cffi-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088"}, {file = "cffi-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9"}, @@ -601,6 +628,7 @@ files = [ {file = "cffi-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8"}, {file = "cffi-1.16.0.tar.gz", hash = "sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0"}, ] +markers = {main = "platform_python_implementation == \"PyPy\""} [package.dependencies] pycparser = "*" @@ -611,6 +639,7 @@ version = "5.2.0" description = "Universal encoding detector for Python 3" optional = false python-versions = ">=3.7" +groups = ["unstructured_data"] files = [ {file = "chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970"}, {file = "chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7"}, @@ -622,6 +651,7 @@ version = "3.3.0" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." optional = false python-versions = ">=3.7.0" +groups = ["main", "airtable", "asana_dlt", "dev", "dltpure", "facebook_ads", "filesystem", "google_ads", "google_analytics", "google_sheets", "salesforce", "scrapy", "stripe_analytics", "unstructured_data", "unstructured_data_lint"] files = [ {file = "charset-normalizer-3.3.0.tar.gz", hash = "sha256:63563193aec44bce707e0c5ca64ff69fa72ed7cf34ce6e11d5127555756fd2f6"}, {file = "charset_normalizer-3.3.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:effe5406c9bd748a871dbcaf3ac69167c38d72db8c9baf3ff954c344f31c4cbe"}, @@ -721,6 +751,7 @@ version = "0.3.29" description = "Chroma." optional = false python-versions = ">=3.7" +groups = ["unstructured_data"] files = [ {file = "chromadb-0.3.29-py3-none-any.whl", hash = "sha256:d681a3e4f3284715dd146774be84cad3d2f8c529bd004ba249e1d3deb70ac68e"}, {file = "chromadb-0.3.29.tar.gz", hash = "sha256:29d47835da494fc1b58da40abb1435689d4ba1c93df6c64664a5d91521cb80e9"}, @@ -751,6 +782,7 @@ version = "8.1.7" description = "Composable command line interface toolkit" optional = false python-versions = ">=3.7" +groups = ["main", "dev", "dltpure", "unstructured_data"] files = [ {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, @@ -765,6 +797,7 @@ version = "0.6.14" description = "ClickHouse Database Core Driver for Python, Pandas, and Superset" optional = false python-versions = "~=3.7" +groups = ["unstructured_data"] files = [ {file = "clickhouse-connect-0.6.14.tar.gz", hash = "sha256:0531bbd5b8bdee616bf1cca5ddcb0af86db12e2b48fd39257a8ecdf32200bd57"}, {file = "clickhouse_connect-0.6.14-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:04affbd255fb8b1e4a882ddc1336c86530976d05578f47bb65e3a53471d291e4"}, @@ -853,10 +886,12 @@ version = "0.4.6" description = "Cross-platform colored terminal text." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +groups = ["main", "dev", "dltpure", "pytest", "unstructured_data", "unstructured_data_lint"] files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +markers = {main = "platform_system == \"Windows\"", dev = "sys_platform == \"win32\" or platform_system == \"Windows\"", dltpure = "platform_system == \"Windows\"", pytest = "sys_platform == \"win32\"", unstructured_data = "sys_platform == \"win32\" or platform_system == \"Windows\"", unstructured_data_lint = "platform_system == \"Windows\""} [[package]] name = "coloredlogs" @@ -864,6 +899,7 @@ version = "15.0.1" description = "Colored terminal output for Python's logging module" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +groups = ["unstructured_data"] files = [ {file = "coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934"}, {file = "coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0"}, @@ -881,6 +917,7 @@ version = "2.3.0" description = "Confluent's Python client for Apache Kafka" optional = false python-versions = "*" +groups = ["dev"] files = [ {file = "confluent-kafka-2.3.0.tar.gz", hash = "sha256:4069e7b56e0baf9db18c053a605213f0ab2d8f23715dca7b3bd97108df446ced"}, {file = "confluent_kafka-2.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5df845755cd3ebb9165ca00fd1d3a7d514c61e84d9fcbe7babb91193fe9b369c"}, @@ -932,6 +969,7 @@ version = "0.3.2" description = "" optional = false python-versions = "*" +groups = ["sql_database"] files = [ {file = "connectorx-0.3.2-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:98274242c64a2831a8b1c86e0fa2c46a557dd8cbcf00c3adcf5a602455fb02d7"}, {file = "connectorx-0.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e2b11ba49efd330a7348bef3ce09c98218eea21d92a12dd75cd8f0ade5c99ffc"}, @@ -957,6 +995,7 @@ version = "23.10.4" description = "Symbolic constants in Python" optional = false python-versions = ">=3.8" +groups = ["dev", "scrapy"] files = [ {file = "constantly-23.10.4-py3-none-any.whl", hash = "sha256:3fd9b4d1c3dc1ec9757f3c52aef7e53ad9323dbe39f51dfd4c43853b68dfa3f9"}, {file = "constantly-23.10.4.tar.gz", hash = "sha256:aa92b70a33e2ac0bb33cd745eb61776594dc48764b06c35e0efd050b7f1c7cbd"}, @@ -968,6 +1007,7 @@ version = "41.0.4" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." optional = false python-versions = ">=3.7" +groups = ["filesystem", "salesforce", "scrapy", "unstructured_data"] files = [ {file = "cryptography-41.0.4-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:80907d3faa55dc5434a16579952ac6da800935cd98d14dbd62f6f042c7f5e839"}, {file = "cryptography-41.0.4-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:35c00f637cd0b9d5b6c6bd11b6c3359194a8eba9c46d4e875a3660e3b400005f"}, @@ -1013,6 +1053,7 @@ version = "1.2.0" description = "cssselect parses CSS3 Selectors and translates them to XPath 1.0" optional = false python-versions = ">=3.7" +groups = ["scrapy"] files = [ {file = "cssselect-1.2.0-py2.py3-none-any.whl", hash = "sha256:da1885f0c10b60c03ed5eccbb6b68d6eff248d91976fcde348f395d54c9fd35e"}, {file = "cssselect-1.2.0.tar.gz", hash = "sha256:666b19839cfaddb9ce9d36bfe4c969132c647b92fc9088c4e23f786b30f1b3dc"}, @@ -1024,6 +1065,7 @@ version = "2.2.1" description = "Library to convert python requests object to curl command." optional = false python-versions = "*" +groups = ["facebook_ads"] files = [ {file = "curlify-2.2.1.tar.gz", hash = "sha256:0d3f02e7235faf952de8ef45ef469845196d30632d5838bcd5aee217726ddd6d"}, ] @@ -1037,6 +1079,7 @@ version = "0.5.9" description = "Easily serialize dataclasses to and from JSON" optional = false python-versions = ">=3.6" +groups = ["unstructured_data", "unstructured_data_lint"] files = [ {file = "dataclasses-json-0.5.9.tar.gz", hash = "sha256:e9ac87b73edc0141aafbce02b44e93553c3123ad574958f0fe52a534b6707e8e"}, {file = "dataclasses_json-0.5.9-py3-none-any.whl", hash = "sha256:1280542631df1c375b7bc92e5b86d39e06c44760d7e3571a537b3b8acabf2f0c"}, @@ -1056,6 +1099,7 @@ version = "1.3.1" description = "Pandas Data Types for SQL systems (BigQuery, Spanner)" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "db_dtypes-1.3.1-py2.py3-none-any.whl", hash = "sha256:fbc9d1740d94aaf2b5ae24601cfc875a69b4635bb9d049e3c3036e9f10203af8"}, {file = "db_dtypes-1.3.1.tar.gz", hash = "sha256:a058f05dab100891f3e76a7a3db9ad0f107f18dd3d1bdd13680749a2f07eae77"}, @@ -1073,6 +1117,7 @@ version = "5.1.1" description = "Decorators for Humans" optional = false python-versions = ">=3.5" +groups = ["main", "filesystem"] files = [ {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, @@ -1084,6 +1129,7 @@ version = "1.3.0" description = "dlt is an open-source python-first scalable data loading library that does not require any backend to run." optional = false python-versions = "<3.13,>=3.8.1" +groups = ["main", "dltpure"] files = [ {file = "dlt-1.3.0-py3-none-any.whl", hash = "sha256:e2583ed0ad4a0d9941b8f9cb0e078f4443bcbeb0e1cf1cce586cf35107ccf266"}, {file = "dlt-1.3.0.tar.gz", hash = "sha256:57eecee99ace25b6d37027a78f59f8c735d1913cc81f1101e1b47bf96fc544b8"}, @@ -1160,6 +1206,7 @@ version = "2.4.2" description = "DNS toolkit" optional = false python-versions = ">=3.8,<4.0" +groups = ["mongodb"] files = [ {file = "dnspython-2.4.2-py3-none-any.whl", hash = "sha256:57c6fbaaeaaf39c891292012060beb141791735dbb4004798328fc2c467402d8"}, {file = "dnspython-2.4.2.tar.gz", hash = "sha256:8dcfae8c7460a2f84b4072e26f1c9f4101ca20c071649cb7c34e8b6a93d58984"}, @@ -1179,6 +1226,7 @@ version = "3.6.1" description = "Helpful functions for Python 🐍 🛠️" optional = false python-versions = ">=3.6" +groups = ["dev"] files = [ {file = "domdf_python_tools-3.6.1-py3-none-any.whl", hash = "sha256:e18158460850957f18e740eb94ede56f580ddb0cb162ab9d9834ed8bbb1b6431"}, {file = "domdf_python_tools-3.6.1.tar.gz", hash = "sha256:acc04563d23bce4d437dd08af6b9bea788328c412772a044d8ca428a7ad861be"}, @@ -1199,6 +1247,7 @@ version = "0.10.3" description = "DuckDB in-process database" optional = false python-versions = ">=3.7.0" +groups = ["main", "unstructured_data"] files = [ {file = "duckdb-0.10.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cd25cc8d001c09a19340739ba59d33e12a81ab285b7a6bed37169655e1cefb31"}, {file = "duckdb-0.10.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2f9259c637b917ca0f4c63887e8d9b35ec248f5d987c886dfc4229d66a791009"}, @@ -1255,6 +1304,7 @@ version = "1.1.0" description = "An implementation of lxml.xmlfile for the standard library" optional = false python-versions = ">=3.6" +groups = ["unstructured_data"] files = [ {file = "et_xmlfile-1.1.0-py3-none-any.whl", hash = "sha256:a2ba85d1d6a74ef63837eed693bcb89c3f752169b0e3e7ae5b16ca5e1b3deada"}, {file = "et_xmlfile-1.1.0.tar.gz", hash = "sha256:8eb9e2bc2f8c97e37a2dc85a09ecdcdec9d8a396530a6d5a33b30b9a92da0c5c"}, @@ -1266,6 +1316,8 @@ version = "1.1.3" description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" +groups = ["dev", "pytest", "unstructured_data"] +markers = "python_version < \"3.11\"" files = [ {file = "exceptiongroup-1.1.3-py3-none-any.whl", hash = "sha256:343280667a4585d195ca1cf9cef84a4e178c4b6cf2274caef9859782b567d5e3"}, {file = "exceptiongroup-1.1.3.tar.gz", hash = "sha256:097acd85d473d75af5bb98e41b61ff7fe35efe6675e4f9370ec6ec5126d160e9"}, @@ -1280,6 +1332,7 @@ version = "17.0.4" description = "Facebook Business SDK" optional = false python-versions = "*" +groups = ["facebook_ads"] files = [ {file = "facebook_business-17.0.4-py3-none-any.whl", hash = "sha256:c3a4afbe019c1fd2454eeeefb4e895ed3276d506115fbf9a993135f6af1c1a88"}, {file = "facebook_business-17.0.4.tar.gz", hash = "sha256:52b516a237ab4cbf083053d3cc062995ff4732fca487b46543c4eab3bdbbf188"}, @@ -1298,6 +1351,7 @@ version = "0.85.1" description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" optional = false python-versions = ">=3.7" +groups = ["unstructured_data"] files = [ {file = "fastapi-0.85.1-py3-none-any.whl", hash = "sha256:de3166b6b1163dc22da4dc4ebdc3192fcbac7700dd1870a1afa44de636a636b5"}, {file = "fastapi-0.85.1.tar.gz", hash = "sha256:1facd097189682a4ff11cbd01334a992e51b56be663b2bd50c2c09523624f144"}, @@ -1319,6 +1373,7 @@ version = "3.12.4" description = "A platform independent file lock." optional = false python-versions = ">=3.8" +groups = ["scrapy", "unstructured_data"] files = [ {file = "filelock-3.12.4-py3-none-any.whl", hash = "sha256:08c21d87ded6e2b9da6728c3dff51baf1dcecf973b768ef35bcbc3447edb9ad4"}, {file = "filelock-3.12.4.tar.gz", hash = "sha256:2e6f249f1f3654291606e046b09f1fd5eac39b360664c27f5aad072012f8bcbd"}, @@ -1335,6 +1390,7 @@ version = "1.2.0" description = "Infer file type and MIME type of any file/buffer. No external dependencies." optional = false python-versions = "*" +groups = ["unstructured_data"] files = [ {file = "filetype-1.2.0-py2.py3-none-any.whl", hash = "sha256:7ce71b6880181241cf7ac8697a2f1eb6a8bd9b429f7ad6d27b8db9ba5f1c2d25"}, {file = "filetype-1.2.0.tar.gz", hash = "sha256:66b56cd6474bf41d8c54660347d37afcc3f7d1970648de365c102ef77548aadb"}, @@ -1346,6 +1402,7 @@ version = "6.1.0" description = "the modular source code checker: pep8 pyflakes and co" optional = false python-versions = ">=3.8.1" +groups = ["dev"] files = [ {file = "flake8-6.1.0-py2.py3-none-any.whl", hash = "sha256:ffdfce58ea94c6580c77888a86506937f9a1a227dfcd15f245d694ae20a6b6e5"}, {file = "flake8-6.1.0.tar.gz", hash = "sha256:d5b3857f07c030bdb5bf41c7f53799571d75c4491748a3adcd47de929e34cd23"}, @@ -1362,6 +1419,7 @@ version = "22.12.6" description = "A plugin for flake8 finding likely bugs and design problems in your program. Contains warnings that don't belong in pyflakes and pycodestyle." optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "flake8-bugbear-22.12.6.tar.gz", hash = "sha256:4cdb2c06e229971104443ae293e75e64c6107798229202fbe4f4091427a30ac0"}, {file = "flake8_bugbear-22.12.6-py3-none-any.whl", hash = "sha256:b69a510634f8a9c298dfda2b18a8036455e6b19ecac4fe582e4d7a0abfa50a30"}, @@ -1380,6 +1438,7 @@ version = "2.1.0" description = "Check for python builtins being used as variables or parameters." optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "flake8-builtins-2.1.0.tar.gz", hash = "sha256:12ff1ee96dd4e1f3141141ee6c45a5c7d3b3c440d0949e9b8d345c42b39c51d4"}, {file = "flake8_builtins-2.1.0-py3-none-any.whl", hash = "sha256:469e8f03d6d0edf4b1e62b6d5a97dce4598592c8a13ec8f0952e7a185eba50a1"}, @@ -1397,6 +1456,7 @@ version = "0.5.0.post1" description = "A Flake8 plugin to identify incorrect use of encodings." optional = false python-versions = ">=3.6" +groups = ["dev"] files = [ {file = "flake8_encodings-0.5.0.post1-py3-none-any.whl", hash = "sha256:d2fecca0e89ba09c86e5d61cf6bdb1b337f0d74746aac67bbcf0c517b4cb6cba"}, {file = "flake8_encodings-0.5.0.post1.tar.gz", hash = "sha256:082c0163325c85b438a8106e876283b5ed3cbfc53e68d89130d70be8be4c9977"}, @@ -1418,6 +1478,7 @@ version = "0.2.1" description = "A helper library for Flake8 plugins." optional = false python-versions = ">=3.6" +groups = ["dev"] files = [ {file = "flake8_helper-0.2.1-py3-none-any.whl", hash = "sha256:9123cdf351ad32ee8a51b85036052302c478122d62fb512c0773e111b3d05241"}, {file = "flake8_helper-0.2.1.tar.gz", hash = "sha256:479f86d1c52df8e49ff876ecd3873242699f93eeece7e6675cdca9c37c9b0a16"}, @@ -1432,6 +1493,7 @@ version = "4.10.0" description = "A flake8 plugin that helps you write tidier imports." optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "flake8_tidy_imports-4.10.0-py3-none-any.whl", hash = "sha256:b0387fb2ea200441bd142309e716fb7b8f4b0937bdf5f8b7c0c118a5f5e2b8ed"}, {file = "flake8_tidy_imports-4.10.0.tar.gz", hash = "sha256:bd6cf86465402d2b86903009b748d85a628e599e17b76e810c9857e3a2815173"}, @@ -1446,6 +1508,7 @@ version = "23.5.26" description = "The FlatBuffers serialization format for Python" optional = false python-versions = "*" +groups = ["unstructured_data"] files = [ {file = "flatbuffers-23.5.26-py2.py3-none-any.whl", hash = "sha256:c0ff356da363087b915fde4b8b45bdda73432fc17cddb3c8157472eab1422ad1"}, {file = "flatbuffers-23.5.26.tar.gz", hash = "sha256:9ea1144cac05ce5d86e2859f431c6cd5e66cd9c78c558317c7955fb8d4c78d89"}, @@ -1457,6 +1520,7 @@ version = "1.4.0" description = "A list-like structure which implements collections.abc.MutableSequence" optional = false python-versions = ">=3.8" +groups = ["main", "facebook_ads", "filesystem", "unstructured_data", "unstructured_data_lint"] files = [ {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:764226ceef3125e53ea2cb275000e309c0aa5464d43bd72abd661e27fffc26ab"}, {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d6484756b12f40003c6128bfcc3fa9f0d49a687e171186c2d85ec82e3758c559"}, @@ -1527,6 +1591,7 @@ version = "2024.3.1" description = "File-system specification" optional = false python-versions = ">=3.8" +groups = ["main", "dltpure", "filesystem", "unstructured_data"] files = [ {file = "fsspec-2024.3.1-py3-none-any.whl", hash = "sha256:918d18d41bf73f0e2b261824baeb1b124bcf771767e3a26425cd7dec3332f512"}, {file = "fsspec-2024.3.1.tar.gz", hash = "sha256:f39780e282d7d117ffb42bb96992f8a90795e4d0fb0f661a70ca39fe9c43ded9"}, @@ -1562,6 +1627,7 @@ version = "2024.3.1" description = "Convenient Filesystem interface over GCS" optional = false python-versions = ">=3.8" +groups = ["main", "filesystem"] files = [ {file = "gcsfs-2024.3.1-py2.py3-none-any.whl", hash = "sha256:57ec693a25b74637f00e7a834b4f1dcd7a7511217f7f640072d6fb51a7794bac"}, {file = "gcsfs-2024.3.1.tar.gz", hash = "sha256:d34bdb8a1a51e1b2552ae9e47d1933dec41162ba6b6cc8ea470aef693a8a6aa6"}, @@ -1586,6 +1652,7 @@ version = "4.0.10" description = "Git Object Database" optional = false python-versions = ">=3.7" +groups = ["main", "dev", "dltpure"] files = [ {file = "gitdb-4.0.10-py3-none-any.whl", hash = "sha256:c286cf298426064079ed96a9e4a9d39e7f3e9bf15ba60701e95f5492f28415c7"}, {file = "gitdb-4.0.10.tar.gz", hash = "sha256:6eb990b69df4e15bad899ea868dc46572c3f75339735663b81de79b06f17eb9a"}, @@ -1600,6 +1667,7 @@ version = "3.1.37" description = "GitPython is a Python library used to interact with Git repositories" optional = false python-versions = ">=3.7" +groups = ["main", "dev", "dltpure"] files = [ {file = "GitPython-3.1.37-py3-none-any.whl", hash = "sha256:5f4c4187de49616d710a77e98ddf17b4782060a1788df441846bddefbb89ab33"}, {file = "GitPython-3.1.37.tar.gz", hash = "sha256:f9b9ddc0761c125d5780eab2d64be4873fc6817c2899cbcb34b02344bdc7bc54"}, @@ -1617,6 +1685,7 @@ version = "0.12.0" description = "A Git URL parsing module (supports parsing and rewriting)" optional = false python-versions = ">=3.8" +groups = ["main", "dltpure"] files = [ {file = "giturlparse-0.12.0-py2.py3-none-any.whl", hash = "sha256:412b74f2855f1da2fefa89fd8dde62df48476077a72fc19b62039554d27360eb"}, {file = "giturlparse-0.12.0.tar.gz", hash = "sha256:c0fff7c21acc435491b1779566e038757a205c1ffdcb47e4f81ea52ad8c3859a"}, @@ -1628,6 +1697,7 @@ version = "21.3.0" description = "Client library for the Google Ads API" optional = false python-versions = ">=3.7" +groups = ["google_ads"] files = [ {file = "google-ads-21.3.0.tar.gz", hash = "sha256:bd4fcb6bd5e55bace413e889e82012d48578aa28f7b4d726c86e2d594c753c6c"}, {file = "google_ads-21.3.0-py3-none-any.whl", hash = "sha256:961943fc737941a38f1a826681f7974448df7c60e6c8db2ac7168b26d66738a7"}, @@ -1653,6 +1723,7 @@ version = "0.16.3" description = "Google Analytics Data API client library" optional = false python-versions = ">=3.7" +groups = ["google_analytics"] files = [ {file = "google-analytics-data-0.16.3.tar.gz", hash = "sha256:f29431ec63ab462f7a9b42227521d148c877307c629e308c284025ad834aab52"}, {file = "google_analytics_data-0.16.3-py2.py3-none-any.whl", hash = "sha256:bb73f36707a5a2966e87c9439c25cd8004d58305b0ef01c6f2f50128c08feb13"}, @@ -1672,6 +1743,7 @@ version = "2.12.0" description = "Google API client core library" optional = false python-versions = ">=3.7" +groups = ["main", "filesystem", "google_ads", "google_analytics", "google_sheets"] files = [ {file = "google-api-core-2.12.0.tar.gz", hash = "sha256:c22e01b1e3c4dcd90998494879612c38d0a3411d1f7b679eb89e2abe3ce1f553"}, {file = "google_api_core-2.12.0-py3-none-any.whl", hash = "sha256:ec6054f7d64ad13b41e43d96f735acbd763b0f3b695dabaa2d579673f6a6e160"}, @@ -1702,6 +1774,7 @@ version = "2.129.0" description = "Google API Client Library for Python" optional = false python-versions = ">=3.7" +groups = ["google_ads", "google_analytics", "google_sheets"] files = [ {file = "google-api-python-client-2.129.0.tar.gz", hash = "sha256:984cc8cc8eb4923468b1926d2b8effc5b459a4dda3c845896eb87c153b28ef84"}, {file = "google_api_python_client-2.129.0-py2.py3-none-any.whl", hash = "sha256:d50f7e2dfdbb7fc2732f6a0cba1c54d7bb676390679526c6bb628c901e43ec86"}, @@ -1720,6 +1793,7 @@ version = "2.23.3" description = "Google Authentication Library" optional = false python-versions = ">=3.7" +groups = ["main", "filesystem", "google_ads", "google_analytics", "google_sheets"] files = [ {file = "google-auth-2.23.3.tar.gz", hash = "sha256:6864247895eea5d13b9c57c9e03abb49cb94ce2dc7c58e91cba3248c7477c9e3"}, {file = "google_auth-2.23.3-py2.py3-none-any.whl", hash = "sha256:a8f4608e65c244ead9e0538f181a96c6e11199ec114d41f1d7b1bffa96937bda"}, @@ -1743,6 +1817,7 @@ version = "0.2.0" description = "Google Authentication Library: httplib2 transport" optional = false python-versions = "*" +groups = ["google_ads", "google_analytics", "google_sheets"] files = [ {file = "google-auth-httplib2-0.2.0.tar.gz", hash = "sha256:38aa7badf48f974f1eb9861794e9c0cb2a0511a4ec0679b1f886d108f5640e05"}, {file = "google_auth_httplib2-0.2.0-py2.py3-none-any.whl", hash = "sha256:b65a0a2123300dd71281a7bf6e64d65a0759287df52729bdd1ae2e47dc311a3d"}, @@ -1758,6 +1833,7 @@ version = "1.1.0" description = "Google Authentication Library" optional = false python-versions = ">=3.6" +groups = ["main", "filesystem", "google_ads", "google_analytics"] files = [ {file = "google-auth-oauthlib-1.1.0.tar.gz", hash = "sha256:83ea8c3b0881e453790baff4448e8a6112ac8778d1de9da0b68010b843937afb"}, {file = "google_auth_oauthlib-1.1.0-py2.py3-none-any.whl", hash = "sha256:089c6e587d36f4803ac7e0720c045c6a8b1fd1790088b8424975b90d0ee61c12"}, @@ -1776,6 +1852,7 @@ version = "3.25.0" description = "Google BigQuery API client library" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "google-cloud-bigquery-3.25.0.tar.gz", hash = "sha256:5b2aff3205a854481117436836ae1403f11f2594e6810a98886afd57eda28509"}, {file = "google_cloud_bigquery-3.25.0-py2.py3-none-any.whl", hash = "sha256:7f0c371bc74d2a7fb74dacbc00ac0f90c8c2bec2289b51dd6685a275873b1ce9"}, @@ -1807,6 +1884,7 @@ version = "2.3.3" description = "Google Cloud API client core library" optional = false python-versions = ">=3.7" +groups = ["main", "filesystem"] files = [ {file = "google-cloud-core-2.3.3.tar.gz", hash = "sha256:37b80273c8d7eee1ae816b3a20ae43585ea50506cb0e60f3cf5be5f87f1373cb"}, {file = "google_cloud_core-2.3.3-py2.py3-none-any.whl", hash = "sha256:fbd11cad3e98a7e5b0343dc07cb1039a5ffd7a5bb96e1f1e27cee4bda4a90863"}, @@ -1825,6 +1903,7 @@ version = "2.12.0" description = "Google Cloud Storage API client library" optional = false python-versions = ">=3.7" +groups = ["main", "filesystem"] files = [ {file = "google-cloud-storage-2.12.0.tar.gz", hash = "sha256:57c0bcda2f5e11f008a155d8636d8381d5abab46b58e0cae0e46dd5e595e6b46"}, {file = "google_cloud_storage-2.12.0-py2.py3-none-any.whl", hash = "sha256:bc52563439d42981b6e21b071a76da2791672776eda3ba99d13a8061ebbd6e5e"}, @@ -1847,6 +1926,7 @@ version = "1.5.0" description = "A python wrapper of the C library 'Google CRC32C'" optional = false python-versions = ">=3.7" +groups = ["main", "filesystem"] files = [ {file = "google-crc32c-1.5.0.tar.gz", hash = "sha256:89284716bc6a5a415d4eaa11b1726d2d60a0cd12aadf5439828353662ede9dd7"}, {file = "google_crc32c-1.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:596d1f98fc70232fcb6590c439f43b350cb762fb5d61ce7b0e9db4539654cc13"}, @@ -1927,6 +2007,7 @@ version = "2.6.0" description = "Utilities for Google Media Downloads and Resumable Uploads" optional = false python-versions = ">= 3.7" +groups = ["main", "filesystem"] files = [ {file = "google-resumable-media-2.6.0.tar.gz", hash = "sha256:972852f6c65f933e15a4a210c2b96930763b47197cdf4aa5f5bea435efb626e7"}, {file = "google_resumable_media-2.6.0-py2.py3-none-any.whl", hash = "sha256:fc03d344381970f79eebb632a3c18bb1828593a2dc5572b5f90115ef7d11e81b"}, @@ -1945,6 +2026,7 @@ version = "1.61.0" description = "Common protobufs used in Google APIs" optional = false python-versions = ">=3.7" +groups = ["main", "filesystem", "google_ads", "google_analytics", "google_sheets"] files = [ {file = "googleapis-common-protos-1.61.0.tar.gz", hash = "sha256:8a64866a97f6304a7179873a465d6eee97b7a24ec6cfd78e0f575e96b821240b"}, {file = "googleapis_common_protos-1.61.0-py2.py3-none-any.whl", hash = "sha256:22f1915393bb3245343f6efe87f6fe868532efc12aa26b391b15132e1279f1c0"}, @@ -1962,6 +2044,8 @@ version = "1.0.3" description = "Backport of the Python 3.9 graphlib module for Python 3.6+" optional = false python-versions = ">=3.6,<4.0" +groups = ["main", "dltpure", "unstructured_data"] +markers = "python_version < \"3.9\"" files = [ {file = "graphlib_backport-1.0.3-py3-none-any.whl", hash = "sha256:24246967b9e7e6a91550bc770e6169585d35aa32790258579a8a3899a8c18fde"}, {file = "graphlib_backport-1.0.3.tar.gz", hash = "sha256:7bb8fc7757b8ae4e6d8000a26cd49e9232aaa9a3aa57edb478474b8424bfaae2"}, @@ -1973,6 +2057,7 @@ version = "2.0.2" description = "Lightweight in-process concurrent programming" optional = false python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*" +groups = ["dev", "sql_database", "unstructured_data", "unstructured_data_lint"] files = [ {file = "greenlet-2.0.2-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:bdfea8c661e80d3c1c99ad7c3ff74e6e87184895bbaca6ee8cc61209f8b9b85d"}, {file = "greenlet-2.0.2-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:9d14b83fab60d5e8abe587d51c75b252bcc21683f24699ada8fb275d7712f5a9"}, @@ -2050,6 +2135,7 @@ version = "1.59.0" description = "HTTP/2-based RPC framework" optional = false python-versions = ">=3.7" +groups = ["main", "google_ads", "google_analytics"] files = [ {file = "grpcio-1.59.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:225e5fa61c35eeaebb4e7491cd2d768cd8eb6ed00f2664fa83a58f29418b39fd"}, {file = "grpcio-1.59.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:b95ec8ecc4f703f5caaa8d96e93e40c7f589bad299a2617bdb8becbcce525539"}, @@ -2116,6 +2202,7 @@ version = "1.59.0" description = "Status proto mapping for gRPC" optional = false python-versions = ">=3.6" +groups = ["main", "google_ads", "google_analytics"] files = [ {file = "grpcio-status-1.59.0.tar.gz", hash = "sha256:f93b9c33e0a26162ef8431bfcffcc3e1fb217ccd8d7b5b3061b6e9f813e698b5"}, {file = "grpcio_status-1.59.0-py3-none-any.whl", hash = "sha256:cb5a222b14a80ee050bff9676623822e953bff0c50d2d29180de723652fdf10d"}, @@ -2132,6 +2219,7 @@ version = "0.14.0" description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" optional = false python-versions = ">=3.7" +groups = ["unstructured_data"] files = [ {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, @@ -2143,6 +2231,7 @@ version = "0.3.1" description = "hexbytes: Python `bytes` subclass that decodes hex, with a readable console output" optional = false python-versions = ">=3.7, <4" +groups = ["main", "dltpure"] files = [ {file = "hexbytes-0.3.1-py3-none-any.whl", hash = "sha256:383595ad75026cf00abd570f44b368c6cdac0c6becfae5c39ff88829877f8a59"}, {file = "hexbytes-0.3.1.tar.gz", hash = "sha256:a3fe35c6831ee8fafd048c4c086b986075fc14fd46258fa24ecb8d65745f9a9d"}, @@ -2160,6 +2249,7 @@ version = "0.7.0" description = "hnswlib" optional = false python-versions = "*" +groups = ["unstructured_data"] files = [ {file = "hnswlib-0.7.0.tar.gz", hash = "sha256:bc459668e7e44bb7454b256b90c98c5af750653919d9a91698dafcf416cf64c4"}, ] @@ -2173,6 +2263,7 @@ version = "0.22.0" description = "A comprehensive HTTP client library." optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +groups = ["google_ads", "google_analytics", "google_sheets"] files = [ {file = "httplib2-0.22.0-py3-none-any.whl", hash = "sha256:14ae0a53c1ba8f3d37e9e27cf37eabb0fb9980f435ba405d546948b009dd64dc"}, {file = "httplib2-0.22.0.tar.gz", hash = "sha256:d7a10bc5ef5ab08322488bde8c726eeee5c8618723fdb399597ec58f3d82df81"}, @@ -2187,6 +2278,7 @@ version = "0.6.0" description = "A collection of framework independent HTTP protocol utils." optional = false python-versions = ">=3.5.0" +groups = ["unstructured_data"] files = [ {file = "httptools-0.6.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:818325afee467d483bfab1647a72054246d29f9053fd17cc4b86cda09cc60339"}, {file = "httptools-0.6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72205730bf1be875003692ca54a4a7c35fac77b4746008966061d9d41a61b0f5"}, @@ -2234,6 +2326,7 @@ version = "0.17.3" description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" optional = false python-versions = ">=3.8.0" +groups = ["unstructured_data"] files = [ {file = "huggingface_hub-0.17.3-py3-none-any.whl", hash = "sha256:545eb3665f6ac587add946e73984148f2ea5c7877eac2e845549730570c1933a"}, {file = "huggingface_hub-0.17.3.tar.gz", hash = "sha256:40439632b211311f788964602bf8b0d9d6b7a2314fba4e8d67b2ce3ecea0e3fd"}, @@ -2267,6 +2360,7 @@ version = "10.0" description = "Human friendly output for text interfaces using Python" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +groups = ["unstructured_data"] files = [ {file = "humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477"}, {file = "humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc"}, @@ -2281,6 +2375,7 @@ version = "4.8.0" description = "Python humanize utilities" optional = false python-versions = ">=3.8" +groups = ["main", "dltpure"] files = [ {file = "humanize-4.8.0-py3-none-any.whl", hash = "sha256:8bc9e2bb9315e61ec06bf690151ae35aeb65651ab091266941edf97c90836404"}, {file = "humanize-4.8.0.tar.gz", hash = "sha256:9783373bf1eec713a770ecaa7c2d7a7902c98398009dfa3d8a2df91eec9311e8"}, @@ -2295,6 +2390,7 @@ version = "21.0.0" description = "A featureful, immutable, and correct URL for Python." optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +groups = ["dev", "scrapy"] files = [ {file = "hyperlink-21.0.0-py2.py3-none-any.whl", hash = "sha256:e6b14c37ecb73e89c77d78cdb4c2cc8f3fb59a885c5b3f819ff4ed80f25af1b4"}, {file = "hyperlink-21.0.0.tar.gz", hash = "sha256:427af957daa58bc909471c6c40f74c5450fa123dd093fc53efd2e91d2705a56b"}, @@ -2309,6 +2405,7 @@ version = "3.4" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.5" +groups = ["main", "airtable", "asana_dlt", "dev", "dltpure", "facebook_ads", "filesystem", "google_ads", "google_analytics", "google_sheets", "salesforce", "scrapy", "stripe_analytics", "unstructured_data", "unstructured_data_lint"] files = [ {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"}, {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, @@ -2320,10 +2417,12 @@ version = "6.8.0" description = "Read metadata from Python packages" optional = false python-versions = ">=3.8" +groups = ["dev", "unstructured_data"] files = [ {file = "importlib_metadata-6.8.0-py3-none-any.whl", hash = "sha256:3ebb78df84a805d7698245025b975d9d67053cd94c79245ba4b3eb694abe68bb"}, {file = "importlib_metadata-6.8.0.tar.gz", hash = "sha256:dbace7892d8c0c4ac1ad096662232f831d4e64f4c4545bd53016a3e9d4654743"}, ] +markers = {dev = "python_version < \"3.9\"", unstructured_data = "python_version < \"3.10\""} [package.dependencies] zipp = ">=0.5" @@ -2339,6 +2438,8 @@ version = "6.4.0" description = "Read resources from Python packages" optional = false python-versions = ">=3.8" +groups = ["main", "dev", "dltpure", "salesforce"] +markers = "python_version < \"3.9\"" files = [ {file = "importlib_resources-6.4.0-py3-none-any.whl", hash = "sha256:50d10f043df931902d4194ea07ec57960f66a80449ff867bfe782b4c486ba78c"}, {file = "importlib_resources-6.4.0.tar.gz", hash = "sha256:cdb2b453b8046ca4e3798eb1d84f3cce1446a0e8e7b5ef4efb600f19fc398145"}, @@ -2357,6 +2458,7 @@ version = "22.10.0" description = "\"A small library that versions your Python projects.\"" optional = false python-versions = "*" +groups = ["dev", "scrapy"] files = [ {file = "incremental-22.10.0-py2.py3-none-any.whl", hash = "sha256:b864a1f30885ee72c5ac2835a761b8fe8aa9c28b9395cacf27286602688d3e51"}, {file = "incremental-22.10.0.tar.gz", hash = "sha256:912feeb5e0f7e0188e6f42241d2f450002e11bbc0937c65865045854c24c0bd0"}, @@ -2372,6 +2474,7 @@ version = "0.5.1" description = "A port of Ruby on Rails inflector to Python" optional = false python-versions = ">=3.5" +groups = ["airtable"] files = [ {file = "inflection-0.5.1-py2.py3-none-any.whl", hash = "sha256:f38b2b640938a4f35ade69ac3d053042959b62a0f1076a5bbaa1b9526605a8a2"}, {file = "inflection-0.5.1.tar.gz", hash = "sha256:1a29730d366e996aaacffb2f1f1cb9593dc38e2ddd30c91250c6dde09ea9b417"}, @@ -2383,6 +2486,7 @@ version = "2.0.0" description = "brain-dead simple config-ini parsing" optional = false python-versions = ">=3.7" +groups = ["dev", "pytest"] files = [ {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, @@ -2394,6 +2498,7 @@ version = "0.6.1" description = "An ISO 8601 date/time/duration parser and formatter" optional = false python-versions = "*" +groups = ["filesystem", "salesforce"] files = [ {file = "isodate-0.6.1-py2.py3-none-any.whl", hash = "sha256:0751eece944162659049d35f4f549ed815792b38793f07cf73381c1c87cbed96"}, {file = "isodate-0.6.1.tar.gz", hash = "sha256:48c5881de7e8b0a0d648cb024c8062dc84e7b840ed81e864c7614fd3c127bde9"}, @@ -2408,6 +2513,7 @@ version = "0.8.0" description = "Common interface for data container classes" optional = false python-versions = ">=3.7" +groups = ["scrapy"] files = [ {file = "itemadapter-0.8.0-py3-none-any.whl", hash = "sha256:2ac1fbcc363b789a18639935ca322e50a65a0a7dfdd8d973c34e2c468e6c0f94"}, {file = "itemadapter-0.8.0.tar.gz", hash = "sha256:77758485fb0ac10730d4b131363e37d65cb8db2450bfec7a57c3f3271f4a48a9"}, @@ -2419,6 +2525,7 @@ version = "1.1.0" description = "Base library for scrapy's ItemLoader" optional = false python-versions = ">=3.7" +groups = ["scrapy"] files = [ {file = "itemloaders-1.1.0-py3-none-any.whl", hash = "sha256:c8c82fe0c11fc4cdd08ec04df0b3c43f3cb7190002edb517e02d55de8efc2aeb"}, {file = "itemloaders-1.1.0.tar.gz", hash = "sha256:21d81c61da6a08b48e5996288cdf3031c0f92e5d0075920a0242527523e14a48"}, @@ -2436,6 +2543,7 @@ version = "1.0.1" description = "JSON Matching Expressions" optional = false python-versions = ">=3.7" +groups = ["filesystem", "scrapy"] files = [ {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, @@ -2447,6 +2555,7 @@ version = "1.3.2" description = "Lightweight pipelining with Python functions" optional = false python-versions = ">=3.7" +groups = ["unstructured_data"] files = [ {file = "joblib-1.3.2-py3-none-any.whl", hash = "sha256:ef4331c65f239985f3f2220ecc87db222f08fd22097a3dd5698f693875f8cbb9"}, {file = "joblib-1.3.2.tar.gz", hash = "sha256:92f865e621e17784e7955080b6d042489e3b8e294949cc44c6eac304f59772b1"}, @@ -2458,6 +2567,7 @@ version = "1.6.0" description = "A final implementation of JSONPath for Python that aims to be standard compliant, including arithmetic and binary comparison operators and providing clear AST for metaprogramming." optional = false python-versions = "*" +groups = ["main", "dltpure"] files = [ {file = "jsonpath-ng-1.6.0.tar.gz", hash = "sha256:5483f8e9d74c39c9abfab554c070ae783c1c8cbadf5df60d561bc705ac68a07e"}, {file = "jsonpath_ng-1.6.0-py3-none-any.whl", hash = "sha256:6fd04833412c4b3d9299edf369542f5e67095ca84efa17cbb7f06a34958adc9f"}, @@ -2472,6 +2582,7 @@ version = "0.0.219" description = "Building applications with LLMs through composability" optional = false python-versions = ">=3.8.1,<4.0" +groups = ["unstructured_data", "unstructured_data_lint"] files = [ {file = "langchain-0.0.219-py3-none-any.whl", hash = "sha256:1f08a00e622f1c75087d6013f34e82be3f8dd1859266eb583a0fd7bc045090cf"}, {file = "langchain-0.0.219.tar.gz", hash = "sha256:842f8212939e5ac4005906d2215574ffb3e34d2fe28f5bc0f46eb3b28fb29c5d"}, @@ -2511,6 +2622,7 @@ version = "0.0.20" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." optional = false python-versions = ">=3.8.1,<4.0" +groups = ["unstructured_data", "unstructured_data_lint"] files = [ {file = "langchainplus_sdk-0.0.20-py3-none-any.whl", hash = "sha256:07a869d476755803aa04c4986ce78d00c2fe4ff584c0eaa57d7570c9664188db"}, {file = "langchainplus_sdk-0.0.20.tar.gz", hash = "sha256:3d300e2e3290f68cc9d842c059f9458deba60e776c9e790309688cad1bfbb219"}, @@ -2527,6 +2639,7 @@ version = "4.9.3" description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*" +groups = ["salesforce", "scrapy", "unstructured_data"] files = [ {file = "lxml-4.9.3-cp27-cp27m-macosx_11_0_x86_64.whl", hash = "sha256:b0a545b46b526d418eb91754565ba5b63b1c0b12f9bd2f808c852d9b4b2f9b5c"}, {file = "lxml-4.9.3-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:075b731ddd9e7f68ad24c635374211376aa05a281673ede86cbe1d1b3455279d"}, @@ -2634,6 +2747,7 @@ version = "4.3.2" description = "LZ4 Bindings for Python" optional = false python-versions = ">=3.7" +groups = ["unstructured_data"] files = [ {file = "lz4-4.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1c4c100d99eed7c08d4e8852dd11e7d1ec47a3340f49e3a96f8dfbba17ffb300"}, {file = "lz4-4.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:edd8987d8415b5dad25e797043936d91535017237f72fa456601be1479386c92"}, @@ -2683,6 +2797,7 @@ version = "1.15.1" description = "Small library to dynamically create python functions." optional = false python-versions = "*" +groups = ["main", "dltpure"] files = [ {file = "makefun-1.15.1-py2.py3-none-any.whl", hash = "sha256:a63cfc7b47a539c76d97bd4fdb833c7d0461e759fd1225f580cb4be6200294d4"}, {file = "makefun-1.15.1.tar.gz", hash = "sha256:40b0f118b6ded0d8d78c78f1eb679b8b6b2462e3c1b3e05fb1b2da8cd46b48a5"}, @@ -2694,6 +2809,7 @@ version = "3.5" description = "Python implementation of John Gruber's Markdown." optional = false python-versions = ">=3.8" +groups = ["unstructured_data"] files = [ {file = "Markdown-3.5-py3-none-any.whl", hash = "sha256:4afb124395ce5fc34e6d9886dab977fd9ae987fc6e85689f08278cf0c69d4bf3"}, {file = "Markdown-3.5.tar.gz", hash = "sha256:a807eb2e4778d9156c8f07876c6e4d50b5494c5665c4834f67b06459dfd877b3"}, @@ -2712,6 +2828,7 @@ version = "3.0.0" description = "Python port of markdown-it. Markdown parsing, done right!" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"}, {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"}, @@ -2736,6 +2853,7 @@ version = "3.20.1" description = "A lightweight library for converting complex datatypes to and from native Python datatypes." optional = false python-versions = ">=3.8" +groups = ["unstructured_data", "unstructured_data_lint"] files = [ {file = "marshmallow-3.20.1-py3-none-any.whl", hash = "sha256:684939db93e80ad3561392f47be0230743131560a41c5110684c16e21ade0a5c"}, {file = "marshmallow-3.20.1.tar.gz", hash = "sha256:5d2371bbe42000f2b3fb5eaa065224df7d8f8597bc19a1bbfa5bfe7fba8da889"}, @@ -2756,6 +2874,7 @@ version = "1.5.1" description = "Enum field for Marshmallow" optional = false python-versions = "*" +groups = ["unstructured_data", "unstructured_data_lint"] files = [ {file = "marshmallow-enum-1.5.1.tar.gz", hash = "sha256:38e697e11f45a8e64b4a1e664000897c659b60aa57bfa18d44e226a9920b6e58"}, {file = "marshmallow_enum-1.5.1-py2.py3-none-any.whl", hash = "sha256:57161ab3dbfde4f57adeb12090f39592e992b9c86d206d02f6bd03ebec60f072"}, @@ -2770,6 +2889,7 @@ version = "0.7.0" description = "McCabe checker, plugin for flake8" optional = false python-versions = ">=3.6" +groups = ["dev"] files = [ {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, @@ -2781,6 +2901,7 @@ version = "0.1.2" description = "Markdown URL utilities" optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, @@ -2792,6 +2913,7 @@ version = "7.1.0" description = "Mimesis: Fake Data Generator." optional = false python-versions = ">=3.8,<4.0" +groups = ["dev"] files = [ {file = "mimesis-7.1.0-py3-none-any.whl", hash = "sha256:da65bea6d6d5d5d87d5c008e6b23ef5f96a49cce436d9f8708dabb5152da0290"}, {file = "mimesis-7.1.0.tar.gz", hash = "sha256:c83b55d35536d7e9b9700a596b7ccfb639a740e3e1fb5e08062e8ab2a67dcb37"}, @@ -2803,6 +2925,7 @@ version = "1.6" description = "An implementation of time.monotonic() for Python 2 & < 3.3" optional = false python-versions = "*" +groups = ["unstructured_data"] files = [ {file = "monotonic-1.6-py2.py3-none-any.whl", hash = "sha256:68687e19a14f11f26d140dd5c86f3dba4bf5df58003000ed467e0e2a69bca96c"}, {file = "monotonic-1.6.tar.gz", hash = "sha256:3a55207bcfed53ddd5c5bae174524062935efed17792e9de2ad0205ce9ad63f7"}, @@ -2814,6 +2937,7 @@ version = "10.1.0" description = "More routines for operating on iterables, beyond itertools" optional = false python-versions = ">=3.8" +groups = ["salesforce"] files = [ {file = "more-itertools-10.1.0.tar.gz", hash = "sha256:626c369fa0eb37bac0291bce8259b332fd59ac792fa5497b59837309cd5b114a"}, {file = "more_itertools-10.1.0-py3-none-any.whl", hash = "sha256:64e0735fcfdc6f3464ea133afe8ea4483b1c5fe3a3d69852e6503b43a0b222e6"}, @@ -2825,6 +2949,7 @@ version = "1.3.0" description = "Python library for arbitrary-precision floating-point arithmetic" optional = false python-versions = "*" +groups = ["unstructured_data"] files = [ {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"}, {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"}, @@ -2842,6 +2967,7 @@ version = "1.24.1" description = "The Microsoft Authentication Library (MSAL) for Python library" optional = false python-versions = ">=2.7" +groups = ["filesystem"] files = [ {file = "msal-1.24.1-py2.py3-none-any.whl", hash = "sha256:ce4320688f95c301ee74a4d0e9dbcfe029a63663a8cc61756f40d0d0d36574ad"}, {file = "msal-1.24.1.tar.gz", hash = "sha256:aa0972884b3c6fdec53d9a0bd15c12e5bd7b71ac1b66d746f54d128709f3f8f8"}, @@ -2861,6 +2987,7 @@ version = "1.0.0" description = "Microsoft Authentication Library extensions (MSAL EX) provides a persistence API that can save your data on disk, encrypted on Windows, macOS and Linux. Concurrent data access will be coordinated by a file lock mechanism." optional = false python-versions = "*" +groups = ["filesystem"] files = [ {file = "msal-extensions-1.0.0.tar.gz", hash = "sha256:c676aba56b0cce3783de1b5c5ecfe828db998167875126ca4b47dc6436451354"}, {file = "msal_extensions-1.0.0-py2.py3-none-any.whl", hash = "sha256:91e3db9620b822d0ed2b4d1850056a0f133cba04455e62f11612e40f5502f2ee"}, @@ -2879,6 +3006,7 @@ version = "1.2.0" description = "This module enables reading, parsing and converting Microsoft Outlook MSG E-Mail files." optional = false python-versions = ">=3.4" +groups = ["unstructured_data"] files = [ {file = "msg_parser-1.2.0-py2.py3-none-any.whl", hash = "sha256:d47a2f0b2a359cb189fad83cc991b63ea781ecc70d91410324273fbf93e95375"}, {file = "msg_parser-1.2.0.tar.gz", hash = "sha256:0de858d4fcebb6c8f6f028da83a17a20fe01cdce67c490779cf43b3b0162aa66"}, @@ -2896,6 +3024,7 @@ version = "6.0.4" description = "multidict implementation" optional = false python-versions = ">=3.7" +groups = ["main", "facebook_ads", "filesystem", "unstructured_data", "unstructured_data_lint"] files = [ {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b1a97283e0c85772d613878028fec909f003993e1007eafa715b24b377cb9b8"}, {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eeb6dcc05e911516ae3d1f207d4b0520d07f54484c49dfc294d6e7d63b734171"}, @@ -2979,6 +3108,7 @@ version = "1.10.0" description = "Optional static typing for Python" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "mypy-1.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:da1cbf08fb3b851ab3b9523a884c232774008267b1f83371ace57f412fe308c2"}, {file = "mypy-1.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:12b6bfc1b1a66095ab413160a6e520e1dc076a28f3e22f7fb25ba3b000b4ef99"}, @@ -3026,6 +3156,7 @@ version = "1.0.0" description = "Type system extensions for programs checked with the mypy type checker." optional = false python-versions = ">=3.5" +groups = ["dev", "unstructured_data", "unstructured_data_lint"] files = [ {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, @@ -3037,6 +3168,7 @@ version = "8.4.0" description = "Simple yet flexible natural sorting in Python." optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "natsort-8.4.0-py3-none-any.whl", hash = "sha256:4732914fb471f56b5cce04d7bae6f164a592c7712e1c85f9ef585e197299521c"}, {file = "natsort-8.4.0.tar.gz", hash = "sha256:45312c4a0e5507593da193dedd04abb1469253b601ecaf63445ad80f0a1ea581"}, @@ -3052,6 +3184,7 @@ version = "3.8.1" description = "Natural Language Toolkit" optional = false python-versions = ">=3.7" +groups = ["unstructured_data"] files = [ {file = "nltk-3.8.1-py3-none-any.whl", hash = "sha256:fd5c9109f976fa86bcadba8f91e47f5e9293bd034474752e92a520f81c93dda5"}, {file = "nltk-3.8.1.zip", hash = "sha256:1834da3d0682cba4f2cede2f9aad6b0fafb6461ba451db0efb6f9c39798d64d3"}, @@ -3077,6 +3210,7 @@ version = "2.8.6" description = "Fast numerical expression evaluator for NumPy" optional = false python-versions = ">=3.7" +groups = ["unstructured_data", "unstructured_data_lint"] files = [ {file = "numexpr-2.8.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:80acbfefb68bd92e708e09f0a02b29e04d388b9ae72f9fcd57988aca172a7833"}, {file = "numexpr-2.8.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6e884687da8af5955dc9beb6a12d469675c90b8fb38b6c93668c989cfc2cd982"}, @@ -3119,6 +3253,7 @@ version = "1.24.4" description = "Fundamental package for array computing in Python" optional = false python-versions = ">=3.8" +groups = ["main", "dev", "mongodb", "stripe_analytics", "unstructured_data", "unstructured_data_lint"] files = [ {file = "numpy-1.24.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0bfb52d2169d58c1cdb8cc1f16989101639b34c7d3ce60ed70b19c63eba0b64"}, {file = "numpy-1.24.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ed094d4f0c177b1b8e7aa9cba7d6ceed51c0e569a5318ac0ca9a090680a6a1b1"}, @@ -3156,6 +3291,7 @@ version = "3.2.2" description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" optional = false python-versions = ">=3.6" +groups = ["main", "asana_dlt", "filesystem", "google_ads", "google_analytics"] files = [ {file = "oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca"}, {file = "oauthlib-3.2.2.tar.gz", hash = "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918"}, @@ -3172,6 +3308,7 @@ version = "0.46" description = "Python package to parse, read and write Microsoft OLE2 files (Structured Storage or Compound Document, Microsoft Office)" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +groups = ["unstructured_data"] files = [ {file = "olefile-0.46.zip", hash = "sha256:133b031eaf8fd2c9399b78b8bc5b8fcbe4c31e85295749bb17a87cba8f3c3964"}, ] @@ -3182,6 +3319,7 @@ version = "1.16.1" description = "ONNX Runtime is a runtime accelerator for Machine Learning models" optional = false python-versions = "*" +groups = ["unstructured_data"] files = [ {file = "onnxruntime-1.16.1-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:28b2c7f444b4119950b69370801cd66067f403d19cbaf2a444735d7c269cce4a"}, {file = "onnxruntime-1.16.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c24e04f33e7899f6aebb03ed51e51d346c1f906b05c5569d58ac9a12d38a2f58"}, @@ -3223,6 +3361,7 @@ version = "0.27.10" description = "Python client library for the OpenAI API" optional = false python-versions = ">=3.7.1" +groups = ["unstructured_data", "unstructured_data_lint"] files = [ {file = "openai-0.27.10-py3-none-any.whl", hash = "sha256:beabd1757e3286fa166dde3b70ebb5ad8081af046876b47c14c41e203ed22a14"}, {file = "openai-0.27.10.tar.gz", hash = "sha256:60e09edf7100080283688748c6803b7b3b52d5a55d21890f3815292a0552d83b"}, @@ -3245,6 +3384,7 @@ version = "1.2.4" description = "OpenAPI (v3) specification schema as pydantic class" optional = false python-versions = ">=3.6.1" +groups = ["unstructured_data", "unstructured_data_lint"] files = [ {file = "openapi-schema-pydantic-1.2.4.tar.gz", hash = "sha256:3e22cf58b74a69f752cc7e5f1537f6e44164282db2700cbbcd3bb99ddd065196"}, {file = "openapi_schema_pydantic-1.2.4-py3-none-any.whl", hash = "sha256:a932ecc5dcbb308950282088956e94dea069c9823c84e507d64f6b622222098c"}, @@ -3259,6 +3399,7 @@ version = "3.1.2" description = "A Python library to read/write Excel 2010 xlsx/xlsm files" optional = false python-versions = ">=3.6" +groups = ["unstructured_data"] files = [ {file = "openpyxl-3.1.2-py2.py3-none-any.whl", hash = "sha256:f91456ead12ab3c6c2e9491cf33ba6d08357d802192379bb482f1033ade496f5"}, {file = "openpyxl-3.1.2.tar.gz", hash = "sha256:a6f5977418eff3b2d5500d54d9db50c8277a368436f4e4f8ddb1be3422870184"}, @@ -3273,6 +3414,8 @@ version = "3.9.9" description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" optional = false python-versions = ">=3.8" +groups = ["main", "dltpure"] +markers = "platform_python_implementation != \"PyPy\"" files = [ {file = "orjson-3.9.9-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:f28090060a31f4d11221f9ba48b2273b0d04b702f4dcaa197c38c64ce639cc51"}, {file = "orjson-3.9.9-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8038ba245d0c0a6337cfb6747ea0c51fe18b0cf1a4bc943d530fd66799fae33d"}, @@ -3332,6 +3475,7 @@ version = "7.4.0" description = "A decorator to automatically detect mismatch when overriding a method." optional = false python-versions = ">=3.6" +groups = ["unstructured_data"] files = [ {file = "overrides-7.4.0-py3-none-any.whl", hash = "sha256:3ad24583f86d6d7a49049695efe9933e67ba62f0c7625d53c59fa832ce4b8b7d"}, {file = "overrides-7.4.0.tar.gz", hash = "sha256:9502a3cca51f4fac40b5feca985b6703a5c1f6ad815588a7ca9e285b9dca6757"}, @@ -3343,6 +3487,7 @@ version = "23.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.7" +groups = ["main", "dev", "dltpure", "mongodb", "pytest", "scrapy", "unstructured_data", "unstructured_data_lint"] files = [ {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"}, {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, @@ -3354,6 +3499,7 @@ version = "2.0.3" description = "Powerful data structures for data analysis, time series, and statistics" optional = false python-versions = ">=3.8" +groups = ["main", "mongodb", "stripe_analytics", "unstructured_data"] files = [ {file = "pandas-2.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e4c7c9f27a4185304c7caf96dc7d91bc60bc162221152de697c98eb0b2648dd8"}, {file = "pandas-2.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f167beed68918d62bffb6ec64f2e1d8a7d297a038f86d4aed056b9493fca407f"}, @@ -3421,6 +3567,7 @@ version = "2.0.2.230605" description = "Type annotations for pandas" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "pandas_stubs-2.0.2.230605-py3-none-any.whl", hash = "sha256:39106b602f3cb6dc5f728b84e1b32bde6ecf41ee34ee714c66228009609fbada"}, {file = "pandas_stubs-2.0.2.230605.tar.gz", hash = "sha256:624c7bb06d38145a44b61be459ccd19b038e0bf20364a025ecaab78fea65e858"}, @@ -3436,6 +3583,7 @@ version = "1.8.1" description = "Parsel is a library to extract data from HTML and XML using XPath and CSS selectors" optional = false python-versions = ">=3.7" +groups = ["scrapy"] files = [ {file = "parsel-1.8.1-py2.py3-none-any.whl", hash = "sha256:2708fc74daeeb4ce471e2c2e9089b650ec940c7a218053e57421e69b5b00f82c"}, {file = "parsel-1.8.1.tar.gz", hash = "sha256:aff28e68c9b3f1a901db2a4e3f158d8480a38724d7328ee751c1a4e1c1801e39"}, @@ -3454,6 +3602,7 @@ version = "0.11.2" description = "Utility library for gitignore style pattern matching of file paths." optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "pathspec-0.11.2-py3-none-any.whl", hash = "sha256:1d6ed233af05e679efb96b1851550ea95bbb64b7c490b0f5aa52996c11e92a20"}, {file = "pathspec-0.11.2.tar.gz", hash = "sha256:e0d8d0ac2f12da61956eb2306b69f9469b42f4deb0f3cb6ed47b9cce9996ced3"}, @@ -3465,6 +3614,7 @@ version = "3.2.0" description = "pathvalidate is a Python library to sanitize/validate a string such as filenames/file-paths/etc." optional = false python-versions = ">=3.7" +groups = ["main", "dltpure"] files = [ {file = "pathvalidate-3.2.0-py3-none-any.whl", hash = "sha256:cc593caa6299b22b37f228148257997e2fa850eea2daf7e4cc9205cef6908dee"}, {file = "pathvalidate-3.2.0.tar.gz", hash = "sha256:5e8378cf6712bff67fbe7a8307d99fa8c1a0cb28aa477056f8fc374f0dff24ad"}, @@ -3480,6 +3630,7 @@ version = "5.11.1" description = "Python Build Reasonableness" optional = false python-versions = ">=2.6" +groups = ["dev"] files = [ {file = "pbr-5.11.1-py2.py3-none-any.whl", hash = "sha256:567f09558bae2b3ab53cb3c1e2e33e726ff3338e7bae3db5dc954b3a44eef12b"}, {file = "pbr-5.11.1.tar.gz", hash = "sha256:aefc51675b0b533d56bb5fd1c8c6c0522fe31896679882e1c4c63d5e4a0fccb3"}, @@ -3491,6 +3642,7 @@ version = "1.16.3" description = "A wrapper around the pdftoppm and pdftocairo command line tools to convert PDF to a PIL Image list." optional = false python-versions = "*" +groups = ["unstructured_data"] files = [ {file = "pdf2image-1.16.3-py3-none-any.whl", hash = "sha256:b6154164af3677211c22cbb38b2bd778b43aca02758e962fe1e231f6d3b0e380"}, {file = "pdf2image-1.16.3.tar.gz", hash = "sha256:74208810c2cef4d9e347769b8e62a52303982ddb4f2dfd744c7ab4b940ae287e"}, @@ -3505,6 +3657,7 @@ version = "20221105" description = "PDF parser and analyzer" optional = false python-versions = ">=3.6" +groups = ["unstructured_data"] files = [ {file = "pdfminer.six-20221105-py3-none-any.whl", hash = "sha256:1eaddd712d5b2732f8ac8486824533514f8ba12a0787b3d5fe1e686cd826532d"}, {file = "pdfminer.six-20221105.tar.gz", hash = "sha256:8448ab7b939d18b64820478ecac5394f482d7a79f5f7eaa7703c6c959c175e1d"}, @@ -3525,6 +3678,7 @@ version = "3.0.0" description = "Python datetimes made easy" optional = false python-versions = ">=3.8" +groups = ["main", "dev", "dltpure", "salesforce"] files = [ {file = "pendulum-3.0.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2cf9e53ef11668e07f73190c805dbdf07a1939c3298b78d5a9203a86775d1bfd"}, {file = "pendulum-3.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fb551b9b5e6059377889d2d878d940fd0bbb80ae4810543db18e6f77b02c5ef6"}, @@ -3626,6 +3780,7 @@ version = "9.5.0" description = "Python Imaging Library (Fork)" optional = false python-versions = ">=3.7" +groups = ["unstructured_data"] files = [ {file = "Pillow-9.5.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:ace6ca218308447b9077c14ea4ef381ba0b67ee78d64046b3f19cf4e1139ad16"}, {file = "Pillow-9.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d3d403753c9d5adc04d4694d35cf0391f0f3d57c8e0030aac09d7678fa8030aa"}, @@ -3705,6 +3860,7 @@ version = "3.11.0" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." optional = false python-versions = ">=3.7" +groups = ["dev", "salesforce"] files = [ {file = "platformdirs-3.11.0-py3-none-any.whl", hash = "sha256:e9d171d00af68be50e9202731309c4e658fd8bc76f55c11c7dd760d023bda68e"}, {file = "platformdirs-3.11.0.tar.gz", hash = "sha256:cf8ee52a3afdb965072dcc652433e0c7e3e40cf5ea1477cd4b3b1d2eb75495b3"}, @@ -3720,6 +3876,7 @@ version = "1.3.0" description = "plugin and hook calling mechanisms for python" optional = false python-versions = ">=3.8" +groups = ["main", "dev", "dltpure", "pytest"] files = [ {file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"}, {file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"}, @@ -3735,6 +3892,7 @@ version = "3.11" description = "Python Lex & Yacc" optional = false python-versions = "*" +groups = ["main", "dltpure"] files = [ {file = "ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce"}, {file = "ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3"}, @@ -3746,6 +3904,7 @@ version = "2.8.2" description = "Wraps the portalocker recipe for easy usage" optional = false python-versions = ">=3.8" +groups = ["filesystem"] files = [ {file = "portalocker-2.8.2-py3-none-any.whl", hash = "sha256:cfb86acc09b9aa7c3b43594e19be1345b9d16af3feb08bf92f23d4dce513a28e"}, {file = "portalocker-2.8.2.tar.gz", hash = "sha256:2b035aa7828e46c58e9b31390ee1f169b98e1066ab10b9a6a861fe7e25ee4f33"}, @@ -3765,6 +3924,7 @@ version = "3.0.2" description = "Integrate PostHog into any python application." optional = false python-versions = "*" +groups = ["unstructured_data"] files = [ {file = "posthog-3.0.2-py2.py3-none-any.whl", hash = "sha256:a8c0af6f2401fbe50f90e68c4143d0824b54e872de036b1c2f23b5abb39d88ce"}, {file = "posthog-3.0.2.tar.gz", hash = "sha256:701fba6e446a4de687c6e861b587e7b7741955ad624bf34fe013c06a0fec6fb3"}, @@ -3788,6 +3948,7 @@ version = "0.3.0" description = "Pure-Python robots.txt parser with support for modern conventions" optional = false python-versions = ">=3.7" +groups = ["scrapy"] files = [ {file = "Protego-0.3.0-py2.py3-none-any.whl", hash = "sha256:db38f6a945839d8162a4034031a21490469566a2726afb51d668497c457fb0aa"}, {file = "Protego-0.3.0.tar.gz", hash = "sha256:04228bffde4c6bcba31cf6529ba2cfd6e1b70808fdc1d2cb4301be6b28d6c568"}, @@ -3799,6 +3960,7 @@ version = "1.22.3" description = "Beautiful, Pythonic protocol buffers." optional = false python-versions = ">=3.6" +groups = ["google_ads", "google_analytics"] files = [ {file = "proto-plus-1.22.3.tar.gz", hash = "sha256:fdcd09713cbd42480740d2fe29c990f7fbd885a67efc328aa8be6ee3e9f76a6b"}, {file = "proto_plus-1.22.3-py3-none-any.whl", hash = "sha256:a49cd903bc0b6ab41f76bf65510439d56ca76f868adf0274e738bfdd096894df"}, @@ -3816,6 +3978,7 @@ version = "4.24.4" description = "" optional = false python-versions = ">=3.7" +groups = ["main", "filesystem", "google_ads", "google_analytics", "google_sheets", "unstructured_data"] files = [ {file = "protobuf-4.24.4-cp310-abi3-win32.whl", hash = "sha256:ec9912d5cb6714a5710e28e592ee1093d68c5ebfeda61983b3f40331da0b1ebb"}, {file = "protobuf-4.24.4-cp310-abi3-win_amd64.whl", hash = "sha256:1badab72aa8a3a2b812eacfede5020472e16c6b2212d737cefd685884c191085"}, @@ -3838,6 +4001,7 @@ version = "2.9.9" description = "psycopg2 - Python-PostgreSQL Database Adapter" optional = false python-versions = ">=3.7" +groups = ["main", "pg_replication"] files = [ {file = "psycopg2-binary-2.9.9.tar.gz", hash = "sha256:7f01846810177d829c7692f1f5ada8096762d9172af1b1a28d4ab5b77c923c1c"}, {file = "psycopg2_binary-2.9.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c2470da5418b76232f02a2fcd2229537bb2d5a7096674ce61859c3229f2eb202"}, @@ -3919,6 +4083,8 @@ version = "2.9.0" description = ".. image:: https://travis-ci.org/chtd/psycopg2cffi.svg?branch=master" optional = false python-versions = "*" +groups = ["main"] +markers = "platform_python_implementation == \"PyPy\"" files = [ {file = "psycopg2cffi-2.9.0.tar.gz", hash = "sha256:7e272edcd837de3a1d12b62185eb85c45a19feda9e62fa1b120c54f9e8d35c52"}, ] @@ -3933,6 +4099,7 @@ version = "3.3.0" description = "Apache Pulsar Python client library" optional = false python-versions = "*" +groups = ["unstructured_data"] files = [ {file = "pulsar_client-3.3.0-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:c31afd3e67a044ff93177df89e08febf214cc965e95ede097d9fe8755af00e01"}, {file = "pulsar_client-3.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f66982284571674b215324cc26b5c2f7c56c7043113c47a7084cb70d67a8afb"}, @@ -3980,6 +4147,7 @@ version = "1.11.0" description = "library with cross-python path, ini-parsing, io, code, log facilities" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +groups = ["dev", "pytest"] files = [ {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, @@ -3991,6 +4159,7 @@ version = "2.1.0.post1" description = "Python Client for the Airtable API" optional = false python-versions = "*" +groups = ["airtable"] files = [ {file = "pyairtable-2.1.0.post1-py2.py3-none-any.whl", hash = "sha256:a80eb85f7c020bf41679bb00ca57da11aeaa43769afbc73619276798a2ca182e"}, {file = "pyairtable-2.1.0.post1.tar.gz", hash = "sha256:e588249e68cf338dcdca9908537ed16d5a22ae72345ec930022b230ba96e5f84"}, @@ -4009,6 +4178,7 @@ version = "16.0.0" description = "Python library for Apache Arrow" optional = false python-versions = ">=3.8" +groups = ["main", "mongodb"] files = [ {file = "pyarrow-16.0.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:22a1fdb1254e5095d629e29cd1ea98ed04b4bbfd8e42cc670a6b639ccc208b60"}, {file = "pyarrow-16.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:574a00260a4ed9d118a14770edbd440b848fcae5a3024128be9d0274dbcaf858"}, @@ -4057,6 +4227,7 @@ version = "0.5.0" description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +groups = ["main", "filesystem", "google_ads", "google_analytics", "google_sheets", "scrapy"] files = [ {file = "pyasn1-0.5.0-py2.py3-none-any.whl", hash = "sha256:87a2121042a1ac9358cabcaf1d07680ff97ee6404333bacca15f76aa8ad01a57"}, {file = "pyasn1-0.5.0.tar.gz", hash = "sha256:97b7290ca68e62a832558ec3976f15cbf911bf5d7c7039d8b861c2a0ece69fde"}, @@ -4068,6 +4239,7 @@ version = "0.3.0" description = "A collection of ASN.1-based protocols modules" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +groups = ["main", "filesystem", "google_ads", "google_analytics", "google_sheets", "scrapy"] files = [ {file = "pyasn1_modules-0.3.0-py2.py3-none-any.whl", hash = "sha256:d3ccd6ed470d9ffbc716be08bd90efbd44d0734bc9303818f7336070984a162d"}, {file = "pyasn1_modules-0.3.0.tar.gz", hash = "sha256:5bd01446b736eb9d31512a30d46c1ac3395d676c6f3cafa4c03eb54b9925631c"}, @@ -4082,6 +4254,7 @@ version = "2.11.1" description = "Python style guide checker" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "pycodestyle-2.11.1-py2.py3-none-any.whl", hash = "sha256:44fe31000b2d866f2e41841b18528a505fbd7fef9017b04eff4e2648a0fadc67"}, {file = "pycodestyle-2.11.1.tar.gz", hash = "sha256:41ba0e7afc9752dfb53ced5489e89f8186be00e599e712660695b7a75ff2663f"}, @@ -4093,6 +4266,7 @@ version = "22.3.5" description = "ISO country, subdivision, language, currency and script definitions and their translations" optional = false python-versions = ">=3.6, <4" +groups = ["facebook_ads"] files = [ {file = "pycountry-22.3.5.tar.gz", hash = "sha256:b2163a246c585894d808f18783e19137cb70a0c18fb36748dc01fc6f109c1646"}, ] @@ -4106,10 +4280,12 @@ version = "2.21" description = "C parser in Python" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +groups = ["main", "filesystem", "salesforce", "scrapy", "unstructured_data"] files = [ {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"}, {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, ] +markers = {main = "platform_python_implementation == \"PyPy\""} [[package]] name = "pydantic" @@ -4117,6 +4293,7 @@ version = "1.10.13" description = "Data validation and settings management using python type hints" optional = false python-versions = ">=3.7" +groups = ["airtable", "unstructured_data", "unstructured_data_lint"] files = [ {file = "pydantic-1.10.13-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:efff03cc7a4f29d9009d1c96ceb1e7a70a65cfe86e89d34e4a5f2ab1e5693737"}, {file = "pydantic-1.10.13-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3ecea2b9d80e5333303eeb77e180b90e95eea8f765d08c3d278cd56b00345d01"}, @@ -4169,6 +4346,8 @@ version = "2.0.7" description = "Multi-producer multi-consumer in-memory signal dispatch system" optional = false python-versions = "*" +groups = ["scrapy"] +markers = "platform_python_implementation == \"CPython\"" files = [ {file = "PyDispatcher-2.0.7-py3-none-any.whl", hash = "sha256:96543bea04115ffde08f851e1d45cacbfd1ee866ac42127d9b476dc5aefa7de0"}, {file = "PyDispatcher-2.0.7.tar.gz", hash = "sha256:b777c6ad080dc1bad74a4c29d6a46914fa6701ac70f94b0d66fbcfde62f5be31"}, @@ -4183,6 +4362,7 @@ version = "3.1.0" description = "passive checker of Python programs" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "pyflakes-3.1.0-py2.py3-none-any.whl", hash = "sha256:4132f6d49cb4dae6819e5379898f2b8cce3c5f23994194c24b77d5da2e36f774"}, {file = "pyflakes-3.1.0.tar.gz", hash = "sha256:a0aae034c444db0071aa077972ba4768d40c830d9539fd45bf4cd3f8f6992efc"}, @@ -4194,6 +4374,7 @@ version = "2.16.1" description = "Pygments is a syntax highlighting package written in Python." optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "Pygments-2.16.1-py3-none-any.whl", hash = "sha256:13fc09fa63bc8d8671a6d247e1eb303c4b343eaee81d861f3404db2935653692"}, {file = "Pygments-2.16.1.tar.gz", hash = "sha256:1daff0494820c69bc8941e407aa20f577374ee88364ee10a98fdbe0aece96e29"}, @@ -4208,6 +4389,7 @@ version = "2.8.0" description = "JSON Web Token implementation in Python" optional = false python-versions = ">=3.7" +groups = ["filesystem", "salesforce"] files = [ {file = "PyJWT-2.8.0-py3-none-any.whl", hash = "sha256:59127c392cc44c2da5bb3192169a91f429924e17aff6534d70fdc02ab3e04320"}, {file = "PyJWT-2.8.0.tar.gz", hash = "sha256:57e28d156e3d5c10088e0c68abb90bfac3df82b40a71bd0daa20c65ccd5c23de"}, @@ -4228,6 +4410,7 @@ version = "4.5.0" description = "Python driver for MongoDB " optional = false python-versions = ">=3.7" +groups = ["mongodb"] files = [ {file = "pymongo-4.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2d4fa1b01fa7e5b7bb8d312e3542e211b320eb7a4e3d8dc884327039d93cb9e0"}, {file = "pymongo-4.5.0-cp310-cp310-manylinux1_i686.whl", hash = "sha256:dfcd2b9f510411de615ccedd47462dae80e82fdc09fe9ab0f0f32f11cf57eeb5"}, @@ -4330,6 +4513,7 @@ version = "1.4.0" description = "\"Tools for using NumPy, Pandas, Polars, and PyArrow with MongoDB\"" optional = false python-versions = ">=3.8" +groups = ["mongodb"] files = [ {file = "pymongoarrow-1.4.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:57a438dad3808c10931ffadd6028c8107133d254229996f8260e7c61417d98fe"}, {file = "pymongoarrow-1.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:de545ecfc591288c75b602b4baeb6cd9f7db9ff6890c69d46dcb63eebd5e379d"}, @@ -4373,6 +4557,7 @@ version = "1.1.0" description = "Pure Python MySQL Driver" optional = false python-versions = ">=3.7" +groups = ["sql_database"] files = [ {file = "PyMySQL-1.1.0-py3-none-any.whl", hash = "sha256:8969ec6d763c856f7073c4c64662882675702efcb114b4bcbb955aea3a069fa7"}, {file = "PyMySQL-1.1.0.tar.gz", hash = "sha256:4f13a7df8bf36a51e81dd9f3605fede45a4878fe02f9236349fd82a3f0612f96"}, @@ -4388,6 +4573,7 @@ version = "23.2.0" description = "Python wrapper module around the OpenSSL library" optional = false python-versions = ">=3.6" +groups = ["scrapy"] files = [ {file = "pyOpenSSL-23.2.0-py3-none-any.whl", hash = "sha256:24f0dc5227396b3e831f4c7f602b950a5e9833d292c8e4a2e06b709292806ae2"}, {file = "pyOpenSSL-23.2.0.tar.gz", hash = "sha256:276f931f55a452e7dea69c7173e984eb2a4407ce413c918aa34b55f82f9b8bac"}, @@ -4406,6 +4592,7 @@ version = "1.11" description = "Thin wrapper for pandoc." optional = false python-versions = ">=3.6" +groups = ["unstructured_data"] files = [ {file = "pypandoc-1.11-py3-none-any.whl", hash = "sha256:b260596934e9cfc6513056110a7c8600171d414f90558bf4407e68b209be8007"}, {file = "pypandoc-1.11.tar.gz", hash = "sha256:7f6d68db0e57e0f6961bec2190897118c4d305fc2d31c22cd16037f22ee084a5"}, @@ -4417,6 +4604,7 @@ version = "3.1.1" description = "pyparsing module - Classes and methods to define and execute parsing grammars" optional = false python-versions = ">=3.6.8" +groups = ["google_ads", "google_analytics", "google_sheets"] files = [ {file = "pyparsing-3.1.1-py3-none-any.whl", hash = "sha256:32c7c0b711493c72ff18a981d24f28aaf9c1fb7ed5e9667c9e84e3db623bdbfb"}, {file = "pyparsing-3.1.1.tar.gz", hash = "sha256:ede28a1a32462f5a9705e07aea48001a08f7cf81a021585011deba701581a0db"}, @@ -4431,6 +4619,7 @@ version = "3.0.1" description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files" optional = false python-versions = ">=3.6" +groups = ["dev"] files = [ {file = "PyPDF2-3.0.1.tar.gz", hash = "sha256:a74408f69ba6271f71b9352ef4ed03dc53a31aa404d29b5d31f53bfecfee1440"}, {file = "pypdf2-3.0.1-py3-none-any.whl", hash = "sha256:d16e4205cfee272fbdc0568b68d82be796540b1537508cef59388f839c191928"}, @@ -4452,6 +4641,8 @@ version = "2.1.2" description = "Multi-producer-multi-consumer signal dispatching mechanism" optional = false python-versions = "*" +groups = ["scrapy"] +markers = "platform_python_implementation == \"PyPy\"" files = [ {file = "PyPyDispatcher-2.1.2.tar.gz", hash = "sha256:b6bec5dfcff9d2535bca2b23c80eae367b1ac250a645106948d315fcfa9130f2"}, ] @@ -4462,6 +4653,8 @@ version = "3.4.1" description = "A python implementation of GNU readline." optional = false python-versions = "*" +groups = ["unstructured_data"] +markers = "sys_platform == \"win32\"" files = [ {file = "pyreadline3-3.4.1-py3-none-any.whl", hash = "sha256:b0efb6516fd4fb07b45949053826a62fa4cb353db5be2bbb4a7aa1fdd1e345fb"}, {file = "pyreadline3-3.4.1.tar.gz", hash = "sha256:6f3d1f7b8a31ba32b73917cefc1f28cc660562f39aea8646d30bd6eff21f7bae"}, @@ -4473,6 +4666,7 @@ version = "7.4.2" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.7" +groups = ["dev", "pytest"] files = [ {file = "pytest-7.4.2-py3-none-any.whl", hash = "sha256:1d881c6124e08ff0a1bb75ba3ec0bfd8b5354a01c194ddd5a0a870a48d99b002"}, {file = "pytest-7.4.2.tar.gz", hash = "sha256:a766259cfab564a2ad52cb1aae1b881a75c3eb7e34ca3779697c23ed47c47069"}, @@ -4495,6 +4689,7 @@ version = "1.6.0" description = "run tests in isolated forked subprocesses" optional = false python-versions = ">=3.7" +groups = ["dev", "pytest"] files = [ {file = "pytest-forked-1.6.0.tar.gz", hash = "sha256:4dafd46a9a600f65d822b8f605133ecf5b3e1941ebb3588e943b4e3eb71a5a3f"}, {file = "pytest_forked-1.6.0-py3-none-any.whl", hash = "sha256:810958f66a91afb1a1e2ae83089d8dc1cd2437ac96b12963042fbb9fb4d16af0"}, @@ -4510,6 +4705,7 @@ version = "3.12.0" description = "Thin-wrapper around the mock package for easier use with pytest" optional = false python-versions = ">=3.8" +groups = ["dev", "pytest"] files = [ {file = "pytest-mock-3.12.0.tar.gz", hash = "sha256:31a40f038c22cad32287bb43932054451ff5583ff094bca6f675df2f8bc1a6e9"}, {file = "pytest_mock-3.12.0-py3-none-any.whl", hash = "sha256:0972719a7263072da3a21c7f4773069bcc7486027d7e8e1f81d98a47e701bc4f"}, @@ -4527,6 +4723,7 @@ version = "2.8.2" description = "Extensions to the standard Python datetime module" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +groups = ["main", "dev", "dltpure", "filesystem", "mongodb", "salesforce", "stripe_analytics", "unstructured_data"] files = [ {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, @@ -4541,6 +4738,7 @@ version = "1.0.1" description = "Create, read, and update Microsoft Word .docx files." optional = false python-versions = ">=3.7" +groups = ["unstructured_data"] files = [ {file = "python-docx-1.0.1.tar.gz", hash = "sha256:255148e15a4414244ec75f50e92d19864e52a7416768c65491707a7414659524"}, {file = "python_docx-1.0.1-py3-none-any.whl", hash = "sha256:851340c49b36f917a1838a44c602a5a0702c0c3507b9890969545732dc10d2d1"}, @@ -4556,6 +4754,7 @@ version = "1.0.0" description = "Read key-value pairs from a .env file and set them as environment variables" optional = false python-versions = ">=3.8" +groups = ["unstructured_data"] files = [ {file = "python-dotenv-1.0.0.tar.gz", hash = "sha256:a8df96034aae6d2d50a4ebe8216326c61c3eb64836776504fcca410e5937a3ba"}, {file = "python_dotenv-1.0.0-py3-none-any.whl", hash = "sha256:f5971a9226b701070a4bf2c38c89e5a3f0d64de8debda981d1db98583009122a"}, @@ -4570,6 +4769,7 @@ version = "0.4.27" description = "File type identification using libmagic" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +groups = ["unstructured_data"] files = [ {file = "python-magic-0.4.27.tar.gz", hash = "sha256:c1ba14b08e4a5f5c31a302b7721239695b2f0f058d125bd5ce1ee36b9d9d3c3b"}, {file = "python_magic-0.4.27-py2.py3-none-any.whl", hash = "sha256:c212960ad306f700aa0d01e5d7a325d20548ff97eb9920dcd29513174f0294d3"}, @@ -4581,6 +4781,7 @@ version = "0.6.22" description = "Generate and manipulate Open XML PowerPoint (.pptx) files" optional = false python-versions = "*" +groups = ["unstructured_data"] files = [ {file = "python-pptx-0.6.22.tar.gz", hash = "sha256:38f8ee92dde31d24b4562560e61b0357e5d97ecf75c4352ae6616d5a32978654"}, {file = "python_pptx-0.6.22-py3-none-any.whl", hash = "sha256:3d097c29e08de2da1fc3c6752169087065efa4153216e77fc1b27dff1bcdcb46"}, @@ -4597,6 +4798,7 @@ version = "2023.3.post1" description = "World timezone definitions, modern and historical" optional = false python-versions = "*" +groups = ["main", "dltpure", "mongodb", "salesforce", "stripe_analytics", "unstructured_data"] files = [ {file = "pytz-2023.3.post1-py2.py3-none-any.whl", hash = "sha256:ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7"}, {file = "pytz-2023.3.post1.tar.gz", hash = "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b"}, @@ -4608,6 +4810,8 @@ version = "306" description = "Python for Window Extensions" optional = false python-versions = "*" +groups = ["filesystem"] +markers = "platform_system == \"Windows\"" files = [ {file = "pywin32-306-cp310-cp310-win32.whl", hash = "sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d"}, {file = "pywin32-306-cp310-cp310-win_amd64.whl", hash = "sha256:84f4471dbca1887ea3803d8848a1616429ac94a4a8d05f4bc9c5dcfd42ca99c8"}, @@ -4631,6 +4835,7 @@ version = "6.0.1" description = "YAML parser and emitter for Python" optional = false python-versions = ">=3.6" +groups = ["main", "dev", "dltpure", "google_ads", "unstructured_data", "unstructured_data_lint"] files = [ {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, @@ -4691,6 +4896,7 @@ version = "1.6.2" description = "Collection of persistent (disk-based) and non-persistent (memory-based) queues" optional = false python-versions = ">=3.5" +groups = ["scrapy"] files = [ {file = "queuelib-1.6.2-py2.py3-none-any.whl", hash = "sha256:4b96d48f650a814c6fb2fd11b968f9c46178b683aad96d68f930fe13a8574d19"}, {file = "queuelib-1.6.2.tar.gz", hash = "sha256:4b207267f2642a8699a1f806045c56eb7ad1a85a10c0e249884580d139c2fcd2"}, @@ -4702,6 +4908,7 @@ version = "2023.10.3" description = "Alternative regular expression module, to replace re." optional = false python-versions = ">=3.7" +groups = ["unstructured_data"] files = [ {file = "regex-2023.10.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4c34d4f73ea738223a094d8e0ffd6d2c1a1b4c175da34d6b0de3d8d69bee6bcc"}, {file = "regex-2023.10.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a8f4e49fc3ce020f65411432183e6775f24e02dff617281094ba6ab079ef0915"}, @@ -4799,6 +5006,7 @@ version = "2.31.0" description = "Python HTTP for Humans." optional = false python-versions = ">=3.7" +groups = ["main", "airtable", "asana_dlt", "dev", "dltpure", "facebook_ads", "filesystem", "google_ads", "google_analytics", "google_sheets", "salesforce", "scrapy", "stripe_analytics", "unstructured_data", "unstructured_data_lint"] files = [ {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, @@ -4820,6 +5028,7 @@ version = "1.5.1" description = "File transport adapter for Requests" optional = false python-versions = "*" +groups = ["salesforce", "scrapy"] files = [ {file = "requests-file-1.5.1.tar.gz", hash = "sha256:07d74208d3389d01c38ab89ef403af0cfec63957d53a0081d8eca738d0247d8e"}, {file = "requests_file-1.5.1-py2.py3-none-any.whl", hash = "sha256:dfe5dae75c12481f68ba353183c53a65e6044c923e64c24b2209f6c7570ca953"}, @@ -4835,6 +5044,7 @@ version = "1.11.0" description = "Mock out responses from the requests package" optional = false python-versions = "*" +groups = ["dev"] files = [ {file = "requests-mock-1.11.0.tar.gz", hash = "sha256:ef10b572b489a5f28e09b708697208c4a3b2b89ef80a9f01584340ea357ec3c4"}, {file = "requests_mock-1.11.0-py2.py3-none-any.whl", hash = "sha256:f7fae383f228633f6bececebdab236c478ace2284d6292c6e7e2867b9ab74d15"}, @@ -4854,6 +5064,7 @@ version = "1.3.1" description = "OAuthlib authentication support for Requests." optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +groups = ["main", "asana_dlt", "filesystem", "google_ads", "google_analytics"] files = [ {file = "requests-oauthlib-1.3.1.tar.gz", hash = "sha256:75beac4a47881eeb94d5ea5d6ad31ef88856affe2332b9aafb52c6452ccf0d7a"}, {file = "requests_oauthlib-1.3.1-py2.py3-none-any.whl", hash = "sha256:2577c501a2fb8d05a304c09d090d6e47c306fef15809d102b327cf8364bddab5"}, @@ -4872,6 +5083,7 @@ version = "1.0.0" description = "A utility belt for advanced users of python-requests" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +groups = ["salesforce"] files = [ {file = "requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6"}, {file = "requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06"}, @@ -4886,6 +5098,7 @@ version = "0.5.0" description = "This is a small Python module for parsing Pip requirement files." optional = false python-versions = ">=3.6,<4.0" +groups = ["main", "dltpure"] files = [ {file = "requirements-parser-0.5.0.tar.gz", hash = "sha256:3336f3a3ae23e06d3f0f88595e4052396e3adf91688787f637e5d2ca1a904069"}, {file = "requirements_parser-0.5.0-py3-none-any.whl", hash = "sha256:e7fcdcd04f2049e73a9fb150d8a0f9d51ce4108f5f7cbeac74c484e17b12bcd9"}, @@ -4900,6 +5113,7 @@ version = "13.6.0" description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" optional = false python-versions = ">=3.7.0" +groups = ["dev"] files = [ {file = "rich-13.6.0-py3-none-any.whl", hash = "sha256:2b38e2fe9ca72c9a00170a1a2d20c63c790d0e10ef1fe35eba76e1e7b1d7d245"}, {file = "rich-13.6.0.tar.gz", hash = "sha256:5c14d22737e6d5084ef4771b62d5d4363165b403455a30a1c8ca39dc7b644bef"}, @@ -4919,6 +5133,7 @@ version = "4.9" description = "Pure-Python RSA implementation" optional = false python-versions = ">=3.6,<4" +groups = ["main", "filesystem", "google_ads", "google_analytics", "google_sheets"] files = [ {file = "rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7"}, {file = "rsa-4.9.tar.gz", hash = "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21"}, @@ -4933,6 +5148,7 @@ version = "2024.3.1" description = "Convenient Filesystem interface over S3" optional = false python-versions = ">= 3.8" +groups = ["filesystem"] files = [ {file = "s3fs-2024.3.1-py3-none-any.whl", hash = "sha256:f4566a5446c473740d272ec08e0b4aae8db1aa05f662c42ff0aa2c89bb5060ea"}, {file = "s3fs-2024.3.1.tar.gz", hash = "sha256:1b8bc8dbd65e7b60f5487378f6eeffe1de59aa72caa9efca6dad6ab877405487"}, @@ -4953,6 +5169,7 @@ version = "2.11.1" description = "A high-level Web Crawling and Web Scraping framework" optional = false python-versions = ">=3.8" +groups = ["scrapy"] files = [ {file = "Scrapy-2.11.1-py2.py3-none-any.whl", hash = "sha256:f1edee0cd214512054c01a8d031a8d213dddb53492b02c9e66256e3efe90d175"}, {file = "Scrapy-2.11.1.tar.gz", hash = "sha256:733a039c7423e52b69bf2810b5332093d4e42a848460359c07b02ecff8f73ebe"}, @@ -4984,6 +5201,7 @@ version = "3.0.2" description = "Python helper for Semantic Versioning (https://semver.org)" optional = false python-versions = ">=3.7" +groups = ["main", "dltpure"] files = [ {file = "semver-3.0.2-py3-none-any.whl", hash = "sha256:b1ea4686fe70b981f85359eda33199d60c53964284e0cfb4977d243e37cf4bf4"}, {file = "semver-3.0.2.tar.gz", hash = "sha256:6253adb39c70f6e51afed2fa7152bcd414c411286088fb4b9effb133885ab4cc"}, @@ -4995,6 +5213,7 @@ version = "24.1.0" description = "Service identity verification for pyOpenSSL & cryptography." optional = false python-versions = ">=3.8" +groups = ["scrapy"] files = [ {file = "service_identity-24.1.0-py3-none-any.whl", hash = "sha256:a28caf8130c8a5c1c7a6f5293faaf239bbfb7751e4862436920ee6f2616f568a"}, {file = "service_identity-24.1.0.tar.gz", hash = "sha256:6829c9d62fb832c2e1c435629b0a8c476e1929881f28bee4d20bc24161009221"}, @@ -5019,6 +5238,7 @@ version = "68.2.2" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.8" +groups = ["main", "dev", "dltpure", "facebook_ads", "google_ads", "scrapy"] files = [ {file = "setuptools-68.2.2-py3-none-any.whl", hash = "sha256:b454a35605876da60632df1a60f736524eb73cc47bbc9f3f1ef1b644de74fd2a"}, {file = "setuptools-68.2.2.tar.gz", hash = "sha256:4ac1475276d2f1c48684874089fefcd83bd7162ddaafb81fac866ba0db282a87"}, @@ -5035,6 +5255,7 @@ version = "1.12.5" description = "A basic Salesforce.com REST API client." optional = false python-versions = "*" +groups = ["salesforce"] files = [ {file = "simple-salesforce-1.12.5.tar.gz", hash = "sha256:ef65f72438e3b215619f6835d3d4356e147adf3a7ece6896d239127dd6aefcd1"}, {file = "simple_salesforce-1.12.5-py2.py3-none-any.whl", hash = "sha256:07029575385d04132babfd6e19c1c8068c859d616a45dab07bbf9875bdc5ab93"}, @@ -5054,6 +5275,7 @@ version = "3.19.2" description = "Simple, fast, extensible JSON encoder/decoder for Python" optional = false python-versions = ">=2.5, !=3.0.*, !=3.1.*, !=3.2.*" +groups = ["main", "dltpure"] files = [ {file = "simplejson-3.19.2-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:3471e95110dcaf901db16063b2e40fb394f8a9e99b3fe9ee3acc6f6ef72183a2"}, {file = "simplejson-3.19.2-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:3194cd0d2c959062b94094c0a9f8780ffd38417a5322450a0db0ca1a23e7fbd2"}, @@ -5161,6 +5383,7 @@ version = "1.16.0" description = "Python 2 and 3 compatibility utilities" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +groups = ["main", "dev", "dltpure", "facebook_ads", "filesystem", "mongodb", "salesforce", "scrapy", "stripe_analytics", "unstructured_data"] files = [ {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, @@ -5172,6 +5395,7 @@ version = "5.0.1" description = "A pure Python implementation of a sliding window memory map manager" optional = false python-versions = ">=3.7" +groups = ["main", "dev", "dltpure"] files = [ {file = "smmap-5.0.1-py3-none-any.whl", hash = "sha256:e6d8668fa5f93e706934a62d7b4db19c8d9eb8cf2adbb75ef1b675aa332b69da"}, {file = "smmap-5.0.1.tar.gz", hash = "sha256:dceeb6c0028fdb6734471eb07c0cd2aae706ccaecab45965ee83f11c8d3b1f62"}, @@ -5183,6 +5407,7 @@ version = "1.3.0" description = "Sniff out which async library your code is running under" optional = false python-versions = ">=3.7" +groups = ["unstructured_data"] files = [ {file = "sniffio-1.3.0-py3-none-any.whl", hash = "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384"}, {file = "sniffio-1.3.0.tar.gz", hash = "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101"}, @@ -5194,6 +5419,7 @@ version = "2.0.22" description = "Database Abstraction Library" optional = false python-versions = ">=3.7" +groups = ["sql_database", "unstructured_data", "unstructured_data_lint"] files = [ {file = "SQLAlchemy-2.0.22-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f146c61ae128ab43ea3a0955de1af7e1633942c2b2b4985ac51cc292daf33222"}, {file = "SQLAlchemy-2.0.22-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:875de9414393e778b655a3d97d60465eb3fae7c919e88b70cc10b40b9f56042d"}, @@ -5280,6 +5506,7 @@ version = "0.20.4" description = "The little ASGI library that shines." optional = false python-versions = ">=3.7" +groups = ["unstructured_data"] files = [ {file = "starlette-0.20.4-py3-none-any.whl", hash = "sha256:c0414d5a56297d37f3db96a84034d61ce29889b9eaccf65eb98a0b39441fcaa3"}, {file = "starlette-0.20.4.tar.gz", hash = "sha256:42fcf3122f998fefce3e2c5ad7e5edbf0f02cf685d646a83a08d404726af5084"}, @@ -5298,6 +5525,7 @@ version = "5.1.0" description = "Manage dynamic plugins for Python applications" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "stevedore-5.1.0-py3-none-any.whl", hash = "sha256:8cc040628f3cea5d7128f2e76cf486b2251a4e543c7b938f58d9a377f6694a2d"}, {file = "stevedore-5.1.0.tar.gz", hash = "sha256:a54534acf9b89bc7ed264807013b505bf07f74dbe4bcfa37d32bd063870b087c"}, @@ -5312,6 +5540,7 @@ version = "5.5.0" description = "Python bindings for the Stripe API" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +groups = ["stripe_analytics"] files = [ {file = "stripe-5.5.0-py2.py3-none-any.whl", hash = "sha256:b4947da66dbb3de8969004ba6398f9a019c6b1b3ffe6aa88d5b07ac560a52b28"}, {file = "stripe-5.5.0.tar.gz", hash = "sha256:04a9732b37a46228ecf0e496163a3edd93596b0e6200029fbc48911638627e19"}, @@ -5326,6 +5555,7 @@ version = "1.12" description = "Computer algebra system (CAS) in Python" optional = false python-versions = ">=3.8" +groups = ["unstructured_data"] files = [ {file = "sympy-1.12-py3-none-any.whl", hash = "sha256:c3588cd4295d0c0f603d0f2ae780587e64e2efeedb3521e46b9bb1d08d184fa5"}, {file = "sympy-1.12.tar.gz", hash = "sha256:ebf595c8dac3e0fdc4152c51878b498396ec7f30e7a914d6071e674d49420fb8"}, @@ -5340,6 +5570,7 @@ version = "0.9.0" description = "Pretty-print tabular data" optional = false python-versions = ">=3.7" +groups = ["unstructured_data"] files = [ {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"}, {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"}, @@ -5354,6 +5585,7 @@ version = "8.2.3" description = "Retry code until it succeeds" optional = false python-versions = ">=3.7" +groups = ["main", "dltpure", "unstructured_data", "unstructured_data_lint"] files = [ {file = "tenacity-8.2.3-py3-none-any.whl", hash = "sha256:ce510e327a630c9e1beaf17d42e6ffacc88185044ad85cf74c0a8887c6a0f88c"}, {file = "tenacity-8.2.3.tar.gz", hash = "sha256:5398ef0d78e63f40007c1fb4c0bff96e1911394d2fa8d194f77619c05ff6cc8a"}, @@ -5368,6 +5600,7 @@ version = "0.4.0" description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models" optional = false python-versions = ">=3.8" +groups = ["unstructured_data"] files = [ {file = "tiktoken-0.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:176cad7f053d2cc82ce7e2a7c883ccc6971840a4b5276740d0b732a2b2011f8a"}, {file = "tiktoken-0.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:450d504892b3ac80207700266ee87c932df8efea54e05cefe8613edc963c1285"}, @@ -5413,6 +5646,7 @@ version = "5.1.1" description = "Accurately separates a URL's subdomain, domain, and public suffix, using the Public Suffix List (PSL). By default, this includes the public ICANN TLDs and their exceptions. You can optionally support the Public Suffix List's private domains as well." optional = false python-versions = ">=3.8" +groups = ["scrapy"] files = [ {file = "tldextract-5.1.1-py3-none-any.whl", hash = "sha256:b9c4510a8766d377033b6bace7e9f1f17a891383ced3c5d50c150f181e9e1cc2"}, {file = "tldextract-5.1.1.tar.gz", hash = "sha256:9b6dbf803cb5636397f0203d48541c0da8ba53babaf0e8a6feda2d88746813d4"}, @@ -5433,6 +5667,7 @@ version = "0.14.1" description = "" optional = false python-versions = ">=3.7" +groups = ["unstructured_data"] files = [ {file = "tokenizers-0.14.1-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:04ec1134a18ede355a05641cdc7700f17280e01f69f2f315769f02f7e295cf1e"}, {file = "tokenizers-0.14.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:638abedb39375f0ddce2de536fc9c976639b2d1b7202d715c2e7a25f0ebfd091"}, @@ -5548,6 +5783,8 @@ version = "2.0.1" description = "A lil' TOML parser" optional = false python-versions = ">=3.7" +groups = ["dev", "pytest"] +markers = "python_version < \"3.11\"" files = [ {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, @@ -5559,6 +5796,7 @@ version = "0.12.1" description = "Style preserving TOML library" optional = false python-versions = ">=3.7" +groups = ["main", "dltpure"] files = [ {file = "tomlkit-0.12.1-py3-none-any.whl", hash = "sha256:712cbd236609acc6a3e2e97253dfc52d4c2082982a88f61b640ecf0817eab899"}, {file = "tomlkit-0.12.1.tar.gz", hash = "sha256:38e1ff8edb991273ec9f6181244a6a391ac30e9f5098e7535640ea6be97a7c86"}, @@ -5570,6 +5808,7 @@ version = "4.66.1" description = "Fast, Extensible Progress Meter" optional = false python-versions = ">=3.7" +groups = ["unstructured_data", "unstructured_data_lint"] files = [ {file = "tqdm-4.66.1-py3-none-any.whl", hash = "sha256:d302b3c5b53d47bce91fea46679d9c3c6508cf6332229aa1e7d8653723793386"}, {file = "tqdm-4.66.1.tar.gz", hash = "sha256:d88e651f9db8d8551a62556d3cff9e3034274ca5d66e93197cf2490e2dcb69c7"}, @@ -5590,6 +5829,7 @@ version = "22.10.0" description = "An asynchronous networking framework written in Python" optional = false python-versions = ">=3.7.1" +groups = ["dev", "scrapy"] files = [ {file = "Twisted-22.10.0-py3-none-any.whl", hash = "sha256:86c55f712cc5ab6f6d64e02503352464f0400f66d4f079096d744080afcccbd0"}, {file = "Twisted-22.10.0.tar.gz", hash = "sha256:32acbd40a94f5f46e7b42c109bfae2b302250945561783a8b7a059048f2d4d31"}, @@ -5628,6 +5868,8 @@ version = "1.0.4" description = "An extension for use in the twisted I/O Completion Ports reactor." optional = false python-versions = "*" +groups = ["dev", "scrapy"] +markers = "platform_system == \"Windows\"" files = [ {file = "twisted-iocpsupport-1.0.4.tar.gz", hash = "sha256:858096c0d15e33f15ac157f455d8f86f2f2cdd223963e58c0f682a3af8362d89"}, {file = "twisted_iocpsupport-1.0.4-cp310-cp310-win32.whl", hash = "sha256:afa2b630797f9ed2f27f3d9f55e3f72b4244911e45a8c82756f44babbf0b243e"}, @@ -5656,6 +5898,7 @@ version = "2.9.21.20240218" description = "Typing stubs for psycopg2" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "types-psycopg2-2.9.21.20240218.tar.gz", hash = "sha256:3084cd807038a62c80fb5be78b41d855b48a060316101ea59fd85c302efb57d4"}, {file = "types_psycopg2-2.9.21.20240218-py3-none-any.whl", hash = "sha256:cac96264e063cbce28dee337a973d39e6df4ca671252343cb4f8e5ef6db5e67d"}, @@ -5667,6 +5910,7 @@ version = "2023.3.1.1" description = "Typing stubs for pytz" optional = false python-versions = "*" +groups = ["dev"] files = [ {file = "types-pytz-2023.3.1.1.tar.gz", hash = "sha256:cc23d0192cd49c8f6bba44ee0c81e4586a8f30204970fc0894d209a6b08dab9a"}, {file = "types_pytz-2023.3.1.1-py3-none-any.whl", hash = "sha256:1999a123a3dc0e39a2ef6d19f3f8584211de9e6a77fe7a0259f04a524e90a5cf"}, @@ -5678,6 +5922,7 @@ version = "2.31.0.6" description = "Typing stubs for requests" optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "types-requests-2.31.0.6.tar.gz", hash = "sha256:cd74ce3b53c461f1228a9b783929ac73a666658f223e28ed29753771477b3bd0"}, {file = "types_requests-2.31.0.6-py3-none-any.whl", hash = "sha256:a2db9cb228a81da8348b49ad6db3f5519452dd20a9c1e1a868c83c5fe88fd1a9"}, @@ -5692,6 +5937,7 @@ version = "68.2.0.0" description = "Typing stubs for setuptools" optional = false python-versions = "*" +groups = ["main", "dltpure"] files = [ {file = "types-setuptools-68.2.0.0.tar.gz", hash = "sha256:a4216f1e2ef29d089877b3af3ab2acf489eb869ccaf905125c69d2dc3932fd85"}, {file = "types_setuptools-68.2.0.0-py3-none-any.whl", hash = "sha256:77edcc843e53f8fc83bb1a840684841f3dc804ec94562623bfa2ea70d5a2ba1b"}, @@ -5703,6 +5949,7 @@ version = "3.5.2.14" description = "Typing stubs for stripe" optional = false python-versions = "*" +groups = ["stripe_analytics"] files = [ {file = "types-stripe-3.5.2.14.tar.gz", hash = "sha256:bcc020aa5ba9acd796b9f2ac21f044c8e377ce2c0f570057f0f64c4b4637bbe7"}, {file = "types_stripe-3.5.2.14-py3-none-any.whl", hash = "sha256:f5f1249f72a35ada1db95523edc7e8f7b543dc8434b2ff23eaa9ec2e251c2e59"}, @@ -5714,6 +5961,7 @@ version = "1.26.25.14" description = "Typing stubs for urllib3" optional = false python-versions = "*" +groups = ["dev"] files = [ {file = "types-urllib3-1.26.25.14.tar.gz", hash = "sha256:229b7f577c951b8c1b92c1bc2b2fdb0b49847bd2af6d1cc2a2e3dd340f3bda8f"}, {file = "types_urllib3-1.26.25.14-py3-none-any.whl", hash = "sha256:9683bbb7fb72e32bfe9d2be6e04875fbe1b3eeec3cbb4ea231435aa7fd6b4f0e"}, @@ -5725,6 +5973,7 @@ version = "4.8.0" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" +groups = ["main", "airtable", "dev", "dltpure", "filesystem", "scrapy", "sql_database", "unstructured_data", "unstructured_data_lint"] files = [ {file = "typing_extensions-4.8.0-py3-none-any.whl", hash = "sha256:8f92fc8806f9a6b641eaa5318da32b44d401efaac0f6678c9bc448ba3605faa0"}, {file = "typing_extensions-4.8.0.tar.gz", hash = "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef"}, @@ -5736,6 +5985,7 @@ version = "0.9.0" description = "Runtime inspection utilities for typing module." optional = false python-versions = "*" +groups = ["unstructured_data", "unstructured_data_lint"] files = [ {file = "typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f"}, {file = "typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78"}, @@ -5751,6 +6001,7 @@ version = "2023.3" description = "Provider of IANA time zone data" optional = false python-versions = ">=2" +groups = ["main", "dev", "dltpure", "mongodb", "salesforce", "stripe_analytics", "unstructured_data"] files = [ {file = "tzdata-2023.3-py2.py3-none-any.whl", hash = "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda"}, {file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"}, @@ -5762,6 +6013,7 @@ version = "0.7.12" description = "A library that prepares raw documents for downstream ML tasks." optional = false python-versions = ">=3.7.0" +groups = ["unstructured_data"] files = [ {file = "unstructured-0.7.12-py3-none-any.whl", hash = "sha256:6dec4f23574e213f30bccb680a4fb84c95617092ce4abf5d8955cc71af402fef"}, {file = "unstructured-0.7.12.tar.gz", hash = "sha256:3dcddea34f52e1070f38fd10063b3b0f64bc4cbe5b778d6b86b5d33262d625cd"}, @@ -5809,6 +6061,7 @@ version = "4.1.1" description = "Implementation of RFC 6570 URI Templates" optional = false python-versions = ">=3.6" +groups = ["google_ads", "google_analytics", "google_sheets"] files = [ {file = "uritemplate-4.1.1-py2.py3-none-any.whl", hash = "sha256:830c08b8d99bdd312ea4ead05994a38e8936266f84b9a7878232db50b044e02e"}, {file = "uritemplate-4.1.1.tar.gz", hash = "sha256:4346edfc5c3b79f694bccd6d6099a322bbeb628dbf2cd86eea55a456ce5124f0"}, @@ -5820,6 +6073,7 @@ version = "1.26.17" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +groups = ["main", "airtable", "asana_dlt", "dev", "dltpure", "facebook_ads", "filesystem", "google_ads", "google_analytics", "google_sheets", "salesforce", "scrapy", "stripe_analytics", "unstructured_data", "unstructured_data_lint"] files = [ {file = "urllib3-1.26.17-py2.py3-none-any.whl", hash = "sha256:94a757d178c9be92ef5539b8840d48dc9cf1b2709c9d6b588232a055c524458b"}, {file = "urllib3-1.26.17.tar.gz", hash = "sha256:24d6a242c28d29af46c3fae832c36db3bbebcc533dd1bb549172cd739c82df21"}, @@ -5836,6 +6090,7 @@ version = "0.23.2" description = "The lightning-fast ASGI server." optional = false python-versions = ">=3.8" +groups = ["unstructured_data"] files = [ {file = "uvicorn-0.23.2-py3-none-any.whl", hash = "sha256:1f9be6558f01239d4fdf22ef8126c39cb1ad0addf76c40e760549d2c2f43ab53"}, {file = "uvicorn-0.23.2.tar.gz", hash = "sha256:4d3cc12d7727ba72b64d12d3cc7743124074c0a69f7b201512fc50c3e3f1569a"}, @@ -5862,6 +6117,8 @@ version = "0.18.0" description = "Fast implementation of asyncio event loop on top of libuv" optional = false python-versions = ">=3.7.0" +groups = ["unstructured_data"] +markers = "(sys_platform != \"win32\" and sys_platform != \"cygwin\") and platform_python_implementation != \"PyPy\"" files = [ {file = "uvloop-0.18.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1f354d669586fca96a9a688c585b6257706d216177ac457c92e15709acaece10"}, {file = "uvloop-0.18.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:280904236a5b333a273292b3bcdcbfe173690f69901365b973fa35be302d7781"}, @@ -5911,6 +6168,7 @@ version = "2.1.2" description = "Library of web-related functions" optional = false python-versions = ">=3.7" +groups = ["scrapy"] files = [ {file = "w3lib-2.1.2-py3-none-any.whl", hash = "sha256:c4432926e739caa8e3f49f5de783f336df563d9490416aebd5d39fb896d264e7"}, {file = "w3lib-2.1.2.tar.gz", hash = "sha256:ed5b74e997eea2abe3c1321f916e344144ee8e9072a6f33463ee8e57f858a4b1"}, @@ -5922,6 +6180,7 @@ version = "0.21.0" description = "Simple, modern and high performance file watching and code reload in python." optional = false python-versions = ">=3.8" +groups = ["unstructured_data"] files = [ {file = "watchfiles-0.21.0-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:27b4035013f1ea49c6c0b42d983133b136637a527e48c132d368eb19bf1ac6aa"}, {file = "watchfiles-0.21.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c81818595eff6e92535ff32825f31c116f867f64ff8cdf6562cd1d6b2e1e8f3e"}, @@ -6009,6 +6268,7 @@ version = "11.0.3" description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" optional = false python-versions = ">=3.7" +groups = ["unstructured_data"] files = [ {file = "websockets-11.0.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3ccc8a0c387629aec40f2fc9fdcb4b9d5431954f934da3eaf16cdc94f67dbfac"}, {file = "websockets-11.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d67ac60a307f760c6e65dad586f556dde58e683fab03323221a4e530ead6f74d"}, @@ -6088,6 +6348,7 @@ version = "0.41.2" description = "A built-package format for Python" optional = false python-versions = ">=3.7" +groups = ["main", "dltpure"] files = [ {file = "wheel-0.41.2-py3-none-any.whl", hash = "sha256:75909db2664838d015e3d9139004ee16711748a52c8f336b52882266540215d8"}, {file = "wheel-0.41.2.tar.gz", hash = "sha256:0c5ac5ff2afb79ac23ab82bab027a0be7b5dbcf2e54dc50efe4bf507de1f7985"}, @@ -6102,6 +6363,8 @@ version = "1.4.2" description = "" optional = false python-versions = ">=3.7" +groups = ["main", "dltpure"] +markers = "os_name == \"nt\"" files = [ {file = "win-precise-time-1.4.2.tar.gz", hash = "sha256:89274785cbc5f2997e01675206da3203835a442c60fd97798415c6b3c179c0b9"}, {file = "win_precise_time-1.4.2-cp310-cp310-win32.whl", hash = "sha256:7fa13a2247c2ef41cd5e9b930f40716eacc7fc1f079ea72853bd5613fe087a1a"}, @@ -6124,6 +6387,7 @@ version = "1.15.0" description = "Module for decorators, wrappers and monkey patching." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" +groups = ["filesystem"] files = [ {file = "wrapt-1.15.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:ca1cccf838cd28d5a0883b342474c630ac48cac5df0ee6eacc9c7290f76b11c1"}, {file = "wrapt-1.15.0-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:e826aadda3cae59295b95343db8f3d965fb31059da7de01ee8d1c40a60398b29"}, @@ -6208,6 +6472,7 @@ version = "2.0.1" description = "Library for developers to extract data from Microsoft Excel (tm) .xls spreadsheet files" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +groups = ["unstructured_data"] files = [ {file = "xlrd-2.0.1-py2.py3-none-any.whl", hash = "sha256:6a33ee89877bd9abc1158129f6e94be74e2679636b8a205b43b85206c3f0bbdd"}, {file = "xlrd-2.0.1.tar.gz", hash = "sha256:f72f148f54442c6b056bf931dbc34f986fd0c3b0b6b5a58d013c9aef274d0c88"}, @@ -6224,6 +6489,7 @@ version = "3.1.7" description = "A Python module for creating Excel XLSX files." optional = false python-versions = ">=3.6" +groups = ["unstructured_data"] files = [ {file = "XlsxWriter-3.1.7-py3-none-any.whl", hash = "sha256:8c730c4beb468696c4160aa1d6d168fb4c1a20dd972b212cd8cc1e74ddeab1b6"}, {file = "XlsxWriter-3.1.7.tar.gz", hash = "sha256:353042efb0f8551ce72baa087e98228f3394fcb380e8b96313edf1eec8d50823"}, @@ -6235,6 +6501,7 @@ version = "1.9.2" description = "Yet another URL library" optional = false python-versions = ">=3.7" +groups = ["main", "facebook_ads", "filesystem", "unstructured_data", "unstructured_data_lint"] files = [ {file = "yarl-1.9.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8c2ad583743d16ddbdf6bb14b5cd76bf43b0d0006e918809d5d4ddf7bde8dd82"}, {file = "yarl-1.9.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:82aa6264b36c50acfb2424ad5ca537a2060ab6de158a5bd2a72a032cc75b9eb8"}, @@ -6322,6 +6589,7 @@ version = "4.2.1" description = "A Python SOAP client" optional = false python-versions = ">=3.7" +groups = ["salesforce"] files = [ {file = "zeep-4.2.1-py3-none-any.whl", hash = "sha256:6754feb4c34a4b6d65fbc359252bf6654dcce3937bf1d95aae4402a60a8f5939"}, {file = "zeep-4.2.1.tar.gz", hash = "sha256:72093acfdb1d8360ed400869b73fbf1882b95c4287f798084c42ee0c1ff0e425"}, @@ -6349,10 +6617,12 @@ version = "3.17.0" description = "Backport of pathlib-compatible object wrapper for zip files" optional = false python-versions = ">=3.8" +groups = ["main", "dev", "dltpure", "salesforce", "unstructured_data"] files = [ {file = "zipp-3.17.0-py3-none-any.whl", hash = "sha256:0e923e726174922dce09c53c59ad483ff7bbb8e572e00c7f7c46b88556409f31"}, {file = "zipp-3.17.0.tar.gz", hash = "sha256:84e64a1c28cf7e91ed2078bb8cc8c259cb19b76942096c8d7b84947690cabaf0"}, ] +markers = {main = "python_version < \"3.9\"", dev = "python_version < \"3.9\"", dltpure = "python_version < \"3.9\"", salesforce = "python_version < \"3.9\"", unstructured_data = "python_version < \"3.10\""} [package.extras] docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"] @@ -6364,6 +6634,7 @@ version = "6.2" description = "Interfaces for Python" optional = false python-versions = ">=3.7" +groups = ["dev", "scrapy"] files = [ {file = "zope.interface-6.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:506f5410b36e5ba494136d9fa04c548eaf1a0d9c442b0b0e7a0944db7620e0ab"}, {file = "zope.interface-6.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b386b8b9d2b6a5e1e4eadd4e62335571244cb9193b7328c2b6e38b64cfda4f0e"}, @@ -6417,6 +6688,7 @@ version = "0.21.0" description = "Zstandard bindings for Python" optional = false python-versions = ">=3.7" +groups = ["unstructured_data"] files = [ {file = "zstandard-0.21.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:649a67643257e3b2cff1c0a73130609679a5673bf389564bc6d4b164d822a7ce"}, {file = "zstandard-0.21.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:144a4fe4be2e747bf9c646deab212666e39048faa4372abb6a250dab0f347a29"}, @@ -6470,6 +6742,6 @@ cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\ cffi = ["cffi (>=1.11)"] [metadata] -lock-version = "2.0" +lock-version = "2.1" python-versions = ">=3.8.1,<3.13" content-hash = "e216234bd35e71ef0c8e5a498c2cc616df417c5b14658b00aed9d935ba5a782e" diff --git a/pyproject.toml b/pyproject.toml index a1a431d54..7fe872f3b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -93,8 +93,8 @@ chromadb = "^0.3.26" tiktoken = "^0.4.0" [tool.poetry.group.mongodb.dependencies] -pymongo = "^4.3.3" -pymongoarrow = ">=1.3.0" +pymongo = "^4.10.0" +pymongoarrow = "^1.5.0" [tool.poetry.group.airtable.dependencies] pyairtable = "^2.1.0.post1" diff --git a/sources/mongodb/helpers.py b/sources/mongodb/helpers.py index 0c0182ed1..a9f2b5eba 100644 --- a/sources/mongodb/helpers.py +++ b/sources/mongodb/helpers.py @@ -18,7 +18,7 @@ from pymongo import ASCENDING, DESCENDING, MongoClient from pymongo.collection import Collection from pymongo.cursor import Cursor -from pymongo.helpers import _fields_list_to_dict +from pymongo.helpers_shared import _fields_list_to_dict if TYPE_CHECKING: @@ -312,11 +312,7 @@ def load_documents( Iterator[Any]: An iterator of the loaded documents. """ from pymongoarrow.context import PyMongoArrowContext # type: ignore - from pymongoarrow.lib import process_bson_stream # type: ignore - - context = PyMongoArrowContext.from_schema( - None, codec_options=self.collection.codec_options - ) + from pymongoarrow.lib import process_bson_stream filter_op = self._filter_op _raise_if_intersection(filter_op, filter_) @@ -333,9 +329,11 @@ def load_documents( cursor = self._limit(cursor, limit) # type: ignore + context = PyMongoArrowContext.from_schema( + None, codec_options=self.collection.codec_options + ) for batch in cursor: process_bson_stream(batch, context) - table = context.finish() yield convert_arrow_columns(table) @@ -345,7 +343,6 @@ class CollectionArrowLoaderParallel(CollectionLoaderParallel): Mongo DB collection parallel loader, which uses Apache Arrow for data processing. """ - def _get_cursor( self, filter_: Dict[str, Any], @@ -383,10 +380,8 @@ def _run_batch(self, cursor: TCursor, batch: Dict[str, int]) -> TDataItem: context = PyMongoArrowContext.from_schema( None, codec_options=self.collection.codec_options ) - for chunk in cursor.skip(batch["skip"]).limit(batch["limit"]): process_bson_stream(chunk, context) - table = context.finish() yield convert_arrow_columns(table) From 33c80634abbc700fda1df8480d83c41869f1c8a9 Mon Sep 17 00:00:00 2001 From: zilto Date: Thu, 16 Jan 2025 17:23:22 -0500 Subject: [PATCH 3/4] added pymongoarrow_schema; linting --- sources/mongodb/__init__.py | 31 ++++-- sources/mongodb/helpers.py | 136 +++++++++++++++++++++++---- tests/mongodb/test_mongodb_source.py | 31 +++--- 3 files changed, 160 insertions(+), 38 deletions(-) diff --git a/sources/mongodb/__init__.py b/sources/mongodb/__init__.py index 351b850e7..cea713a3e 100644 --- a/sources/mongodb/__init__.py +++ b/sources/mongodb/__init__.py @@ -24,6 +24,8 @@ def mongodb( parallel: Optional[bool] = dlt.config.value, limit: Optional[int] = None, filter_: Optional[Dict[str, Any]] = None, + projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None, + pymongoarrow_schema: Optional[Any] = None ) -> Iterable[DltResource]: """ A DLT source which loads data from a mongo database using PyMongo. @@ -41,6 +43,13 @@ def mongodb( The maximum number of documents to load. The limit is applied to each requested collection separately. filter_ (Optional[Dict[str, Any]]): The filter to apply to the collection. + projection: (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields of a collection + when loading the collection. Supported inputs: + include (list) - ["year", "title"] + include (dict) - {"year": True, "title": True} + exclude (dict) - {"released": False, "runtime": False} + Note: Can't mix include and exclude statements '{"title": True, "released": False}` + pymongoarrow_schema (pymongoarrow.schema.Schema): Mapping of expected field types of a collection to convert BSON to Arrow Returns: Iterable[DltResource]: A list of DLT resources for each collection to be loaded. @@ -73,12 +82,15 @@ def mongodb( parallel=parallel, limit=limit, filter_=filter_ or {}, - projection=None, + projection=projection, + pymongoarrow_schema=pymongoarrow_schema, ) -@dlt.common.configuration.with_config( - sections=("sources", "mongodb"), spec=MongoDbCollectionResourceConfiguration +@dlt.resource( + name=lambda args: args["collection"], + standalone=True, + spec=MongoDbCollectionResourceConfiguration, ) def mongodb_collection( connection_url: str = dlt.secrets.value, @@ -91,7 +103,8 @@ def mongodb_collection( chunk_size: Optional[int] = 10000, data_item_format: Optional[TDataItemFormat] = "object", filter_: Optional[Dict[str, Any]] = None, - projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None, + projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = dlt.config.value, + pymongoarrow_schema: Optional[Any] = None ) -> Any: """ A DLT source which loads a collection from a mongo database using PyMongo. @@ -111,12 +124,13 @@ def mongodb_collection( object - Python objects (dicts, lists). arrow - Apache Arrow tables. filter_ (Optional[Dict[str, Any]]): The filter to apply to the collection. - projection: (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select columns + projection: (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields when loading the collection. Supported inputs: include (list) - ["year", "title"] - include (dict) - {"year": 1, "title": 1} - exclude (dict) - {"released": 0, "runtime": 0} - Note: Can't mix include and exclude statements '{"title": 1, "released": 0}` + include (dict) - {"year": True, "title": True} + exclude (dict) - {"released": False, "runtime": False} + Note: Can't mix include and exclude statements '{"title": True, "released": False}` + pymongoarrow_schema (pymongoarrow.schema.Schema): Mapping of expected field types to convert BSON to Arrow Returns: Iterable[DltResource]: A list of DLT resources for each collection to be loaded. @@ -145,4 +159,5 @@ def mongodb_collection( data_item_format=data_item_format, filter_=filter_ or {}, projection=projection, + pymongoarrow_schema=pymongoarrow_schema, ) diff --git a/sources/mongodb/helpers.py b/sources/mongodb/helpers.py index a9f2b5eba..817a58fdd 100644 --- a/sources/mongodb/helpers.py +++ b/sources/mongodb/helpers.py @@ -107,10 +107,10 @@ def _filter_op(self) -> Dict[str, Any]: filt[self.cursor_field]["$gt"] = self.incremental.end_value return filt - - def _projection_op(self, projection) -> Optional[Dict[str, Any]]: + + def _projection_op(self, projection:Optional[Union[Mapping[str, Any], Iterable[str]]]) -> Optional[Dict[str, Any]]: """Build a projection operator. - + A tuple of fields to include or a dict specifying fields to include or exclude. The incremental `primary_key` needs to be handle differently for inclusion and exclusion projections. @@ -123,17 +123,16 @@ def _projection_op(self, projection) -> Optional[Dict[str, Any]]: projection_dict = dict(_fields_list_to_dict(projection, "projection")) - # NOTE we can still filter on primary_key if it's excluded from projection if self.incremental: # this is an inclusion projection - if any(v == 1 for v in projection.values()): + if any(v == 1 for v in projection_dict.values()): # ensure primary_key is included - projection_dict.update({self.incremental.primary_key: 1}) + projection_dict.update(m={self.incremental.primary_key: 1}) # this is an exclusion projection else: try: # ensure primary_key isn't excluded - projection_dict.pop(self.incremental.primary_key) + projection_dict.pop(self.incremental.primary_key) # type: ignore except KeyError: pass # primary_key was properly not included in exclusion projection else: @@ -174,6 +173,7 @@ def load_documents( Args: filter_ (Dict[str, Any]): The filter to apply to the collection. limit (Optional[int]): The number of documents to load. + projection: selection of fields to create Cursor Yields: Iterator[TDataItem]: An iterator of the loaded documents. @@ -279,6 +279,7 @@ def load_documents( Args: filter_ (Dict[str, Any]): The filter to apply to the collection. limit (Optional[int]): The number of documents to load. + projection: selection of fields to create Cursor Yields: Iterator[TDataItem]: An iterator of the loaded documents. @@ -300,6 +301,7 @@ def load_documents( filter_: Dict[str, Any], limit: Optional[int] = None, projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None, + pymongoarrow_schema: Any = None, ) -> Iterator[Any]: """ Load documents from the collection in Apache Arrow format. @@ -307,12 +309,14 @@ def load_documents( Args: filter_ (Dict[str, Any]): The filter to apply to the collection. limit (Optional[int]): The number of documents to load. + projection: selection of fields to create Cursor + pymongoarrow_schema: mapping of field types to convert BSON to Arrow Yields: Iterator[Any]: An iterator of the loaded documents. """ from pymongoarrow.context import PyMongoArrowContext # type: ignore - from pymongoarrow.lib import process_bson_stream + from pymongoarrow.lib import process_bson_stream # type: ignore filter_op = self._filter_op _raise_if_intersection(filter_op, filter_) @@ -330,7 +334,8 @@ def load_documents( cursor = self._limit(cursor, limit) # type: ignore context = PyMongoArrowContext.from_schema( - None, codec_options=self.collection.codec_options + schema=pymongoarrow_schema, + codec_options=self.collection.codec_options ) for batch in cursor: process_bson_stream(batch, context) @@ -343,6 +348,58 @@ class CollectionArrowLoaderParallel(CollectionLoaderParallel): Mongo DB collection parallel loader, which uses Apache Arrow for data processing. """ + def load_documents( + self, + filter_: Dict[str, Any], + limit: Optional[int] = None, + projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None, + pymongoarrow_schema: Any = None, + ) -> Iterator[TDataItem]: + """Load documents from the collection in parallel. + + Args: + filter_ (Dict[str, Any]): The filter to apply to the collection. + limit (Optional[int]): The number of documents to load. + projection: selection of fields to create Cursor + pymongoarrow_schema: mapping of field types to convert BSON to Arrow + + Yields: + Iterator[TDataItem]: An iterator of the loaded documents. + """ + yield from self._get_all_batches( + limit=limit, + filter_=filter_, + projection=projection, + pymongoarrow_schema=pymongoarrow_schema + ) + + def _get_all_batches( + self, + filter_: Dict[str, Any], + limit: Optional[int] = None, + projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None, + pymongoarrow_schema: Any = None, + ) -> Iterator[TDataItem]: + """Load all documents from the collection in parallel batches. + + Args: + filter_ (Dict[str, Any]): The filter to apply to the collection. + limit (Optional[int]): The maximum number of documents to load. + projection: selection of fields to create Cursor + pymongoarrow_schema: mapping of field types to convert BSON to Arrow + + Yields: + Iterator[TDataItem]: An iterator of the loaded documents. + """ + batches = self._create_batches(limit=limit) + cursor = self._get_cursor(filter_=filter_, projection=projection) + for batch in batches: + yield self._run_batch( + cursor=cursor, + batch=batch, + pymongoarrow_schema=pymongoarrow_schema, + ) + def _get_cursor( self, filter_: Dict[str, Any], @@ -352,6 +409,7 @@ def _get_cursor( Args: filter_ (Dict[str, Any]): The filter to apply to the collection. + projection: selection of fields to create Cursor Returns: Cursor: The cursor for the collection. @@ -371,14 +429,20 @@ def _get_cursor( return cursor @dlt.defer - def _run_batch(self, cursor: TCursor, batch: Dict[str, int]) -> TDataItem: + def _run_batch( + self, + cursor: TCursor, + batch: Dict[str, int], + pymongoarrow_schema: Any = None, + ) -> TDataItem: from pymongoarrow.context import PyMongoArrowContext from pymongoarrow.lib import process_bson_stream cursor = cursor.clone() context = PyMongoArrowContext.from_schema( - None, codec_options=self.collection.codec_options + schema=pymongoarrow_schema, + codec_options=self.collection.codec_options ) for chunk in cursor.skip(batch["skip"]).limit(batch["limit"]): process_bson_stream(chunk, context) @@ -390,7 +454,8 @@ def collection_documents( client: TMongoClient, collection: TCollection, filter_: Dict[str, Any], - projection: Union[Dict[str, Any], List[str]], # TODO kwargs reserved for dlt? + projection: Union[Dict[str, Any], List[str]], + pymongoarrow_schema: "pymongoarrow.schema.Schema", incremental: Optional[dlt.sources.incremental[Any]] = None, parallel: bool = False, limit: Optional[int] = None, @@ -413,12 +478,13 @@ def collection_documents( Supported formats: object - Python objects (dicts, lists). arrow - Apache Arrow tables. - projection: (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select columns + projection: (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields when loading the collection. Supported inputs: include (list) - ["year", "title"] - include (dict) - {"year": 1, "title": 1} - exclude (dict) - {"released": 0, "runtime": 0} - Note: Can't mix include and exclude statements '{"title": 1, "released": 0}` + include (dict) - {"year": True, "title": True} + exclude (dict) - {"released": False, "runtime": False} + Note: Can't mix include and exclude statements '{"title": True, "released": False}` + pymongoarrow_schema (pymongoarrow.schema.Schema): Mapping of expected field types of a collection to convert BSON to Arrow Returns: Iterable[DltResource]: A list of DLT resources for each collection to be loaded. @@ -429,6 +495,19 @@ def collection_documents( ) data_item_format = "object" + if data_item_format != "arrow" and pymongoarrow_schema: + dlt.common.logger.warn( + "Received value for `pymongoarrow_schema`, but `data_item_format=='object'` " + "Use `data_item_format=='arrow'` to enforce schema." + ) + + if data_item_format == "arrow" and pymongoarrow_schema and projection: + dlt.common.logger.warn( + "Received values for both `pymongoarrow_schema` and `projection`. Since both " + "create a projection to select fields, `projection` will be ignored." + ) + + if parallel: if data_item_format == "arrow": LoaderClass = CollectionArrowLoaderParallel @@ -443,11 +522,24 @@ def collection_documents( loader = LoaderClass( client, collection, incremental=incremental, chunk_size=chunk_size ) - for data in loader.load_documents(limit=limit, filter_=filter_, projection=projection): - yield data + if isinstance(loader, (CollectionArrowLoader, CollectionArrowLoaderParallel)): + yield from loader.load_documents( + limit=limit, + filter_=filter_, + projection=projection, + pymongoarrow_schema=pymongoarrow_schema, + ) + else: + yield from loader.load_documents(limit=limit, filter_=filter_, projection=projection) def convert_mongo_objs(value: Any) -> Any: + """MongoDB to dlt type conversion when using Python loaders. + + Notes: + The method `ObjectId.__str__()` creates an hexstring using `binascii.hexlify(__id).decode()` + + """ if isinstance(value, (ObjectId, Decimal128)): return str(value) if isinstance(value, _datetime.datetime): @@ -464,6 +556,13 @@ def convert_mongo_objs(value: Any) -> Any: def convert_arrow_columns(table: Any) -> Any: """Convert the given table columns to Python types. + Notes: + Calling str() matches the `convert_mongo_obs()` used in non-arrow code. + Pymongoarrow converts ObjectId to `fixed_size_binary[12]`, which can't be + converted to a string as a vectorized operation because it contains ASCII characters. + + Instead, you need to loop over values using: `value.as_buffer().hex().decode()` + Args: table (pyarrow.lib.Table): The table to convert. @@ -539,6 +638,7 @@ class MongoDbCollectionResourceConfiguration(BaseConfiguration): incremental: Optional[dlt.sources.incremental] = None # type: ignore[type-arg] write_disposition: Optional[str] = dlt.config.value parallel: Optional[bool] = False + projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = dlt.config.value __source_name__ = "mongodb" diff --git a/tests/mongodb/test_mongodb_source.py b/tests/mongodb/test_mongodb_source.py index 88f4ba1c5..ec75a6dea 100644 --- a/tests/mongodb/test_mongodb_source.py +++ b/tests/mongodb/test_mongodb_source.py @@ -422,9 +422,7 @@ def test_projection_list_inclusion(destination_name): expected_columns = projection + ["_id", "_dlt_id", "_dlt_load_id"] movies = mongodb_collection( - collection=collection_name, - projection=projection, - limit=2 + collection=collection_name, projection=projection, limit=2 ) pipeline.run(movies) loaded_columns = pipeline.default_schema.get_table_columns(collection_name).keys() @@ -445,9 +443,7 @@ def test_projection_dict_inclusion(destination_name): expected_columns = list(projection.keys()) + ["_id", "_dlt_id", "_dlt_load_id"] movies = mongodb_collection( - collection=collection_name, - projection=projection, - limit=2 + collection=collection_name, projection=projection, limit=2 ) pipeline.run(movies) loaded_columns = pipeline.default_schema.get_table_columns(collection_name).keys() @@ -465,17 +461,28 @@ def test_projection_dict_exclusion(destination_name): ) collection_name = "movies" columns_to_exclude = [ - "runtime", "released", "year", "plot", "fullplot", "lastupdated", "type", - "directors", "imdb", "cast", "countries", "genres", "tomatoes", "num_mflix_comments", - "rated", "awards" + "runtime", + "released", + "year", + "plot", + "fullplot", + "lastupdated", + "type", + "directors", + "imdb", + "cast", + "countries", + "genres", + "tomatoes", + "num_mflix_comments", + "rated", + "awards", ] projection = {col: 0 for col in columns_to_exclude} expected_columns = ["title", "poster", "_id", "_dlt_id", "_dlt_load_id"] movies = mongodb_collection( - collection=collection_name, - projection=projection, - limit=2 + collection=collection_name, projection=projection, limit=2 ) pipeline.run(movies) loaded_columns = pipeline.default_schema.get_table_columns(collection_name).keys() From 393d2752129aa26666570f25d3a220ae5e86aaf8 Mon Sep 17 00:00:00 2001 From: zilto Date: Fri, 28 Feb 2025 15:17:16 -0500 Subject: [PATCH 4/4] fix linting and formatting --- sources/mongodb/__init__.py | 4 ++-- sources/mongodb/helpers.py | 40 +++++++++++++++++++++++++++---------- 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/sources/mongodb/__init__.py b/sources/mongodb/__init__.py index cea713a3e..f659f0c21 100644 --- a/sources/mongodb/__init__.py +++ b/sources/mongodb/__init__.py @@ -25,7 +25,7 @@ def mongodb( limit: Optional[int] = None, filter_: Optional[Dict[str, Any]] = None, projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None, - pymongoarrow_schema: Optional[Any] = None + pymongoarrow_schema: Optional[Any] = None, ) -> Iterable[DltResource]: """ A DLT source which loads data from a mongo database using PyMongo. @@ -104,7 +104,7 @@ def mongodb_collection( data_item_format: Optional[TDataItemFormat] = "object", filter_: Optional[Dict[str, Any]] = None, projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = dlt.config.value, - pymongoarrow_schema: Optional[Any] = None + pymongoarrow_schema: Optional[Any] = None, ) -> Any: """ A DLT source which loads a collection from a mongo database using PyMongo. diff --git a/sources/mongodb/helpers.py b/sources/mongodb/helpers.py index 817a58fdd..5584750bb 100644 --- a/sources/mongodb/helpers.py +++ b/sources/mongodb/helpers.py @@ -1,7 +1,18 @@ """Mongo database source helpers""" from itertools import islice -from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Tuple, Union, Iterable, Mapping +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Iterator, + List, + Optional, + Tuple, + Union, + Iterable, + Mapping, +) import dlt from bson.decimal128 import Decimal128 @@ -23,7 +34,7 @@ if TYPE_CHECKING: TMongoClient = MongoClient[Any] - TCollection = Collection[Any] # type: ignore + TCollection = Collection[Any] TCursor = Cursor[Any] else: TMongoClient = Any @@ -108,7 +119,9 @@ def _filter_op(self) -> Dict[str, Any]: return filt - def _projection_op(self, projection:Optional[Union[Mapping[str, Any], Iterable[str]]]) -> Optional[Dict[str, Any]]: + def _projection_op( + self, projection: Optional[Union[Mapping[str, Any], Iterable[str]]] + ) -> Optional[Dict[str, Any]]: """Build a projection operator. A tuple of fields to include or a dict specifying fields to include or exclude. @@ -334,8 +347,7 @@ def load_documents( cursor = self._limit(cursor, limit) # type: ignore context = PyMongoArrowContext.from_schema( - schema=pymongoarrow_schema, - codec_options=self.collection.codec_options + schema=pymongoarrow_schema, codec_options=self.collection.codec_options ) for batch in cursor: process_bson_stream(batch, context) @@ -348,6 +360,7 @@ class CollectionArrowLoaderParallel(CollectionLoaderParallel): Mongo DB collection parallel loader, which uses Apache Arrow for data processing. """ + def load_documents( self, filter_: Dict[str, Any], @@ -370,7 +383,7 @@ def load_documents( limit=limit, filter_=filter_, projection=projection, - pymongoarrow_schema=pymongoarrow_schema + pymongoarrow_schema=pymongoarrow_schema, ) def _get_all_batches( @@ -441,8 +454,7 @@ def _run_batch( cursor = cursor.clone() context = PyMongoArrowContext.from_schema( - schema=pymongoarrow_schema, - codec_options=self.collection.codec_options + schema=pymongoarrow_schema, codec_options=self.collection.codec_options ) for chunk in cursor.skip(batch["skip"]).limit(batch["limit"]): process_bson_stream(chunk, context) @@ -507,7 +519,6 @@ def collection_documents( "create a projection to select fields, `projection` will be ignored." ) - if parallel: if data_item_format == "arrow": LoaderClass = CollectionArrowLoaderParallel @@ -530,7 +541,9 @@ def collection_documents( pymongoarrow_schema=pymongoarrow_schema, ) else: - yield from loader.load_documents(limit=limit, filter_=filter_, projection=projection) + yield from loader.load_documents( + limit=limit, filter_=filter_, projection=projection + ) def convert_mongo_objs(value: Any) -> Any: @@ -561,7 +574,12 @@ def convert_arrow_columns(table: Any) -> Any: Pymongoarrow converts ObjectId to `fixed_size_binary[12]`, which can't be converted to a string as a vectorized operation because it contains ASCII characters. - Instead, you need to loop over values using: `value.as_buffer().hex().decode()` + Instead, you need to loop over values using: + ```python + pyarrow.array([v.as_buffer().hex() for v in object_id_array], type=pyarrow.string()) + # pymongoarrow simplifies this by allowing this syntax + [str(v) for v in object_id_array] + ``` Args: table (pyarrow.lib.Table): The table to convert.