diff --git a/CHANGELOG.md b/CHANGELOG.md index 474ec27f..f315c6b5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,31 @@ and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - --> +------ +## [v7.1.1](https://github.com/asfadmin/Discovery-asf_search/compare/v7.1.0...v7.1.1) +### Changed +- Uses `ciso8601.parse_datetime()` in baseline calculations, speeds up calculations on larger stacks +### Added +- Adds `ASF_LOGGER` logging in `search_generator()` and related methods +### Fixed +- `ASFProduct.get_sort_keys()` will no longer returns `None` if missing sort key, defaults to empty string + +------ +## [v7.1.0](https://github.com/asfadmin/Discovery-asf_search/compare/v7.0.9...v7.1.0) +### Added +- Improved logging in `ASFSession` authentication methods +### Changed +- Uses `ciso8601` module for parsing dates from CMR response, significant performance improvement post-query +- `ASFSession` now allows for authorized user access to hidden/restricted CMR datasets via `auth_with_creds()` or `auth_with_cookiejar()` authentication methods (previously only supported via `auth_with_token()` method) +- `ASFSession.auth_with_token()` now authenticates directly against EDL endpoint +- UMM Platform ShortName used as final fallback criteria for product subclass assignment + +------ +## [v7.0.9](https://github.com/asfadmin/Discovery-asf_search/compare/v7.0.8...v7.0.9) +### Changed +- collection "ARIA_S1_GUNW" added to `ARIA_S1_GUNW` dataset, V3 products now loaded as `ARIAS1GUNWProduct` subclass +- `ARIAS1GUNWProduct` now exposes `ariaVersion` and (for V3 products) `inputGranules` in `ARIAS1GUNWProduct.properties` + ------ ## [v7.0.8](https://github.com/asfadmin/Discovery-asf_search/compare/v7.0.7...v7.0.8) ### Added diff --git a/asf_search/ASFProduct.py b/asf_search/ASFProduct.py index 09005fb1..662fe4d3 100644 --- a/asf_search/ASFProduct.py +++ b/asf_search/ASFProduct.py @@ -306,12 +306,32 @@ def get_property_paths() -> Dict: """ return ASFProduct._base_properties - def get_sort_keys(self) -> Tuple: + def get_sort_keys(self) -> Tuple[str, str]: """ Returns tuple of primary and secondary date values used for sorting final search results + Any subclasses must return string for final `sort()` to work """ - return (self.properties.get('stopTime'), self.properties.get('fileID', 'sceneName')) - + # `sort()` will raise an error when comparing `NoneType`, + # using self._read_property() to wrap standard `dict.get()` for possible `None` values + primary_key = self._read_property(key='stopTime', default='') + secondary_key = self._read_property( + key='fileID', + default=self._read_property('sceneName', '') + ) + + return (primary_key, secondary_key) + + def _read_property(self, key: str, default: Any = None) -> Any: + """ + Helper method wraps `properties.get()`. + Since a property can be `None`, if the key exists `dict.get('key', 'default')` will never return the default + """ + output = default + if (value:=self.properties.get(key)) is not None: + output = value + + return output + @final @staticmethod def umm_get(item: Dict, *args): diff --git a/asf_search/ASFSearchOptions/ASFSearchOptions.py b/asf_search/ASFSearchOptions/ASFSearchOptions.py index 8b1103e7..a2d3d9d3 100644 --- a/asf_search/ASFSearchOptions/ASFSearchOptions.py +++ b/asf_search/ASFSearchOptions/ASFSearchOptions.py @@ -69,7 +69,7 @@ def __str__(self): """ What to display if `print(opts)` is called. """ - return json.dumps(dict(self), indent=4) + return json.dumps(dict(self), indent=4, default=str) # Default is set to '...', since 'None' is a very valid value here def pop(self, key, default=...): diff --git a/asf_search/ASFSession.py b/asf_search/ASFSession.py index cf1a65ae..33762739 100644 --- a/asf_search/ASFSession.py +++ b/asf_search/ASFSession.py @@ -3,8 +3,10 @@ import requests from requests.utils import get_netrc_auth import http.cookiejar -from asf_search import __name__ as asf_name, __version__ as asf_version + +from asf_search import ASF_LOGGER, __name__ as asf_name, __version__ as asf_version from asf_search.exceptions import ASFAuthenticationError +from warnings import warn class ASFSession(requests.Session): def __init__(self, @@ -28,7 +30,7 @@ def __init__(self, `edl_host`: the Earthdata login endpoint used by auth_with_creds(). Defaults to `asf_search.constants.INTERNAL.EDL_HOST` `edl_client_id`: The Earthdata Login client ID for this package. Defaults to `asf_search.constants.INTERNAL.EDL_CLIENT_ID` `asf_auth_host`: the ASF auth endpoint . Defaults to `asf_search.constants.INTERNAL.ASF_AUTH_HOST` - `cmr_host`: the base CMR endpoint to test EDL login tokens against. Defaults to `asf_search.constants.INTERNAL.CMR_HOST` + `cmr_host (DEPRECATED V7.0.9)`: the base CMR endpoint to test EDL login tokens against. Defaults to `asf_search.constants.INTERNAL.CMR_HOST` `cmr_collections`: the CMR endpoint path login tokens will be tested against. Defaults to `asf_search.constants.INTERNAL.CMR_COLLECTIONS` `auth_domains`: the list of authorized endpoints that are allowed to pass auth credentials. Defaults to `asf_search.constants.INTERNAL.AUTH_DOMAINS`. Authorization headers WILL NOT be stripped from the session object when redirected through these domains. `auth_cookie_names`: the list of cookie names to use when verifying with `auth_with_creds()` & `auth_with_cookiejar()` @@ -49,11 +51,18 @@ def __init__(self, self.edl_host = INTERNAL.EDL_HOST if edl_host is None else edl_host self.edl_client_id = INTERNAL.EDL_CLIENT_ID if edl_client_id is None else edl_client_id self.asf_auth_host = INTERNAL.ASF_AUTH_HOST if asf_auth_host is None else asf_auth_host - self.cmr_host = INTERNAL.CMR_HOST if cmr_host is None else cmr_host self.cmr_collections = INTERNAL.CMR_COLLECTIONS if cmr_collections is None else cmr_collections self.auth_domains = INTERNAL.AUTH_DOMAINS if auth_domains is None else auth_domains self.auth_cookie_names = INTERNAL.AUTH_COOKIES if auth_cookie_names is None else auth_cookie_names + self.cmr_host = INTERNAL.CMR_HOST + + if cmr_host is not None: + warn(f'Use of `cmr_host` keyword with `ASFSession` is deprecated for asf-search versions >= 7.0.9, and will be removed with the next major version. \ + \nTo authenticate an EDL token for a non-prod deployment of CMR, set the `edl_host` keyword instead. \ + \n(ex: session arugments for authenticating against uat: `ASFSession(edl_host="uat.urs.earthdata.nasa.gov")`)', category=DeprecationWarning, stacklevel=2) + self.cmr_host = cmr_host + def __eq__(self, other): return self.auth == other.auth \ and self.headers == other.headers \ @@ -72,11 +81,25 @@ def auth_with_creds(self, username: str, password: str): login_url = f'https://{self.edl_host}/oauth/authorize?client_id={self.edl_client_id}&response_type=code&redirect_uri=https://{self.asf_auth_host}/login' self.auth = (username, password) + + ASF_LOGGER.info(f'Attempting to login via "{login_url}"') self.get(login_url) if not self._check_auth_cookies(self.cookies.get_dict()): raise ASFAuthenticationError("Username or password is incorrect") + ASF_LOGGER.info(f'Login successful') + + token = self.cookies.get_dict().get('urs-access-token') + + if token is None: + ASF_LOGGER.warning(f'Provided asf_auth_host "{self.asf_auth_host}" returned no EDL token during ASFSession validation. EDL Token expected in "urs-access-token" cookie, required for hidden/restricted dataset access. The current session will use basic authorization.') + else: + ASF_LOGGER.info(f'Found "urs-access-token" cookie in response from auth host, using token for downloads and cmr queries.') + self.auth = None + self._update_edl_token(token=token) + + return self def auth_with_token(self, token: str): @@ -87,17 +110,42 @@ def auth_with_token(self, token: str): :return ASFSession: returns self for convenience """ - self.headers.update({'Authorization': 'Bearer {0}'.format(token)}) - - url = f"https://{self.cmr_host}{self.cmr_collections}" - response = self.get(url) + oauth_authorization = f"https://{self.edl_host}/oauth/tokens/user?client_id={self.edl_client_id}" + + ASF_LOGGER.info(f"Authenticating EDL token against {oauth_authorization}") + response = self.post(url=oauth_authorization, data={ + 'token': token + }) if not 200 <= response.status_code <= 299: - raise ASFAuthenticationError("Invalid/Expired token passed") + if not self._try_legacy_token_auth(token=token): + raise ASFAuthenticationError("Invalid/Expired token passed") + + ASF_LOGGER.info(f"EDL token authentication successful") + self._update_edl_token(token=token) return self - def auth_with_cookiejar(self, cookies: http.cookiejar.CookieJar): + def _try_legacy_token_auth(self, token: str) -> False: + """ + Checks `cmr_host` search endpoint directly with provided token using method used in previous versions of asf-search (<7.0.9). + This is to prevent breaking changes until next major release + """ + from asf_search.constants import INTERNAL + + if self.cmr_host != INTERNAL.CMR_HOST: + self.headers.update({'Authorization': 'Bearer {0}'.format(token)}) + legacy_auth_url = f"https://{self.cmr_host}{self.cmr_collections}" + response = self.get(legacy_auth_url) + self.headers.pop('Authorization') + return 200 <= response.status_code <= 299 + + return False + + def _update_edl_token(self, token: str): + self.headers.update({'Authorization': 'Bearer {0}'.format(token)}) + + def auth_with_cookiejar(self, cookies: Union[http.cookiejar.CookieJar, requests.cookies.RequestsCookieJar]): """ Authenticates the session using a pre-existing cookiejar @@ -105,7 +153,6 @@ def auth_with_cookiejar(self, cookies: http.cookiejar.CookieJar): :return ASFSession: returns self for convenience """ - if not self._check_auth_cookies(cookies): raise ASFAuthenticationError("Cookiejar does not contain login cookies") @@ -113,11 +160,24 @@ def auth_with_cookiejar(self, cookies: http.cookiejar.CookieJar): if cookie.is_expired(): raise ASFAuthenticationError("Cookiejar contains expired cookies") + token = cookies.get_dict().get('urs-access-token') + if token is None: + ASF_LOGGER.warning(f'Failed to find EDL Token in cookiejar. EDL Token expected in "urs-access-token" cookie, required for hidden/restricted dataset access.') + else: + ASF_LOGGER.info(f'Authenticating EDL token found in "urs-access-token" cookie') + try: + self.auth_with_token(token) + except ASFAuthenticationError: + ASF_LOGGER.warning(f'Failed to authenticate with found EDL token found. Access to hidden/restricted cmr data may be limited.') + self.cookies = cookies return self - def _check_auth_cookies(self, cookies: Union[http.cookiejar.CookieJar, Dict]) -> bool: + def _check_auth_cookies(self, cookies: Union[http.cookiejar.CookieJar, requests.cookies.RequestsCookieJar]) -> bool: + if isinstance(cookies, requests.cookies.RequestsCookieJar): + cookies = dict(cookies) + return any(cookie in self.auth_cookie_names for cookie in cookies) def rebuild_auth(self, prepared_request: requests.Request, response: requests.Response): diff --git a/asf_search/CMR/datasets.py b/asf_search/CMR/datasets.py index 288f9cd5..7d49e089 100644 --- a/asf_search/CMR/datasets.py +++ b/asf_search/CMR/datasets.py @@ -433,6 +433,7 @@ "C1595765183-ASF", "C1225776659-ASF", ], + "ARIA_S1_GUNW": ["C2859376221-ASF", "C1261881077-ASF"] }, "SMAP": { "SPL1A_RO_METADATA_003": ["C1243122884-ASF", "C1233103964-ASF"], @@ -539,6 +540,8 @@ "C1214470533-ASF", "C1214470576-ASF", "C1595422627-ASF", + "C2859376221-ASF", + "C1261881077-ASF", "C1214470496-ASF", "C1214470532-ASF", "C1214472977-ASF", diff --git a/asf_search/CMR/translate.py b/asf_search/CMR/translate.py index 1c57de58..d564d9c7 100644 --- a/asf_search/CMR/translate.py +++ b/asf_search/CMR/translate.py @@ -9,7 +9,7 @@ from shapely.geometry.base import BaseGeometry from .field_map import field_map from .datasets import collections_per_platform -import dateparser +import ciso8601 import logging @@ -157,8 +157,11 @@ def try_parse_date(value: str) -> Optional[str]: if value is None: return None - date = dateparser.parse(value) - + try: + date = ciso8601.parse_datetime(value) + except ValueError: + return None + if date is None: return value diff --git a/asf_search/Products/ARIAS1GUNWProduct.py b/asf_search/Products/ARIAS1GUNWProduct.py index 2d88419a..34be38ce 100644 --- a/asf_search/Products/ARIAS1GUNWProduct.py +++ b/asf_search/Products/ARIAS1GUNWProduct.py @@ -1,5 +1,6 @@ from typing import Dict from asf_search import ASFSession +from asf_search.ASFProduct import ASFProduct from asf_search.ASFSearchOptions import ASFSearchOptions from asf_search.Products import S1Product from asf_search.CMR.translate import try_parse_float @@ -13,7 +14,9 @@ class ARIAS1GUNWProduct(S1Product): """ _base_properties = { 'perpendicularBaseline': {'path': ['AdditionalAttributes', ('Name', 'PERPENDICULAR_BASELINE'), 'Values', 0], 'cast': try_parse_float}, - 'orbit': {'path': ['OrbitCalculatedSpatialDomains']} + 'orbit': {'path': ['OrbitCalculatedSpatialDomains']}, + 'inputGranules': {'path': ['InputGranules']}, + 'ariaVersion': {'path': ['AdditionalAttributes', ('Name', 'VERSION'), 'Values', 0]} } def __init__(self, args: Dict = {}, session: ASFSession = ASFSession()): @@ -21,10 +24,12 @@ def __init__(self, args: Dict = {}, session: ASFSession = ASFSession()): self.properties['orbit'] = [orbit['OrbitNumber'] for orbit in self.properties['orbit']] urls = self.umm_get(self.umm, 'RelatedUrls', ('Type', [('USE SERVICE API', 'URL')]), 0) + + self.properties['additionalUrls'] = [] if urls is not None: self.properties['url'] = urls[0] self.properties['fileName'] = self.properties['fileID'] + '.' + urls[0].split('.')[-1] - self.properties['additionalUrls'] = [urls[1]] + self.properties['additionalUrls'] = urls[1:] @staticmethod def get_property_paths() -> Dict: @@ -50,4 +55,13 @@ def get_default_baseline_product_type() -> None: """ Returns the product type to search for when building a baseline stack. """ - return None \ No newline at end of file + return None + + @staticmethod + def is_ARIAS1GUNWProduct(item: Dict) -> bool: + platform = ASFProduct.umm_get(item['umm'], 'Platforms', 0, 'ShortName') + if platform in ['SENTINEL-1A', 'SENTINEL-1B']: + asf_platform = ASFProduct.umm_get(item['umm'], 'AdditionalAttributes', ('Name', 'ASF_PLATFORM'), 'Values', 0) + return 'Sentinel-1 Interferogram' in asf_platform + + return False diff --git a/asf_search/Products/NISARProduct.py b/asf_search/Products/NISARProduct.py index 279e014a..819e1eb8 100644 --- a/asf_search/Products/NISARProduct.py +++ b/asf_search/Products/NISARProduct.py @@ -1,4 +1,4 @@ -from typing import Dict, Union +from typing import Dict, Tuple, Union from asf_search import ASFSearchOptions, ASFSession, ASFStackableProduct from asf_search.CMR.translate import try_parse_float, try_parse_int, try_round_float from asf_search.constants import PRODUCT_TYPE @@ -48,10 +48,10 @@ def get_property_paths() -> Dict: **NISARProduct._base_properties } - def get_sort_keys(self): + def get_sort_keys(self) -> Tuple[str, str]: keys = super().get_sort_keys() - - if keys[0] is None: - return (self.properties.get('processingDate', ''), keys[1]) + + if keys[0] == '': + return (self._read_property('processingDate', ''), keys[1]) return keys diff --git a/asf_search/Products/OPERAS1Product.py b/asf_search/Products/OPERAS1Product.py index d205b840..86af7bbe 100644 --- a/asf_search/Products/OPERAS1Product.py +++ b/asf_search/Products/OPERAS1Product.py @@ -1,4 +1,4 @@ -from typing import Dict +from typing import Dict, Tuple from asf_search import ASFSearchOptions, ASFSession from asf_search.CMR.translate import try_parse_date from asf_search.Products import S1Product @@ -71,10 +71,10 @@ def get_stack_opts(self, opts: ASFSearchOptions = None) -> ASFSearchOptions: """ return None - def get_sort_keys(self): + def get_sort_keys(self) -> Tuple[str, str]: keys = super().get_sort_keys() - if keys[0] is None: - keys = self.properties.get('validityStartDate'), keys[1] + if keys[0] == '': + return (self._read_property('validityStartDate', ''), keys[1]) return keys diff --git a/asf_search/WKT/RepairEntry.py b/asf_search/WKT/RepairEntry.py index f8d413b4..b0f5d6f2 100644 --- a/asf_search/WKT/RepairEntry.py +++ b/asf_search/WKT/RepairEntry.py @@ -4,4 +4,4 @@ def __init__(self, report_type: str, report: str) -> None: self.report = report def __str__(self) -> str: - return f'{self.report_type}\n\t{self.report}' + return f"{self.report_type}: {self.report}" diff --git a/asf_search/WKT/validate_wkt.py b/asf_search/WKT/validate_wkt.py index 6a8f7681..a22911be 100644 --- a/asf_search/WKT/validate_wkt.py +++ b/asf_search/WKT/validate_wkt.py @@ -11,7 +11,7 @@ from asf_search.exceptions import ASFWKTError -def validate_wkt(aoi: Union[str, BaseGeometry]) -> Tuple[BaseGeometry, List[RepairEntry]]: +def validate_wkt(aoi: Union[str, BaseGeometry]) -> Tuple[BaseGeometry, BaseGeometry, List[RepairEntry]]: """ Param aoi: the WKT string or Shapely Geometry to validate and prepare for the CMR query Validates the given area of interest, and returns a validated and simplified WKT string @@ -52,7 +52,7 @@ def _search_wkt_prep(shape: BaseGeometry): if isinstance(shape, Polygon): return orient(Polygon(shape.exterior), sign=1.0) -def _simplify_geometry(geometry: BaseGeometry) -> Tuple[BaseGeometry, List[RepairEntry]]: +def _simplify_geometry(geometry: BaseGeometry) -> Tuple[BaseGeometry, BaseGeometry, List[RepairEntry]]: """ param geometry: AOI Shapely Geometry to be prepped for CMR prepares geometry for CMR by: @@ -165,7 +165,7 @@ def _counter_clockwise_reorientation(geometry: Union[Point, LineString, Polygon] return reoriented, None -def _get_clamped_and_wrapped_geometry(shape: BaseGeometry) -> Tuple[BaseGeometry, List[RepairEntry]]: +def _get_clamped_and_wrapped_geometry(shape: BaseGeometry) -> Tuple[BaseGeometry, BaseGeometry, List[RepairEntry]]: """ param geometry: Shapely geometry to clamp Clamps geometry to +/-90 latitude and wraps longitude +/-180 diff --git a/asf_search/baseline/calc.py b/asf_search/baseline/calc.py index efe76ae4..faa6442e 100644 --- a/asf_search/baseline/calc.py +++ b/asf_search/baseline/calc.py @@ -2,7 +2,7 @@ from typing import List import numpy as np -from dateutil.parser import parse +from ciso8601 import parse_datetime from asf_search import ASFProduct # WGS84 constants @@ -23,17 +23,17 @@ def calculate_perpendicular_baselines(reference: str, stack: List[ASFProduct]): baselineProperties['noStateVectors'] = True continue - asc_node_time = parse(baselineProperties['ascendingNodeTime']).timestamp() + asc_node_time = parse_datetime(baselineProperties['ascendingNodeTime']).timestamp() - start = parse(product.properties['startTime']).timestamp() - end = parse(product.properties['stopTime']).timestamp() + start = parse_datetime(product.properties['startTime']).timestamp() + end = parse_datetime(product.properties['stopTime']).timestamp() center = start + ((end - start) / 2) baselineProperties['relative_start_time'] = start - asc_node_time baselineProperties['relative_center_time'] = center - asc_node_time baselineProperties['relative_end_time'] = end - asc_node_time - t_pre = parse(positionProperties['prePositionTime']).timestamp() - t_post = parse(positionProperties['postPositionTime']).timestamp() + t_pre = parse_datetime(positionProperties['prePositionTime']).timestamp() + t_post = parse_datetime(positionProperties['postPositionTime']).timestamp() product.baseline['relative_sv_pre_time'] = t_pre - asc_node_time product.baseline['relative_sv_post_time'] = t_post - asc_node_time diff --git a/asf_search/baseline/stack.py b/asf_search/baseline/stack.py index 69b66f85..c443adae 100644 --- a/asf_search/baseline/stack.py +++ b/asf_search/baseline/stack.py @@ -1,5 +1,5 @@ from typing import Tuple, List -from dateutil.parser import parse +from ciso8601 import parse_datetime import pytz from .calc import calculate_perpendicular_baselines @@ -66,12 +66,12 @@ def calculate_temporal_baselines(reference: ASFProduct, stack: ASFSearchResults) :param stack: The stack to operate on. :return: None, as the operation occurs in-place on the stack provided. """ - reference_time = parse(reference.properties['startTime']) + reference_time = parse_datetime(reference.properties['startTime']) if reference_time.tzinfo is None: reference_time = pytz.utc.localize(reference_time) for secondary in stack: - secondary_time = parse(secondary.properties['startTime']) + secondary_time = parse_datetime(secondary.properties['startTime']) if secondary_time.tzinfo is None: secondary_time = pytz.utc.localize(secondary_time) secondary.properties['temporalBaseline'] = (secondary_time.date() - reference_time.date()).days diff --git a/asf_search/search/search.py b/asf_search/search/search.py index 95bcc146..37360736 100644 --- a/asf_search/search/search.py +++ b/asf_search/search/search.py @@ -2,7 +2,7 @@ from copy import copy import datetime -from asf_search import ASFSearchResults +from asf_search import ASF_LOGGER, ASFSearchResults from asf_search.ASFSearchOptions import ASFSearchOptions from asf_search.search.search_generator import search_generator @@ -99,6 +99,9 @@ def search( results.searchComplete = page.searchComplete results.searchOptions = page.searchOptions - results.sort(key=lambda p: p.get_sort_keys(), reverse=True) + try: + results.sort(key=lambda p: p.get_sort_keys(), reverse=True) + except TypeError as exc: + ASF_LOGGER.warning(f"Failed to sort final results, leaving results unsorted. Reason: {exc}") return results diff --git a/asf_search/search/search_generator.py b/asf_search/search/search_generator.py index d99cc653..f2a9b772 100644 --- a/asf_search/search/search_generator.py +++ b/asf_search/search/search_generator.py @@ -1,4 +1,3 @@ -import logging from typing import Dict, Generator, Union, Sequence, Tuple, List from copy import copy from requests.exceptions import HTTPError @@ -6,9 +5,8 @@ from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential, wait_fixed import datetime import dateparser -import warnings -from asf_search import __version__ +from asf_search import ASF_LOGGER, __version__ from asf_search.ASFSearchResults import ASFSearchResults from asf_search.ASFSearchOptions import ASFSearchOptions @@ -22,7 +20,7 @@ from asf_search.WKT.validate_wkt import validate_wkt from asf_search.search.error_reporting import report_search_error import asf_search.Products as ASFProductType - +from shapely.geometry.base import BaseGeometry def search_generator( absoluteOrbit: Union[int, Tuple[int, int], range, Sequence[Union[int, Tuple[int, int], range]]] = None, @@ -82,27 +80,42 @@ def search_generator( (getattr(opts, 'granule_list', False) or getattr(opts, 'product_list', False)): raise ValueError("Cannot use maxResults along with product_list/granule_list.") + ASF_LOGGER.debug(f'SEARCH: preprocessing opts: {opts}') preprocess_opts(opts) + ASF_LOGGER.debug(f'SEARCH: preprocessed opts: {opts}') + + ASF_LOGGER.info(f'SEARCH: Using search opts {opts}') url = '/'.join(s.strip('/') for s in [f'https://{opts.host}', f'{INTERNAL.CMR_GRANULE_PATH}']) total = 0 queries = build_subqueries(opts) - for query in queries: + + ASF_LOGGER.info(f'SEARCH: Using cmr endpoint: "{url}"') + ASF_LOGGER.debug(f'SEARCH: Built {len(queries)} subqueries') + + for subquery_idx, query in enumerate(queries): + ASF_LOGGER.info(f'SUBQUERY {subquery_idx + 1}: Beginning subquery with opts: {query}') + + ASF_LOGGER.debug(f'TRANSLATION: Translating subquery:\n{query}') translated_opts = translate_opts(query) + ASF_LOGGER.debug(f'TRANSLATION: Subquery translated to cmr keywords:\n{translated_opts}') cmr_search_after_header = "" subquery_count = 0 + page_number = 1 while(cmr_search_after_header is not None): try: + ASF_LOGGER.debug(f'SUBQUERY {subquery_idx + 1}: Fetching page {page_number}') items, subquery_max_results, cmr_search_after_header = query_cmr(opts.session, url, translated_opts, subquery_count) except (ASFSearchError, CMRIncompleteError) as e: message = str(e) - logging.error(message) + ASF_LOGGER.error(message) report_search_error(query, message) opts.session.headers.pop('CMR-Search-After', None) return + ASF_LOGGER.debug(f'SUBQUERY {subquery_idx + 1}: Page {page_number} fetched, returned {len(items)} items.') opts.session.headers.update({'CMR-Search-After': cmr_search_after_header}) last_page = process_page(items, maxResults, subquery_max_results, total, subquery_count, opts) subquery_count += len(last_page) @@ -112,13 +125,18 @@ def search_generator( if last_page.searchComplete: if total == maxResults: # the user has as many results as they wanted + ASF_LOGGER.info(f'SEARCH COMPLETE: MaxResults ({maxResults}) reached') opts.session.headers.pop('CMR-Search-After', None) return else: # or we've gotten all possible results for this subquery + ASF_LOGGER.info(f'SUBQUERY {subquery_idx + 1} COMPLETE: results exhausted for subquery') cmr_search_after_header = None + + page_number += 1 opts.session.headers.pop('CMR-Search-After', None) + ASF_LOGGER.info(f'SEARCH COMPLETE: results exhausted for search opts {opts}') @retry(reraise=True, retry=retry_if_exception_type(CMRIncompleteError), @@ -180,8 +198,10 @@ def preprocess_opts(opts: ASFSearchOptions): def wrap_wkt(opts: ASFSearchOptions): if opts.intersectsWith is not None: - wrapped, _, __ = validate_wkt(opts.intersectsWith) + wrapped, _, repairs = validate_wkt(opts.intersectsWith) opts.intersectsWith = wrapped.wkt + if len(repairs): + ASF_LOGGER.warning(f"WKT REPAIR/VALIDATION: The following repairs were performed on the provided AOI:\n{[str(repair) for repair in repairs]}") def set_default_dates(opts: ASFSearchOptions): @@ -192,7 +212,7 @@ def set_default_dates(opts: ASFSearchOptions): # If both are used, make sure they're in the right order: if opts.start is not None and opts.end is not None: if opts.start > opts.end: - warnings.warn(f"Start date ({opts.start}) is after end date ({opts.end}). Switching the two.") + ASF_LOGGER.warning(f"Start date ({opts.start}) is after end date ({opts.end}). Switching the two.") opts.start, opts.end = opts.end, opts.start # Can't do this sooner, since you need to compare start vs end: if opts.start is not None: @@ -253,7 +273,7 @@ def as_ASFProduct(item: Dict, session: ASFSession) -> ASFProduct: if subclass is not None: return subclass(item, session=session) - # or if the key matches one of the shortnames in any of our datasets + # if the key matches one of the shortnames in any of our datasets for dataset, collections in dataset_collections.items(): if collections.get(product_type_key) is not None: subclass = dataset_to_product_types.get(dataset) @@ -261,7 +281,21 @@ def as_ASFProduct(item: Dict, session: ASFSession) -> ASFProduct: return subclass(item, session=session) break # dataset exists, but is not in dataset_to_product_types yet - return ASFProduct(item, session=session) + # If the platform exists, try to match it + platform = _get_platform(item=item) + if ASFProductType.ARIAS1GUNWProduct.is_ARIAS1GUNWProduct(item=item): + return dataset_to_product_types.get('ARIA S1 GUNW')(item, session=session) + elif (subclass := dataset_to_product_types.get(platform)) is not None: + return subclass(item, session=session) + + output = ASFProduct(item, session=session) + + granule_concept_id = output.meta.get('concept-id', 'Missing Granule Concept ID') + fileID = output.properties.get('fileID', output.properties.get('sceneName', 'fileID and sceneName Missing')) + + ASF_LOGGER.warning(f'Failed to find corresponding ASFProduct subclass for \ + Product: "{fileID}", Granule Concept ID: "{granule_concept_id}", default to "ASFProduct"') + return output def _get_product_type_key(item: Dict) -> str: """Match the umm response to the right ASFProduct subclass by returning one of the following: @@ -272,16 +306,17 @@ def _get_product_type_key(item: Dict) -> str: collection_shortName = ASFProduct.umm_get(item['umm'], 'CollectionReference', 'ShortName') if collection_shortName is None: - platform_shortname = ASFProduct.umm_get(item['umm'], 'Platforms', 0, 'ShortName') - if platform_shortname in ['SENTINEL-1A', 'SENTINEL-1B']: - asf_platform = ASFProduct.umm_get(item['umm'], 'AdditionalAttributes', ('Name', 'ASF_PLATFORM'), 'Values', 0) - if 'Sentinel-1 Interferogram' in asf_platform: - return 'ARIA S1 GUNW' + platform = _get_platform(item=item) + if ASFProductType.ARIAS1GUNWProduct.is_ARIAS1GUNWProduct(item=item): + return 'ARIA S1 GUNW' - return platform_shortname + return platform return collection_shortName +def _get_platform(item: Dict): + return ASFProduct.umm_get(item['umm'], 'Platforms', 0, 'ShortName') + # Maps datasets from DATASET.py and collection/platform shortnames to ASFProduct subclasses dataset_to_product_types = { 'SENTINEL-1': ASFProductType.S1Product, diff --git a/setup.py b/setup.py index d88008cf..36d31fc8 100644 --- a/setup.py +++ b/setup.py @@ -4,17 +4,17 @@ requirements = [ "requests", "shapely", - "python-dateutil", "pytz", "importlib_metadata", "numpy", "dateparser", - "tenacity == 8.2.2" + "tenacity == 8.2.2", + "ciso8601" ] test_requirements = [ - "pytest < 7.2.0", - "pytest-automation", + "pytest==8.1.1", + "pytest-automation==3.0.0", "pytest-cov", "pytest-xdist", "coverage", @@ -53,7 +53,7 @@ license='BSD', license_files=('LICENSE',), classifiers=[ - "Development Status :: 2 - Pre-Alpha", + "Development Status :: 5 - Production/Stable", "License :: OSI Approved :: BSD License", "Operating System :: OS Independent", "Intended Audience :: Developers", @@ -62,6 +62,9 @@ "Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Topic :: Software Development", "Topic :: Scientific/Engineering :: Atmospheric Science", "Topic :: Scientific/Engineering :: GIS", diff --git a/tests/ASFSearchResults/test_ASFSearchResults.py b/tests/ASFSearchResults/test_ASFSearchResults.py index 68e55ec2..7f80a124 100644 --- a/tests/ASFSearchResults/test_ASFSearchResults.py +++ b/tests/ASFSearchResults/test_ASFSearchResults.py @@ -1,6 +1,5 @@ from typing import Dict, List -import dateparser import asf_search as asf from asf_search import ASFSearchResults import defusedxml.ElementTree as DefusedETree diff --git a/tests/ASFSession/test_ASFSession.py b/tests/ASFSession/test_ASFSession.py index 21382372..4c560281 100644 --- a/tests/ASFSession/test_ASFSession.py +++ b/tests/ASFSession/test_ASFSession.py @@ -16,7 +16,7 @@ def run_auth_with_creds(username: str, password: str): def run_auth_with_token(token: str): session = ASFSession() - with patch('asf_search.ASFSession.get') as mock_token_session: + with patch('asf_search.ASFSession.post') as mock_token_session: if not token.startswith('Bearer EDL'): mock_token_session.return_value.status_code = 400 session.auth_with_token(token) @@ -28,8 +28,13 @@ def run_auth_with_cookiejar(cookies: List): cookiejar = http.cookiejar.CookieJar() for cookie in cookies: cookiejar.set_cookie(create_cookie(name=cookie.pop('name'), **cookie)) + + # requests.cookies.RequestsCookieJar, which has slightly different behaviour session = ASFSession() - session.auth_with_cookiejar(cookies) + session.auth_with_cookiejar(cookiejar) + + request_cookiejar_session = ASFSession() + request_cookiejar_session.auth_with_cookiejar(session.cookies) def run_test_asf_session_rebuild_auth( original_domain: str, @@ -43,7 +48,7 @@ def run_test_asf_session_rebuild_auth( session = ASFSession() - with patch('asf_search.ASFSession.get') as mock_token_session: + with patch('asf_search.ASFSession.post') as mock_token_session: mock_token_session.return_value.status_code = 200 session.auth_with_token("bad_token") diff --git a/tests/yml_tests/test_search.yml b/tests/yml_tests/test_search.yml index f18370c1..ab896207 100644 --- a/tests/yml_tests/test_search.yml +++ b/tests/yml_tests/test_search.yml @@ -1,6 +1,8 @@ sentinel-1_collections: &sentinel-1_collections - "C2803501758-ASF" - "C2803501097-ASF" + - "C1261881077-ASF" + - "C2859376221-ASF" - "C1214470488-ASF" # S1A - "C1214470533-ASF" - "C1214470576-ASF" @@ -355,6 +357,8 @@ tests: collections: [ "C1214353986-ASF", "C1214336045-ASF", + "C1261881077-ASF", + "C2859376221-ASF", "C1214336717-ASF", "C1214335430-ASF", "C1214335471-ASF",