Skip to content

Commit

Permalink
Merge branch 'master' into topic-baseline-speedup
Browse files Browse the repository at this point in the history
  • Loading branch information
SpicyGarlicAlbacoreRoll authored Apr 24, 2024
2 parents ec134ef + a7ca6bc commit 0167999
Show file tree
Hide file tree
Showing 10 changed files with 109 additions and 37 deletions.
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,20 @@ and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [v7.1.1](https://github.com/asfadmin/Discovery-asf_search/compare/v7.1.0...v7.1.1)
### Changed
- Uses `ciso8601.parse_datetime()` in baseline calculations, speeds up calculations on larger stacks
### Added
- Adds `ASF_LOGGER` logging in `search_generator()` and related methods
### Fixed
- `ASFProduct.get_sort_keys()` will no longer returns `None` if missing sort key, defaults to empty string

------
## [v7.1.0](https://github.com/asfadmin/Discovery-asf_search/compare/v7.0.9...v7.1.0)
### Added
- Improved logging in `ASFSession` authentication methods

### Changed
- Uses `ciso8601` module for parsing dates from CMR response, significant performance improvement post-query
- `ASFSession` now allows for authorized user access to hidden/restricted CMR datasets via `auth_with_creds()` or `auth_with_cookiejar()` authentication methods (previously only supported via `auth_with_token()` method)
- `ASFSession.auth_with_token()` now authenticates directly against EDL endpoint
- UMM Platform ShortName used as final fallback criteria for product subclass assignment

------
## [v7.0.9](https://github.com/asfadmin/Discovery-asf_search/compare/v7.0.8...v7.0.9)
Expand Down
26 changes: 23 additions & 3 deletions asf_search/ASFProduct.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,12 +284,32 @@ def get_property_paths() -> Dict:
"""
return ASFProduct._base_properties

def get_sort_keys(self) -> Tuple:
def get_sort_keys(self) -> Tuple[str, str]:
"""
Returns tuple of primary and secondary date values used for sorting final search results
Any subclasses must return string for final `sort()` to work
"""
return (self.properties.get('stopTime'), self.properties.get('fileID', 'sceneName'))

# `sort()` will raise an error when comparing `NoneType`,
# using self._read_property() to wrap standard `dict.get()` for possible `None` values
primary_key = self._read_property(key='stopTime', default='')
secondary_key = self._read_property(
key='fileID',
default=self._read_property('sceneName', '')
)

return (primary_key, secondary_key)

def _read_property(self, key: str, default: Any = None) -> Any:
"""
Helper method wraps `properties.get()`.
Since a property can be `None`, if the key exists `dict.get('key', 'default')` will never return the default
"""
output = default
if (value:=self.properties.get(key)) is not None:
output = value

return output

@final
@staticmethod
def umm_get(item: Dict, *args):
Expand Down
2 changes: 1 addition & 1 deletion asf_search/ASFSearchOptions/ASFSearchOptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def __str__(self):
"""
What to display if `print(opts)` is called.
"""
return json.dumps(dict(self), indent=4)
return json.dumps(dict(self), indent=4, default=str)

# Default is set to '...', since 'None' is a very valid value here
def pop(self, key, default=...):
Expand Down
12 changes: 11 additions & 1 deletion asf_search/Products/ARIAS1GUNWProduct.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Dict
from asf_search import ASFSession
from asf_search.ASFProduct import ASFProduct
from asf_search.ASFSearchOptions import ASFSearchOptions
from asf_search.Products import S1Product
from asf_search.CMR.translate import try_parse_float
Expand Down Expand Up @@ -54,4 +55,13 @@ def get_default_baseline_product_type() -> None:
"""
Returns the product type to search for when building a baseline stack.
"""
return None
return None

@staticmethod
def is_ARIAS1GUNWProduct(item: Dict) -> bool:
platform = ASFProduct.umm_get(item['umm'], 'Platforms', 0, 'ShortName')
if platform in ['SENTINEL-1A', 'SENTINEL-1B']:
asf_platform = ASFProduct.umm_get(item['umm'], 'AdditionalAttributes', ('Name', 'ASF_PLATFORM'), 'Values', 0)
return 'Sentinel-1 Interferogram' in asf_platform

return False
10 changes: 5 additions & 5 deletions asf_search/Products/NISARProduct.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Dict, Union
from typing import Dict, Tuple, Union
from asf_search import ASFSearchOptions, ASFSession, ASFStackableProduct
from asf_search.CMR.translate import try_parse_float, try_parse_int, try_round_float
from asf_search.constants import PRODUCT_TYPE
Expand Down Expand Up @@ -48,10 +48,10 @@ def get_property_paths() -> Dict:
**NISARProduct._base_properties
}

def get_sort_keys(self):
def get_sort_keys(self) -> Tuple[str, str]:
keys = super().get_sort_keys()

if keys[0] is None:
return (self.properties.get('processingDate', ''), keys[1])
if keys[0] == '':
return (self._read_property('processingDate', ''), keys[1])

return keys
8 changes: 4 additions & 4 deletions asf_search/Products/OPERAS1Product.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Dict
from typing import Dict, Tuple
from asf_search import ASFSearchOptions, ASFSession
from asf_search.CMR.translate import try_parse_date
from asf_search.Products import S1Product
Expand Down Expand Up @@ -71,10 +71,10 @@ def get_stack_opts(self, opts: ASFSearchOptions = None) -> ASFSearchOptions:
"""
return None

def get_sort_keys(self):
def get_sort_keys(self) -> Tuple[str, str]:
keys = super().get_sort_keys()

if keys[0] is None:
keys = self.properties.get('validityStartDate'), keys[1]
if keys[0] == '':
return (self._read_property('validityStartDate', ''), keys[1])

return keys
2 changes: 1 addition & 1 deletion asf_search/WKT/RepairEntry.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ def __init__(self, report_type: str, report: str) -> None:
self.report = report

def __str__(self) -> str:
return f'{self.report_type}\n\t{self.report}'
return f"{self.report_type}: {self.report}"
6 changes: 3 additions & 3 deletions asf_search/WKT/validate_wkt.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from asf_search.exceptions import ASFWKTError


def validate_wkt(aoi: Union[str, BaseGeometry]) -> Tuple[BaseGeometry, List[RepairEntry]]:
def validate_wkt(aoi: Union[str, BaseGeometry]) -> Tuple[BaseGeometry, BaseGeometry, List[RepairEntry]]:
"""
Param aoi: the WKT string or Shapely Geometry to validate and prepare for the CMR query
Validates the given area of interest, and returns a validated and simplified WKT string
Expand Down Expand Up @@ -52,7 +52,7 @@ def _search_wkt_prep(shape: BaseGeometry):
if isinstance(shape, Polygon):
return orient(Polygon(shape.exterior), sign=1.0)

def _simplify_geometry(geometry: BaseGeometry) -> Tuple[BaseGeometry, List[RepairEntry]]:
def _simplify_geometry(geometry: BaseGeometry) -> Tuple[BaseGeometry, BaseGeometry, List[RepairEntry]]:
"""
param geometry: AOI Shapely Geometry to be prepped for CMR
prepares geometry for CMR by:
Expand Down Expand Up @@ -165,7 +165,7 @@ def _counter_clockwise_reorientation(geometry: Union[Point, LineString, Polygon]
return reoriented, None


def _get_clamped_and_wrapped_geometry(shape: BaseGeometry) -> Tuple[BaseGeometry, List[RepairEntry]]:
def _get_clamped_and_wrapped_geometry(shape: BaseGeometry) -> Tuple[BaseGeometry, BaseGeometry, List[RepairEntry]]:
"""
param geometry: Shapely geometry to clamp
Clamps geometry to +/-90 latitude and wraps longitude +/-180
Expand Down
7 changes: 5 additions & 2 deletions asf_search/search/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from copy import copy
import datetime

from asf_search import ASFSearchResults
from asf_search import ASF_LOGGER, ASFSearchResults
from asf_search.ASFSearchOptions import ASFSearchOptions
from asf_search.search.search_generator import search_generator

Expand Down Expand Up @@ -99,6 +99,9 @@ def search(
results.searchComplete = page.searchComplete
results.searchOptions = page.searchOptions

results.sort(key=lambda p: p.get_sort_keys(), reverse=True)
try:
results.sort(key=lambda p: p.get_sort_keys(), reverse=True)
except TypeError as exc:
ASF_LOGGER.warning(f"Failed to sort final results, leaving results unsorted. Reason: {exc}")

return results
67 changes: 51 additions & 16 deletions asf_search/search/search_generator.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
import logging
from typing import Dict, Generator, Union, Sequence, Tuple, List
from copy import copy
from requests.exceptions import HTTPError
from requests import ReadTimeout, Response
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential, wait_fixed
import datetime
import dateparser
import warnings

from asf_search import __version__
from asf_search import ASF_LOGGER, __version__

from asf_search.ASFSearchResults import ASFSearchResults
from asf_search.ASFSearchOptions import ASFSearchOptions
Expand All @@ -22,7 +20,7 @@
from asf_search.WKT.validate_wkt import validate_wkt
from asf_search.search.error_reporting import report_search_error
import asf_search.Products as ASFProductType

from shapely.geometry.base import BaseGeometry

def search_generator(
absoluteOrbit: Union[int, Tuple[int, int], range, Sequence[Union[int, Tuple[int, int], range]]] = None,
Expand Down Expand Up @@ -82,27 +80,42 @@ def search_generator(
(getattr(opts, 'granule_list', False) or getattr(opts, 'product_list', False)):
raise ValueError("Cannot use maxResults along with product_list/granule_list.")

ASF_LOGGER.debug(f'SEARCH: preprocessing opts: {opts}')
preprocess_opts(opts)
ASF_LOGGER.debug(f'SEARCH: preprocessed opts: {opts}')

ASF_LOGGER.info(f'SEARCH: Using search opts {opts}')

url = '/'.join(s.strip('/') for s in [f'https://{opts.host}', f'{INTERNAL.CMR_GRANULE_PATH}'])
total = 0

queries = build_subqueries(opts)
for query in queries:

ASF_LOGGER.info(f'SEARCH: Using cmr endpoint: "{url}"')
ASF_LOGGER.debug(f'SEARCH: Built {len(queries)} subqueries')

for subquery_idx, query in enumerate(queries):
ASF_LOGGER.info(f'SUBQUERY {subquery_idx + 1}: Beginning subquery with opts: {query}')

ASF_LOGGER.debug(f'TRANSLATION: Translating subquery:\n{query}')
translated_opts = translate_opts(query)
ASF_LOGGER.debug(f'TRANSLATION: Subquery translated to cmr keywords:\n{translated_opts}')
cmr_search_after_header = ""
subquery_count = 0

page_number = 1
while(cmr_search_after_header is not None):
try:
ASF_LOGGER.debug(f'SUBQUERY {subquery_idx + 1}: Fetching page {page_number}')
items, subquery_max_results, cmr_search_after_header = query_cmr(opts.session, url, translated_opts, subquery_count)
except (ASFSearchError, CMRIncompleteError) as e:
message = str(e)
logging.error(message)
ASF_LOGGER.error(message)
report_search_error(query, message)
opts.session.headers.pop('CMR-Search-After', None)
return

ASF_LOGGER.debug(f'SUBQUERY {subquery_idx + 1}: Page {page_number} fetched, returned {len(items)} items.')
opts.session.headers.update({'CMR-Search-After': cmr_search_after_header})
last_page = process_page(items, maxResults, subquery_max_results, total, subquery_count, opts)
subquery_count += len(last_page)
Expand All @@ -112,13 +125,18 @@ def search_generator(

if last_page.searchComplete:
if total == maxResults: # the user has as many results as they wanted
ASF_LOGGER.info(f'SEARCH COMPLETE: MaxResults ({maxResults}) reached')
opts.session.headers.pop('CMR-Search-After', None)
return
else: # or we've gotten all possible results for this subquery
ASF_LOGGER.info(f'SUBQUERY {subquery_idx + 1} COMPLETE: results exhausted for subquery')
cmr_search_after_header = None

page_number += 1

opts.session.headers.pop('CMR-Search-After', None)

ASF_LOGGER.info(f'SEARCH COMPLETE: results exhausted for search opts {opts}')

@retry(reraise=True,
retry=retry_if_exception_type(CMRIncompleteError),
Expand Down Expand Up @@ -180,8 +198,10 @@ def preprocess_opts(opts: ASFSearchOptions):

def wrap_wkt(opts: ASFSearchOptions):
if opts.intersectsWith is not None:
wrapped, _, __ = validate_wkt(opts.intersectsWith)
wrapped, _, repairs = validate_wkt(opts.intersectsWith)
opts.intersectsWith = wrapped.wkt
if len(repairs):
ASF_LOGGER.warning(f"WKT REPAIR/VALIDATION: The following repairs were performed on the provided AOI:\n{[str(repair) for repair in repairs]}")


def set_default_dates(opts: ASFSearchOptions):
Expand All @@ -192,7 +212,7 @@ def set_default_dates(opts: ASFSearchOptions):
# If both are used, make sure they're in the right order:
if opts.start is not None and opts.end is not None:
if opts.start > opts.end:
warnings.warn(f"Start date ({opts.start}) is after end date ({opts.end}). Switching the two.")
ASF_LOGGER.warning(f"Start date ({opts.start}) is after end date ({opts.end}). Switching the two.")
opts.start, opts.end = opts.end, opts.start
# Can't do this sooner, since you need to compare start vs end:
if opts.start is not None:
Expand Down Expand Up @@ -253,15 +273,29 @@ def as_ASFProduct(item: Dict, session: ASFSession) -> ASFProduct:
if subclass is not None:
return subclass(item, session=session)

# or if the key matches one of the shortnames in any of our datasets
# if the key matches one of the shortnames in any of our datasets
for dataset, collections in dataset_collections.items():
if collections.get(product_type_key) is not None:
subclass = dataset_to_product_types.get(dataset)
if subclass is not None:
return subclass(item, session=session)
break # dataset exists, but is not in dataset_to_product_types yet

return ASFProduct(item, session=session)
# If the platform exists, try to match it
platform = _get_platform(item=item)
if ASFProductType.ARIAS1GUNWProduct.is_ARIAS1GUNWProduct(item=item):
return dataset_to_product_types.get('ARIA S1 GUNW')(item, session=session)
elif (subclass := dataset_to_product_types.get(platform)) is not None:
return subclass(item, session=session)

output = ASFProduct(item, session=session)

granule_concept_id = output.meta.get('concept-id', 'Missing Granule Concept ID')
fileID = output.properties.get('fileID', output.properties.get('sceneName', 'fileID and sceneName Missing'))

ASF_LOGGER.warning(f'Failed to find corresponding ASFProduct subclass for \
Product: "{fileID}", Granule Concept ID: "{granule_concept_id}", default to "ASFProduct"')
return output

def _get_product_type_key(item: Dict) -> str:
"""Match the umm response to the right ASFProduct subclass by returning one of the following:
Expand All @@ -272,16 +306,17 @@ def _get_product_type_key(item: Dict) -> str:
collection_shortName = ASFProduct.umm_get(item['umm'], 'CollectionReference', 'ShortName')

if collection_shortName is None:
platform_shortname = ASFProduct.umm_get(item['umm'], 'Platforms', 0, 'ShortName')
if platform_shortname in ['SENTINEL-1A', 'SENTINEL-1B']:
asf_platform = ASFProduct.umm_get(item['umm'], 'AdditionalAttributes', ('Name', 'ASF_PLATFORM'), 'Values', 0)
if 'Sentinel-1 Interferogram' in asf_platform:
return 'ARIA S1 GUNW'
platform = _get_platform(item=item)
if ASFProductType.ARIAS1GUNWProduct.is_ARIAS1GUNWProduct(item=item):
return 'ARIA S1 GUNW'

return platform_shortname
return platform

return collection_shortName

def _get_platform(item: Dict):
return ASFProduct.umm_get(item['umm'], 'Platforms', 0, 'ShortName')

# Maps datasets from DATASET.py and collection/platform shortnames to ASFProduct subclasses
dataset_to_product_types = {
'SENTINEL-1': ASFProductType.S1Product,
Expand Down

0 comments on commit 0167999

Please sign in to comment.