Skip to content

Commit

Permalink
chore: clean up get_urls() method, update changelog, setup.py
Browse files Browse the repository at this point in the history
  • Loading branch information
kim committed Jan 13, 2025
1 parent 55556af commit 50548a8
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 17 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
------
## [v8.1.0](https://github.com/asfadmin/Discovery-asf_search/compare/v8.0.1...v8.1.0)
### Added
- Adds ability to gather urls from results by extension and regex patterns
- Adds `ASFSearchResults.find_urls()` and `ASFProduct.find_urls()` to gather urls/uris from results by extension and/or regex pattern
### Changed
- Changed log level from warning to debug/info for search timing log messages
- Raised minimum Python version to 3.9 from 3.8, which reached EOL last year (see the official [Status of Python versions](https://devguide.python.org/versions/) for the Python version release cycle)
Expand Down
36 changes: 23 additions & 13 deletions asf_search/ASFProduct.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,8 @@ def get_stack_opts(self, opts: ASFSearchOptions = None) -> ASFSearchOptions:
return None

def _get_access_urls(
self, url_types: List[str] = ['GET DATA', 'EXTENDED METADATA']
self,
url_types: List[str] = ['GET DATA', 'EXTENDED METADATA']
) -> List[str]:
accessUrls = []

Expand All @@ -275,16 +276,31 @@ def _get_access_urls(

return sorted(list(set(accessUrls)))

def _get_urls(self) -> List[str]:
"""Finds and returns all umm urls"""
urls = self._get_access_urls(
['GET DATA', 'EXTENDED METADATA', 'GET DATA VIA DIRECT ACCESS', 'VIEW RELATED INFORMATION']
)
return [
url for url in urls if not url.startswith('s3://')
]

def _get_s3_uris(self) -> List[str]:
"""Finds and returns all umm S3 direct access uris"""
s3_urls = self._get_access_urls(
['GET DATA', 'EXTENDED METADATA', 'GET DATA VIA DIRECT ACCESS']
)
return [url for url in s3_urls if url.startswith('s3://')]

def _get_additional_urls(self) -> List[str]:
accessUrls = self._get_access_urls(['GET DATA', 'EXTENDED METADATA'])
"""Finds and returns all non-md5/image urls and filters out the existing `url` property"""
access_urls = self._get_urls()
return [
url
for url in accessUrls
url for url in access_urls
if not url.endswith('.md5')
and not url.startswith('s3://')
and 's3credentials' not in url
and not url.endswith('.png')
and url != self.properties['url']
and 's3credentials' not in url
]

def find_urls(self, extension: str = None, pattern: str = r'.*', directAccess: bool = False) -> List[str]:
Expand All @@ -296,7 +312,7 @@ def find_urls(self, extension: str = None, pattern: str = r'.*', directAccess: b
- Example: `r'(QA_)+'` to find urls with 'QA_' at least once
param directAccess: should search in s3 bucket urls (Defaults to `False`)
"""
search_list = self._get_s3_urls() if directAccess else self._get_additional_urls()
search_list = self._get_s3_uris() if directAccess else self._get_urls()

def _get_extension(file_url: str):
path = parse.urlparse(file_url).path
Expand All @@ -308,12 +324,6 @@ def _get_extension(file_url: str):
regexp = re.compile(pattern=pattern)

return [url for url in search_list if regexp.search(url) is not None]

def _get_s3_urls(self) -> List[str]:
s3_urls = self._get_access_urls(
['GET DATA', 'EXTENDED METADATA', 'GET DATA VIA DIRECT ACCESS']
)
return [url for url in s3_urls if url.startswith('s3://')]

def centroid(self) -> Point:
"""
Expand Down
2 changes: 1 addition & 1 deletion asf_search/ASFSearchResults.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def find_urls(self, extension: str = None, pattern: str = r'.*', directAccess: b
for product in self:
urls.extend(product.find_urls(extension=extension, pattern=pattern, directAccess=directAccess))

return urls
return list(set(urls))

def __str__(self):
return json.dumps(self.geojson(), indent=2, sort_keys=True)
Expand Down
2 changes: 1 addition & 1 deletion asf_search/Products/NISARProduct.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def __init__(self, args: Dict = {}, session: ASFSession = ASFSession()):
super().__init__(args, session)

self.properties['additionalUrls'] = self._get_additional_urls()
self.properties['s3Urls'] = self._get_s3_urls()
self.properties['s3Urls'] = self._get_s3_uris()

if self.properties.get('groupID') is None:
self.properties['groupID'] = self.properties['sceneName']
Expand Down
2 changes: 1 addition & 1 deletion asf_search/Products/S1Product.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class S1Product(ASFStackableProduct):
def __init__(self, args: Dict = {}, session: ASFSession = ASFSession()):
super().__init__(args, session)

self.properties['s3Urls'] = self._get_s3_urls()
self.properties['s3Urls'] = self._get_s3_uris()

if self.has_baseline():
self.baseline = self.get_baseline_calc_properties()
Expand Down

0 comments on commit 50548a8

Please sign in to comment.