Skip to content

Commit

Permalink
Merge pull request #18 from OceanNetworksCanada/exceptions
Browse files Browse the repository at this point in the history
Improve exceptions emitted and handled
  • Loading branch information
eliferguson authored Dec 8, 2023
2 parents dbc3d98 + 5e22016 commit 33e1b50
Show file tree
Hide file tree
Showing 41 changed files with 451 additions and 475 deletions.
5 changes: 4 additions & 1 deletion src/onc/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
from onc.onc import ONC # noqa F401
from .modules._DataProductFile import MaxRetriesException
from .onc import ONC

__all__ = ["ONC", "MaxRetriesException"]
3 changes: 0 additions & 3 deletions src/onc/modules/Exceptions.py

This file was deleted.

130 changes: 54 additions & 76 deletions src/onc/modules/_DataProductFile.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
from time import sleep, time
from warnings import warn

import requests

from ._PollLog import _PollLog
from ._util import _printErrorMessage, saveAsFile
from .Exceptions import MaxRetriesException
from ._util import _createErrorMessage, saveAsFile


class MaxRetriesException(RuntimeError):
def __init__(self, max_retries):
super().__init__(f"Maximum number of retries ({max_retries}) exceeded")


class _DataProductFile:
Expand Down Expand Up @@ -48,80 +53,53 @@ def download(
log = _PollLog(True)
self._status = 202
while self._status == 202:
try:
# Run timed request
start = time()
response = requests.get(self._baseUrl, self._filters, timeout=timeout)
duration = time() - start

self._downloadUrl = response.url
self._status = response.status_code
self._retries += 1

# print('request got {:d}'.format(response.status_code))
if maxRetries > 0 and self._retries > maxRetries:
raise MaxRetriesException(
f" Maximum number of retries ({maxRetries}) exceeded"
)

# Status 200: file downloaded
# Status 202: processing
# Status 204: no data
# Status 400: error
# Status 404: index out of bounds
# Status 410: gone (file deleted from FTP)
if self._status == 200:
# File downloaded, get filename from header and save
self._downloaded = True
self._downloadingTime = round(duration, 3)
filename = self.extractNameFromHeader(response)
self._filePath = filename
self._fileSize = len(response.content)
saved = saveAsFile(response, outPath, filename, overwrite)
if saved == 0:
pass
elif saved == -2:
if self._retries > 1:
print("") # new line if required
print(f' Skipping "{self._filePath}": File already exists.')
self._status = 777
else:
raise Exception(
f'An error ocurred when saving the file "{filename}"'
)

elif self._status == 202:
# Still processing, wait and retry
log.logMessage(response.json())
sleep(pollPeriod)

elif self._status == 204:
# No data found
print(" No data found.")

elif self._status == 400:
# API Error
_printErrorMessage(response)
raise Exception(
f"The request failed with HTTP status {self._status}.",
response.json(),
)

elif self._status == 404:
# Index too high, no more files to download
log.printNewLine()
pass

else:
# Gone
print(
" FTP Error: File not found. If the product order is recent,",
"retry downloading using the method downloadProduct",
f"with the runId: {self._filters['dpRunId']}",
)
_printErrorMessage(response)
except Exception:
raise
# Run timed request
start = time()
response = requests.get(self._baseUrl, self._filters, timeout=timeout)
duration = time() - start

self._downloadUrl = response.url
self._status = response.status_code
self._retries += 1

if maxRetries > 0 and self._retries > maxRetries:
raise MaxRetriesException(maxRetries)

if self._status == 200:
self._downloaded = True
self._downloadingTime = round(duration, 3)
filename = self.extractNameFromHeader(response)
self._filePath = filename
self._fileSize = len(response.content)
try:
saveAsFile(response, outPath, filename, overwrite)
except FileExistsError:
if self._retries > 1:
print("")
print(f' Skipping "{self._filePath}": File already exists.')
self._status = 777

elif self._status == 202: # Still processing, wait and retry
log.logMessage(response.json())
sleep(pollPeriod)

elif self._status == 204: # No data found
print(" No data found.")

elif self._status == 400:
raise requests.HTTPError(_createErrorMessage(response))

elif self._status == 404: # Index too high, no more files to download
log.printNewLine()
pass

elif self._status == 410: # Status 410: gone (file deleted from FTP)
warn(
" FTP Error: File not found. If the product order is recent,"
"retry downloading using the method downloadProduct"
f"with the runId: {self._filters['dpRunId']}",
stacklevel=2,
)

return self._status

Expand Down
90 changes: 42 additions & 48 deletions src/onc/modules/_MultiPage.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,59 +20,53 @@ def getAllPages(self, service: str, url: str, filters: dict):
Multiple pages will be downloaded until completed
@return: Service response with concatenated data for all pages obtained
"""
try:
# pop archivefiles extension
extension = None
if service == "archivefiles" and "extension" in filters:
extension = filters["extension"]
del filters["extension"]

# download first page
start = time()
response, responseTime = self._doPageRequest(
url, filters, service, extension
# pop archivefiles extension
extension = None
if service == "archivefiles" and "extension" in filters:
extension = filters["extension"]
del filters["extension"]

# download first page
start = time()
response, responseTime = self._doPageRequest(url, filters, service, extension)
rNext = response["next"]

if rNext is not None:
print(
"Data quantity is greater than the row limit and",
"will be downloaded in multiple pages.",
)
rNext = response["next"]

if rNext is not None:
print(
"Data quantity is greater than the row limit and",
"will be downloaded in multiple pages.",
pageCount = 1
pageEstimate = self._estimatePages(response, service, responseTime)
if pageEstimate > 0:
timeEstimate = _formatDuration(pageEstimate * responseTime)
print(f"Estimated approx. {pageEstimate} pages")
print(f"Estimated approx. {timeEstimate} to complete")

# keep downloading pages until next is None
print("")
while rNext is not None:
pageCount += 1
rowCount = self._rowCount(response, service)

print(f" ({rowCount} samples) Downloading page {pageCount}...")
nextResponse, nextTime = self._doPageRequest(
url, rNext["parameters"], service, extension
)
rNext = nextResponse["next"]

pageCount = 1
pageEstimate = self._estimatePages(response, service, responseTime)
if pageEstimate > 0:
timeEstimate = _formatDuration(pageEstimate * responseTime)
print(f"Estimated approx. {pageEstimate} pages")
print(f"Estimated approx. {timeEstimate} to complete")

# keep downloading pages until next is None
print("")
while rNext is not None:
pageCount += 1
rowCount = self._rowCount(response, service)

print(f" ({rowCount} samples) Downloading page {pageCount}...")
nextResponse, nextTime = self._doPageRequest(
url, rNext["parameters"], service, extension
)
rNext = nextResponse["next"]

# concatenate new data obtained
self._catenateData(response, nextResponse, service)

totalTime = _formatDuration(time() - start)
print(
" ({:d} samples) Completed in {:s}.".format(
self._rowCount(response, service), totalTime
)
)
response["next"] = None
# concatenate new data obtained
self._catenateData(response, nextResponse, service)

totalTime = _formatDuration(time() - start)
print(
f" ({self._rowCount(response, service):d} samples)"
f" Completed in {totalTime}."
)
response["next"] = None

return response
except Exception:
raise
return response

def _doPageRequest(
self, url: str, filters: dict, service: str, extension: str = None
Expand Down
Loading

0 comments on commit 33e1b50

Please sign in to comment.