Skip to content

Commit

Permalink
s2748 - Alopeke/Zorro support (#16)
Browse files Browse the repository at this point in the history
* s2627 - handle a few more naming patterns during validation.

* t76247 - initial testing with proposed Gemini incremental harvesting URL.

* s2646 - regression testing.

* s2646 - refactor Gemini to make use of the run_composable, name_builder_composable, and data_source_composable abstractions introduced into caom2pipe.

* s2647 - regression testing cleanup.

* Remove spurious directory commit.

* s2648 - regression testing for OMM.

* s2648 - regression testing for OMM.

* s2648 - regression testing for OMM.

* s2648 - regression testing for OMM.

* s2648 - regression testing for OMM.

* s2468 - SITELLE regression testing.

* s2650 - SPIRou regression testing.

* s2650 - SPIRou regression testing.

* s2650 - SPIRou regression testing - add importlib-metadata to Dockerfile.

* Issue #4 - add CachingObsFileRelationship class to do a lookup of the observationID<=>file name relationship that is ordered by distance from execution.

* Issue #3 - fix provenance handling.

* Issue #3 - integration testing.

* Issue #6 - replace execute_composable invocations with run_composable invocations. Refactor how file name to data label lookup is handled.

* Issue #6 - replace execute_composable invocations with run_composable invocations.

* Issue #6 - use all caom2tools from github.

* Issue #6 - fix how recently public query handles netrc file authentication and authorization.

* Issue #6 - got side-tracked and fixed (?) the WCS validation issue with the caom2.4 validator.

* Issue #6 - header information from archive.gemini.edu may contain '--- PHU' OR '--- HDU 0'.

* s2748 - do an initial test observation for ZORRO/ALOPEKE, and rename the expected xml files.

* s2748 + #5 - some fixes.

* s2748 - setting the cache for a file lookup.

* Issue #5 - rethinking the way command-line arguments are issued.

* Issue #5 - remove a lot of code that was used when processing was by observationID/data label. It's no longer required now that processing is by file name.

* Issue #5 - more cleanup of code that's no longer required.

* Issue #5 - scrape help.

* s2766 - OBJECT file spatial WCS refactoring.

* s2766 - make changes for s2770 available sooner.

* Update setup.cfg

* s2770 - GEMINI.

* Issue #12 - GMOS keywords should be associated with instrument, not provenance.

* Issue #5 - files with no prefix do not have 'fixed' data labels.

* s2748 - Add support for ALOPEKE/ZORRO observationID handling to GemName.

* s2748 - initial attempt at ALOPEKE observation.

* s2748 - Alopeke - 2 planes per set of files.

* s2748 - Add ZORRO test records.

* s2766 - side-effects of consistent keyword handling in fits2caom2.

* s2748 - add calibration files to Alopeke/Zorro test set.

Co-authored-by: Sharon Goliath <goliaths.cadc@gmail.com>
  • Loading branch information
SharonGoliath and Sharon Goliath authored Sep 3, 2020
1 parent 7a9f10b commit 0197fca
Show file tree
Hide file tree
Showing 100 changed files with 4,811 additions and 461 deletions.
60 changes: 51 additions & 9 deletions gem2caom2/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,15 @@
#

import logging
import traceback

from caom2pipe import astro_composable as ac
from caom2pipe import manage_composable as mc
from caom2pipe import name_builder_composable as nbc
from gem2caom2 import gem_name, external_metadata


__all__ = ['EduQueryBuilder', 'GemObsIDBuilder']
__all__ = ['EduQueryBuilder', 'GemObsIDBuilder', 'get_instrument']


class EduQueryBuilder(nbc.Builder):
Expand Down Expand Up @@ -112,7 +114,8 @@ def build(self, entry):

external_metadata.get_obs_metadata(
gem_name.GemName.remove_extensions(entry))
storage_name = gem_name.GemName(file_name=entry)
instrument = get_instrument()
storage_name = gem_name.GemName(file_name=entry, instrument=instrument)
return storage_name


Expand All @@ -133,10 +136,49 @@ def build(self, entry):
:return: an instance of StorageName for use in execute_composable.
"""
self._logger.debug(f'Build a StorageName instance for {entry}.')
if (mc.TaskType.INGEST_OBS in self._config.task_types and
'.fits' not in entry):
result = gem_name.GemName(obs_id=entry)
else:
result = gem_name.GemName(file_name=entry)
self._logger.debug('Done build.')
return result
try:
if (mc.TaskType.INGEST_OBS in self._config.task_types and
'.fits' not in entry):
# anything that is NOT ALOPEKE/ZORRO, which are the only
# two instruments that change the behaviour of the
# GemName constructor - and yeah, that abstraction is leaking
# like a sieve.
self._logger.debug('INGEST_OBS, hard-coded instrument.')
instrument = external_metadata.Inst.CIRPASS
result = gem_name.GemName(obs_id=entry, instrument=instrument)
elif (mc.TaskType.SCRAPE in self._config.task_types or
self._config.use_local_files):
self._logger.debug(
'Use a local file to read instrument from the headers.')
headers = ac.read_fits_headers(
f'{self._config.working_dir}/{entry}')
instrument = external_metadata.Inst(headers[0].get('INSTRUME'))
result = gem_name.GemName(
file_name=entry, instrument=instrument)
elif self._config.features.use_file_names:
self._logger.debug('Read instrument from archive.gemini.edu.')
file_id = gem_name.GemName.remove_extensions(entry)
external_metadata.get_obs_metadata(file_id)
instrument = get_instrument()
result = gem_name.GemName(
file_name=entry, instrument=instrument)
else:
raise mc.CadcException('The need has not been encountered '
'in the real world yet.')
self._logger.debug('Done build.')
return result
except Exception as e:
self._logger.error(e)
self._logger.debug(traceback.format_exc())
raise mc.CadcException(e)


def get_instrument():
inst = external_metadata.om.get('instrument')
if inst == 'ALOPEKE':
# because the value in JSON is a different case than the value in
# the FITS header
inst = 'Alopeke'
if inst == 'ZORRO':
inst = 'Zorro'
return external_metadata.Inst(inst)
18 changes: 16 additions & 2 deletions gem2caom2/external_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ class Inst(Enum):
PHOENIX = 'PHOENIX'
TEXES = 'TEXES'
TRECS = 'TReCS'
ZORRO = 'Zorro'


def get_obs_metadata(file_id):
Expand Down Expand Up @@ -300,6 +301,17 @@ def _repair_filter_name_for_svo(instrument, filter_names):
separated by '+'
:return filter_name the SVO version
"""
# Alopeke/ZORRO == FOX in Hawaiian and Spanish
FILTER_REPAIR_FOX = {'Red-832': 'EO_832',
'Blue-u': 'u_sdss',
'Blue-466': 'EO_466',
'Blue-g': 'g_sdss',
'Blue-562': 'EO_562',
'Blue-r': 'r_sdss',
'Blue-Halpha': 'Halpha',
'Red-716': 'EO_716',
'Red-i': 'i_sdss',
'Red-z': 'z_sdss'}
FILTER_REPAIR_NICI = {'CH4-H4S': 'ED451',
'CH4-H4L': 'ED449',
'CH4-H1S': 'ED286',
Expand Down Expand Up @@ -416,6 +428,8 @@ def _repair_filter_name_for_svo(instrument, filter_names):
elif instrument is Inst.GSAOI:
if temp in FILTER_REPAIR_GSAOI:
temp = FILTER_REPAIR_GSAOI[temp]
elif instrument in [Inst.ALOPEKE, Inst.ZORRO]:
temp = FILTER_REPAIR_FOX.get(temp)
elif instrument is Inst.F2:
if temp == 'J-lo':
temp = 'Jlow'
Expand Down Expand Up @@ -473,7 +487,7 @@ def tap_client(self, value):
self._tap_client = value

def get_obs_id(self, file_id):
self._logger.error(f'Entering get_obs_id for {file_id}.')
self._logger.debug(f'Entering get_obs_id for {file_id}.')
result = super(CachingObsFileRelationship, self).get_obs_id(file_id)
if result is None:
if self._use_local_files:
Expand All @@ -498,7 +512,7 @@ def _get_obs_id_from_gemini(self, file_id):
# using the global om structure to look up and store
# metadata will modify the internal index of the class - maintain
# that index here with a save/restore
self._logger.error(f'Begin _get_obs_id_from_gemini for {file_id}')
self._logger.debug(f'Begin _get_obs_id_from_gemini for {file_id}')
global om
current_file_id = om.current
get_obs_metadata(file_id)
Expand Down
130 changes: 94 additions & 36 deletions gem2caom2/gem_name.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,50 +89,108 @@ class GemName(mc.StorageName):
mixed-case obs id values - the case the inputs are provided in are
assumed to be correct.
- support uncompressed files in storage
ALOPEKE/ZORRO::
DB 31-08-20
DATALAB can NOT be used for the CAOM2 Observation ID since it appears that
the DATALAB value is identical for all files obtained for a single
program. e.g. if the program ID is GN-2020A-DD-115 then the DATALAB value
is always GN-2020A-DD-115-0-0.
Instead, use the root of the filename as the observation ID. e.g.
N20200819A0003r.fits and N20200819A0003b.fits are two files generated from
a single observation (r = red channel, b = blue channel). Use
N20200819A0003 as the observation ID with two planes given by the two
colours of data.
DB 01-09-20
Gemini has kludged the headers so that every observation for a single
program has the same DATALAB in the header. This is what we usually use
for the observation ID. Each single ‘observation’ actually produces two
files (not a single MEF file) for the red and blue channels so to me it
would make the most sense to group these two files as a single observation
with two artifacts given by uri’s pointing to the two files. And this is
a single plane, correct?
PD 01-09-20
What is the meaning of red and blue channels? different energy bands?
DB 02-09-20
Yes. there’s a dichroic that directs light shortward of 675nm to one
detector (through one of several possible filters) and light longward of
675nm to a second detector (through another filter). But instead of
generating a single MEF file they generate two files, e.g.
N20191219A0004b.fits and N20191219A0004r.fits.
PD 02-09-20
This seems very much like MACHO... if those two files are images in the
normal sense then it could make sense to create separate planes with
dataProductType = image that end up with the correct (distinct) energy
metadata. It is OK for an observation to create two sibling products and
two planes probably captures the goal of this instrument/observing mode
more directly.
"""

GEM_NAME_PATTERN = '*'

def __init__(self, fname_on_disk=None, file_name=None, obs_id=None,
file_id=None):
file_id=None, instrument=None):
logging.debug('parameters fname_on_disk {} file_name {}'
' obs id {} file id {}'.format(fname_on_disk,
file_name,
obs_id,
file_id))
# try to set the file name, if that information is available

# file_name is assumed to be the file name in ad
# because the GEM files are stored uncompressed,
# while the files available from Gemini are bz2.
self._file_name = None
self._file_id = None
if file_name is not None:
self._file_id = GemName.get_file_id(file_name)
self.file_name = file_name
if fname_on_disk is not None:
self._file_id = GemName.get_file_id(fname_on_disk)
self.file_name = fname_on_disk
if obs_id is not None:
self._obs_id = obs_id
super(GemName, self).__init__(
obs_id=obs_id, collection=ARCHIVE,
collection_pattern=GemName.GEM_NAME_PATTERN,
fname_on_disk=self.file_name,
scheme=SCHEME)
if self._obs_id is None:
temp = em.get_gofr().get_obs_id(self._file_id)
if temp is not None:
self._obs_id = GemName.remove_extensions(temp)
if (self._fname_on_disk is None and self._file_name is None and
self._obs_id is None):
raise mc.CadcException('Require a name.')
if (self._file_id is None and self._obs_id is None and
file_id is not None):
self._file_id = file_id
self._obs_id = file_id
if file_id is not None:
self._file_id = file_id
if instrument in [em.Inst.ALOPEKE, em.Inst.ZORRO]:
if file_name is None and fname_on_disk is None:
raise mc.CadcException(f'Need a file name of some sort.')
if file_name is None:
self._file_name = fname_on_disk
else:
self._file_name = file_name
self._file_id = GemName.remove_extensions(self._file_name)
self._obs_id = self._file_id[:-1]
self._product_id = self._file_id
super(GemName, self).__init__(
obs_id=self._obs_id, collection=ARCHIVE,
collection_pattern=GemName.GEM_NAME_PATTERN,
fname_on_disk=self._file_name,
scheme=SCHEME)
else:
# try to set the file name, if that information is available

# file_name is assumed to be the file name in ad
# because the GEM files are stored uncompressed,
# while the files available from Gemini are bz2.
self._file_name = None
self._file_id = None
if file_name is not None:
self._file_id = GemName.get_file_id(file_name)
self.file_name = file_name
if fname_on_disk is not None:
self._file_id = GemName.get_file_id(fname_on_disk)
self.file_name = fname_on_disk
if obs_id is not None:
self._obs_id = obs_id
super(GemName, self).__init__(
obs_id=obs_id, collection=ARCHIVE,
collection_pattern=GemName.GEM_NAME_PATTERN,
fname_on_disk=self.file_name,
scheme=SCHEME)
if self._obs_id is None:
temp = em.get_gofr().get_obs_id(self._file_id)
if temp is not None:
self._obs_id = GemName.remove_extensions(temp)
if (self._fname_on_disk is None and self._file_name is None and
self._obs_id is None):
raise mc.CadcException('Require a name.')
if (self._file_id is None and self._obs_id is None and
file_id is not None):
self._file_id = file_id
self._obs_id = file_id
if file_id is not None:
self._file_id = file_id
self._product_id = self._file_id
self._logger = logging.getLogger(__name__)
self._logger.debug(self)

Expand Down Expand Up @@ -175,7 +233,7 @@ def file_id(self, value):

@property
def lineage(self):
return mc.get_lineage(ARCHIVE, self._file_id, self._file_name,
return mc.get_lineage(ARCHIVE, self.product_id, self._file_name,
self.scheme)

@property
Expand All @@ -184,7 +242,7 @@ def external_urls(self):

@property
def product_id(self):
return self._file_id
return self._product_id

@property
def thumb_uri(self):
Expand Down
Loading

0 comments on commit 0197fca

Please sign in to comment.