Skip to content

Commit

Permalink
Checksum pass
Browse files Browse the repository at this point in the history
  • Loading branch information
Adrian Damian authored and Adrian Damian committed Feb 6, 2025
1 parent 8e0ac39 commit 4550f38
Show file tree
Hide file tree
Showing 12 changed files with 326 additions and 263 deletions.
3 changes: 2 additions & 1 deletion caom2/caom2/artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,8 @@ def content_checksum(self, value):
self._content_checksum = None
else:
caom_util.type_check(value, ChecksumURI, "checksum_uri", False)
self._content_checksum = value
# TODO necessary?
self._content_checksum = value.uri

@property
def content_release(self):
Expand Down
66 changes: 49 additions & 17 deletions caom2/caom2/checksum.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,8 +267,8 @@ def update_checksum(checksum, value, attribute=''):
if isinstance(value, ObservationURI) or isinstance(value, ChecksumURI):
b = value.uri.encode('utf-8')
elif isinstance(value, CaomObject):
logger.debug('Process object {}'.format(attribute))
update_caom_checksum(checksum, value, attribute)
#logger.debug('Process object {}'.format(attribute))
return update_caom_checksum(checksum, value, attribute)
elif isinstance(value, bytes):
b = value
elif isinstance(value, bool):
Expand All @@ -284,42 +284,59 @@ def update_checksum(checksum, value, attribute=''):
b = value.strip().encode('utf-8')
elif isinstance(value, datetime):
b = struct.pack('!q', int(
(value - datetime(1970, 1, 1)).total_seconds()))
(value - datetime(1970, 1, 1)).total_seconds()*1000))
elif isinstance(value, set) or \
(isinstance(value, TypedSet) and not
isinstance(value.key_type, AbstractCaomEntity)):
updated = False
for i in sorted(value):
update_checksum(checksum, i, attribute)
updated |= update_checksum(checksum, i, attribute)
return updated
elif isinstance(value, list) or isinstance(value, TypedList):
updated = False
for i in value:
if not isinstance(i, AbstractCaomEntity):
update_checksum(checksum, i, attribute)
updated |= update_checksum(checksum, i, attribute)
return updated
elif isinstance(value, Enum):
update_checksum(checksum, value.value, attribute)
return update_checksum(checksum, value.value, attribute)
elif isinstance(value, uuid.UUID):
b = value.bytes
elif isinstance(value, TypedOrderedDict):
# calculate the checksum of each component and add them in
# alphabetical order of their ids
# Note: ignore dictionaries of AbstractCaomEntity types
checksums = []
updated = False
for i in value:
if not isinstance(value[i], AbstractCaomEntity):
checksums.append(value[i]._id)
for i in sorted(checksums):
update_checksum(checksum, checksum[i], attribute)
updated &= update_checksum(checksum, checksum[i], attribute)
return updated
else:
raise ValueError(
'Cannot transform in bytes: {}({})'.format(value, type(value)))

if b is not None:
checksum.update(b)
if logger.isEnabledFor(logging.DEBUG):
md5 = hashlib.md5()
md5.update(b)
logger.debug('Encoded attribute ({}) {} = {} -- {}'.
format(type(value), attribute,
value, md5.hexdigest()))
logger.debug("Encoded attribute value - {} = {} {} bytes".format(attribute, value, len(b)))
return True
return False


def to_model_name(attribute):
"""
Converts the attribute name to the corresponding model name
:param attribute: name of attribute
:return: camel case name of the attribute in the model
"""
# Replace underscores and capitalize the first letter of each word
components = attribute.split('_')
# The first component should remain lowercase
return components[0] + ''.join(
word.capitalize() for word in components[1:])


def update_caom_checksum(checksum, entity, parent=None):
Expand All @@ -334,10 +351,16 @@ def update_caom_checksum(checksum, entity, parent=None):
if not isinstance(entity, CaomObject):
raise AttributeError('CaomObject expected')
# get the id first
updated = False
if isinstance(entity, AbstractCaomEntity):
update_checksum(checksum, entity._id)
update_checksum(checksum, entity._id, "Entity.id")
if entity._meta_producer:
update_checksum(checksum, entity._meta_producer)
if update_checksum(checksum, entity._meta_producer, "Entity.metaProducer"):
updated = True
model_name = "Entity.metaProducer"
checksum.update(model_name.encode('utf-8'))
logger.debug('Encoded attribute name {} = {}'.
format('_meta_producer', model_name))

# determine the excluded fields if necessary
checksum_excluded_fields = []
Expand All @@ -352,9 +375,18 @@ def update_caom_checksum(checksum, entity, parent=None):
for i in sorted(dir(entity)):
if not callable(getattr(entity, i)) and not i.startswith('_') and \
i not in checksum_excluded_fields:
if getattr(entity, i) is not None:
val = getattr(entity, i)
if val is not None:
atrib = '{}.{}'.format(parent, i) if parent is not None else i
update_checksum(checksum, getattr(entity, i), atrib)
if update_checksum(checksum, val, atrib):
updated = True
type_name = type(entity).__name__
if type_name in ['DerivedObservation', 'SimpleObservation'] and to_model_name(i) != 'members':
type_name = 'Observation'
model_name = (type_name + "." + to_model_name(i)).lower()
checksum.update(model_name.encode('utf-8'))
logger.debug('Encoded attribute name - {} = {}'.format(atrib, model_name))
return updated


def checksum_diff():
Expand Down Expand Up @@ -430,7 +462,7 @@ def _print_diff(orig, actual):
mistmatches += 1

if elem_type != 'chunk':
# do the accummulated checksums
# do the accumulated checksums
if orig.acc_meta_checksum == actual.acc_meta_checksum:
print('{}: {} {} == {}'.
format(elem_type, orig._id, orig.acc_meta_checksum.checksum,
Expand Down
2 changes: 1 addition & 1 deletion caom2/caom2/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ def meta_producer(self, value):
self._meta_producer = value


class VocabularyTerm(object):
class VocabularyTerm(CaomObject):
""" VocabularyTerm """

def __init__(self, namespace, term, base=False):
Expand Down
53 changes: 27 additions & 26 deletions caom2/caom2/obs_reader_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ def _get_proposal(self, element_tag, parent, ns, required):
else:
proposal = observation.Proposal(
self._get_child_text("id", el, ns, True))
proposal.pi_name = self._get_child_text("pi", el, ns, False)
proposal.pi = self._get_child_text("pi", el, ns, False)
proposal.project = self._get_child_text("project", el, ns, False)
proposal.title = self._get_child_text("title", el, ns, False)
self._add_keywords(proposal.keywords, el, ns, False)
Expand All @@ -400,7 +400,7 @@ def _get_target(self, element_tag, parent, ns, required):
self._get_child_text("name", el, ns, True))
target_type = self._get_child_text("type", el, ns, False)
if target_type:
target.target_type = observation.TargetType(target_type)
target.type = observation.TargetType(target_type)
target_standard = self._get_child_text("standard", el, ns, False)
if target_standard is not None:
target.standard = ("true" == target_standard)
Expand Down Expand Up @@ -568,23 +568,23 @@ def _add_members(self, members, parent, ns):
members.add(observation.ObservationURI(member_element.text))

def _add_inputs(self, inputs, parent, ns):
"""Create PlaneURI objects from an XML representation of the planeURI
elements and add them to the set of PlaneURIs.
"""Create URI objects from an XML representation of the planeURI
elements and add them to the set of plane URIs.
Arguments:
inputs : set of PlaneURI from the Provenance
parent : element containing the PlaneURI elements
inputs : set of plane URIs from the Provenance
parent : element containing the plane uri elements
ns : namespace of the document
raise : ObservationParsingException
"""
el = self._get_child_element("inputs", parent, ns, False)
if el is not None:
if self.version < 25:
for uri_element in el.iterchildren("{" + ns + "}planeURI"):
inputs.add(plane.PlaneURI(str(uri_element.text)))
inputs.add(str(uri_element.text))
else:
for uri_element in el.iterchildren("{" + ns + "}input"):
inputs.add(plane.PlaneURI(str(uri_element.text)))
inputs.add(str(uri_element.text))

if not inputs:
error = "No planeURI element found in members"
Expand Down Expand Up @@ -1491,12 +1491,12 @@ def _get_polarization(self, element_tag, parent, ns, required):
polarization = plane.Polarization()
_pstates_el = self._get_child_element("states", el, ns, False)
if _pstates_el is not None:
_polarization_states = list()
_states = list()
for _pstate_el in _pstates_el.iterchildren("{" + ns + "}state"):
_pstate = _pstate_el.text
_polarization_state = plane.PolarizationState(_pstate)
_polarization_states.append(_polarization_state)
polarization.polarization_states = _polarization_states
_state = plane.PolarizationState(_pstate)
_states.append(_state)
polarization.states = _states
polarization.dimension = self._get_child_text_as_int("dimension", el,
ns, False)

Expand Down Expand Up @@ -1796,13 +1796,14 @@ def _add_planes(self, obs, parent, ns):
else:
for plane_element in el.iterchildren("{" + ns + "}plane"):
if self.version < 25:
_uri = plane.PlaneURI.get_plane_uri(
# TODO
_uri = "{}/{}".format(
obs.uri,
self._get_child_text("productID",
plane_element, ns, True))
else:
_uri = plane.PlaneURI(self._get_child_text("uri", plane_element, ns, True))
_plane = plane.Plane(_uri.uri)
_uri = self._get_child_text("uri", plane_element, ns, True)
_plane = plane.Plane(_uri)
_plane.meta_release = caom_util.str2ivoa(
self._get_child_text("metaRelease", plane_element, ns,
False))
Expand Down Expand Up @@ -2082,7 +2083,7 @@ def _add_proposal_element(self, proposal, parent):

element = self._get_caom_element("proposal", parent)
self._add_element("id", proposal.id, element)
self._add_element("pi", proposal.pi_name, element)
self._add_element("pi", proposal.pi, element)
self._add_element("project", proposal.project, element)
self._add_element("title", proposal.title, element)
self._add_element("reference", proposal.reference, element)
Expand All @@ -2101,8 +2102,8 @@ def _add_target_element(self, target, parent):
raise AttributeError(
"Attempt to write CAOM2.4 element (target.targetID) "
"as CAOM2.3 Observation")
if target.target_type is not None:
self._add_element("type", target.target_type.value, element)
if target.type is not None:
self._add_element("type", target.type.value, element)
self._add_boolean_element("standard", target.standard, element)
self._add_element("redshift", target.redshift, element)
self._add_boolean_element("moving", target.moving, element)
Expand Down Expand Up @@ -2195,13 +2196,13 @@ def _add_planes_element(self, planes, parent):
plane_element = self._get_caom_element("plane", element)
self._add_entity_attributes(_plane, plane_element)
if self._output_version < 25:
_comp = _plane.uri.uri.split('/')
_comp = _plane.uri.split('/')
if len(_comp) != 3:
raise ValueError("Attempt to write CAOM2.4 but can't deduce "
"Plane.productID in Plane.uri=" + _plane.uri)
self._add_element("productID", _comp[-1], plane_element)
else:
self._add_element("uri", _plane.uri.uri, plane_element)
self._add_element("uri", _plane.uri, plane_element)
self._add_datetime_element("metaRelease", _plane.meta_release,
plane_element)
if self._output_version < 24 and _plane.meta_read_groups:
Expand Down Expand Up @@ -2417,9 +2418,9 @@ def _add_polarization_element(self, polarization, parent):
if polarization is None:
return
element = self._get_caom_element("polarization", parent)
if polarization.polarization_states:
if polarization.states:
_pstates_el = self._get_caom_element("states", element)
for _state in polarization.polarization_states:
for _state in polarization.states:
self._add_element("state", _state.value, _pstates_el)
self._add_element("dimension", polarization.dimension, element)

Expand Down Expand Up @@ -2536,7 +2537,7 @@ def _add_observable_element(self, observable, parent):
return

element = self._get_caom_element("observable", parent)
self._add_element("ucd", observable.ucd.value, element)
self._add_element("ucd", observable.ucd, element)
if observable.calibration:
if self._output_version < 25:
raise AttributeError(
Expand Down Expand Up @@ -2588,7 +2589,7 @@ def _add_artifacts_element(self, artifacts, parent):
artifact_element)
if _artifact.content_checksum:
self._add_element("contentChecksum",
_artifact.content_checksum.uri,
_artifact.content_checksum,
artifact_element)
if _artifact.description_id is not None:
if self._output_version < 25:
Expand Down Expand Up @@ -2986,9 +2987,9 @@ def _add_inputs_element(self, name, collection, parent):
element = self._get_caom_element(name, parent)
for plane_uri in collection:
if self._output_version < 25:
self._add_element("planeURI", plane_uri.uri, element)
self._add_element("planeURI", plane_uri, element)
else:
self._add_element("input", plane_uri.uri, element)
self._add_element("input", plane_uri, element)

def _get_caom_element(self, tag, parent):
return etree.SubElement(parent, self._caom2_namespace + tag)
Expand Down
28 changes: 14 additions & 14 deletions caom2/caom2/observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -879,7 +879,7 @@ class Proposal(CaomObject):

def __init__(self,
id,
pi_name=None,
pi=None,
project=None,
title=None,
reference=None):
Expand All @@ -891,7 +891,7 @@ def __init__(self,
"""

self.id = id
self.pi_name = pi_name
self.pi = pi
self.project = project
self.title = title
self.keywords = set()
Expand Down Expand Up @@ -931,18 +931,18 @@ def keywords(self, value):
self._keywords = value

@property
def pi_name(self):
def pi(self):
"""The name (First Last) of the Principle Investigator of the
Proposal.
type: unicode string
"""
return self._pi_name
return self._pi

@pi_name.setter
def pi_name(self, value):
caom_util.type_check(value, str, 'pi_name')
self._pi_name = value
@pi.setter
def pi(self, value):
caom_util.type_check(value, str, 'pi')
self._pi = value

@property
def project(self):
Expand Down Expand Up @@ -1011,7 +1011,7 @@ class Target(CaomObject):
""" Target """

def __init__(self, name,
target_type=None,
type=None,
standard=None,
redshift=None,
keywords=None,
Expand All @@ -1026,7 +1026,7 @@ def __init__(self, name,
"""

self.name = name
self.target_type = target_type
self.type = type
self.standard = standard
self.redshift = redshift
if keywords is None:
Expand All @@ -1053,7 +1053,7 @@ def name(self, value):
self._name = value

@property
def target_type(self):
def type(self):
"""A keyword describing the type of target.
must be from the list
""" + str(list(TargetType)) + """
Expand All @@ -1062,11 +1062,11 @@ def target_type(self):
"""
return self._type

@target_type.setter
def target_type(self, value):
@type.setter
def type(self, value):
if isinstance(value, str):
value = TargetType(value)
caom_util.type_check(value, TargetType, "target_type")
caom_util.type_check(value, TargetType, "type")
self._type = value

@property
Expand Down
Loading

0 comments on commit 4550f38

Please sign in to comment.