diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 000000000..8dada3eda --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/d1_client_cli/src/d1_client_cli/ore2txt b/d1_client_cli/src/d1_client_cli/ore2txt new file mode 100755 index 000000000..7deab2ed7 --- /dev/null +++ b/d1_client_cli/src/d1_client_cli/ore2txt @@ -0,0 +1,184 @@ +#!/usr/bin/env python + +# This work was created by participants in the DataONE project, and is +# jointly copyrighted by participating institutions in DataONE. For +# more information on DataONE, see our web site at http://dataone.org. +# +# Copyright 2009-2016 DataONE +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Script to do a simple text dump of an ORE document.""" + +import sys +import codecs +import logging +import argparse +import requests +import d1_pyore + + +def downloadDocument(url): + if url == "-": + return sys.stdin.read() + if url.startswith("https://") or url.startswith("http://"): + response = requests.get(url) + return response.text + with codecs.open(url, encoding="utf-8") as f: + return f.read() + + +def packageToObject(pkg): + + def getPidForId(ids, oid): + for entry in ids: + if entry[0] == oid: + return entry[1] + return '' + + + def getIdforPid(ids, pid): + for entry in ids: + if entry[1] == pid: + return entry[0] + return '' + + + obj = {'pid': pkg.getResourceMapPid(), + 'aggregations': {}, + 'objects':{}, + 'metadata':[], + 'data':[], + 'documents':{}, + 'isDocumentedBy':{}, + } + + #Load the aggregations from the package + ags = pkg.getSubjectObjectsByPredicate(d1_pyore.ORE.isAggregatedBy) + ids = pkg.getSubjectObjectsByPredicate(d1_pyore.DCTERMS.identifier) + + obj['id'] = getIdforPid(ids, obj['pid']) + + for ag in ags: + if not obj['aggregations'].has_key(ag[1]): + obj['aggregations'][ag[1]] = [] + obj['aggregations'][ag[1]].append(ag[0]) + for oid in ids: + obj['objects'][oid[0]] = {'pid':oid[1], + } + + obj['metadata'] = pkg.getAggregatedScienceMetadataPids() + obj['data'] = pkg.getAggregatedScienceDataPids() + + ids = pkg.getSubjectObjectsByPredicate(d1_pyore.CITO.isDocumentedBy) + for oid in ids: + if oid[0] not in obj['isDocumentedBy'].keys(): + obj['isDocumentedBy'][oid[0]] = [] + obj['isDocumentedBy'][oid[0]].append(oid[1]) + + ids = pkg.getSubjectObjectsByPredicate(d1_pyore.CITO.documents) + for oid in ids: + if oid[0] not in obj['documents'].keys(): + obj['documents'][oid[0]] = [] + obj['documents'][oid[0]].append(oid[1]) + + return obj + + +def packageToText(pkg, fto=sys.stdout): + obj = packageToObject(pkg) + + fto.write( "OAI-ORE Description\n\n" ) + fto.write( "Resource Map Document PID: {0}\n".format(obj['pid']) ) + fto.write( " ID: {0}\n\n".format(obj['id']) ) + fto.write( "Aggregations\n\n" ) + i = 1 + for ag in obj['aggregations'].keys(): + fto.write( "{0}: {1}\n".format(i, ag) ) + fto.write( " Contents:\n" ) + j = 1 + for oid in obj['aggregations'][ag]: + pid = obj['objects'][oid]['pid'] + fto.write( " {0}: id: {1}\n".format(j, oid) ) + fto.write( " pid: {1}\n".format(j, pid) ) + j += 1 + i += 1 + + i = 1 + fto.write( "\nCITO:documents\n" ) + for doc in obj['documents'].keys(): + fto.write( "\nThe document:\n" ) + fto.write( "{0}: id: {1}".format(i, doc) ) + pid = obj['objects'][doc]['pid'] + fto.write( " pid: {0}\n\n".format(pid) ) + fto.write( " describes:\n" ) + j = 1 + for oid in obj['documents'][doc]: + pid = obj['objects'][oid]['pid'] + fto.write( " {0}: id: {1}\n".format(j, oid) ) + fto.write( " pid: {0}\n".format(pid) ) + j += 1 + i += 1 + + i = 1 + fto.write( "\nCITO:isDocumentedBy\n" ) + for doc in obj['isDocumentedBy'].keys(): + fto.write( "\nThe data:\n" ) + fto.write( "{0}: id: {1}\n".format(i, doc) ) + pid = obj['objects'][doc]['pid'] + fto.write( " pid: {0}\n\n".format(pid) ) + fto.write( " is described by:\n" ) + j = 1 + for oid in obj['isDocumentedBy'][doc]: + pid = obj['objects'][oid]['pid'] + fto.write( " {0}: id: {1}\n".format(j, oid) ) + fto.write( " pid: {0}\n".format(pid) ) + j += 1 + i += 1 + + +def main(url, format): + ore_doc = downloadDocument(url) + pkg = d1_pyore.ResourceMap() + pkg.parse(data=ore_doc) + if args.format == "text": + packageToText(pkg) + return + print pkg.serialize( format=format ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Show OAI-ORE object as text or serialized as json-ld, n3, xml') + + parser.add_argument('-l', '--log', + default=logging.WARN, + type=int, + help="Set the logging level (debug=10, error=40)") + + parser.add_argument('url', + help="Full URL to ORE object, path to ORE file, or '-' to read from stdin") + + parser.add_argument('-f', '--format', + default="text", + help="Format for output. Can be text or any of the standard serialization formats.") + + args = parser.parse_args() + + if args.log not in (10,20,30,40,50): + logging.basicConfig(level=logging.INFO) + logging.warn("Invalid value %s for log level. Using 20 (INFO).", args.log) + else: + logging.basicConfig( level=args.log ) + + main(args.url, args.format.lower()) + diff --git a/d1_client_cli/src/d1_client_cli/pids2ore b/d1_client_cli/src/d1_client_cli/pids2ore new file mode 100755 index 000000000..e75341e2f --- /dev/null +++ b/d1_client_cli/src/d1_client_cli/pids2ore @@ -0,0 +1,143 @@ +#!/usr/bin/env python + +# This work was created by participants in the DataONE project, and is +# jointly copyrighted by participating institutions in DataONE. For +# more information on DataONE, see our web site at http://dataone.org. +# +# Copyright 2009-2016 DataONE +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Generate an OAI-ORE document from stdin. + +This script will take a list of identifiers, one per line and create a simple, +single metadata OAI-ORE document and output the result on stdout. + +The first non-blank line will contain the identifier for the resource map +document. + +The second non-blank line will contain the identifier for the metadata document. + +Remaining rows are read until the stream is consumed, and these rows contain +identifiers for data objects described by the metadata document. + +example: + +$ cat pids.txt +a +b +c + +$ cat pids.txt | pids2ore + + + + + + Aggregation + + + c + + + + + + + a + d1_pyore DataONE Python library + + + + + + + + + b + + + + +Different serializations are supported with the -f or --format parameter. e.g.: + +$ cat pids.txt | pids2ore --format n3 + +@prefix cito: . +@prefix dc: . +@prefix dcterms: . +@prefix foaf: . +@prefix ore: . +@prefix rdf: . +@prefix rdfs: . +@prefix xml: . +@prefix xsd: . + + a ore:ResourceMap ; + dcterms:creator "d1_pyore DataONE Python library" ; + dcterms:identifier "a" ; + ore:describes . + +ore:Aggregation rdfs:label "Aggregation" ; + rdfs:isDefinedBy ore: . + + dcterms:identifier "b" ; + cito:documents ; + ore:isAggregatedBy . + + a ore:Aggregation ; + ore:aggregates , + . + + dcterms:identifier "c" ; + cito:isDocumentedBy ; + ore:isAggregatedBy . +""" + +import sys +import logging +import argparse +import d1_pyore + +parser = argparse.ArgumentParser(description="stdin to OAI-ORE") + +parser.add_argument('-l', '--log', + default=logging.WARN, + type=int, + help="Set the logging level (debug=10, error=40)") + +parser.add_argument('-f', '--format', + default='xml', + help='Specify the serialization format (xml)') + +parser.add_argument('-b', '--base_url', + default=u'https://cn.dataone.org/cn', + help='Specify the base URL for the resource map entity identifiers') + +args = parser.parse_args() + +if args.log not in (10,20,30,40,50): + logging.basicConfig(level=logging.INFO) + logging.warn("Invalid value %s for log level. Using 20 (INFO).", args.log) +else: + logging.basicConfig( level=args.log ) + +print( d1_pyore.pids2ore( sys.stdin, + fmt=args.format, + base_url=args.base_url) )