Skip to content

Commit

Permalink
Merge pull request #6 from sdss/curl
Browse files Browse the repository at this point in the history
add curl as an access method
  • Loading branch information
havok2063 authored Nov 12, 2019
2 parents 22de665 + 6da1708 commit 5d7b6cb
Show file tree
Hide file tree
Showing 29 changed files with 732 additions and 231 deletions.
3 changes: 3 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,12 @@ python:

os:
- linux
- windows

matrix:
fast_finish: true
allow_failures:
- os: windows

notifications:
email: false
Expand Down
5 changes: 5 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,15 @@ This document records the main changes to the sdss_access code.

0.2.8 (unreleased)
------------------

Added
^^^^^
- new extract method to return extracted keywords from a given filename
- new tests for sdss_access.path
- methods to extract and look up source code given a method name
- sdss_access now has a CurlAccess class to enable use on Windows OS
- implemented new BaseAccess class to abstract out commonalities between RsyncAccess and CurlAccess
- added a general Access class which handles the choice between Rsync/CurlAccess
- added public access for HttpAccess

Changed
Expand All @@ -22,6 +26,7 @@ Changed
- replaced template envvar substitution with os.path.expandvars
- updating yaml.load to use FullLoaded in compliance with pyyaml 5.1
- changing disutils.strictversion to parse_versions
- moved methods from RsyncAccess and CurlAccess into common BaseAccess
- refactored the test suite to add tests on DR data, and simplify new path entries

Fixed
Expand Down
12 changes: 12 additions & 0 deletions bin/sdss_access_drpall
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/usr/bin/env python
from sdss_access import Access, AccessError
access = Access(label='mangamap',verbose=True)

#works if you have the sdss username in your ~/.netrc (or you will be prompted)
access.remote()
#the alternative to setting the password in ~/.netrc is not recommended!
#access.remote(username='sdss',password='***-******')

access.add('drpall', drpver='v1_5_1')
access.set_stream()
access.commit()
36 changes: 18 additions & 18 deletions bin/sdss_access_rsync_example → bin/sdss_access_example
Original file line number Diff line number Diff line change
Expand Up @@ -2,55 +2,55 @@
from __future__ import absolute_import, division, print_function, unicode_literals
# The line above will help with 2to3 support.

from sdss_access import RsyncAccess, AccessError
rsync_access = RsyncAccess(label='spZbest',mirror=True,public=True,verbose=True)
from sdss_access import Access, AccessError
access = Access(label='mangacube',mirror=True,public=False,verbose=True)

#works if you have the sdss username in your ~/.netrc (or you will be prompted)
rsync_access.remote()
access.remote()
#the alternative to setting the password in ~/.netrc is not recommended!
#rsync_access.remote(username='sdss',password='***-******')
#access.remote(username='sdss',password='***-******')

#rsync_access.add('mangacube', drpver='v1_5_1', plate=8485, ifu='*')
rsync_access.add('spZbest', run2d='v5_7_2', run1d='v5_7_2', plateid=7339, mjd='*')
try: rsync_access.set_stream()
access.add('mangacube', drpver='v1_5_1', plate=8485, ifu='*')
#access.add('spZbest', run2d='v5_7_2', run1d='v5_7_2', plateid=7339, mjd='*')
try: access.set_stream()
except: print("error")

print("="*80)
print("example i): expand wildcard")
locations = rsync_access.get_locations()
locations = access.get_locations()
for location in locations: print("- %s" % location)

print("="*80)
print("example ii): first 5 (limit=5)")
locations = rsync_access.get_locations(limit=5)
locations = access.get_locations(limit=5)
if locations:
for location in locations: print("- %s" % location)

print("="*80)
print("example iii): next 5 (offset=5,limit=5)"
rsync_access.get_locations(offset=5,limit=5)
print("example iii): next 5 (offset=5,limit=5)")
locations = access.get_locations(offset=5,limit=5)
if locations:
for location in locations: print("- %s" % location)

print("="*80)
print("example iv): random 5 = shuffle then (limit=5)")
rsync_access.shuffle()
locations = rsync_access.get_locations(limit=5)
access.shuffle()
locations = access.get_locations(limit=5)
if locations:
for location in locations: print("- %s" % location)

print("="*80)
print("example v): random 5 as paths")
rsync_access.shuffle()
paths = rsync_access.get_paths(limit=5)
access.shuffle()
paths = access.get_paths(limit=5)
if paths:
for path in paths: print("- %s" % path)

print("="*80)
print("example vi): random 5 as urls")
rsync_access.shuffle()
urls = rsync_access.get_urls(limit=5)
access.shuffle()
urls = access.get_urls(limit=5)
if urls:
for url in urls: print("- %s" % url)

#rsync_access.commit(dryrun=True)
#access.commit(dryrun=True)
9 changes: 9 additions & 0 deletions bin/sdss_access_mangacube_dr14
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/usr/bin/env python3
from sdss_access import Access, AccessError
access = Access(label='mangacube_dr14',public=True,release='dr14',verbose=True)
access.remote()
access.add('mangacube', drpver='v2_1_2', plate=8485, ifu='19*')
access.set_stream()

access.commit()

13 changes: 13 additions & 0 deletions bin/sdss_access_mangacube_mpl7
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/usr/bin/env python3
from sdss_access import Access, AccessError
access = Access(label='mangacube_mpl7',verbose=True)

#works if you have the sdss username in your ~/.netrc (or you will be prompted)
access.remote()
#the alternative to setting the password in ~/.netrc is not recommended!
#access.remote(username='sdss',password='***-******')

access.add('mangacube', drpver='v2_4_3', plate=8485, ifu='19*')
access.set_stream()

access.commit()
13 changes: 13 additions & 0 deletions bin/sdss_access_mangamap_mpl4
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/usr/bin/env python
from sdss_access import Access, AccessError
access = Access(label='mangamap',verbose=True)

#works if you have the sdss username in your ~/.netrc (or you will be prompted)
access.remote()
#the alternative to setting the password in ~/.netrc is not recommended!
#access.remote(username='sdss',password='***-******')
#mangamap = $MANGA_SPECTRO_ANALYSIS/{drpver}/{dapver}/full/{plate}/{ifu}/manga-{plate}-{ifu}-LOGCUBE_MAPS-{bintype}-{n:0>3}.fits.gz

access.add('mangamap', drpver = 'v1_5_1', dapver='1.1.1', plate=8485, ifu=1901, bintype='NONE', n='*')
access.set_stream()
access.commit()
13 changes: 13 additions & 0 deletions bin/sdss_access_mangamap_mpl5
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/usr/bin/env python
from sdss_access import Access, AccessError
access = Access(label='mangamaps',verbose=True)

#works if you have the sdss username in your ~/.netrc (or you will be prompted)
access.remote()
#the alternative to setting the password in ~/.netrc is not recommended!
#access.remote(username='sdss',password='***-******')

access.add('mangadap5', drpver='v2_0_1', dapver='2.0.2', plate=8485, ifu=1901, mode='*', daptype='*')
access.set_stream()

access.commit()
12 changes: 0 additions & 12 deletions bin/sdss_access_rsync_drpall

This file was deleted.

8 changes: 0 additions & 8 deletions bin/sdss_access_rsync_mangacube_dr14

This file was deleted.

13 changes: 0 additions & 13 deletions bin/sdss_access_rsync_mangacube_mpl7

This file was deleted.

12 changes: 0 additions & 12 deletions bin/sdss_access_rsync_mangamap_mpl4

This file was deleted.

13 changes: 0 additions & 13 deletions bin/sdss_access_rsync_mangamap_mpl5

This file was deleted.

14 changes: 14 additions & 0 deletions docs/sphinx/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@ Path
Sync
----

Access
^^^^^^
.. automodule:: sdss_access.sync.access
:members:
:undoc-members:
:show-inheritance:

Auth
^^^^
.. automodule:: sdss_access.sync.auth
Expand All @@ -28,6 +35,13 @@ Client
:undoc-members:
:show-inheritance:

Curl
^^^^^
.. automodule:: sdss_access.sync.curl
:members:
:undoc-members:
:show-inheritance:

Http
^^^^
.. automodule:: sdss_access.sync.http
Expand Down
58 changes: 50 additions & 8 deletions docs/sphinx/intro.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ You can generate full paths to files easily with `Path.full`::
path.full('mangacube', drpver='v2_3_1', plate='8485', ifu='1901')
'/Users/Brian/Work/sdss/sas/mangawork/manga/spectro/redux/v2_3_1/8485/stack/manga-8485-1902-LOGCUBE.fits.gz'

Note that this only generates a path. The file may not actually exist locally. If you want to generate a URL path to the file at Utah, you can use `Path.url`::
Note that this only generates a path. The file may not actually exist locally. If you want to generate a URL path to the file at
Utah, you can use `Path.url`::

# generate a http path to the file
path.url('mangacube', drpver='v2_3_1', plate='8485', ifu='1901')
Expand All @@ -30,8 +31,11 @@ Note that this only generates a path. The file may not actually exist locally.
Path Names
^^^^^^^^^^

The syntax for all paths defined in `sdss_access`, for most methods, is ``(name, **kwargs)``. Each path is defined by a **name** and several **keyword arguments**, indicated in the template filepath by **{keyword_name}**. For example, the path to a MaNGA data cube has **name** ``mangacube`` and path keywords, **plate**, **drpver**, and **ifu**, defined in the path ``$MANGA_SPECTRO_REDUX/{drpver}/{plate}/stack/manga-{plate}-{ifu}-LOGCUBE.fits.gz``. All paths are defined inside the SDSS `tree` product, within the `sdss_paths.ini` file, and
available to you as a dictionary, ``path.templates``::
The syntax for all paths defined in `sdss_access`, for most methods, is ``(name, **kwargs)``. Each path is defined by a **name** and
several **keyword arguments**, indicated in the template filepath by **{keyword_name}**. For example, the path to a MaNGA data cube
has **name** ``mangacube`` and path keywords, **plate**, **drpver**, and **ifu**, defined in the path
``$MANGA_SPECTRO_REDUX/{drpver}/{plate}/stack/manga-{plate}-{ifu}-LOGCUBE.fits.gz``. All paths are defined inside the SDSS
`tree` product, within the `sdss_paths.ini` file, and available to you as a dictionary, ``path.templates``::

from sdss_access import SDSSPath
path = SDSSPath()
Expand Down Expand Up @@ -60,7 +64,14 @@ You can download files from the SAS and place them in your local SAS. `sdss_acc
the real SAS at Utah. If you do not already have a `SAS_BASE_DIR` set, one will be defined in your home directory, as a new `sas`
directory.

Using the `HttpAccess` package.
sdss_access has four classes designed to facilitate access to SAS data.

- Access - class that automatically decides between `RsyncAccess` and `CurlAccess` based on the operating system.
- HttpAccess - uses the `urllib` package to download data using a direct http request
- RsyncAccess - uses `rsync` to download data. Available for Linux and MacOS.
- CurlAccess - uses `curl` to download data. This is the only available method for use on Windows machines.

Using the `HttpAccess` class.

::

Expand All @@ -73,12 +84,12 @@ Using the `HttpAccess` package.
# get the file
http_access.get('mangacube', drpver='v2_3_1', plate='8485', ifu='1901')

Using the `RsyncAccess` package. `RsyncAccess` is generally much faster then `HttpAccess` as it spreads multiple file downloads
across multiple continuous rsync download streams.
Using the `RsyncAccess` class. `RsyncAccess` is generally much faster then `HttpAccess` as it spreads multiple file downloads
across multiple continuous rsync download streams.

::

# import the rsync package
# import the rsync class
from sdss_access import RsyncAccess
rsync = RsyncAccess()

Expand All @@ -95,13 +106,42 @@ across multiple continuous rsync download streams.
# start the download(s)
rsync.commit()

The default mode of `RsyncAccess` is for collaboration access. You can also access data from publicly available SDSS data releases, by specifying the `public` and `release` keyword arguments on init.
The default mode of `RsyncAccess` is for collaboration access. You can also access data from publicly available SDSS data releases,
by specifying the `public` and `release` keyword arguments on init.

::

# setup rsync access to download public data from DR14
rsync = RsyncAccess(public=True, release='dr14')

Using the `CurlAccess` class. `CurlAccess` behaves exactly the same way as `RsyncAccess`. After importing and instantiating
a `CurlAccess` object, all methods and behavior are the same as in the `RsyncAccess` class.
::

# import the curl class
from sdss_access import CurlAccess
curl = CurlAccess()

Using the `Access` class. Depending on your operating system, `posix` or not, Access will either create itself using `RsyncAccess`
or `CurlAccess`, and behave as either object. Via `Acccess`, Windows machines will always use `CurlAccess`, while Linux or Macs
will automatically utilize `RsyncAccess`.
::

# import the access class
from sdss_access import Access
access = Access()

# the access mode is automatically set to rsync.
print(access)
>>> <Access(access_mode="rsync", using="data.sdss.org")>

# the class now behaves exactly like RsyncAccess.
# download a MaNGA cube
access.remote()
access.add('mangacube', drpver='v2_3_1', plate='8485', ifu='1901')
access.set_stream()
access.commit()

.. _sdss-access-api:

Reference/API
Expand All @@ -110,8 +150,10 @@ Reference/API
.. rubric:: Class

.. autosummary:: sdss_access.path.Path
.. autosummary:: sdss_access.Access
.. autosummary:: sdss_access.HttpAccess
.. autosummary:: sdss_access.RsyncAccess
.. autosummary:: sdss_access.CurlAccess

.. rubric:: Methods

Expand Down
6 changes: 5 additions & 1 deletion python/sdss_access/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,18 @@
# File logging can be started by calling log.start_file_logger(name).
from .misc import log

# check if posix-based operating system
from os import name
is_posix = ( name == "posix" )

# set up the TREE, but match the TREE_VER if it is already there
from tree import Tree
config = os.environ.get('TREE_VER', 'sdsswork')
tree = Tree(config=config)
log.debug("SDSS_ACCESS> Using %r" % tree)

from .path import Path as SDSSPath, AccessError
from .sync import HttpAccess, RsyncAccess
from .sync import HttpAccess, Access, BaseAccess, RsyncAccess, CurlAccess


NAME = 'sdss_access'
Expand Down
Loading

0 comments on commit 5d7b6cb

Please sign in to comment.