Skip to content

Commit

Permalink
Fix most linting errors
Browse files Browse the repository at this point in the history
  • Loading branch information
wfondrie committed Apr 15, 2024
1 parent e638191 commit 56024f5
Show file tree
Hide file tree
Showing 21 changed files with 110 additions and 105 deletions.
3 changes: 1 addition & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
#
# Configuration file for the Sphinx documentation builder.
#
Expand All @@ -13,8 +12,8 @@
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import sys
import subprocess
import sys

sys.path.insert(0, os.path.abspath(".."))

Expand Down
11 changes: 5 additions & 6 deletions ppx/__init__.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,22 @@
"""See the README for detailed documentation and examples."""
try:
from importlib.metadata import version, PackageNotFoundError
from importlib.metadata import PackageNotFoundError, version

try:
__version__ = version(__name__)
except PackageNotFoundError:
pass

except ImportError:
from pkg_resources import get_distribution, DistributionNotFound
from pkg_resources import DistributionNotFound, get_distribution

try:
__version__ = get_distribution(__name__).version
except DistributionNotFound:
pass

from . import pride
from . import massive
from . import massive, pride
from .config import get_data_dir, set_data_dir
from .factory import find_project
from .pride import PrideProject
from .massive import MassiveProject
from .config import get_data_dir, set_data_dir
from .pride import PrideProject
8 changes: 5 additions & 3 deletions ppx/config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""This module contains the configuration details for ppx"""
import os
import logging
import os
from pathlib import Path

from cloudpathlib import AnyPath
Expand All @@ -14,6 +14,7 @@ class PPXConfig:
Attributes
----------
path : pathlib.Path object
"""

def __init__(self):
Expand Down Expand Up @@ -42,8 +43,7 @@ def path(self, path):
path = self._resolve_path(path)
if not path.exists():
raise FileNotFoundError(
f"The specified directory or bucket ({path}) "
"does not exist."
f"The specified directory or bucket ({path}) " "does not exist."
)

self._path = path
Expand All @@ -61,6 +61,7 @@ def _resolve_path(path):
-------
Path or CloudPath
The resolved path.
"""
path = AnyPath(path)
try:
Expand All @@ -83,6 +84,7 @@ def set_data_dir(path=None):
----------
path : str or pathlib.Path object, optional
The path for ppx to use as its data directory.
"""
config.path = path

Expand Down
12 changes: 7 additions & 5 deletions ppx/factory.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
"""The PXDataset class and its associated methods.
This is the foundation of the ppx package.
"""
This module contains the PXDataset class and its associated methods,
which are the foundation of the ppx package.
"""
import re
import logging
import re
from urllib.parse import urlparse

import requests

from .pride import PrideProject
from .massive import MassiveProject
from .pride import PrideProject

LOGGER = logging.getLogger(__name__)

Expand All @@ -29,6 +29,7 @@ class PXDFactory:
Should ppx check the remote repository for updated metadata?
timeout : float, optional
The maximum amount of time to wait for a server response.
"""

rest = "http://proteomecentral.proteomexchange.org/cgi/GetDataset"
Expand Down Expand Up @@ -156,6 +157,7 @@ def find_project(identifier, local=None, repo=None, fetch=False, timeout=10.0):
-------
:py:class:`~ppx.PrideProject` or :py:class:`~ppx.MassiveProject`
An object to interact with the project data in the repository.
"""
identifier = str(identifier).upper()
if repo is not None:
Expand Down
15 changes: 11 additions & 4 deletions ppx/ftp.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging
import re
import socket
from ftplib import FTP, error_temp, error_perm
from ftplib import FTP, error_perm, error_temp
from functools import partial

from cloudpathlib import CloudPath
Expand Down Expand Up @@ -51,6 +51,7 @@ class FTPParser:
The maximum number of reconnects to attempt during downloads.
timeout : float, optional
The maximum amount of time to wait for a response from the server.
"""

def __init__(self, url, max_depth=4, max_reconnects=10, timeout=10.0):
Expand Down Expand Up @@ -97,9 +98,9 @@ def _with_reconnects(self, func, *args, **kwargs):
return func(*args, **kwargs)

except (
TimeoutError,
ConnectionRefusedError,
ConnectionResetError,
socket.timeout,
socket.gaierror,
socket.herror,
error_temp,
Expand Down Expand Up @@ -131,6 +132,7 @@ def _download_file(self, remote_file, out_file, force_, silent):
Disable the progress bar?
force_ : bool
Force the file to be redownloaded, even if it exists.
"""
self.connect()
size = self.connection.size(remote_file)
Expand Down Expand Up @@ -182,6 +184,7 @@ def open_(out_file, force_):
-------
file object
The opened for the file.
"""
open_kwargs = {"mode": "wb+" if force_ else "ab+"}
if isinstance(out_file, CloudPath):
Expand All @@ -200,6 +203,7 @@ def _transfer_file(self, fname, fhandle, pbar):
The opened file object where the data will be written.
pbar : tqdm.tqdm
The tqdm progress bar to update.
"""
write = partial(write_file, fhandle=fhandle, pbar=pbar)
self.connection.retrbinary(f"RETR {fname}", write, rest=fhandle.tell())
Expand Down Expand Up @@ -243,6 +247,7 @@ def download(self, files, dest_dir, force_=False, silent=False):
Force the files to be redownloaded, even they already exist.
silent : bool
Disable the progress bar?
"""
files = listify(files)
out_files = []
Expand Down Expand Up @@ -302,13 +307,14 @@ def parse_response(conn):
Parameters
----------
url : str
The url of the FTP server.
conn : Connection
The FTP server connection
Returns
-------
files : list of str
directories : list of str
"""
lines = []
conn.dir(lines.append)
Expand Down Expand Up @@ -341,6 +347,7 @@ def parse_line(line):
The file or directory name.
date : date
The modification date.
"""
match = UNIX.fullmatch(line)
is_dir = match[1] == "d" or match[1] == "l"
Expand Down
19 changes: 10 additions & 9 deletions ppx/massive.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""MassIVE datasets."""
import logging
import re
import socket
import logging
import xml.etree.ElementTree as ET
import xml.etree.ElementTree as ET # noqa: N817
from pathlib import Path

import requests
Expand Down Expand Up @@ -41,6 +41,7 @@ class MassiveProject(BaseProject):
metadata : dict
fetch : bool
timeout : float
"""

_api = "https://gnps-datasetcache.ucsd.edu/datasette/database/filename.csv"
Expand Down Expand Up @@ -68,6 +69,7 @@ def _validate_id(self, identifier):
-------
str
The validated identifier.
"""
identifier = str(identifier).upper()
if not re.match("(MSV|RMSV)[0-9]{9}", identifier):
Expand All @@ -82,8 +84,6 @@ def url(self):
return self._url

res = requests.get(self._proxy_api + self.id, timeout=self.timeout)
print(res)
print(res.json())
for link in res.json()["datasetLink"]:
if link["accession"] == "MS:1002852":
self._url = link["value"]
Expand Down Expand Up @@ -138,18 +138,17 @@ def remote_files(self, glob=None):
-------
list of str
The remote files available for this project.
"""
if self.fetch or self._remote_files is None:
try:
info = self.file_info().splitlines()[1:]
self._remote_files = [
r.split(",")[0].split("/", 1)[1] for r in info
]
self._remote_files = [r.split(",")[0].split("/", 1)[1] for r in info]
assert self._remote_files
except (
TimeoutError,
ConnectionRefusedError,
ConnectionResetError,
socket.timeout,
socket.gaierror,
socket.herror,
EOFError,
Expand All @@ -173,6 +172,7 @@ def file_info(self):
-------
str
Information about the files in a CSV format.
"""
file_info_path = self.local / ".file_info.csv"
if file_info_path.exists() and not self.fetch:
Expand Down Expand Up @@ -208,6 +208,7 @@ def list_projects(timeout=10.0):
-------
list of str
A list of MassIVE identifiers.
"""
url = "https://gnps-datasetcache.ucsd.edu/datasette/database.csv"
params = dict(sql="select distinct dataset from filename", _size="max")
Expand All @@ -217,9 +218,9 @@ def list_projects(timeout=10.0):
return res

except (
TimeoutError,
ConnectionRefusedError,
ConnectionResetError,
socket.timeout,
socket.gaierror,
socket.herror,
EOFError,
Expand Down
31 changes: 10 additions & 21 deletions ppx/ppx.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,17 @@
"""The command line entry point for ppx"""
import sys
import logging
from pathlib import Path
import sys
from argparse import ArgumentParser
from pathlib import Path

from . import __version__
from . import find_project
from . import __version__, find_project

LOGGER = logging.getLogger(__name__)


def get_parser():
"""Parse the command line arguments"""
desc = f"""Use this command line utility to download files from the PRIDE and MassIVE
desc = """Use this command line utility to download files from the PRIDE and MassIVE
proteomics repositories. The paths to the downloaded files are written to
stdout."""

Expand All @@ -22,10 +21,7 @@ def get_parser():
parser.add_argument(
"identifier",
type=str,
help=(
"The ProteomeXchange, PRIDE, or MassIVE identifier for the "
"project."
),
help=("The ProteomeXchange, PRIDE, or MassIVE identifier for the " "project."),
)

parser.add_argument(
Expand Down Expand Up @@ -83,9 +79,7 @@ def get_parser():

def main():
"""Run ppx"""
logging.basicConfig(
level=logging.INFO, format="[%(levelname)s]: %(message)s"
)
logging.basicConfig(level=logging.INFO, format="[%(levelname)s]: %(message)s")

parser = get_parser()
args = parser.parse_args()
Expand All @@ -96,26 +90,21 @@ def main():
matches = set()
passed = []
for pat in args.files:
pat_match = set(f for f in remote_files if Path(f).match(pat))
pat_match = {f for f in remote_files if Path(f).match(pat)}
passed.append(bool(pat_match))
matches.update(pat_match)

if not all(passed):
failed = " \n".join(
[f for f, p in zip(args.files, passed) if not p]
)
failed = " \n".join([f for f, p in zip(args.files, passed) if not p])

raise FileNotFoundError(
"Unable to find one or more of the files or patterns:"
f"\n {failed}"
"Unable to find one or more of the files or patterns:" f"\n {failed}"
)

else:
matches = remote_files

LOGGER.info(
"Downloading %i files from %s...", len(matches), args.identifier
)
LOGGER.info("Downloading %i files from %s...", len(matches), args.identifier)
downloaded = proj.download(matches)

for local_file in downloaded:
Expand Down
Loading

0 comments on commit 56024f5

Please sign in to comment.