-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #9 from UoA-eResearch/IDS-929-generate-manifest-ba…
…sed-on-rd-content Create inital mocked manifests for Get Requests
- Loading branch information
Showing
13 changed files
with
610 additions
and
455 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
"""Scripts for generating file manifests | ||
""" | ||
|
||
import multiprocessing | ||
import os | ||
from pathlib import Path | ||
from typing import Dict, Generator | ||
|
||
import bagit | ||
|
||
from models.manifest import Manifest | ||
|
||
PROCESSES = max(multiprocessing.cpu_count() - 2, 1) | ||
DEFAULT_CHECKSUM = "sha512" | ||
|
||
|
||
def _sorted_walk(data_dir: str) -> Generator[str, None, None]: | ||
for dirpath, dirnames, filenames in os.walk(data_dir): | ||
filenames.sort() | ||
dirnames.sort() | ||
for fn in filenames: | ||
path = os.path.join(dirpath, fn) | ||
yield path | ||
|
||
|
||
def _encode_filename(s: str) -> str: | ||
s = s.replace("\r", "%0D") | ||
s = s.replace("\n", "%0A") | ||
return s | ||
|
||
|
||
def genertate_filelist(drive_path: Path) -> str: | ||
"""Generate a list of all the files in a path separated by newlines. | ||
Sorts on filenames and dirnames to mirror bagit process. | ||
""" | ||
if PROCESSES > 1: | ||
with multiprocessing.Pool(processes=PROCESSES) as pool: | ||
filenames = pool.map(_encode_filename, _sorted_walk(drive_path.as_posix())) | ||
# pool.close() | ||
# pool.join() | ||
else: | ||
filenames = [_encode_filename(i) for i in _sorted_walk(drive_path.as_posix())] | ||
return "\n".join(filenames) | ||
|
||
|
||
def generate_manifest(drive_id: str) -> Manifest: | ||
"""Generate a manifest from a drive ID. | ||
in future provide logic for a service account to mount a research drive. | ||
Currently generate a mockup from a test directory. | ||
""" | ||
# mount drive based on ID | ||
# use service account to mount drive to mountpoint | ||
_ = f"//files.auckland.ac.nz/research/{drive_id}" | ||
mountpoint = Path("tests/restst000000001-testing") | ||
manifest = genertate_filelist(mountpoint) | ||
return Manifest(manifest=manifest) | ||
|
||
|
||
def bag_directory(drive_path: Path, bag_info: Dict[str, str]) -> None: | ||
"""Create a bagit bag from a given directory | ||
Args: | ||
drive_path (Path): the path to the directory to bag | ||
bag_info (Dict[str,str]): a dictionary documenting ownership of the bag | ||
""" | ||
_ = bagit.make_bag( | ||
bag_dir=drive_path.as_posix(), | ||
bag_info=bag_info, | ||
processes=PROCESSES, | ||
checksums=DEFAULT_CHECKSUM, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
"""sql models for storing maninfests | ||
""" | ||
|
||
from typing import TYPE_CHECKING | ||
|
||
from sqlmodel import Field, Relationship, SQLModel | ||
|
||
if TYPE_CHECKING: | ||
from models.services import ResearchDriveService | ||
|
||
|
||
class ManifestDriveLink(SQLModel, table=True): | ||
"""Linking table between research drive service and a manifest of files""" | ||
|
||
manifest_id: int | None = Field( | ||
default=None, foreign_key="manifest.id", primary_key=True | ||
) | ||
research_drive_id: int | None = Field( | ||
default=None, foreign_key="researchdriveservice.id", primary_key=True | ||
) | ||
|
||
|
||
class Manifest(SQLModel, table=True): | ||
"""SQL model for storing simple file manifests""" | ||
|
||
id: int = Field(primary_key=True) | ||
manifest: str | ||
research_drive: "ResearchDriveService" = Relationship( | ||
link_model=ManifestDriveLink, back_populates="manifest" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.