Skip to content

Commit

Permalink
Merge pull request #9 from UoA-eResearch/IDS-929-generate-manifest-ba…
Browse files Browse the repository at this point in the history
…sed-on-rd-content

Create inital mocked manifests for Get Requests
  • Loading branch information
JLoveUOA authored Nov 26, 2024
2 parents dc07567 + 37dd942 commit 92bcbca
Show file tree
Hide file tree
Showing 13 changed files with 610 additions and 455 deletions.
918 changes: 465 additions & 453 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ python = "^3.11"
fastapi = {extras = ["standard"], version = "^0.115.0"}
sqlmodel = "^0.0.22"
sqlalchemy = "^2.0.36"
bagit = "^1.8.1"


[tool.poetry.group.dev.dependencies]
Expand Down
40 changes: 38 additions & 2 deletions src/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from sqlalchemy.exc import IntegrityError
from sqlmodel import Session, SQLModel, create_engine, select

from api.manifests import generate_manifest
from api.security import ApiKey, validate_api_key, validate_permissions
from models.member import Member
from models.person import Person
Expand Down Expand Up @@ -107,6 +108,9 @@ async def set_drive_info(
for drive in input_project.services.research_drive
]
project.research_drives = drives
for drive in drives:
drive.manifest = generate_manifest(drive.name)
# Add the validated services and members into the project
project.members = members
# Upsert the project.
session.merge(project)
Expand Down Expand Up @@ -139,24 +143,56 @@ async def get_drive_info(
"""Retrieve information about the specified Research Drive."""

validate_permissions("GET", api_key)

code_query = select(ResearchDriveService).where(
ResearchDriveService.name == drive_id
)
drive_found = session.exec(code_query).first()

if drive_found is None:
raise HTTPException(
status_code=404,
detail=f"Research Drive ID {drive_id} not found in local database.",
)

projects = drive_found.projects
if len(projects) == 0:
raise HTTPException(
status_code=404,
detail=f"No Projects associated with {drive_id} in local database",
)

return {
"drive_id": drive_id,
"ro_crate": "TODO: Make RO-Crate from: " + str(projects),
"manifest": "TODO: Make Manifest",
}


@app.get(ENDPOINT_PREFIX + "/resdrivemanifest")
async def get_drive_manifest(
drive_id: ResearchDriveID,
session: SessionDep,
api_key: ApiKey = Security(validate_api_key),
) -> dict[str, str]:
"""Retrieve a manifest from a research drive that has been loaded into the backend"""
validate_permissions("GET", api_key)
code_query = select(ResearchDriveService).where(
ResearchDriveService.name == drive_id
)
drive_found = session.exec(code_query).first()

if drive_found is None:
raise HTTPException(
status_code=404,
detail=f"Research Drive ID {drive_id} not found in local database.",
)
manifest = drive_found.manifest.manifest

if manifest is None:
raise HTTPException(
status_code=404,
detail=f"Manifest not available for {drive_id}",
)

return {
"manifest": manifest,
}
71 changes: 71 additions & 0 deletions src/api/manifests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""Scripts for generating file manifests
"""

import multiprocessing
import os
from pathlib import Path
from typing import Dict, Generator

import bagit

from models.manifest import Manifest

PROCESSES = max(multiprocessing.cpu_count() - 2, 1)
DEFAULT_CHECKSUM = "sha512"


def _sorted_walk(data_dir: str) -> Generator[str, None, None]:
for dirpath, dirnames, filenames in os.walk(data_dir):
filenames.sort()
dirnames.sort()
for fn in filenames:
path = os.path.join(dirpath, fn)
yield path


def _encode_filename(s: str) -> str:
s = s.replace("\r", "%0D")
s = s.replace("\n", "%0A")
return s


def genertate_filelist(drive_path: Path) -> str:
"""Generate a list of all the files in a path separated by newlines.
Sorts on filenames and dirnames to mirror bagit process.
"""
if PROCESSES > 1:
with multiprocessing.Pool(processes=PROCESSES) as pool:
filenames = pool.map(_encode_filename, _sorted_walk(drive_path.as_posix()))
# pool.close()
# pool.join()
else:
filenames = [_encode_filename(i) for i in _sorted_walk(drive_path.as_posix())]
return "\n".join(filenames)


def generate_manifest(drive_id: str) -> Manifest:
"""Generate a manifest from a drive ID.
in future provide logic for a service account to mount a research drive.
Currently generate a mockup from a test directory.
"""
# mount drive based on ID
# use service account to mount drive to mountpoint
_ = f"//files.auckland.ac.nz/research/{drive_id}"
mountpoint = Path("tests/restst000000001-testing")
manifest = genertate_filelist(mountpoint)
return Manifest(manifest=manifest)


def bag_directory(drive_path: Path, bag_info: Dict[str, str]) -> None:
"""Create a bagit bag from a given directory
Args:
drive_path (Path): the path to the directory to bag
bag_info (Dict[str,str]): a dictionary documenting ownership of the bag
"""
_ = bagit.make_bag(
bag_dir=drive_path.as_posix(),
bag_info=bag_info,
processes=PROCESSES,
checksums=DEFAULT_CHECKSUM,
)
30 changes: 30 additions & 0 deletions src/models/manifest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""sql models for storing maninfests
"""

from typing import TYPE_CHECKING

from sqlmodel import Field, Relationship, SQLModel

if TYPE_CHECKING:
from models.services import ResearchDriveService


class ManifestDriveLink(SQLModel, table=True):
"""Linking table between research drive service and a manifest of files"""

manifest_id: int | None = Field(
default=None, foreign_key="manifest.id", primary_key=True
)
research_drive_id: int | None = Field(
default=None, foreign_key="researchdriveservice.id", primary_key=True
)


class Manifest(SQLModel, table=True):
"""SQL model for storing simple file manifests"""

id: int = Field(primary_key=True)
manifest: str
research_drive: "ResearchDriveService" = Relationship(
link_model=ManifestDriveLink, back_populates="manifest"
)
5 changes: 5 additions & 0 deletions src/models/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

from sqlmodel import Field, Relationship, SQLModel

from models.manifest import Manifest, ManifestDriveLink

if TYPE_CHECKING:
from models.project import Project

Expand Down Expand Up @@ -35,6 +37,9 @@ class ResearchDriveService(SQLModel, table=True):
projects: list["Project"] = Relationship(
link_model=ResearchDriveProjectLink, back_populates="research_drives"
)
manifest: Manifest = Relationship(
link_model=ManifestDriveLink, back_populates="research_drive"
)


class InputServices(SQLModel):
Expand Down
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.

0 comments on commit 92bcbca

Please sign in to comment.