Skip to content

Commit

Permalink
[Clean-up] Remove references to terminusdb (#542)
Browse files Browse the repository at this point in the history
* remove terminus db from docker-compose.yml

* remove nmdc_runtime/site/terminusdb/&

* remove tdb from ~/api/db and ~/site/resources.py

* cleanup site/graphs.py

* cleanup site/repository.py and tdb script

* cleanup import

* cleanup setup, ops.py

* cleanup scripts

* run black

* cleanup readme

* remove update_schema`

* Revert "run black"

This reverts commit 6f01714.

* style: reformat

---------

Co-authored-by: Jing <jing@polyneme.xyz>
Co-authored-by: eecavanna <eecavanna@users.noreply.github.com>
Co-authored-by: github-actions <github-actions@github.com>
  • Loading branch information
4 people authored Jun 7, 2024
1 parent fdc52b2 commit f17fa64
Show file tree
Hide file tree
Showing 20 changed files with 4 additions and 7,519 deletions.
9 changes: 3 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,18 +63,15 @@ The runtime features:
- `schedules` trigger recurring pipeline runs based on time
- `sensors` trigger pipeline runs based on external state
- Each `pipeline` can declare dependencies on any runtime `resources` or additional
configuration. There are TerminusDB and MongoDB `resources` defined, as well as `preset`
configuration. There are MongoDB `resources` defined, as well as `preset`
configuration definitions for both "dev" and "prod" `modes`. The `preset`s tell Dagster to
look to a set of known environment variables to load resources configurations, depending on
the `mode`.

2. A [TerminusDB](https://terminusdb.com/) database supporting revision control of schema-validated
data.

3. A MongoDB database supporting write-once, high-throughput internal
2. A MongoDB database supporting write-once, high-throughput internal
data storage by the nmdc-runtime FastAPI instance.

4. A [FastAPI](https://fastapi.tiangolo.com/) service to interface with the orchestrator and
3. A [FastAPI](https://fastapi.tiangolo.com/) service to interface with the orchestrator and
database, as a hub for data management and workflow automation.

## Local Development
Expand Down
17 changes: 1 addition & 16 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,28 +88,13 @@ services:
MONGO_INITDB_ROOT_USERNAME: admin
MONGO_INITDB_ROOT_PASSWORD: root

terminus:
image: terminusdb/terminusdb-server:v11.0.6
container_name: terminus
ports:
- "6364:6363"
tty: true
volumes:
- nmdc_runtime_terminus_data:/app/terminusdb/storage
restart: unless-stopped
environment:
TERMINUSDB_SERVER_PORT: 6363
TERMINUSDB_ADMIN_PASS: root
TERMINUSDB_AUTOLOGIN_ENABLED: "true"
TERMINUSDB_HTTPS_ENABLED: "false"

volumes:
nmdc_runtime_postgres_data:
driver: local
nmdc_runtime_mongo_data:
driver: local
nmdc_runtime_terminus_data:
driver: local


secrets:
mongoKeyFile:
Expand Down
24 changes: 0 additions & 24 deletions nmdc_runtime/api/db/terminus.py

This file was deleted.

1 change: 0 additions & 1 deletion nmdc_runtime/site/entrypoint-daemon.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ file_env() {
}

file_env "MONGO_PASSWORD"
file_env "TERMINUS_KEY"
file_env "DAGSTER_POSTGRES_PASSWORD"

exec dagster-daemon run
1 change: 0 additions & 1 deletion nmdc_runtime/site/entrypoint-dagit-readonly.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ file_env() {
}

file_env "MONGO_PASSWORD"
file_env "TERMINUS_KEY"
file_env "DAGSTER_POSTGRES_PASSWORD"

exec dagit -h 0.0.0.0 -p 3000 -w workspace.yaml --read-only
1 change: 0 additions & 1 deletion nmdc_runtime/site/entrypoint-dagit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ file_env() {
}

file_env "MONGO_PASSWORD"
file_env "TERMINUS_KEY"
file_env "DAGSTER_POSTGRES_PASSWORD"

exec dagit -h 0.0.0.0 -p 3000 -w workspace.yaml
12 changes: 0 additions & 12 deletions nmdc_runtime/site/graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
hello,
mongo_stats,
submit_metadata_to_db,
update_schema,
filter_ops_undone_expired,
construct_jobs,
maybe_post_jobs,
Expand Down Expand Up @@ -87,17 +86,6 @@ def hello_mongo():
mongo_stats()


@graph
def update_terminus():
"""
A pipeline definition. This example pipeline has a single solid.
For more hints on writing Dagster pipelines, see our documentation overview on Pipelines:
https://docs.dagster.io/overview/solids-pipelines/pipelines
"""
update_schema()


@graph
def housekeeping():
delete_operations(list_operations(filter_ops_undone_expired()))
Expand Down
44 changes: 0 additions & 44 deletions nmdc_runtime/site/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@
from pydantic import BaseModel
from pymongo.database import Database as MongoDatabase
from starlette import status
from terminusdb_client.woqlquery import WOQLQuery as WQ
from toolz import assoc, dissoc, get_in, valfilter, identity


Expand All @@ -111,14 +110,6 @@ def log_env(context):
context.log.info("\n".join(out))


@op(required_resource_keys={"terminus"})
def list_databases(context) -> List[String]:
client = context.resources.terminus.client
list_ = client.list_databases()
context.log.info(f"databases: {list_}")
return list_


@op(required_resource_keys={"mongo"})
def mongo_stats(context) -> List[str]:
db = context.resources.mongo.db
Expand All @@ -127,41 +118,6 @@ def mongo_stats(context) -> List[str]:
return collection_names


@op(required_resource_keys={"terminus"})
def update_schema(context):
with tempfile.TemporaryDirectory() as tmpdirname:
try:
context.log.info("shallow-cloning nmdc-schema repo")
subprocess.check_output(
"git clone https://github.com/microbiomedata/nmdc-schema.git"
f" --branch main --single-branch {tmpdirname}/nmdc-schema",
shell=True,
)
context.log.info("generating TerminusDB JSON-LD from NMDC LinkML")
subprocess.check_output(
f"gen-terminusdb {tmpdirname}/nmdc-schema/src/schema/nmdc.yaml"
f" > {tmpdirname}/nmdc.terminus.json",
shell=True,
)
except subprocess.CalledProcessError as e:
if e.stdout:
context.log.debug(e.stdout.decode())
if e.stderr:
context.log.error(e.stderr.decode())
context.log.debug(str(e.returncode))
raise e

with open(f"{tmpdirname}/nmdc.terminus.json") as f:
woql_dict = json.load(f)

context.log.info("Updating terminus schema via WOQLQuery")
rv = WQ(query=woql_dict).execute(
context.resources.terminus.client, "update schema via WOQL"
)
context.log.info(str(rv))
return rv


@op(
required_resource_keys={"mongo", "runtime_api_site_client"},
retry_policy=RetryPolicy(max_retries=2),
Expand Down
2 changes: 0 additions & 2 deletions nmdc_runtime/site/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@
nmdc_portal_api_client_resource,
gold_api_client_resource,
neon_api_client_resource,
terminus_resource,
mongo_resource,
)
from nmdc_runtime.site.resources import (
Expand All @@ -68,7 +67,6 @@
"nmdc_portal_api_client": nmdc_portal_api_client_resource,
"gold_api_client": gold_api_client_resource,
"neon_api_client": neon_api_client_resource,
"terminus": terminus_resource,
"mongo": mongo_resource,
}

Expand Down
31 changes: 0 additions & 31 deletions nmdc_runtime/site/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from linkml_runtime.dumpers import json_dumper
from pydantic import BaseModel, AnyUrl
from pymongo import MongoClient, ReplaceOne, InsertOne
from terminusdb_client import WOQLClient
from toolz import get_in
from toolz import merge

Expand Down Expand Up @@ -512,33 +511,3 @@ def get_mongo(run_config: frozendict):
)
)
return mongo_resource(resource_context)


class TerminusDB:
def __init__(self, server_url, user, key, account, dbid):
self.client = WOQLClient(server_url=server_url)
self.client.connect(user=user, key=key, account=account)
db_info = self.client.get_database(dbid=dbid, account=account)
if db_info is None:
self.client.create_database(dbid=dbid, accountid=account, label=dbid)
self.client.create_graph(graph_type="inference", graph_id="main")
self.client.connect(user=user, key=key, account=account, db=dbid)


@resource(
config_schema={
"server_url": StringSource,
"user": StringSource,
"key": StringSource,
"account": StringSource,
"dbid": StringSource,
}
)
def terminus_resource(context):
return TerminusDB(
server_url=context.resource_config["server_url"],
user=context.resource_config["user"],
key=context.resource_config["key"],
account=context.resource_config["account"],
dbid=context.resource_config["dbid"],
)
1 change: 0 additions & 1 deletion nmdc_runtime/site/terminusdb/.TDB

This file was deleted.

Empty file.
6 changes: 0 additions & 6 deletions nmdc_runtime/site/terminusdb/config.json

This file was deleted.

Loading

0 comments on commit f17fa64

Please sign in to comment.