Skip to content

Commit

Permalink
add. logging and cli
Browse files Browse the repository at this point in the history
  • Loading branch information
gosow9 committed Jul 30, 2024
1 parent 28d3ce3 commit 9f02db9
Show file tree
Hide file tree
Showing 12 changed files with 209 additions and 120 deletions.
4 changes: 4 additions & 0 deletions docs/reference/reference.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
# API Reference

::: mkdocs-click
:module: fits2db.cli.cli
:command: cli
:prog_name: fits2db
94 changes: 87 additions & 7 deletions fits2db/adapters/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,90 @@
import logging
from typing import Optional
from ..config.config_model import ConfigType
from ..fits import FitsFile
from .mysql import MySQL


# Use the configured log
log = logging.getLogger('fits2db')

class DBWriter:
def load_db(self, config:ConfigType):
db_type = config["database"]["type"]
loader = self._get_loader(format)
return loader
def __init__(self, config: ConfigType, file: FitsFile):
"""
Initializes the DBWriter class.
Args:
config (ConfigType): Configuration settings for the database.
file (FitsFile): FITS file to be processed.
"""
log.debug("Initializing DBWriter.")
self.file: FitsFile = file
self.config: ConfigType = config
self.db_type: Optional[str] = None
self.loader = self._load_db()
log.info("DBWriter initialized successfully.")

def _get_loader(self) -> Optional[MySQL]:
"""
Returns the database loader based on the configuration.
Returns:
Optional[MySQL]: An instance of the MySQL loader if the database type is MySQL, otherwise None.
"""
log.debug("Getting database loader for type: %s", self.db_type)
if self.db_type and self.db_type.lower() == 'mysql':
log.info("MySQL loader created.")
return MySQL(self.config, self.file)
log.debug("No loader created. Database type is not MySQL.")
return None

def _load_db(self) -> Optional[MySQL]:
"""
Loads the database type from the configuration and initializes the loader.
Returns:
Optional[MySQL]: An instance of the loader based on the database type.
"""
try:
self.db_type = self.config["database"]["type"]
log.debug("Database type loaded: %s", self.db_type)
loader = self._get_loader()
if loader:
log.info("Loader initialized successfully.")
else:
log.warning("Loader initialization failed.")
return loader
except KeyError as e:
log.error(f"Configuration key error: {e}")
return None
except Exception as e:
log.error(f"Unexpected error while loading database: {e}")
return None

def _get_loader(self, format:str):
if format.lower() == 'mysql':
return
def upsert(self) -> None:
"""
Inserts or updates data in the database.
"""
log.debug("Starting upsert operation.")
try:
if self.loader:
self.loader.upsert_data()
log.info("Upsert operation completed successfully.")
else:
log.error("Loader is not initialized.")
except Exception as e:
log.error(f"Error during upsert operation: {e}")

def update(self) -> None:
"""
Updates data in the database.
"""
log.debug("Starting update operation.")
try:
if self.loader:
self.loader.update_data()
log.info("Update operation completed successfully.")
else:
log.error("Loader is not initialized.")
except Exception as e:
log.error(f"Error during update operation: {e}")
23 changes: 17 additions & 6 deletions fits2db/adapters/base.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,30 @@
from abc import ABC, abstractmethod
from sqlalchemy import create_engine, MetaData
import pandas as pd

from ..config.config_model import ConfigType
from ..fits.fits import FitsFile

class BaseLoader(ABC):
def __init__(self, db_url):
"""A baseclass for writing data in a database
Attributes:
"""
def __init__(self, db_url:str, config:ConfigType, file:FitsFile):
self.engine = create_engine(db_url)
self.metadata = MetaData()
self.metadata.reflect(bind=self.engine)
self.config = config

@abstractmethod
def create_table_if_not_exists(self, table_name, df: pd.DataFrame):
pass

@abstractmethod
def upsert_data(self, table_name, df: pd.DataFrame, unique_key):
def upsert_table(self, table_name:str, df: pd.DataFrame, unique_key)->None:
pass

def upsert_data(self):
table_configs = self.config["fits_files"]["tables"]
print("Start upserting data")
self.upsert_table()



14 changes: 11 additions & 3 deletions fits2db/adapters/mysql.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from .base import BaseLoader
from ..config.config_model import ConfigType
from ..fits.fits import FitsFile

class MySQL(BaseLoader):
def __init__(self, config:ConfigType):
def __init__(self, config:ConfigType, file:FitsFile):
self.config = config
db_url = self.create_db_url()
super().__init__(db_url)
super().__init__(db_url, config, file)

def create_db_url(self):
user = self.config["database"]["user"]
Expand All @@ -15,4 +16,11 @@ def create_db_url(self):
db_name = self.config["database"]["db_name"]
return f'mysql+mysqlconnector://{user}:{password}@{host}:{port}/{db_name}'



def upsert_table(self):
print("MySQL upsert")
#return super().upsert_data(table_name, df, unique_key)




106 changes: 20 additions & 86 deletions fits2db/cli/cli.py
Original file line number Diff line number Diff line change
@@ -1,94 +1,28 @@
import click
from ..core import Fits2db, get_all_fits


def validate_output_filename(ctx, param, value):
if ctx.params.get("csv") and not value.endswith(".csv"):
raise click.BadParameter(
"CSV filename must have a .csv extension."
)
if ctx.params.get("excel") and not value.endswith(".xlsx"):
raise click.BadParameter(
"Excel filename must have a .xlsx extension."
)
if ctx.params.get("csv") or ctx.params.get("excel"):
if not value:
raise click.BadParameter(
"Output filename is required when --csv or --excel is specified."
)
return value


@click.group()
def cli():
"""Fits2DB CLI"""
pass


@click.command()
@click.argument("config_path", type=click.Path(exists=True))
@click.option(
"-f",
"--folder",
default=False,
is_flag=True,
help="Show all fits files in given folder",
)
def files(folder, config_path):
"""Prints all files from given config.yaml file"""
if folder:
files = get_all_fits([config_path])
else:
fits = Fits2db(config_path)
files = fits.get_files()
for f in files:
click.echo(f)


@click.command()
@click.argument("config_path", default=".", type=click.Path(exists=True))
@click.option(
"-m",
"--matrix",
default=False,
is_flag=True,
help="Show all tables and files as matrix",
)
@click.option(
"--csv", default=False, is_flag=True, help="Save the output as csv"
)
@click.option(
"--excel", default=False, is_flag=True, help="Save the output as excel"
from .helper_func import tables, files, upsert
from .utils import set_verbosity

@click.version_option("0.0.1b", "--version")
@click.group(
help="""
Fits2DB CLI can be used to extract data from fits files and load them into a Database.
For this, the CLI has various helper functions to inspect the content of fits files and run some
checks to see if the expected content is available.
??? tips
- Check files before loading them into the database to have fewer worries once loaded.
- You can also set a fail flag to fail the ingestion if some columns or data points are missing.
"""
)
@click.option(
"--filename",
default="output.csv",
callback=validate_output_filename,
help="The filename for the output (required if --csv or --excel is specified).",
)
def tables(config_path, matrix, csv, excel, filename):
"""Prints all table names from all fits files from given config.yaml file"""
fits = Fits2db(config_path)
format = None
if csv:
format = "csv"
elif excel:
format = "excel"

if matrix:
m = fits.create_table_matrix(
output_format=format, output_file=filename
)
if format is None:
click.echo(m.to_string())
else:
names, _ = fits.get_table_names()
for f in names:
click.echo(f)

@click.option('-v', '--verbosity', count=True, callback=set_verbosity, expose_value=False, is_eager=True,
help="Increase verbosity of the log output. Use -v for WARNING, -vv for INFO, -vvv for DEBUG.")
@click.pass_context
def cli(ctx):
ctx.obj['logger'].info("Logger configured with verbosity level.")

cli.add_command(files)
cli.add_command(tables)
cli.add_command(upsert)

if __name__ == "__main__":
cli()
1 change: 1 addition & 0 deletions fits2db/config/config_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class DatabaseConfig(BaseModel):
password: Optional[StrictStr] = None
token: Optional[StrictStr] = None
port: Optional[int] = None
db_name: Optional[StrictStr] = None

@model_validator(mode="after")
def validate_database(self) -> Self:
Expand Down
53 changes: 49 additions & 4 deletions fits2db/core/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,23 @@
import os
from pathlib import Path
from tqdm import tqdm
from hashlib import sha256
import pandas as pd
from ..adapters import DBWriter
import logging

# Use the configured logger
log = logging.getLogger('fits2db')

def get_all_fits(paths: list):
def get_all_fits(paths: list)->list:
"""Searches recursive throught all folders of given list of paths for
fits files and gives them back.
Args:
paths (list): A list of paths to search recursivly for fits files
Returns:
list: Returns list of absolute paths of all fits files
"""
all_fits_files = []
for path in paths:
if os.path.isdir(path):
Expand Down Expand Up @@ -42,10 +55,18 @@ class Fits2db:
def __init__(self, config_path):
self.config_path = Path(config_path)
self.configs = get_configs(config_path)
self.fits_file_paths = self.get_files()
self.fits_file_paths = self.get_file_names()

def get_file_names(self) -> list:
"""Return list of all absolute filepaths found from sourced
given in config file
def get_files(self):
Returns:
list: List of absolute paths
"""
paths = self.configs["fits_files"]["paths"]
log.debug(f"paths {paths}")
log.info("run function")
return list(dict.fromkeys(get_all_fits(paths)))

def get_table_names(self):
Expand All @@ -58,7 +79,7 @@ def get_table_names(self):
self.all_table_names.append(file.table_names)
self.file_table_dict[path] = file.table_names
except ValueError as err:
print(err)
log.error(err)

self.all_table_names = flatten_and_deduplicate(self.all_table_names)
return self.all_table_names, self.file_table_dict
Expand All @@ -83,3 +104,27 @@ def create_table_matrix(self, output_format=None, output_file=None):
df.to_excel(full_file_path, index=True)

return df

def sha256sum(filename, bufsize=128 * 1024):
h = sha256()
buffer = bytearray(bufsize)
# using a memoryview so that we can slice the buffer without copying it
buffer_view = memoryview(buffer)
with open(filename, "rb", buffering=0) as f:
while True:
n = f.readinto(buffer_view)
if not n:
break
h.update(buffer_view[:n])
return h.hexdigest()

def upsert_to_db(self):
for path in tqdm(self.fits_file_paths):
path = Path(path)
try:
file = FitsFile(path)
writer = DBWriter(self.configs, file)
writer.upsert()

except ValueError as err:
log.error(err)
Loading

0 comments on commit 9f02db9

Please sign in to comment.