diff --git a/data_rentgen/db/scripts/create_analytic_views.py b/data_rentgen/db/scripts/create_analytic_views.py index 68bb9599..36999355 100755 --- a/data_rentgen/db/scripts/create_analytic_views.py +++ b/data_rentgen/db/scripts/create_analytic_views.py @@ -49,13 +49,6 @@ def get_parser() -> ArgumentParser: nargs="?", help="Depth of matherialized view data (created_at filter). Default is day", ) - parser.add_argument( - "-r", - "--refresh", - action="store_true", - default=False, - help="If provide will update views", - ) return parser @@ -103,11 +96,13 @@ async def create_views(depths: Depths, session: AsyncSession): statement = get_statement(base_table, depths) logger.debug("Executing statement: %s", statement) await session.execute(text(statement)) + await session.commit() + await refresh_view(base_table + view_sufix_map[depths], session) async def refresh_view(view_name: str, session: AsyncSession): + logger.info("Refresh view: %s", view_name) statement = f"REFRESH MATERIALIZED VIEW {view_name}" - logger.debug("Executing statement: %s", statement) await session.execute(text(statement)) @@ -117,7 +112,6 @@ async def main(args: list[str]) -> None: parser = get_parser() params = parser.parse_args(args) depths = params.depths - refresh = params.refresh db_settings = DatabaseSettings() session_factory = create_session_factory(db_settings) @@ -126,12 +120,11 @@ async def main(args: list[str]) -> None: depths = Depths(depths) logger.info("Create views with depths: %s", depths) await create_views(depths, session) - await session.commit() - if refresh: - logger.info("Refresh views") - for suffix in view_sufix_map.values(): - for base_name in ("output", "input"): - await refresh_view(base_name + suffix, session) + else: + logger.info("Create all views") + for depth in Depths: + logger.info("Create views with depths: %s", depth) + await create_views(depth, session) if __name__ == "__main__": diff --git a/docker-compose.yml b/docker-compose.yml index 9a54849d..c04a72a8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -74,6 +74,15 @@ services: db: condition: service_healthy + db-views: + image: mtsrus/data-rentgen:develop + command: | + python -m data_rentgen.db.scripts.create_analytic_views + env_file: .env.docker + depends_on: + db-migration: + condition: service_completed_successfully + frontend: image: mtsrus/data-rentgen-ui:develop restart: unless-stopped diff --git a/docs/reference/database/index.rst b/docs/reference/database/index.rst index 280c3814..fbf41d2a 100644 --- a/docs/reference/database/index.rst +++ b/docs/reference/database/index.rst @@ -13,6 +13,25 @@ After migrations are performed, it is required to run script which creates parti By default, it creates monthly partitions, for current and next month. This can be changed by overriding command args. This script should run on schedule, for example by adding a dedicated entry to `crontab `_. +Additionally after migrations you can run a script which creates analytics views. +Views based on data in ``output`` and ``input`` tables and has such structure: + + .. code:: text + + dataset_name - Name of dataset. + location_name - Name of dataset location (e.g. clusster name). + user_id - Internal user id. + user_name - Internal user name (e.g. name of user which run spark job). + last_interaction - Time when user lat time interact with dataset. Read or write depens on base table. + num_of_interactions - Number of interactions in given interval. + s_bytes - Sum of bytes in given interval. ``num_bytes`` - column. + s_rows - Sum of rows in given interval. ``num_rows`` - column. + s_files - Sum of files in given interval. ``num_files`` - column. + +We provide three types of views: ``day``, ``week`` and ``month``, based on the time period in which the aggregation occur. +By default, script creates pair views for all intervals. +You can specify which views to create with ``depth`` parameter. Options are: ``day``, ``week``, ``month``. + Requirements ------------ @@ -60,6 +79,23 @@ With Docker 0 0 * * * docker exec data-rentgen-server-1 "python -m data_rentgen.db.scripts.create_partitions" +* Create analytic views: + + .. code:: console + + $ docker exec data-rentgen-server-1 "python -m data_rentgen.db.scripts.create_analytic_views" + +* Add analytic views refresh script to crontab, to run every day: + + .. code:: console + + $ crontab -e + + .. code:: text + + 0 0 * * * docker exec data-rentgen-server-1 "python -m data_rentgen.db.scripts.create_analytic_views" + + Without Docker ~~~~~~~~~~~~~~ @@ -121,6 +157,25 @@ Without Docker # read settings from .env file, and run script using a specific venv with all required dependencies 0 0 * * * /bin/bash -c "source /some/.env && /some/.venv/bin/python -m data_rentgen.db.scripts.create_partitions" +* Create analytic views: + + .. code:: console + + $ python -m data_rentgen.db.scripts.create_analytic_views + +* Add analytic views refresh script to crontab, to run every day: + + .. code:: console + + $ crontab -e + + .. code:: text + + # read settings from .env file, and run script using a specific venv with all required dependencies + 0 0 * * * /bin/bash -c "source /some/.env && /some/.venv/bin/python -m data_rentgen.db.scripts.create_analytic_views" + + + See also -------- @@ -129,4 +184,5 @@ See also configuration partitions_cli + views_cli structure diff --git a/docs/reference/database/views_cli.rst b/docs/reference/database/views_cli.rst new file mode 100644 index 00000000..fe481d1a --- /dev/null +++ b/docs/reference/database/views_cli.rst @@ -0,0 +1,9 @@ +.. _create-views-cli: + +CLI for creating views +=========================== + +.. argparse:: + :module: data_rentgen.db.scripts.create_analytic_views + :func: get_parser + :prog: python -m data_rentgen.db.scripts.create_analytic_views