Skip to content

Commit

Permalink
[DOP-22585] Move views creation to migration
Browse files Browse the repository at this point in the history
  • Loading branch information
TiGrib committed Feb 3, 2025
1 parent 483fdcd commit 477421f
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 60 deletions.
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ repos:
- id: chmod
args: ['644']
exclude_types: [shell]
exclude: ^(.*__main__\.py|data_rentgen/server/scripts/export_openapi_schema\.py|data_rentgen/db/scripts/create_partitions\.py|data_rentgen/db/scripts/create_analytic_views\.py)$
exclude: ^(.*__main__\.py|data_rentgen/server/scripts/export_openapi_schema\.py|data_rentgen/db/scripts/create_partitions\.py|data_rentgen/db/scripts/refresh_analytic_views\.py)$
- id: chmod
args: ['755']
files: ^(.*__main__\.py|data_rentgen/server/scripts/export_openapi_schema\.py|data_rentgen/db/scripts/create_partitions\.py|data_rentgen/db/scripts/create_analytic_views\.py)$
files: ^(.*__main__\.py|data_rentgen/server/scripts/export_openapi_schema\.py|data_rentgen/db/scripts/create_partitions\.py|data_rentgen/db/scripts/refrsh_analytic_views\.py)$
- id: insert-license
files: .*\.py$
exclude: ^(data_rentgen/dependencies/stub.py|docs/.*\.py|tests/.*\.py)$
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# SPDX-FileCopyrightText: 2024-2025 MTS PJSC
# SPDX-License-Identifier: Apache-2.0
"""create_analytics_views
Revision ID: f017d4c58658
Revises: 56f5a3f9442a
Create Date: 2025-02-03 16:44:27.746785
"""

import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision = "f017d4c58658"
down_revision = "56f5a3f9442a"
branch_labels = None
depends_on = None


def upgrade() -> None:
for base_table in ("output", "input"):
for depth, suffix in view_sufix_map.items():
view_name = base_table + suffix
op.execute(sa.text(f"DROP VIEW IF EXISTS {view_name}"))
op.execute(sa.text(get_statement(base_table, depth, suffix)))


def downgrade() -> None:
for base_table in ("output", "input"):
for _, suffix in view_sufix_map.items():
view_name = base_table + suffix
op.execute(sa.text(f"DROP VIEW IF EXISTS {view_name}"))


view_sufix_map = {
"day": "_daily_stats",
"week": "_weekly_stats",
"month": "_monthly_stats",
}


def get_statement(base_table: str, depth: str, suffix: str) -> str:
view_name = base_table + suffix
return f"""
CREATE MATERIALIZED VIEW {view_name}
AS (
WITH aggregates AS (
SELECT
{base_table}.dataset_id as dataset_id
, u.id as user_id
, u.name as user_name
, max({base_table}.created_at) as last_interaction_dt
, count(*) as num_of_interactions
, sum(num_bytes) as sum_bytes
, sum(num_rows) as sum_rows
, sum(num_files) as sum_files
FROM {base_table}
JOIN run r ON {base_table}.run_id = r.id
JOIN public.user u ON r.started_by_user_id = u.id
WHERE {base_table}.created_at >= now() - interval '1 {depth}'
GROUP BY {base_table}.dataset_id, u.id
)
SELECT
d.name as dataset_name
, l.name as dataset_location
, l.type as dataset_location_type
, agr.user_id
, agr.user_name
, agr.last_interaction_dt
, agr.num_of_interactions
, agr.sum_bytes
, agr.sum_rows
, agr.sum_files
FROM aggregates agr
JOIN dataset d ON agr.dataset_id = d.id
LEFT JOIN location l ON d.location_id = l.id
)
WITH NO DATA
"""
Original file line number Diff line number Diff line change
Expand Up @@ -52,52 +52,6 @@ def get_parser() -> ArgumentParser:
return parser


def get_statement(base_table: str, type: str) -> str:
view_name = base_table + view_sufix_map[type]
return f"""
CREATE MATERIALIZED VIEW IF NOT EXISTS {view_name}
AS (
WITH aggregates AS (
SELECT
{base_table}.dataset_id as dataset_id
, u.id as user_id
, u.name as user_name
, max({base_table}.created_at) as last_interaction_dt
, count(*) as num_of_interactions
, sum(num_bytes) as sum_bytes
, sum(num_rows) as sum_rows
, sum(num_files) as sum_files
FROM {base_table}
JOIN run r ON {base_table}.run_id = r.id
JOIN public.user u ON r.started_by_user_id = u.id
WHERE {base_table}.created_at >= now() - interval '1 {type}'
GROUP BY {base_table}.dataset_id, u.id
)
SELECT
d.name as dataset_name
, l.name as dataset_location
, l.type as dataset_location_type
, agr.user_id
, agr.user_name
, agr.last_interaction_dt
, agr.num_of_interactions
, agr.sum_bytes
, agr.sum_rows
, agr.sum_files
FROM aggregates agr
JOIN dataset d ON agr.dataset_id = d.id
LEFT JOIN location l ON d.location_id = l.id
)
"""


async def create_view(depth: Depth, session: AsyncSession):
for base_table in ("output", "input"):
statement = get_statement(base_table, depth)
logger.debug("Executing statement: %s", statement)
await session.execute(text(statement))


async def refresh_view(depth: Depth, session: AsyncSession):
for base_table in ("output", "input"):
view_name = base_table + view_sufix_map[depth]
Expand All @@ -122,7 +76,6 @@ async def main(args: list[str]) -> None:
session_factory = create_session_factory(db_settings)
async with session_factory() as session:
for depth in depths:
await create_view(depth, session)
await refresh_view(depth, session)
await session.commit()

Expand Down
17 changes: 9 additions & 8 deletions docs/reference/database/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ After migrations are performed, it is required to run script which creates parti
By default, it creates monthly partitions, for current and next month. This can be changed by overriding command args.
This script should run on schedule, for example by adding a dedicated entry to `crontab <https://help.ubuntu.com/community/CronHowto>`_.

Additionally after migrations you can run a script which creates analytics views.
Along with migrations analytics views are created. By default these materializd views are empty(`WITH NO DATA`).
In order to fill these tables with data you need to run refresh script. The command for this shown below.
Views based on data in ``output`` and ``input`` tables and has such structure:

.. code:: text
Expand All @@ -30,8 +31,8 @@ Views based on data in ``output`` and ``input`` tables and has such structure:
sum_files - Sum of files in given interval. ``num_files`` - column.
We provide three types of views: ``day``, ``week`` and ``month``, based on the time period in which the aggregation occur.
By default, script creates pair views for all intervals.
You can specify which views to create with ``depth`` parameter. Options are: ``day``, ``week``, ``month``.
Views are created for all intervals and by default, script refresh all views.
You can specify which views to refresh with ``depth`` parameter. Options are: ``day``, ``week``, ``month``.

Requirements
------------
Expand Down Expand Up @@ -80,11 +81,11 @@ With Docker
0 0 * * * docker exec data-rentgen-server-1 "python -m data_rentgen.db.scripts.create_partitions"
* Create analytic views:
* Refresh analytic views:

.. code:: console
$ docker exec data-rentgen-server-1 "python -m data_rentgen.db.scripts.create_analytic_views"
$ docker exec data-rentgen-server-1 "python -m data_rentgen.db.scripts.refresh_analytic_views"
* Add analytic views refresh script to crontab, to run every day:

Expand All @@ -94,7 +95,7 @@ With Docker
.. code:: text
0 0 * * * docker exec data-rentgen-server-1 "python -m data_rentgen.db.scripts.create_analytic_views"
0 0 * * * docker exec data-rentgen-server-1 "python -m data_rentgen.db.scripts.refresh_analytic_views"
Without Docker
Expand Down Expand Up @@ -162,7 +163,7 @@ Without Docker

.. code:: console
$ python -m data_rentgen.db.scripts.create_analytic_views
$ python -m data_rentgen.db.scripts.refresh_analytic_views
* Add analytic views refresh script to crontab, to run every day:

Expand All @@ -173,7 +174,7 @@ Without Docker
.. code:: text
# read settings from .env file, and run script using a specific venv with all required dependencies
0 0 * * * /bin/bash -c "source /some/.env && /some/.venv/bin/python -m data_rentgen.db.scripts.create_analytic_views"
0 0 * * * /bin/bash -c "source /some/.env && /some/.venv/bin/python -m data_rentgen.db.scripts.refresh_analytic_views"
Expand Down
4 changes: 2 additions & 2 deletions docs/reference/database/views_cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ CLI for creating views
===========================

.. argparse::
:module: data_rentgen.db.scripts.create_analytic_views
:module: data_rentgen.db.scripts.refresh_analytic_views
:func: get_parser
:prog: python -m data_rentgen.db.scripts.create_analytic_views
:prog: python -m data_rentgen.db.scripts.refresh_analytic_views
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ per-file-ignores = [
# WPS432 Found magic number: 255
"data_rentgen/db/*.py:WPS102,WPS432",
# S608 Possible SQL injection
"data_rentgen/db/scripts/create_analytic_views.py:S608",
"data_rentgen/db/migrations/versions/2025-02-03_f017d4c58658_create_analytic_view.py:S608,WPS102",
# WPS237 Found a too complex `f` string
"data_rentgen/server/exceptions/*.py:WPS237",
# WPS441 Found control variable used after block: input
Expand Down

0 comments on commit 477421f

Please sign in to comment.