Skip to content

Commit

Permalink
removes wrong argument 'flat' from operations._transform() generator
Browse files Browse the repository at this point in the history
  • Loading branch information
Johannes Hentschel committed Aug 13, 2023
1 parent 5b397f4 commit 3800a11
Showing 1 changed file with 45 additions and 3 deletions.
48 changes: 45 additions & 3 deletions src/ms3/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
"""
import logging
import os
from typing import Dict, List, Literal, Optional, Tuple, Union
from typing import Dict, Iterator, List, Literal, Optional, Tuple, Union

import pandas as pd
from ms3 import (
Corpus,
Parse,
Expand Down Expand Up @@ -342,7 +343,48 @@ def _transform(
zipped: bool = False,
overwrite: bool = True,
log_level=None,
):
) -> Iterator[Tuple[pd.DataFrame, TSVtype, str, str, str]]:
"""Helper generator to iterate over each selected facets--if a DataFrame is to be written for it either as
standalone TSV or into a ZIP file--concatenated for the entire Parse or Corpus object.
Args:
ms3_object: The object to iterate over.
facets: Selected facets. Determines the number of iterations.
filename:
If non-empty, this will be used for filenames as in f"{filename.{facet}.tsv" and, if ``zipped``,
f"{filename}.zip". Otherwise, filename will be set to :attr:`Corpus.name` for corpus objects, and for
Parse objects to the name of the ``output_folder``, if specified, or to "ms3_parse" otherwise.
output_folder:
The path where the file is to be created. The generator itself does not create any files, but the argument
is needed to determine if a file is actually to be written, or not, depending on ``overwrite``. The
argument is yielded as the third element.
choose:
Whether to automatically pick a single facet DataFrame per piece based on the shortest relative path (
default), or all available, or ask for user input when multiple versions of the same facet have been
detected.
interval_index:
Set to True if you want to replace the RangeIndex level with an IntervalIndex indicating the temporal
dimensions of each row in terms of quarterbeats.
unfold:
Set to True if repeats are to be unfolded prior to concatenation of the facet DataFrames.
test: Set to True to log what would have happened without yielding anything.
zipped:
If False (default), each concatenated facet is to be written to a separate TSV file and these are the
paths that generator checks for existence, skipping existing files if ``overwrite`` has been set to False.
If True, all facets are to be written into the same ZIP file; if it exists and ``overwrite`` has been set
to False, the generator aborts (returns None).
overwrite:
True by default, meaning that existing files are to be overwritten. If ``zipped`` has been set to True and
the target ZIP file exists, it will be removed before beginning the iteration.
log_level:
Yields:
The DataFrame holding the concatenated facet for the entire Corpus or Parse object.
The facet name.
``output_folder``.
The filename prefix depending on the ``filename`` argument as explained above.
The log message to emit when writing the file (depends on ``test`` and ``zipped``).
"""
logger = get_logger("ms3.transform", level=log_level)
if len(facets) == 0:
print(
Expand All @@ -362,6 +404,7 @@ def _transform(
prefix = "ms3_parse"
prefix = make_valid_frictionless_name(prefix)
if zipped:
# The variable 'path' will be the ZIP file to write to, or, if not zipped, one TSV path per facet (loop below).
zip_name = f"{prefix}.zip"
path = (
zip_name if output_folder is None else os.path.join(output_folder, zip_name)
Expand Down Expand Up @@ -416,7 +459,6 @@ def _transform(
df = ms3_object.get_facet(
facet,
choose=choose,
flat=True,
interval_index=interval_index,
unfold=unfold,
)
Expand Down

0 comments on commit 3800a11

Please sign in to comment.