From 3800a11955eb8565388d57a37a2890ea363dc8a4 Mon Sep 17 00:00:00 2001 From: Johannes Hentschel Date: Sun, 13 Aug 2023 22:11:57 +0200 Subject: [PATCH] removes wrong argument 'flat' from operations._transform() generator --- src/ms3/operations.py | 48 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/src/ms3/operations.py b/src/ms3/operations.py index ccd64b9f..3085191a 100644 --- a/src/ms3/operations.py +++ b/src/ms3/operations.py @@ -3,8 +3,9 @@ """ import logging import os -from typing import Dict, List, Literal, Optional, Tuple, Union +from typing import Dict, Iterator, List, Literal, Optional, Tuple, Union +import pandas as pd from ms3 import ( Corpus, Parse, @@ -342,7 +343,48 @@ def _transform( zipped: bool = False, overwrite: bool = True, log_level=None, -): +) -> Iterator[Tuple[pd.DataFrame, TSVtype, str, str, str]]: + """Helper generator to iterate over each selected facets--if a DataFrame is to be written for it either as + standalone TSV or into a ZIP file--concatenated for the entire Parse or Corpus object. + + Args: + ms3_object: The object to iterate over. + facets: Selected facets. Determines the number of iterations. + filename: + If non-empty, this will be used for filenames as in f"{filename.{facet}.tsv" and, if ``zipped``, + f"{filename}.zip". Otherwise, filename will be set to :attr:`Corpus.name` for corpus objects, and for + Parse objects to the name of the ``output_folder``, if specified, or to "ms3_parse" otherwise. + output_folder: + The path where the file is to be created. The generator itself does not create any files, but the argument + is needed to determine if a file is actually to be written, or not, depending on ``overwrite``. The + argument is yielded as the third element. + choose: + Whether to automatically pick a single facet DataFrame per piece based on the shortest relative path ( + default), or all available, or ask for user input when multiple versions of the same facet have been + detected. + interval_index: + Set to True if you want to replace the RangeIndex level with an IntervalIndex indicating the temporal + dimensions of each row in terms of quarterbeats. + unfold: + Set to True if repeats are to be unfolded prior to concatenation of the facet DataFrames. + test: Set to True to log what would have happened without yielding anything. + zipped: + If False (default), each concatenated facet is to be written to a separate TSV file and these are the + paths that generator checks for existence, skipping existing files if ``overwrite`` has been set to False. + If True, all facets are to be written into the same ZIP file; if it exists and ``overwrite`` has been set + to False, the generator aborts (returns None). + overwrite: + True by default, meaning that existing files are to be overwritten. If ``zipped`` has been set to True and + the target ZIP file exists, it will be removed before beginning the iteration. + log_level: + + Yields: + The DataFrame holding the concatenated facet for the entire Corpus or Parse object. + The facet name. + ``output_folder``. + The filename prefix depending on the ``filename`` argument as explained above. + The log message to emit when writing the file (depends on ``test`` and ``zipped``). + """ logger = get_logger("ms3.transform", level=log_level) if len(facets) == 0: print( @@ -362,6 +404,7 @@ def _transform( prefix = "ms3_parse" prefix = make_valid_frictionless_name(prefix) if zipped: + # The variable 'path' will be the ZIP file to write to, or, if not zipped, one TSV path per facet (loop below). zip_name = f"{prefix}.zip" path = ( zip_name if output_folder is None else os.path.join(output_folder, zip_name) @@ -416,7 +459,6 @@ def _transform( df = ms3_object.get_facet( facet, choose=choose, - flat=True, interval_index=interval_index, unfold=unfold, )