From fe96d9471fd591c164b107625cd585117ec2e8c8 Mon Sep 17 00:00:00 2001 From: jessiezhang24 Date: Thu, 9 Jan 2025 15:38:21 -0800 Subject: [PATCH] update README and add summary stats func --- README.md | 1 + src/pyeda/data_summary.py | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 src/pyeda/data_summary.py diff --git a/README.md b/README.md index 45bd770..a02ddfd 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ Catherine Meng, Jessie Zhang, Zheng He - **`missing_value_summary`**\ This function is to provide a summary of missing values in the dataset. - **`get_summary_statistics`** + Generate summary statistics for specified columns or all columns if none are provided. ## Installation diff --git a/src/pyeda/data_summary.py b/src/pyeda/data_summary.py new file mode 100644 index 0000000..2ce0c98 --- /dev/null +++ b/src/pyeda/data_summary.py @@ -0,0 +1,21 @@ +def get_summary_statistics(df, col = None): + """ + Generate summary statistics for specified columns or all columns if none are provided. + + This function will return the important statistics (e.g. mean, min, std) for numeric columns, as well as + some key metrics (e.g. count, unique) for non-numeric columns. + + Parameters + ---------- + df : pd.DataFrame + The dataframe containing the data for analysis. + col : list or None + A list of column names for which to get statistics. + Default value is None, the function will apply for all columns. + + Returns + ---------- + pd.DataFrame + A DataFrame with summary statistics for the specified columns. + """ + pass \ No newline at end of file