Skip to content

Commit

Permalink
fix: Feedback addressed by renaming df to data throughout the package
Browse files Browse the repository at this point in the history
  • Loading branch information
xximing committed Feb 2, 2025
1 parent 5b7a220 commit cec8888
Showing 1 changed file with 7 additions and 7 deletions.
14 changes: 7 additions & 7 deletions src/datamop/column_encoder.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import pandas as pd


def column_encoder(df, columns, method='one-hot', order=None):
def column_encoder(data, columns, method='one-hot', order=None):
"""
Encodes categorical columns using one-hot or ordinal encoding based on user input.
Parameters:
-----------
df : pandas.DataFrame
data : pandas.DataFrame
The input DataFrame containing the dataset.
columns : list
The name of the columns to be encoded.
Expand Down Expand Up @@ -40,20 +40,20 @@ def column_encoder(df, columns, method='one-hot', order=None):
Examples:
---------
>>> import pandas as pd
>>> df = pd.DataFrame({
>>> data = pd.DataFrame({
... 'Sport': ['Tennis', 'Basketball', 'Football', 'Badminton'],
... 'Level': ['A', 'B', 'C', 'D']
... })
>>> encoded_df_onehot = column_encoder(df, columns=['Sport'], method='one-hot')
>>> encoded_df_onehot = column_encoder(data, columns=['Sport'], method='one-hot')
>>> print(encoded_df_onehot)
Level Sport_Badminton Sport_Basketball Sport_Football Sport_Tennis
A 0 0 0 1
B 0 1 0 0
C 0 0 1 0
D 1 0 0 0
>>> encoded_df_ordinal = column_encoder(df, columns=['Level'], method='ordinal', order={'Level': ['A', 'B', 'C', 'D']})
>>> encoded_df_ordinal = column_encoder(data, columns=['Level'], method='ordinal', order={'Level': ['A', 'B', 'C', 'D']})
>>> print(encoded_df_ordinal)
Sport Level
Tennis 0
Expand All @@ -63,15 +63,15 @@ def column_encoder(df, columns, method='one-hot', order=None):
"""
#check input type
if not isinstance(df, pd.DataFrame):
if not isinstance(data, pd.DataFrame):
raise TypeError("Input must be a pandas DataFrame")
if not isinstance(columns, list) or not all(isinstance(col, str) for col in columns):
raise TypeError("Columns parameter must be a list of strings")
if not isinstance(method, str):
raise TypeError("Method parameter must be a string")
if method == 'ordinal' and order is not None and not isinstance(order, dict):
raise TypeError("Order parameter must be a dictionary")
encoded_df = df.copy()
encoded_df = data.copy()

if method == 'one-hot':
#check if order is input
Expand Down

0 comments on commit cec8888

Please sign in to comment.