From 7aff1ce9f59169b14befa52b516c129b58c40e24 Mon Sep 17 00:00:00 2001 From: Jeong-Yoon Lee Date: Sat, 12 Oct 2024 22:10:31 -0700 Subject: [PATCH] update tests --- tests/const.py | 4 +- tests/test_causal_trees.py | 6 +- tests/test_ivlearner.py | 22 ++-- tests/test_meta_learners.py | 243 ++++++++++++++++++++---------------- tests/test_uplift_trees.py | 34 ++--- 5 files changed, 168 insertions(+), 141 deletions(-) diff --git a/tests/const.py b/tests/const.py index e94df526..db4023f3 100644 --- a/tests/const.py +++ b/tests/const.py @@ -1,5 +1,5 @@ RANDOM_SEED = 42 -N_SAMPLE = 1000 +N_SAMPLE = 2000 ERROR_THRESHOLD = 0.5 NUM_FEATURES = 6 @@ -14,4 +14,4 @@ DELTA_UPLIFT_INCREASE_DICT = { "treatment1": 0.1, } -N_UPLIFT_INCREASE_DICT = {"treatment1": 5} +N_UPLIFT_INCREASE_DICT = {"treatment1": 2} diff --git a/tests/test_causal_trees.py b/tests/test_causal_trees.py index d05c99d6..7c18e198 100644 --- a/tests/test_causal_trees.py +++ b/tests/test_causal_trees.py @@ -13,7 +13,7 @@ class CausalTreeBase: test_size: float = 0.2 - control_name: int or str = 0 + control_name: int = 0 @abstractmethod def prepare_model(self, *args, **kwargs): @@ -84,7 +84,7 @@ def test_fit(self, generate_regression_data): treatment_col="is_treated", treatment_effect_col="treatment_effect", ) - assert df_qini["ctree_ite_pred"] > df_qini["Random"] + assert df_qini["ctree_ite_pred"] > 0.0 @pytest.mark.parametrize("return_ci", (False, True)) @pytest.mark.parametrize("bootstrap_size", (500, 800)) @@ -165,7 +165,7 @@ def test_fit(self, generate_regression_data, n_estimators): treatment_col="is_treated", treatment_effect_col="treatment_effect", ) - assert df_qini["crforest_ite_pred"] > df_qini["Random"] + assert df_qini["crforest_ite_pred"] > 0.0 @pytest.mark.parametrize("n_estimators", (5,)) def test_predict(self, generate_regression_data, n_estimators): diff --git a/tests/test_ivlearner.py b/tests/test_ivlearner.py index cbf25d43..0278e14e 100644 --- a/tests/test_ivlearner.py +++ b/tests/test_ivlearner.py @@ -1,15 +1,12 @@ import pandas as pd import numpy as np from sklearn.linear_model import LinearRegression -from sklearn.model_selection import train_test_split -import statsmodels.api as sm from xgboost import XGBRegressor -import warnings from causalml.inference.iv import BaseDRIVLearner -from causalml.metrics import ape, get_cumgain +from causalml.metrics import ape, auuc_score -from .const import RANDOM_SEED, N_SAMPLE, ERROR_THRESHOLD, CONTROL_NAME, CONVERSION +from .const import RANDOM_SEED, ERROR_THRESHOLD def test_drivlearner(): @@ -34,7 +31,6 @@ def test_drivlearner(): e = e_raw.copy() e[assignment == 0] = 0 tau = (X[:, 0] + X[:, 1]) / 2 - X_obs = X[:, [i for i in range(8) if i != 1]] w = np.random.binomial(1, e, size=n) treatment = w @@ -75,10 +71,12 @@ def test_drivlearner(): } ) - cumgain = get_cumgain( - auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau" + # Check if the normalized AUUC score of model's prediction is higher than random (0.5). + auuc = auuc_score( + auuc_metrics, + outcome_col="y", + treatment_col="W", + treatment_effect_col="tau", + normalize=True, ) - - # Check if the cumulative gain when using the model's prediction is - # higher than it would be under random targeting - assert cumgain["cate_p"].sum() > cumgain["Random"].sum() + assert auuc["cate_p"] > 0.5 diff --git a/tests/test_meta_learners.py b/tests/test_meta_learners.py index c07622ff..47dd1009 100644 --- a/tests/test_meta_learners.py +++ b/tests/test_meta_learners.py @@ -31,7 +31,7 @@ ) from causalml.inference.meta import TMLELearner from causalml.inference.meta import BaseDRLearner -from causalml.metrics import ape, get_cumgain +from causalml.metrics import ape, auuc_score from .const import RANDOM_SEED, N_SAMPLE, ERROR_THRESHOLD, CONTROL_NAME, CONVERSION @@ -127,13 +127,15 @@ def test_BaseSRegressor(generate_regression_data): } ) - cumgain = get_cumgain( - auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau" + # Check if the normalized AUUC score of model's prediction is higher than random (0.5). + auuc = auuc_score( + auuc_metrics, + outcome_col="y", + treatment_col="W", + treatment_effect_col="tau", + normalize=True, ) - - # Check if the cumulative gain when using the model's prediction is - # higher than it would be under random targeting - assert cumgain["cate_p"].sum() > cumgain["Random"].sum() + assert auuc["cate_p"] > 0.5 def test_LRSRegressor(generate_regression_data): @@ -183,13 +185,15 @@ def test_BaseTLearner(generate_regression_data): } ) - cumgain = get_cumgain( - auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau" + # Check if the normalized AUUC score of model's prediction is higher than random (0.5). + auuc = auuc_score( + auuc_metrics, + outcome_col="y", + treatment_col="W", + treatment_effect_col="tau", + normalize=True, ) - - # Check if the cumulative gain when using the model's prediction is - # higher than it would be under random targeting - assert cumgain["cate_p"].sum() > cumgain["Random"].sum() + assert auuc["cate_p"] > 0.5 # test of using control_learner and treatment_learner learner = BaseTLearner( @@ -233,13 +237,15 @@ def test_BaseTRegressor(generate_regression_data): } ) - cumgain = get_cumgain( - auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau" + # Check if the normalized AUUC score of model's prediction is higher than random (0.5). + auuc = auuc_score( + auuc_metrics, + outcome_col="y", + treatment_col="W", + treatment_effect_col="tau", + normalize=True, ) - - # Check if the cumulative gain when using the model's prediction is - # higher than it would be under random targeting - assert cumgain["cate_p"].sum() > cumgain["Random"].sum() + assert auuc["cate_p"] > 0.5 def test_MLPTRegressor(generate_regression_data): @@ -272,13 +278,15 @@ def test_MLPTRegressor(generate_regression_data): } ) - cumgain = get_cumgain( - auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau" + # Check if the normalized AUUC score of model's prediction is higher than random (0.5). + auuc = auuc_score( + auuc_metrics, + outcome_col="y", + treatment_col="W", + treatment_effect_col="tau", + normalize=True, ) - - # Check if the cumulative gain when using the model's prediction is - # higher than it would be under random targeting - assert cumgain["cate_p"].sum() > cumgain["Random"].sum() + assert auuc["cate_p"] > 0.5 def test_XGBTRegressor(generate_regression_data): @@ -311,13 +319,15 @@ def test_XGBTRegressor(generate_regression_data): } ) - cumgain = get_cumgain( - auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau" + # Check if the normalized AUUC score of model's prediction is higher than random (0.5). + auuc = auuc_score( + auuc_metrics, + outcome_col="y", + treatment_col="W", + treatment_effect_col="tau", + normalize=True, ) - - # Check if the cumulative gain when using the model's prediction is - # higher than it would be under random targeting - assert cumgain["cate_p"].sum() > cumgain["Random"].sum() + assert auuc["cate_p"] > 0.5 def test_BaseXLearner(generate_regression_data): @@ -350,13 +360,15 @@ def test_BaseXLearner(generate_regression_data): } ) - cumgain = get_cumgain( - auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau" + # Check if the normalized AUUC score of model's prediction is higher than random (0.5). + auuc = auuc_score( + auuc_metrics, + outcome_col="y", + treatment_col="W", + treatment_effect_col="tau", + normalize=True, ) - - # Check if the cumulative gain when using the model's prediction is - # higher than it would be under random targeting - assert cumgain["cate_p"].sum() > cumgain["Random"].sum() + assert auuc["cate_p"] > 0.5 # basic test of using outcome_learner and effect_learner learner = BaseXLearner( @@ -402,13 +414,15 @@ def test_BaseXRegressor(generate_regression_data): } ) - cumgain = get_cumgain( - auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau" + # Check if the normalized AUUC score of model's prediction is higher than random (0.5). + auuc = auuc_score( + auuc_metrics, + outcome_col="y", + treatment_col="W", + treatment_effect_col="tau", + normalize=True, ) - - # Check if the cumulative gain when using the model's prediction is - # higher than it would be under random targeting - assert cumgain["cate_p"].sum() > cumgain["Random"].sum() + assert auuc["cate_p"] > 0.5 def test_BaseXLearner_without_p(generate_regression_data): @@ -443,13 +457,15 @@ def test_BaseXLearner_without_p(generate_regression_data): } ) - cumgain = get_cumgain( - auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau" + # Check if the normalized AUUC score of model's prediction is higher than random (0.5). + auuc = auuc_score( + auuc_metrics, + outcome_col="y", + treatment_col="W", + treatment_effect_col="tau", + normalize=True, ) - - # Check if the cumulative gain when using the model's prediction is - # higher than it would be under random targeting - assert cumgain["cate_p"].sum() > cumgain["Random"].sum() + assert auuc["cate_p"] > 0.5 def test_BaseXRegressor_without_p(generate_regression_data): @@ -482,13 +498,15 @@ def test_BaseXRegressor_without_p(generate_regression_data): } ) - cumgain = get_cumgain( - auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau" + # Check if the normalized AUUC score of model's prediction is higher than random (0.5). + auuc = auuc_score( + auuc_metrics, + outcome_col="y", + treatment_col="W", + treatment_effect_col="tau", + normalize=True, ) - - # Check if the cumulative gain when using the model's prediction is - # higher than it would be under random targeting - assert cumgain["cate_p"].sum() > cumgain["Random"].sum() + assert auuc["cate_p"] > 0.5 def test_BaseRLearner(generate_regression_data): @@ -521,13 +539,15 @@ def test_BaseRLearner(generate_regression_data): } ) - cumgain = get_cumgain( - auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau" + # Check if the normalized AUUC score of model's prediction is higher than random (0.5). + auuc = auuc_score( + auuc_metrics, + outcome_col="y", + treatment_col="W", + treatment_effect_col="tau", + normalize=True, ) - - # Check if the cumulative gain when using the model's prediction is - # higher than it would be under random targeting - assert cumgain["cate_p"].sum() > cumgain["Random"].sum() + assert auuc["cate_p"] > 0.5 # basic test of using outcome_learner and effect_learner learner = BaseRLearner( @@ -572,13 +592,15 @@ def test_BaseRRegressor(generate_regression_data): } ) - cumgain = get_cumgain( - auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau" + # Check if the normalized AUUC score of model's prediction is higher than random (0.5). + auuc = auuc_score( + auuc_metrics, + outcome_col="y", + treatment_col="W", + treatment_effect_col="tau", + normalize=True, ) - - # Check if the cumulative gain when using the model's prediction is - # higher than it would be under random targeting - assert cumgain["cate_p"].sum() > cumgain["Random"].sum() + assert auuc["cate_p"] > 0.5 def test_BaseRLearner_without_p(generate_regression_data): @@ -611,13 +633,15 @@ def test_BaseRLearner_without_p(generate_regression_data): } ) - cumgain = get_cumgain( - auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau" + # Check if the normalized AUUC score of model's prediction is higher than random (0.5). + auuc = auuc_score( + auuc_metrics, + outcome_col="y", + treatment_col="W", + treatment_effect_col="tau", + normalize=True, ) - - # Check if the cumulative gain when using the model's prediction is - # higher than it would be under random targeting - assert cumgain["cate_p"].sum() > cumgain["Random"].sum() + assert auuc["cate_p"] > 0.5 def test_BaseRRegressor_without_p(generate_regression_data): @@ -650,13 +674,15 @@ def test_BaseRRegressor_without_p(generate_regression_data): } ) - cumgain = get_cumgain( - auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau" + # Check if the normalized AUUC score of model's prediction is higher than random (0.5). + auuc = auuc_score( + auuc_metrics, + outcome_col="y", + treatment_col="W", + treatment_effect_col="tau", + normalize=True, ) - - # Check if the cumulative gain when using the model's prediction is - # higher than it would be under random targeting - assert cumgain["cate_p"].sum() > cumgain["Random"].sum() + assert auuc["cate_p"] > 0.5 def test_TMLELearner(generate_regression_data): @@ -702,16 +728,15 @@ def test_BaseSClassifier(generate_classification_data): } ) - cumgain = get_cumgain( + # Check if the normalized AUUC score of model's prediction is higher than random (0.5). + auuc = auuc_score( auuc_metrics, outcome_col=CONVERSION, treatment_col="W", treatment_effect_col="treatment_effect_col", + normalize=True, ) - - # Check if the cumulative gain when using the model's prediction is - # higher than it would be under random targeting - assert cumgain["tau_pred"].sum() > cumgain["Random"].sum() + assert auuc["tau_pred"] > 0.5 def test_BaseTClassifier(generate_classification_data): @@ -746,16 +771,15 @@ def test_BaseTClassifier(generate_classification_data): } ) - cumgain = get_cumgain( + # Check if the normalized AUUC score of model's prediction is higher than random (0.5). + auuc = auuc_score( auuc_metrics, outcome_col=CONVERSION, treatment_col="W", treatment_effect_col="treatment_effect_col", + normalize=True, ) - - # Check if the cumulative gain when using the model's prediction is - # higher than it would be under random targeting - assert cumgain["tau_pred"].sum() > cumgain["Random"].sum() + assert auuc["tau_pred"] > 0.5 def test_BaseXClassifier(generate_classification_data): @@ -816,16 +840,15 @@ def test_BaseXClassifier(generate_classification_data): } ) - cumgain = get_cumgain( + # Check if the normalized AUUC score of model's prediction is higher than random (0.5). + auuc = auuc_score( auuc_metrics, outcome_col=CONVERSION, treatment_col="W", treatment_effect_col="treatment_effect_col", + normalize=True, ) - - # Check if the cumulative gain when using the model's prediction is - # higher than it would be under random targeting - assert cumgain["tau_pred"].sum() > cumgain["Random"].sum() + assert auuc["tau_pred"] > 0.5 def test_BaseRClassifier(generate_classification_data): @@ -865,16 +888,15 @@ def test_BaseRClassifier(generate_classification_data): } ) - cumgain = get_cumgain( + # Check if the normalized AUUC score of model's prediction is higher than random (0.5). + auuc = auuc_score( auuc_metrics, outcome_col=CONVERSION, treatment_col="W", treatment_effect_col="treatment_effect_col", + normalize=True, ) - - # Check if the cumulative gain when using the model's prediction is - # higher than it would be under random targeting - assert cumgain["tau_pred"].sum() > cumgain["Random"].sum() + assert auuc["tau_pred"] > 0.5 def test_BaseRClassifier_with_sample_weights(generate_classification_data): @@ -916,16 +938,15 @@ def test_BaseRClassifier_with_sample_weights(generate_classification_data): } ) - cumgain = get_cumgain( + # Check if the normalized AUUC score of model's prediction is higher than random (0.5). + auuc = auuc_score( auuc_metrics, outcome_col=CONVERSION, treatment_col="W", treatment_effect_col="treatment_effect_col", + normalize=True, ) - - # Check if the cumulative gain when using the model's prediction is - # higher than it would be under random targeting - assert cumgain["tau_pred"].sum() > cumgain["Random"].sum() + assert auuc["tau_pred"] > 0.5 def test_XGBRegressor_with_sample_weights(generate_regression_data): @@ -1009,10 +1030,12 @@ def test_BaseDRLearner(generate_regression_data): } ) - cumgain = get_cumgain( - auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau" + # Check if the normalized AUUC score of model's prediction is higher than random (0.5). + auuc = auuc_score( + auuc_metrics, + outcome_col="y", + treatment_col="W", + treatment_effect_col="tau", + normalize=True, ) - - # Check if the cumulative gain when using the model's prediction is - # higher than it would be under random targeting - assert cumgain["cate_p"].sum() > cumgain["Random"].sum() + assert auuc["cate_p"] > 0.5 diff --git a/tests/test_uplift_trees.py b/tests/test_uplift_trees.py index 8da4eda4..9b1a1453 100644 --- a/tests/test_uplift_trees.py +++ b/tests/test_uplift_trees.py @@ -7,9 +7,9 @@ from sklearn.model_selection import train_test_split from causalml.inference.tree import UpliftTreeClassifier, UpliftRandomForestClassifier -from causalml.metrics import get_cumgain +from causalml.metrics import auuc_score from causalml.dataset import make_uplift_classification -from causalml.inference.tree import uplift_tree_string, uplift_tree_plot +from causalml.inference.tree import uplift_tree_plot from .const import RANDOM_SEED, N_SAMPLE, CONTROL_NAME, TREATMENT_NAMES, CONVERSION @@ -104,16 +104,19 @@ def test_UpliftRandomForestClassifier( auuc_metrics = synth.assign( is_treated=1 - actual_is_control[synthetic], conversion=df_test.loc[synthetic, CONVERSION].values, + treatment_effect=df_test.loc[synthetic, "treatment_effect"].values, uplift_tree=synth.max(axis=1), ).drop(columns=list(uplift_model.classes_[1:])) - cumgain = get_cumgain( - auuc_metrics, outcome_col=CONVERSION, treatment_col="is_treated" + # Check if the normalized AUUC score of model's prediction is higher than random (0.5). + auuc = auuc_score( + auuc_metrics, + outcome_col=CONVERSION, + treatment_col="is_treated", + treatment_effect_col="treatment_effect", + normalize=True, ) - - # Check if the cumulative gain of UpLift Random Forest is higher than - # random - assert cumgain["uplift_tree"].sum() > cumgain["Random"].sum() + assert auuc["uplift_tree"] > 0.5 @pytest.mark.parametrize("evaluation_function", ["DDP", "IT", "CIT", "IDDP"]) @@ -179,16 +182,19 @@ def UpliftTreeClassifierTesting(df, x_names, evaluation_function): auuc_metrics = synth.assign( is_treated=1 - actual_is_control[synthetic], conversion=df_test.loc[synthetic, CONVERSION].values, + treatment_effect=df_test.loc[synthetic, "treatment_effect"].values, uplift_tree=synth.max(axis=1), ).drop(columns=result.columns) - cumgain = get_cumgain( - auuc_metrics, outcome_col=CONVERSION, treatment_col="is_treated" + # Check if the normalized AUUC score of model's prediction is higher than random (0.5). + auuc = auuc_score( + auuc_metrics, + outcome_col=CONVERSION, + treatment_col="is_treated", + treatment_effect_col="treatment_effect", + normalize=True, ) - - # Check if the cumulative gain of UpLift Random Forest is higher than - # random (sometimes IT and IDDP are not better than random) - assert cumgain["uplift_tree"].sum() > cumgain["Random"].sum() + assert auuc["uplift_tree"] > 0.5 # Check if the total count is split correctly, at least for control group in the first level def validate_cnt(cur_tree):