Skip to content

Commit

Permalink
Marked MLK and President's holiday
Browse files Browse the repository at this point in the history
  • Loading branch information
Shakleen committed Oct 4, 2024
1 parent 3ca32d2 commit f34cd55
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 28 deletions.
8 changes: 4 additions & 4 deletions src/components/bronze_to_silver_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,10 @@ def holiday_MLK_and_presidents_day(self, df: DataFrame) -> DataFrame:
return df.withColumn(
"is_holiday",
F.when(
(F.month(F.col("date")).isin(1, 2)) # January or February
& (F.weekday(F.col("date")) == 0) # Monday
& (F.dayofmonth(F.col("date")) >= 15) # 3rd week
& (F.dayofmonth(F.col("date")) <= 21), # 3rd week
((F.col("month")).isin(1, 2)) # January or February
& ((F.col("weekday")) == 0) # Monday
& ((F.col("dayofmonth")) >= 15) # 3rd week
& ((F.col("dayofmonth")) <= 21), # 3rd week
F.lit(True),
).otherwise(F.col("is_holiday")),
)
65 changes: 41 additions & 24 deletions test/components/bronze_to_silver_transformer_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,31 +283,48 @@ def test_combine_on_station_id_and_time(
]


@pytest.mark.parametrize(
("weekday", "is_holiday"),
[
(0, False),
(1, False),
(2, False),
(3, False),
(4, False),
(5, True),
(6, True),
],
)
def test_holiday_weekend(
transformer: BronzeToSilverTransformer,
spark: SparkSession,
weekday: int,
is_holiday: bool,
):
df = spark.createDataFrame(
[[1, 100, 100, 2024, 6, 19, weekday, 25, 171, 19, False]],
output_schema,
)
def test_holiday_weekend(transformer: BronzeToSilverTransformer, spark: SparkSession):
data = [
[1, 100, 100, 2024, month, dayofmonth, weekday, 25, 171, 19, False]
for month in range(1, 13, 1)
for weekday in range(7)
for dayofmonth in range(1, 32, 1)
]
expected = [
weekday > 4
for _ in range(1, 13, 1)
for weekday in range(7)
for _ in range(1, 32, 1)
]
df = spark.createDataFrame(data, output_schema)

output = transformer.holiday_weekend(df)

assert output.select("is_holiday").toPandas().to_numpy().flatten().tolist() == [
is_holiday
assert (
output.select("is_holiday").toPandas().to_numpy().flatten().tolist() == expected
)


def test_holiday_MLK_and_presidents_day(
transformer: BronzeToSilverTransformer, spark: SparkSession
):
data = [
[1, 100, 100, 2024, month, dayofmonth, weekday, 25, 171, 19, False]
for month in range(1, 13, 1)
for weekday in range(7)
for dayofmonth in range(1, 32, 1)
]
expected = [
month < 3 and weekday == 0 and 15 <= dayofmonth <= 21
for month in range(1, 13, 1)
for weekday in range(7)
for dayofmonth in range(1, 32, 1)
]

df = spark.createDataFrame(data, output_schema)

output = transformer.holiday_MLK_and_presidents_day(df)

assert (
output.select("is_holiday").toPandas().to_numpy().flatten().tolist() == expected
)

0 comments on commit f34cd55

Please sign in to comment.