Skip to content

Commit

Permalink
Added more tests for data ingestor
Browse files Browse the repository at this point in the history
  • Loading branch information
Shakleen committed Sep 29, 2024
1 parent 9bbbe04 commit 27ae8b2
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 3 deletions.
7 changes: 7 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"python.testing.pytestArgs": [
"."
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true
}
38 changes: 35 additions & 3 deletions test/components/data_ingestor_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,12 @@
@pytest.fixture(scope="session")
def mock_dataframe():
df = Mock(name="dataframe")
df.withColumnRenamed = df
df.withColumn = df
df.drop = df
df.withColumnRenamed.return_value = df
df.withColumn.return_value = df
df.drop.return_value = df
df.union.return_value = df
df.write.return_value = df
df.save.return_value = df
return df


Expand Down Expand Up @@ -47,5 +50,34 @@ def test_read_csv(spark_mock: SparkSession, ingestor: DataIngestor):

def test_read_pre_and_post_csv_dir(spark_mock: SparkSession, ingestor: DataIngestor):
output = ingestor.read_pre_and_post_csv_dir()
assert len(output) == 2
assert output[0] is spark_mock.read.csv.return_value
assert output[1] is spark_mock.read.csv.return_value


def test_combine_dataframes(mocker, mock_dataframe, ingestor: DataIngestor):
mocker.patch(
"src.components.data_ingestor.monotonically_increasing_id",
return_value=Mock(),
)
mocker.patch(
"src.components.data_ingestor.input_file_name",
return_value=Mock(),
)

output = ingestor.combine_dataframes(mock_dataframe, mock_dataframe)

mock_dataframe.union.assert_called_once()
assert output is mock_dataframe


def test_fix_column_names_and_dtypes(mocker, mock_dataframe, ingestor: DataIngestor):
mocker.patch("src.components.data_ingestor.col", return_value=Mock())
mocker.patch("src.components.data_ingestor.IntegerType", return_value=Mock())
mocker.patch("src.components.data_ingestor.when", return_value=Mock())

output = ingestor.fix_column_names_and_dtypes(mock_dataframe, mock_dataframe)

assert len(output) == 2
assert output[0] is mock_dataframe
assert output[1] is mock_dataframe

0 comments on commit 27ae8b2

Please sign in to comment.