@@ -37,7 +37,7 @@ def is_not_null_and_not_empty(col_name: str, trim_strings: bool = False) -> Colu
37
37
column = F .col (col_name )
38
38
if trim_strings :
39
39
column = F .trim (column ).alias (col_name )
40
- condition = column .isNull () | (column .try_cast ("string" ) == F .lit ("" ))
40
+ condition = column .isNull () | (column .cast ("string" ). isNull () | ( column . cast ( "string" ) == F .lit ("" ) ))
41
41
return make_condition (condition , f"Column { col_name } is null or empty" , f"{ col_name } _is_null_or_empty" )
42
42
43
43
@@ -48,8 +48,8 @@ def is_not_empty(col_name: str) -> Column:
48
48
:return: Column object for condition
49
49
"""
50
50
column = F .col (col_name )
51
- column = column .try_cast ("string" )
52
- return make_condition (( column == "" ) , f"Column { col_name } is empty" , f"{ col_name } _is_empty" )
51
+ condition = column .cast ("string" ) == F . lit ( " " )
52
+ return make_condition (condition , f"Column { col_name } is empty" , f"{ col_name } _is_empty" )
53
53
54
54
55
55
def is_not_null (col_name : str ) -> Column :
@@ -77,7 +77,7 @@ def value_is_not_null_and_is_in_list(col_name: str, allowed: list) -> Column:
77
77
F .concat_ws (
78
78
"" ,
79
79
F .lit ("Value " ),
80
- F .when (column .isNull (), F .lit ("null" )).otherwise (column .try_cast ("string" )),
80
+ F .when (column .isNull (), F .lit ("null" )).otherwise (column .cast ("string" )),
81
81
F .lit (" is not in the allowed list: [" ),
82
82
F .concat_ws (", " , * allowed_cols ),
83
83
F .lit ("]" ),
@@ -381,15 +381,15 @@ def is_valid_date(col_name: str, date_format: str | None = None) -> Column:
381
381
:param date_format: date format (e.g. 'yyyy-mm-dd')
382
382
:return: Column object for condition
383
383
"""
384
- str_col = F .col (col_name )
385
- date_col = str_col . try_cast ( "date" ) if date_format is None else F .try_to_timestamp (str_col , F .lit (date_format ))
386
- condition = F .when (str_col .isNull (), F .lit (None )).otherwise (date_col .isNull ())
384
+ column = F .col (col_name )
385
+ date_col = F . try_to_timestamp ( column ) if date_format is None else F .try_to_timestamp (column , F .lit (date_format ))
386
+ condition = F .when (column .isNull (), F .lit (None )).otherwise (date_col .isNull ())
387
387
condition_str = "' is not a valid date"
388
388
if date_format is not None :
389
389
condition_str += f" with format '{ date_format } '"
390
390
return make_condition (
391
391
condition ,
392
- F .concat_ws ("" , F .lit ("Value '" ), str_col , F .lit (condition_str )),
392
+ F .concat_ws ("" , F .lit ("Value '" ), column , F .lit (condition_str )),
393
393
f"{ col_name } _is_not_valid_date" ,
394
394
)
395
395
@@ -401,18 +401,16 @@ def is_valid_timestamp(col_name: str, timestamp_format: str | None = None) -> Co
401
401
:param timestamp_format: timestamp format (e.g. 'yyyy-mm-dd HH:mm:ss')
402
402
:return: Column object for condition
403
403
"""
404
- str_col = F .col (col_name )
404
+ column = F .col (col_name )
405
405
ts_col = (
406
- str_col .try_cast ("timestamp" )
407
- if timestamp_format is None
408
- else F .try_to_timestamp (str_col , F .lit (timestamp_format ))
406
+ F .try_to_timestamp (column ) if timestamp_format is None else F .try_to_timestamp (column , F .lit (timestamp_format ))
409
407
)
410
- condition = F .when (str_col .isNull (), F .lit (None )).otherwise (ts_col .isNull ())
408
+ condition = F .when (column .isNull (), F .lit (None )).otherwise (ts_col .isNull ())
411
409
condition_str = "' is not a valid timestamp"
412
410
if timestamp_format is not None :
413
411
condition_str += f" with format '{ timestamp_format } '"
414
412
return make_condition (
415
413
condition ,
416
- F .concat_ws ("" , F .lit ("Value '" ), str_col , F .lit (condition_str )),
414
+ F .concat_ws ("" , F .lit ("Value '" ), column , F .lit (condition_str )),
417
415
f"{ col_name } _is_not_valid_timestamp" ,
418
416
)
0 commit comments