Skip to content

Commit

Permalink
IDLabFunctions: use correct date pattern for normalizeDate
Browse files Browse the repository at this point in the history
normalizeDate outputs the minutes of a date instead of the months.
Use 'MMMM' instead of 'mm' when normalizing dates.
  • Loading branch information
ghsnd authored and DylanVanAssche committed Jan 25, 2022
1 parent 5ff33f8 commit 1809b54
Show file tree
Hide file tree
Showing 17 changed files with 450 additions and 15 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
### Fixed
- Bump buildnumber during release (see [issue 227](https://gitlab.ilabt.imec.be/rml/proc/rmlmapper-java/-/issues/227))
- Always include xml: prefix (see [issue 144](https://github.com/RMLio/rmlmapper-java/issues/144))
- `normalizeDate` function uses wrong pattern (see [issue 228](https://gitlab.ilabt.imec.be/rml/proc/rmlmapper-java/-/issues/228))

### Changed
- CSVRecordFactory: Replace Apache CSV library with OpenCSV to be able to differentiate between empty string and null (see [issue 140](https://github.com/RMLio/rmlmapper-java/issues/140))
- CI: use Gitlab's own mirror feature instead of mirroring in a CI job.
- CI: enforce CHANGELOG updates.
- Added functions to normalize dates and datetimes.

### Added
- JSONRecordFactory: support JSONL files (see [issue 221](https://gitlab.ilabt.imec.be/rml/proc/rmlmapper-java/-/issues/221)).
Expand Down
92 changes: 77 additions & 15 deletions src/main/java/be/ugent/rml/functions/lib/IDLabFunctions.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.*;

public class IDLabFunctions {
Expand Down Expand Up @@ -205,27 +206,88 @@ public static boolean booleanMatch(String valueParameter, String regexParameter)
return valueParameter.matches(regexParameter);
}

///////////////////////////////
// Date formatting functions //
///////////////////////////////
// TODO check whether this is the right place for this

/**
* Returns `s` as a normalized xsd:date string, using `f` as current date form.
* Returns a given date(time) string as an ISO-8601 formatted date(time) string.
*
* @param s string
* @param f format
* @return a normalized xsd:date string
* @param dateStr Input string representing a parsable date or dateTime, e.g. "01 April 22"
* @param pattern DateTime format pattern used to parse the given dateStr as defined in {@link DateTimeFormatter}, e.g. "dd LLLL uu"
* @param language The language of dateStr, as defined in {@link Locale}
* @param includeTime If <code>true</code>, include the time part in the output.
* @return A normalized date string in the ISO-8601 format uuuu-MM-dd (xs:date) or uuuu-MM-ddTHH:mm:ss,
* or null if parsing the input fails.
*/
public static String normalizeDate(String s, String f) {
DateFormat format = new SimpleDateFormat(f);
Date date = null;
private static String normalizeDateTimeStr(String dateStr, String pattern, String language, boolean includeTime) {
try {
date = format.parse(s);
} catch (ParseException e) {
e.printStackTrace();
return s;
Locale locale = new Locale(language);
DateTimeFormatter formatter = DateTimeFormatter.ofPattern(pattern, locale);
if (includeTime) {
LocalDateTime dateTime = LocalDateTime.parse(dateStr, formatter);
return dateTime.format(DateTimeFormatter.ISO_DATE_TIME);
} else {
LocalDate date = LocalDate.parse(dateStr, formatter);
return date.toString();
}
} catch (Throwable e) {
logger.error("{}; format pattern: \"{}\", input: \"{}\", language: \"{}\"", e.getMessage(), pattern, dateStr, language);
return null;
}
DateFormat xsdDateFormat = new SimpleDateFormat("yyyy-mm-dd");
return xsdDateFormat.format(date);
}

/**
* Returns `dateStr` as a normalized xs:date string, using `pattern` as current date form.
*
* @param dateStr Input string representing a parsable date, e.g. "01 April 22"
* @param pattern Date format pattern used to parse the given dateStr as defined in {@link DateTimeFormatter}, e.g. "dd LLLL uu"
* @param language The language of dateStr, as defined in {@link Locale}
* @return A normalized date string in the ISO-8601 format uuuu-MM-dd (xs:date), or null if parsing the input fails.
*/
public static String normalizeDateWithLang(String dateStr, String pattern, String language) {
return normalizeDateTimeStr(dateStr, pattern, language, false);
}

/**
* Returns `dateStr` as a normalized xs:date string, using `pattern` as current date form. It uses the language of
* the current locale of the JVM to parse certain input strings, like names of months.
*
* @param dateStr Input string representing a parsable date, e.g. "01 April 22"
* @param pattern Date format pattern used to parse the given dateStr as defined in {@link SimpleDateFormat}, e.g. "dd LLL y"
* @return A normalized date string in the ISO-8601 format uuuu-MM-dd (xs:date), or null if parsing the input fails.
*/
public static String normalizeDate(String dateStr, String pattern) {
return normalizeDateWithLang(dateStr, pattern, Locale.getDefault().getLanguage());
}

/**
* Returns `dateTimeStr` as a normalized xs:date string, using `pattern` as current datetime form.
*
* @param dateTimeStr Input string representing a parsable datetime, e.g. "01 April 22T11:44:00"
* @param pattern Date format pattern used to parse the given dateStr as defined in {@link DateTimeFormatter}, e.g. "dd LLLL uuTHH:mm:ss"
* @param language The language of dateStr, as defined in {@link Locale}
* @return A normalized date string in the ISO-8601 format uuuu-MM-ddTHH:mm:ss (xs:datetime), or null if parsing the input fails.
*/
public static String normalizeDateTimeWithLang(String dateTimeStr, String pattern, String language) {
return normalizeDateTimeStr(dateTimeStr, pattern, language, true);
}

/**
* Returns `dateTimeStr` as a normalized xs:date string, using `pattern` as current datetime form.
* It uses the language of the current locale of the JVM to parse certain input strings, like names of months.
*
* @param dateTimeStr Input string representing a parsable datetime, e.g. "01 April 22T11:44:00"
* @param pattern Date format pattern used to parse the given dateStr as defined in {@link DateTimeFormatter}, e.g. "dd LLLL uuTHH:mm:ss"
* @return A normalized date string in the ISO-8601 format uuuu-MM-ddTHH:mm:ss (xs:datetime), or null if parsing the input fails.
*/
public static String normalizeDateTime(String dateTimeStr, String pattern) {
return normalizeDateTimeWithLang(dateTimeStr, pattern, Locale.getDefault().getLanguage());
}



// TODO check whether this is the right place for this
public static String jsonize(Object s) throws JsonProcessingException {
ObjectWriter ow = new ObjectMapper().writer().withDefaultPrettyPrinter();
Expand Down
65 changes: 65 additions & 0 deletions src/main/resources/functions_idlab.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,50 @@ idlab-fn:slugify
lib:class "IDLabFunctions" ;
lib:method "slugify" ] .

idlab-fn:normalizeDate
a fno:Function ;
fno:name "normalizeDate" ;
rdfs:label "normalizeDate" ;
dcterms:description "Parses the input as a date and returns it as a string in the ISO-8601 format uuuu-MM-dd. It uses the current language to parse month or day names." ;
fno:expects ( idlab-fn:_strDate idlab-fn:_pattern ) ;
fno:returns ( idlab-fn:_stringOut ) ;
lib:providedBy [ lib:localLibrary "IDLabFunctions.jar" ;
lib:class "IDLabFunctions" ;
lib:method "normalizeDate" ] .

idlab-fn:normalizeDateWithLang
a fno:Function ;
fno:name "normalizeDateWithLang" ;
rdfs:label "normalizeDateWithLang" ;
dcterms:description "Parses the input as a date and returns it as a string in the ISO-8601 format uuuu-MM-dd. It uses the given language to parse month or day names." ;
fno:expects ( idlab-fn:_strDate idlab-fn:_pattern idlab-fn:_lang ) ;
fno:returns ( idlab-fn:_stringOut ) ;
lib:providedBy [ lib:localLibrary "IDLabFunctions.jar" ;
lib:class "IDLabFunctions" ;
lib:method "normalizeDateWithLang" ] .

idlab-fn:normalizeDateTime
a fno:Function ;
fno:name "normalizeDateTime" ;
rdfs:label "normalizeDateTime" ;
dcterms:description "Parses the input as a datetime and returns it as a string in the ISO-8601 format uuuu-MM-ddTHH:mm:ss. It uses the current language to parse month or day names." ;
fno:expects ( idlab-fn:_strDate idlab-fn:_pattern ) ;
fno:returns ( idlab-fn:_stringOut ) ;
lib:providedBy [ lib:localLibrary "IDLabFunctions.jar" ;
lib:class "IDLabFunctions" ;
lib:method "normalizeDateTime" ] .

idlab-fn:normalizeDateTimeWithLang
a fno:Function ;
fno:name "normalizeDateTimeWithLang" ;
rdfs:label "normalizeDateTimeWithLang" ;
dcterms:description "Parses the input as a datetime and returns it as a string in the ISO-8601 format uuuu-MM-ddTHH:mm:ss. It uses the given language to parse month or day names." ;
fno:expects ( idlab-fn:_strDate idlab-fn:_pattern idlab-fn:_lang ) ;
fno:returns ( idlab-fn:_stringOut ) ;
lib:providedBy [ lib:localLibrary "IDLabFunctions.jar" ;
lib:class "IDLabFunctions" ;
lib:method "normalizeDateTimeWithLang" ] .


grel:valueParam
a fno:Parameter ;
Expand Down Expand Up @@ -293,3 +337,24 @@ idlab-fn:_boolOut
rdfs:label "output boolean" ;
fno:type xsd:boolean ;
fno:predicate idlab-fn:o_boolOut .

idlab-fn:_strDate
a fno:Parameter ;
fno:name "string parsable to a date" ;
rdfs:label "string parsable to a date" ;
fno:type xsd:string ;
fno:predicate idlab-fn:strDate .

idlab-fn:_pattern
a fno:Parameter ;
fno:name "string representing a date pattern" ;
rdfs:label "string representing a date pattern" ;
fno:type xsd:string ;
fno:predicate idlab-fn:pattern .

idlab-fn:_lang
a fno:Parameter ;
fno:name "string representing a BCP 47 language tag" ;
rdfs:label "string representing a BCP 47 language tag" ;
fno:type xsd:string ;
fno:predicate idlab-fn:lang .
32 changes: 32 additions & 0 deletions src/test/java/be/ugent/rml/Custom_RML_FnO_Mapper_Test.java
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,38 @@ public void evaluate_idlab_F008() {
doMapping("./rml-fno-test-cases/RMLFNOTCF008/mapping.rml.ttl", "./rml-fno-test-cases/RMLFNOTCF008/output.nq");
}

/**
* Tests whether the function idlab-fn:normalizeDate is supported correctly by the mapper
*/
@Test
public void evaluate_idlab_F009() {
doMapping("./rml-fno-test-cases/RMLFNOTCF009/mapping.ttl", "./rml-fno-test-cases/RMLFNOTCF009/output.ttl");
}

/**
* Tests whether the function idlab-fn:normalizeDateWithLang is supported correctly by the mapper
*/
@Test
public void evaluate_idlab_F010() {
doMapping("./rml-fno-test-cases/RMLFNOTCF010/mapping.ttl", "./rml-fno-test-cases/RMLFNOTCF010/output.ttl");
}

/**
* Tests whether the function idlab-fn:normalizeDateTime is supported correctly by the mapper
*/
@Test
public void evaluate_idlab_F011() {
doMapping("./rml-fno-test-cases/RMLFNOTCF011/mapping.ttl", "./rml-fno-test-cases/RMLFNOTCF011/output.ttl");
}

/**
* Tests whether the function idlab-fn:normalizeDateTimeWithLang is supported correctly by the mapper
*/
@Test
public void evaluate_idlab_F012() {
doMapping("./rml-fno-test-cases/RMLFNOTCF012/mapping.ttl", "./rml-fno-test-cases/RMLFNOTCF012/output.ttl");
}

/**
* Tests whether the function millisecondsToInstant can be loaded and is supported correctly by the mapper
*/
Expand Down
55 changes: 55 additions & 0 deletions src/test/java/be/ugent/rml/functions/lib/IDLabFunctionsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -164,4 +164,59 @@ public void slugify() {
String result = IDLabFunctions.slugify("Ben De Mééster");
assertEquals("ben-de-meester", result);
}

@Test
public void normalizeDateWithLang() {
String input1 = "20220121";
String format1 = "yyyyMMdd";
assertEquals("2022-01-21", IDLabFunctions.normalizeDateWithLang(input1, format1, "en"));

String input2 = "01 April 22";
// String format2 = "dd LLLL uu"; // This does not work on Java 8!
String format2 = "dd MMMM uu";
assertEquals("2022-04-01", IDLabFunctions.normalizeDateWithLang(input2, format2, "en"));

assertNull(IDLabFunctions.normalizeDateWithLang("rubbish", "yodelahiti", "en"));

// will fail because "April" is no French
assertNull(IDLabFunctions.normalizeDateWithLang(input2, format2, "fr"));

String input3 = "01-avr.-22"; // yes, French abbreviations need a '.' !
String format3 = "dd-MMM-yy";
assertEquals("2022-04-01", IDLabFunctions.normalizeDateWithLang(input3, format3, "fr"));
}

@Test
public void normalizeDate() {
String input1 = "20220121";
String format1 = "yyyyMMdd";
assertEquals("2022-01-21", IDLabFunctions.normalizeDate(input1, format1));

assertNull(IDLabFunctions.normalizeDate("rubbish", "yodelahiti"));

}

@Test
public void normalizeDateTimeWithLang() {
String input1 = "20220121 7 14 33";
String format1 = "yyyyMMdd H m s";
assertEquals("2022-01-21T07:14:33", IDLabFunctions.normalizeDateTimeWithLang(input1, format1, "en"));
}

@Test
public void normalizeDateTime() {
String input1 = "20200521 17 14 33";
String format1 = "yyyyMMdd H m s";
assertEquals("2020-05-21T17:14:33", IDLabFunctions.normalizeDateTime(input1, format1));

// 20220124T09:36:04,yyyyMMdd'THH:mm:ss
String input2 = "20220124T09:36:04";
String format2 = "yyyyMMdd'T'HH:mm:ss";
assertEquals("2022-01-24T09:36:04", IDLabFunctions.normalizeDateTime(input2, format2));

String input3 = "01-Apr-20 9u4";
String format3 = "dd-MMM-yy H'u'm";
assertEquals("2020-04-01T09:04:00", IDLabFunctions.normalizeDateTime(input3, format3));

}
}
3 changes: 3 additions & 0 deletions src/test/resources/rml-fno-test-cases/RMLFNOTCF009/data.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
ID,INPUT,PATTERN
0,20220124,yyyyMMdd
1,01-Apr-20,dd-MMM-yy
47 changes: 47 additions & 0 deletions src/test/resources/rml-fno-test-cases/RMLFNOTCF009/mapping.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
@prefix rr: <http://www.w3.org/ns/r2rml#> .
@prefix ex: <http://example.com/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix rml: <http://semweb.mmlab.be/ns/rml#> .
@prefix ql: <http://semweb.mmlab.be/ns/ql#> .
@prefix fnml: <http://semweb.mmlab.be/ns/fnml#> .
@prefix fno: <https://w3id.org/function/ontology#> .
@prefix grel: <http://users.ugent.be/~bjdmeest/function/grel.ttl#> .
@prefix idlab-fn: <http://example.com/idlab/function/> .

@base <http://example.com/base/> .

<TriplesMap1>
a rr:TriplesMap;

rml:logicalSource [
rml:source "data.csv";
rml:referenceFormulation ql:CSV
];

rr:subjectMap [
rr:template "http://example.com/{ID}"
];

rr:predicateObjectMap [

rr:predicate ex:createdAt;

rr:objectMap [
fnml:functionValue [
rr:predicateObjectMap [
rr:predicate fno:executes ;
rr:objectMap [ rr:constant idlab-fn:normalizeDate ]
];

rr:predicateObjectMap [
rr:predicate idlab-fn:strDate ;
rr:objectMap [ rml:reference "INPUT" ]
];

rr:predicateObjectMap [
rr:predicate idlab-fn:pattern ;
rr:objectMap [ rml:reference "PATTERN" ]
];
]
]
].
2 changes: 2 additions & 0 deletions src/test/resources/rml-fno-test-cases/RMLFNOTCF009/output.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<http://example.com/0> <http://example.com/createdAt> "2022-01-24" .
<http://example.com/1> <http://example.com/createdAt> "2020-04-01" .
4 changes: 4 additions & 0 deletions src/test/resources/rml-fno-test-cases/RMLFNOTCF010/data.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
ID,INPUT,PATTERN,LANG
0,20220124,yyyyMMdd,en
1,01-Apr-20,dd-MMM-yy,en
2,01-avr.-20,dd-MMM-yy,fr
Loading

0 comments on commit 1809b54

Please sign in to comment.