-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathCivet.java
191 lines (164 loc) · 5.5 KB
/
Civet.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
package org.dice_research.opal.civet;
import java.util.LinkedList;
import java.util.List;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.ResourceFactory;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dice_research.opal.civet.metrics.CategorizationMetric;
import org.dice_research.opal.civet.metrics.DataFormatMetric;
import org.dice_research.opal.civet.metrics.DateFormatMetric;
import org.dice_research.opal.civet.metrics.LicenseAvailabilityMetric;
import org.dice_research.opal.civet.metrics.MetadataQualityMetric;
import org.dice_research.opal.civet.metrics.MultipleSerializationsMetric;
import org.dice_research.opal.civet.metrics.ProviderIdentityMetric;
import org.dice_research.opal.civet.metrics.ReadabilityMetric;
import org.dice_research.opal.civet.metrics.RetrievabilityMetric;
import org.dice_research.opal.civet.metrics.TimelinessMetric;
import org.dice_research.opal.civet.metrics.UpdateRateMetric;
import org.dice_research.opal.common.interfaces.JenaModelProcessor;
import org.dice_research.opal.common.interfaces.ModelProcessor;
/**
* Civet - OPAL quality metric component.
*
* This component calculates scores (measurements) of metadata quality metrics.
*
* The Data Quality Vocabulary (DQV) is used to describe the resulting data.
*
* Long running metrics are excluded by default. To change that, use
* {@link #setIncludeLongRunning(boolean)}.
*
* If a measurement of a metric could not be computed, an info is logged. To
* change that, use {@link #setLogIfNotComputed(boolean)}.
*
* Existing measurements will be removed before computing new measurements by
* default. To change that, use {@link #setRemoveMeasurements(boolean)}.
*
* @see https://www.w3.org/TR/vocab-dqv/
*
* @author Adrian Wilke
*/
@SuppressWarnings("deprecation")
public class Civet implements ModelProcessor, JenaModelProcessor {
private static final Logger LOGGER = LogManager.getLogger();
private boolean includeLongRunning = false;
private boolean logIfNotComputed = true;
private boolean removeMeasurements = true;
/**
* Computes quality metric scores (measurements).
*
* Existing measurements will be removed before computing new measurements by
* default. To change that, use {@link #removeMeasurements}.
*/
@Override
public void processModel(Model model, String datasetUri) throws Exception {
Resource dataset = ResourceFactory.createResource(datasetUri);
LOGGER.info("Processing dataset " + datasetUri);
// Remove existing measurements
if (removeMeasurements) {
Utils.removeAllMeasurements(model, dataset);
}
// Compute and add new measurements
for (Metric metric : getMetrics()) {
Integer score = null;
try {
score = metric.compute(model, datasetUri);
} catch (Exception e) {
LOGGER.error("Exception on computing " + metric.getUri() + " for " + datasetUri, e);
continue;
}
if (score == null) {
if (logIfNotComputed) {
LOGGER.info("No result for metric " + metric.getUri() + " and dataset " + datasetUri);
}
} else {
model.add(
Utils.createMetricStatements(dataset, ResourceFactory.createResource(metric.getUri()), score));
}
}
}
/**
* @deprecated Replaced by {@link #processModel(Model, String)}.
*/
@Deprecated
@Override
public Model process(Model model, String datasetUri) throws Exception {
processModel(model, datasetUri);
return model;
}
/**
* Gets list of available metrics.
*/
public List<Metric> getMetrics() {
List<Metric> metrics = new LinkedList<Metric>();
metrics.add(new CategorizationMetric());
metrics.add(new DataFormatMetric());
metrics.add(new DateFormatMetric());
metrics.add(new LicenseAvailabilityMetric());
metrics.add(new MultipleSerializationsMetric());
metrics.add(new ProviderIdentityMetric());
metrics.add(new ReadabilityMetric());
if (includeLongRunning) {
metrics.add(new RetrievabilityMetric());
}
metrics.add(new TimelinessMetric());
metrics.add(new UpdateRateMetric());
// Has to be last metric as it aggregates
metrics.add(new MetadataQualityMetric());
return metrics;
}
/**
* If set, long running metrics are executed.
*/
public boolean isIncludingLongRunning() {
return includeLongRunning;
}
/**
* Sets if long running metrics should be executed.
*/
public Civet setIncludeLongRunning(boolean includeLongRunning) {
this.includeLongRunning = includeLongRunning;
return this;
}
/**
* If set, logs when a measurement could not be computed.
*/
public boolean isLoggingIfNotComputed() {
return logIfNotComputed;
}
/**
* Sets, if it should be logged, when a measurement could not be computed.
*/
public Civet setLogIfNotComputed(boolean logNotComputed) {
this.logIfNotComputed = logNotComputed;
return this;
}
/**
* If set, all existing measurements will be removed before computing new ones.
*/
public boolean isRemovingMeasurements() {
return removeMeasurements;
}
/**
* Sets if all existing measurements will be removed before computing new ones.
*/
public Civet setRemoveMeasurements(boolean removeMeasurements) {
this.removeMeasurements = removeMeasurements;
return this;
}
/**
* @deprecated Replaced by {@link #isLoggingIfNotComputed()}.
*/
@Deprecated
public boolean isLogNotComputed() {
return isLoggingIfNotComputed();
}
/**
* @deprecated Replaced by {@link #setLogIfNotComputed(boolean)}.
*/
@Deprecated
public void setLogNotComputed(boolean logNotComputed) {
setLogIfNotComputed(logNotComputed);
}
}