Skip to content

Commit

Permalink
Merge pull request #28 from RepositorioNacionalCultura/dev
Browse files Browse the repository at this point in the history
Merge dev changes. Closes #21 Closes #26
  • Loading branch information
haxdai authored Feb 20, 2018
2 parents bd8ae94 + de6b411 commit aab84ed
Show file tree
Hide file tree
Showing 9 changed files with 173 additions and 908 deletions.
2 changes: 1 addition & 1 deletion .idea/artifacts/webapp_war_exploded.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions harvester-core/harvester-core.iml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
<orderEntry type="library" name="Maven: com.github.SWBForms:SWBDataManager:master-1.0-SNAPSHOT-g35c84b1-9" level="project" />
<orderEntry type="library" name="Maven: com.github.SWBForms:SWBDataManager:master-1.0-SNAPSHOT-g68e51f2-11" level="project" />
<orderEntry type="library" name="Maven: com.github.RepositorioNacionalCultura:Commons:master-13e5d2264b-1" level="project" />
<orderEntry type="library" name="Maven: com.github.RepositorioNacionalCultura:Commons:master-e04f28f85b-1" level="project" />
<orderEntry type="library" name="Maven: org.freemarker:freemarker:2.3.23" level="project" />
<orderEntry type="library" name="Maven: com.github.SWBForms:SWBDataManager:master-1.0-SNAPSHOT-g48bb1fc-12" level="project" />
<orderEntry type="library" name="Maven: log4j:log4j:1.2.17" level="project" />
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
package mx.gob.cultura.extractor;

import com.mongodb.*;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.model.IndexOptions;
import com.mongodb.client.model.Indexes;
import mx.gob.cultura.indexer.SimpleESIndexer;
import mx.gob.cultura.transformer.DataObjectScriptEngineMapper;
import mx.gob.cultura.util.Util;
import mx.gob.cultura.commons.Util;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVRecord;
import org.apache.log4j.Logger;
Expand Down Expand Up @@ -174,7 +177,7 @@ public void extract() throws Exception {
extractorDef.put("lastExecution", sdf.format(new Date()));

dsExtract.updateObj(extractorDef);
HashMap<String, String> hm = Util.loadOccurrences(engine);
HashMap<String, String> hm = Util.SWBForms.loadOccurrences(engine);
int r = 0;
ArrayList<String> arr = new ArrayList();
for (CSVRecord record : CSVFormat.DEFAULT.parse(in)) {
Expand All @@ -183,7 +186,6 @@ public void extract() throws Exception {
extracting = true;
dsExtract.updateObj(extractorDef);


//arreglo con el nombre de las columnas
if (r == 0) {
int c = 0;
Expand All @@ -193,8 +195,8 @@ public void extract() throws Exception {
arr.add(c, "oaiid");
} else {
field = field.toLowerCase().trim();
field = Util.replaceSpecialCharacters(field, true);
field = Util.replaceOccurrences(hm, field.trim());
field = Util.TEXT.replaceSpecialCharacters(field, true);
field = Util.TEXT.replaceOccurrences(hm, field.trim());

arr.add(c, field);
}
Expand All @@ -216,7 +218,7 @@ public void extract() throws Exception {
}
c++;
}
BasicDBObject bjson = Util.toBasicDBObject(rec);
BasicDBObject bjson = Util.SWBForms.toBasicDBObject(rec);
objects.insert(bjson);
}
r++;
Expand Down Expand Up @@ -257,8 +259,14 @@ public void extract() throws Exception {

//Generar el nuevo DataObject combinado por cada prefix
try {
//Create index in spanish language
IndexOptions opts = new IndexOptions();
opts.defaultLanguage("spanish");

DB db = ExtractorManager.client.getDB(extractorDef.getString("name").toUpperCase());
DBCollection objects = db.getCollection("fullobjects");
MongoCollection mcoll = Util.MONGODB.getMongoClient().getDatabase(extractorDef.getString("name").toUpperCase()).getCollection("TransObject");
mcoll.createIndex(Indexes.compoundIndex(Indexes.text("identifier.value"),Indexes.text("resourcetitle"),Indexes.text("resourcedescription")), opts);
SWBDataSource transobjs = engine.getDataSource("TransObject", extractorDef.getString("name").toUpperCase());

// System.out.println("encontro DB y colección...");
Expand All @@ -281,12 +289,12 @@ public void extract() throws Exception {
dobj = (DataObject) DataObject.parseJSON(next.toString());
try {
DataObject result = mapper.map(dobj);
HashMap<String, String> hmmaptable = Util.loadExtractorMapTable(engine, extractorDef);
HashMap<String, String> hmmaptable = Util.SWBForms.loadExtractorMapTable(engine, extractorDef);

// Mapeo de propiedades definidas en la tabla con los a encontrar en los catálogos
// Se actualizan las propiedades del DataObject
if (!hmmaptable.isEmpty()) {
Util.findProps(result, hmmaptable, engine);
Util.SWBForms.findProps(result, hmmaptable, engine);
}
//System.out.println("Antes de agregar el objeto");
result.put("forIndex", true);
Expand All @@ -307,8 +315,6 @@ public void extract() throws Exception {

//eliminando colección fullobjects
objects.drop();


} catch (Exception e) {
log.error("Error al indexar\n", e);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
package mx.gob.cultura.extractor;

import com.mongodb.*;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.model.IndexOptions;
import com.mongodb.client.model.Indexes;
import mx.gob.cultura.indexer.SimpleESIndexer;
import mx.gob.cultura.transformer.DataObjectScriptEngineMapper;
import mx.gob.cultura.util.Util;
import mx.gob.cultura.commons.Util;
import org.apache.log4j.Logger;
import org.json.JSONArray;
import org.json.JSONException;
Expand Down Expand Up @@ -195,7 +198,7 @@ public void extract() throws Exception {
ext_lastExec = ext_lastExec.replace(" ", "T");
}

HashMap<String, String> hm = Util.loadOccurrences(engine);
HashMap<String, String> hm = Util.SWBForms.loadOccurrences(engine);
boolean isResumeExtract = false;
if (ext_pfxActual != null) {
isResumeExtract = true;
Expand Down Expand Up @@ -253,7 +256,7 @@ public void extract() throws Exception {
log.debug(jsonstr.substring(1, jsonstr.length() - 1));
break;
}
jsonstr = Util.replaceOccurrences(hm, jsonstr);
jsonstr = Util.TEXT.replaceOccurrences(hm, jsonstr);

if (jsonstr.contains("resumptionToken")) {
tknFound = true;
Expand Down Expand Up @@ -342,7 +345,7 @@ public void extract() throws Exception {
DataObject rec = new DataObject();
rec.put("oaiid", nid);
rec.put("body", DataObject.parseJSON(nodeAsString));
BasicDBObject bjson = Util.toBasicDBObject(rec);
BasicDBObject bjson = Util.SWBForms.toBasicDBObject(rec);
objects.insert(bjson);
itemsExtracted++;
} else {
Expand Down Expand Up @@ -562,7 +565,7 @@ public void process() throws Exception {
//hmfull.put(key.trim(), dobj);

// Esta parte sólo es para verificar como forma los objetos completos.
BasicDBObject bjson = Util.toBasicDBObject(dobj);
BasicDBObject bjson = Util.SWBForms.toBasicDBObject(dobj);
if (add2DB) {
objects.insert(bjson);
} else {
Expand Down Expand Up @@ -613,10 +616,16 @@ public void process() throws Exception {
List<String> list = null;
//Generar el nuevo DataObject combinado por cada prefix
try {
//Create index in spanish language
IndexOptions opts = new IndexOptions();
opts.defaultLanguage("spanish");

DB db = ExtractorManager.client.getDB(extractorDef.getString("name").toUpperCase());
DBCollection objects = db.getCollection("fullobjects");
MongoCollection mcoll = Util.MONGODB.getMongoClient().getDatabase(extractorDef.getString("name").toUpperCase()).getCollection("TransObject");
mcoll.createIndex(Indexes.compoundIndex(Indexes.text("identifier"),Indexes.text("resourcetitle"),Indexes.text("resourcedescription")), opts);
SWBDataSource transobjs = engine.getDataSource("TransObject", extractorDef.getString("name").toUpperCase());

DataObject dobj = null;

try {
Expand All @@ -641,11 +650,11 @@ public void process() throws Exception {

//Transformación del DataObject
DataObject result = mapper.map(dobj);
HashMap<String, String> hmmaptable = Util.loadExtractorMapTable(engine, extractorDef);
HashMap<String, String> hmmaptable = Util.SWBForms.loadExtractorMapTable(engine, extractorDef);
// Mapeo de propiedades definidas en la tabla con los a encontrar en los catálogos
// Se actualizan las propiedades del DataObject
if (!hmmaptable.isEmpty()) {
Util.findProps(result, hmmaptable, engine);
Util.SWBForms.findProps(result, hmmaptable, engine);
}
//System.out.println("Antes de agregar el objeto");
result.put("forIndex", true);
Expand All @@ -670,7 +679,6 @@ public void process() throws Exception {
extractorDef.put("status", STATUS.FINISHED.name());
extractorDef.put("transformed", numItemsIndexed);
dsExtract.updateObj(extractorDef);

//eliminando colección fullobjects
objects.drop();

Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
package mx.gob.cultura.indexer.elastic;

import mx.gob.cultura.commons.Util;
import mx.gob.cultura.datasource.Cursor;
import mx.gob.cultura.datasource.DataSourceObject;
import mx.gob.cultura.exception.IndexException;
import mx.gob.cultura.indexer.IndexerBase;
import mx.gob.cultura.util.Util;
import mx.gob.cultura.commons.Util;
import org.apache.log4j.Logger;
import org.bson.Document;
import org.elasticsearch.action.index.IndexRequest;
Expand Down Expand Up @@ -47,7 +48,7 @@ public ESIndexer (String host, int port, String[] indexNames) {

@Override
public void index() throws IndexException {
RestHighLevelClient client = Util.DB.getElasticClient(null==host?"localhost":host, port>0?port:9200);
RestHighLevelClient client = Util.ELASTICSEARCH.getElasticClient(null==host?"localhost":host, port>0?port:9200);
Cursor cur = getDataSource().fetch(null);

while (cur.hasNext()) {
Expand Down
Loading

0 comments on commit aab84ed

Please sign in to comment.