Skip to content

Commit

Permalink
Fix for issue #18. Provided access to capture group info after compil…
Browse files Browse the repository at this point in the history
…e using get_name_table() method.
  • Loading branch information
Tiihott authored and kortemik committed Dec 8, 2023
1 parent a5bfcd5 commit 673d1e4
Show file tree
Hide file tree
Showing 3 changed files with 148 additions and 8 deletions.
93 changes: 93 additions & 0 deletions src/main/c/JavaPcre.c
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,12 @@ typedef struct CompileData_TAG {
int erroroffset;
} CompileData;

typedef struct GroupData_TAG {
char** names;
int* namesnum;
int namescount;
} GroupData;

// regex struct/array implementation starts here
typedef struct RegexStruct_TAG {
int numVals;
Expand Down Expand Up @@ -380,6 +386,93 @@ int pcre2_check_utf8(char temp){
}
}

// Provides method to access capture group info after compile but before matching. PCRE2_INFO_NAMETABLE is available via pcre2_pattern_info.
GroupData pcre2_get_info_group(pcre2_code *re){
PCRE2_SPTR name_table;
uint32_t namecount;
uint32_t name_entry_size;
GroupData temp;
int i;

(void)pcre2_pattern_info(
re, /* the compiled pattern */
PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */
&namecount); /* where to put the answer */
// printf("TESTING NAMECOUNT: %d \r\n", namecount);
if (namecount == 0){
temp.namescount = namecount;
temp.names = (char**)malloc(sizeof(char*) * 1);
temp.namesnum = (int*)malloc(sizeof(int) * 1);
if (temp.names == NULL || temp.namesnum == NULL) {
printf("Error: Out of memory\r\n");
exit(-1);
}
memset(temp.names, 0, sizeof(char*) * 1);
memset(temp.namesnum, 0, sizeof(int) * 1);
// printf("TESTING NAMESCOUNT: %d \r\n", temp.namescount);
} else
{
PCRE2_SPTR tabptr;

/* Before we can access the substrings, we must extract the table for
translating names to numbers, and the size of each entry in the table. */

(void)pcre2_pattern_info(
re, /* the compiled pattern */
PCRE2_INFO_NAMETABLE, /* address of the table */
&name_table); /* where to put the answer */

(void)pcre2_pattern_info(
re, /* the compiled pattern */
PCRE2_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
&name_entry_size); /* where to put the answer */

tabptr = name_table;
temp.namescount = namecount;
temp.namesnum = (int*)malloc(sizeof(int) * temp.namescount);
temp.names = (char**)malloc(sizeof(char*) * temp.namescount);
if (temp.names == NULL || temp.namesnum == NULL) {
printf("Error: Out of memory\r\n");
exit(-1);
}
for (i = 0; i < namecount; i++)
{
int n = (tabptr[0] << 8) | tabptr[1]; // << is a bitwise left shift operator.
temp.namesnum[i] = n; // stores the numerical value for name-number pairing to the struct.
// name table is stored in this format:
// 00 01 d a t e 00 ??
// 00 05 d a y 00 ?? ??
// etc.
// first two bytes (00 and 01) are the number of the capturing parenthesis, and ?? is an undefined byte.
// last 00 byte seems to be the zero termination of the string.
temp.names[i] = (char*)malloc(sizeof(char) * ((int)name_entry_size - 2));
if (temp.names[i] == NULL) {
printf("Error: Out of memory\r\n");
exit(-1);
}
memset(temp.names[i], 0, sizeof(char) * ((int)name_entry_size - 2)); // initializes the string array with null values.
memcpy(temp.names[i], (char *)(tabptr + 2), (int)(name_entry_size - 3));

tabptr += name_entry_size;
}
}
// printf("TESTING NAMESCOUNT: %d \r\n", temp.namescount);
return temp;
}

void free_group_data(GroupData sVal) {
// printf("(C) cleaning up GroupData sVal.names[loop]...\n");
int loop;
for (loop=0; loop<sVal.namescount; loop++){
free(sVal.names[loop]);
}
if (sVal.namescount>0){
free(sVal.names);
free(sVal.namesnum);
}
}


// this function contains matching for a single match
RegexStruct pcre2_single_jmatch(char *b, pcre2_code *re, int offset, MatchOptionsStruct *temp, pcre2_match_context *mcontext){
pcre2_match_data *match_data;
Expand Down
46 changes: 38 additions & 8 deletions src/main/java/com/teragrep/jpr_01/JavaPcre.java
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,21 @@ public static class ByValue extends RegexStruct implements Structure.ByValue {}
public int rc;
}

@FieldOrder({ "names", "namesnum", "namescount" })
class GroupData extends Structure {
public static class ByValue extends GroupData implements Structure.ByValue {}
public Pointer names; // char**
public Pointer namesnum;
public int namescount;
}

void RegexStruct_cleanup(RegexStruct.ByValue sVal);

CompileData.ByValue pcre2_jcompile(String pattern, int i, OptionsStruct options, Pointer ccontext); // returns struct containing compiled pattern re

GroupData.ByValue pcre2_get_info_group(Pointer re); // returns struct containing group info
void free_group_data(GroupData.ByValue sVal); // releases the memory allocated to the group info struct.

RegexStruct.ByValue pcre2_single_jmatch(String subject, Pointer re, int offset, MatchOptionsStruct match_options, Pointer mcontext); // returns pointer to a single match data.
ErrorStruct.ByValue pcre2_translate_error_code(int errorcode);

Expand Down Expand Up @@ -183,9 +194,27 @@ public JavaPcre(){
mcontext = null; // default value for when context is not used in match
matchfound = false;
JPCRE2_ERROR_NOMATCH = false;
name_table = new HashMap<>();
}
// Make another constructor if/when memory management is implemented to the context functions.

public void set_name_table(LibJavaPcre.GroupData.ByValue groupData) {
if (groupData.namescount > 0) {
if (!name_table.isEmpty()) {
name_table.clear();
}
final String[] regex_names = groupData.names.getStringArray(0, groupData.namescount);
final int[] namesnum = groupData.namesnum.getIntArray(0, groupData.namescount);
for (int namesloop = 0; namesloop < groupData.namescount; namesloop++) {
name_table.put(regex_names[namesloop], namesnum[namesloop]);
}
} else {
if (!name_table.isEmpty()) {
name_table.clear();
}
}
}

public Map<String, Integer> get_name_table(){
return name_table;
}
Expand Down Expand Up @@ -340,6 +369,12 @@ public void compile_java(String pat){
LibJavaPcre.INSTANCE.errorcleanup(p);
throw new PatternSyntaxException(val, pattern, comp_val.erroroffset);
}
else{
// initialize the groupData and translate it to name_table format.
LibJavaPcre.GroupData.ByValue groupData = LibJavaPcre.INSTANCE.pcre2_get_info_group(re);
set_name_table(groupData);
LibJavaPcre.INSTANCE.free_group_data(groupData);
}
}

public boolean checkmatchoptionzero(){
Expand All @@ -354,7 +389,6 @@ public void singlematch_java(String a, int b){
if (a == null) {
throw new IllegalStateException("Subject is null");
}
name_table = new LinkedHashMap<>();
subject = a;
offset = b;
match_table = new LinkedHashMap<>();
Expand Down Expand Up @@ -395,13 +429,6 @@ public void singlematch_java(String a, int b){
matchfound = true;
final String[] regex_vals = regex_val.vals.getStringArray(0, regex_val.numVals);
final int[] regex_ovector = regex_val.ovector.getIntArray(0, (regex_val.numVals + 2));
if (regex_val.namescount > 0) {
final String[] regex_names = regex_val.names.getStringArray(0, regex_val.namescount);
final int[] namesnum = regex_val.namesnum.getIntArray(0, regex_val.namescount);
for (int namesloop = 0; namesloop < regex_val.namescount; namesloop++) {
name_table.put(regex_names[namesloop], namesnum[namesloop]);
}
}
for (int regexloop = 0; regexloop < regex_val.numVals; regexloop++) {
match_table.put(ind++, regex_vals[regexloop]);
}
Expand All @@ -421,6 +448,9 @@ public void jcompile_free(){
if (re != null){
LibJavaPcre.INSTANCE.pcre2_jcompile_free(re);
re = null;
if (!name_table.isEmpty()) {
name_table.clear(); // clear name_table that is constructed using the compiled pattern.
}
}else{
throw new IllegalStateException("No data to free");
}
Expand Down
17 changes: 17 additions & 0 deletions src/test/java/com/teragrep/jpr_01/JavaPcreIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.Assertions;

import java.util.HashMap;
import java.util.Map;

import org.slf4j.Logger;
Expand Down Expand Up @@ -164,6 +165,22 @@ void pcre2_compile_test() {
Assertions.assertEquals(null, s1.get_re());
}

@Test
void pcre2_compile_test_with_group_data() {
JavaPcre s1 = new JavaPcre();
s1.compile_java("From:(?<nimi>[^@]+)@(?<sposti>[^\r]+)");
Map<String, Integer> expected_nameTable = new HashMap<>();
expected_nameTable.put("nimi", 1);
expected_nameTable.put("sposti", 2);
Assertions.assertNotEquals(null, s1.get_re());
Map<String, Integer> nameTable = s1.get_name_table();
Assertions.assertEquals(2, nameTable.size());
Assertions.assertEquals(expected_nameTable, nameTable);
s1.jcompile_free();
Assertions.assertEquals(null, s1.get_re());
Assertions.assertEquals(0, nameTable.size());
}

@Test
void pcre2_translator_test() {
// broken
Expand Down

0 comments on commit 673d1e4

Please sign in to comment.