Skip to content

Commit

Permalink
Add an ID to acceptance nodes to differentiate between accepted tokens
Browse files Browse the repository at this point in the history
  • Loading branch information
skordal committed Jun 25, 2020
1 parent 8c9620b commit d132331
Show file tree
Hide file tree
Showing 10 changed files with 78 additions and 9 deletions.
12 changes: 11 additions & 1 deletion src/regex-matchers.adb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ with Regex.State_Machines; use Regex.State_Machines;

package body Regex.Matchers is

function Matches (Input : in Regular_Expression; Query : in String) return Boolean is
function Matches (Input : in Regular_Expression; Query : in String; Match_Id : out Natural) return Boolean is
Current_State : State_Machine_State_Access := Input.Get_Start_State;
begin
for Symbol of Query loop
Expand Down Expand Up @@ -35,9 +35,19 @@ package body Regex.Matchers is
end;
end loop;

if Current_State.Accepting then
Match_Id := Current_State.Acceptance_id;
end if;

return Current_State.Accepting;
end Matches;

function Matches (Input : in Regular_Expression; Query : in String) return Boolean is
Id : Natural;
begin
return Matches (Input, Query, Id);
end Matches;


function Get_Match (Input : in Regular_Expression; Query : in String; Complete_Match : out Boolean) return String
is
Expand Down
6 changes: 5 additions & 1 deletion src/regex-matchers.ads
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@ with Regex.Regular_Expressions; use Regex.Regular_Expressions;

package Regex.Matchers is

-- Checks of a string matches a regular expression:
-- Checks if a string matches a regular expression:
function Matches (Input : in Regular_Expression; Query : in String; Match_Id : out Natural)
return Boolean;

-- Checks if a string matches a regular expression:
function Matches (Input : in Regular_Expression; Query : in String) return Boolean;

-- Gets the first part of a string that matches a regular expression:
Expand Down
1 change: 1 addition & 0 deletions src/regex-regular_expressions-compile.adb
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ begin
Input_Symbols.Add (new Input_Symbol'(Symbol_Type => Any_Character));
elsif Syntax_Node.Node_Type = Acceptance then
Unmarked_State.Accepting := True;
Unmarked_State.Acceptance_Id := Syntax_Node.Acceptance_Id;
end if;
end loop;

Expand Down
5 changes: 3 additions & 2 deletions src/regex-state_machines.adb
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,9 @@ package body Regex.State_Machines is
function Create_State (Syntax_Tree_Nodes : in Syntax_Tree_Node_Sets.Sorted_Set) return State_Machine_State_Access is
Retval : constant State_Machine_State_Access := new State_Machine_State'(
Syntax_Tree_Nodes => Syntax_Tree_Nodes,
Transitions => State_Machine_Transition_Vectors.Empty_Vector,
others => False);
Transitions => State_Machine_Transition_Vectors.Empty_Vector,
Acceptance_Id => 0,
others => False);
begin
return Retval;
end Create_State;
Expand Down
3 changes: 3 additions & 0 deletions src/regex-state_machines.ads
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ package Regex.State_Machines is
-- Outgoing transitions from this state:
Transitions : State_Machine_Transition_Vectors.Vector;

-- Acceptance ID (can be used by lexer IDs to recognize different tokens):
Acceptance_Id : Natural;

Marked, Accepting : Boolean := False;
end record;
package State_Machine_State_Vectors is new Ada.Containers.Vectors (
Expand Down
11 changes: 8 additions & 3 deletions src/regex-syntax_trees.adb
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,14 @@ package body Regex.Syntax_Trees is
begin
Next_Id := Next_Id + 1;

if Retval.Node_Type = Single_Character then
Retval.Char := Root.Char;
end if;
case Retval.Node_Type is
when Single_Character =>
Retval.Char := Root.Char;
when Acceptance =>
Retval.Acceptance_Id := Root.Acceptance_Id;
when others =>
null;
end case;

if Root.Left_Child /= null then
Retval.Left_Child := Clone_Tree (Root.Left_Child, Next_Id);
Expand Down
2 changes: 2 additions & 0 deletions src/regex-syntax_trees.ads
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ package Regex.Syntax_Trees is
case Node_Type is
when Single_Character =>
Char : Character;
when Acceptance =>
Acceptance_Id : Natural;
when others =>
null;
end case;
Expand Down
40 changes: 39 additions & 1 deletion tests/regex_test_cases.adb
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,36 @@ package body Regex_Test_Cases is
Matches (Test_Expr, "ef");
end Test_Syntax_Tree_Compile;

procedure Test_Multiple_Accept (T : in out Test_Fixture) is
pragma Unreferenced (T);

First_Expression : constant Regular_Expression := Create ("abc");
Second_Expression : constant Regular_Expression := Create ("def");
begin

Get_Acceptance_Node (First_Expression.Get_Syntax_Tree).Acceptance_Id := 1;
Get_Acceptance_Node (Second_Expression.Get_Syntax_Tree).Acceptance_Id := 2;

declare
Id_Counter : Natural := 1;
Combined_Tree : Syntax_Tree_Node_Access := Create_Node (
Node_Type => Alternation,
Id => 0,
Left_Child => Clone_Tree (First_Expression.Get_Syntax_Tree, Id_Counter),
Right_Child => Clone_Tree (Second_Expression.Get_Syntax_Tree, Id_Counter));
Test_Expr : constant Regular_Expression := Create (Combined_Tree);

Match_Id : Natural;
begin
Free_Recursively (Combined_Tree);

Does_Not_Match_Empty_Strings (Test_Expr);
Matches (Test_Expr, "abc", 1);
Matches (Test_Expr, "def", 2);
end;

end Test_Multiple_Accept;

------ Test utility functions -----

procedure Matches_Empty_Strings (Regex : in Regular_Expression) is
Expand All @@ -288,7 +318,15 @@ package body Regex_Test_Cases is
begin
Assert (Matches (Regex, Matching), "regex does not match correct input string '"
& Matching & "'");
null;
end Matches;

procedure Matches (Regex : in Regular_Expression; Matching : in String; Expected_Id : in Natural) is
Match_Id : Natural;
begin
Assert (Matches (Regex, Matching, Match_Id), "regex does not match correct input string '"
& Matching & "'");
Assert (Match_Id = Expected_Id, "expected match ID ( " & Natural'Image (Expected_Id)
& ") does not match actual match ID (" & Natural'Image (Match_Id) & ")");
end Matches;

procedure Does_Not_Match (Regex : in Regular_Expression; Not_Matching : in String) is
Expand Down
5 changes: 4 additions & 1 deletion tests/regex_test_cases.ads
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,17 @@ package Regex_Test_Cases is
procedure Test_Partial_Matching (T : in out Test_Fixture);
procedure Test_Newlines (T : in out Test_Fixture);
procedure Test_Syntax_Tree_Compile (T : in out Test_Fixture);
procedure Test_Multiple_Accept (T : in out Test_Fixture);

private
use Regex.Regular_Expressions;

procedure Matches_Empty_Strings (Regex : in Regular_Expression) with Inline;
procedure Does_Not_Match_Empty_Strings (Regex : in Regular_Expression) with Inline;

procedure Matches (Regex : in Regular_Expression; Matching : in String) with Inline;
procedure Matches (Regex : in Regular_Expression; Matching : in String) with Inline;
procedure Matches (Regex : in Regular_Expression; Matching : in String; Expected_Id : in Natural) with Inline;

procedure Does_Not_Match (Regex : in Regular_Expression; Not_Matching : in String) with Inline;

end Regex_Test_Cases;
Expand Down
2 changes: 2 additions & 0 deletions tests/regex_test_suite.adb
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ package body Regex_Test_Suite is
Regex_Test_Cases.Test_Newlines'Access));
Retval.Add_Test (Regex_Test_Caller.Create ("syntax-tree-compile",
Regex_Test_Cases.Test_Syntax_Tree_Compile'Access));
Retval.Add_Test (Regex_Test_Caller.Create ("multiple-accept",
Regex_Test_Cases.Test_Multiple_Accept'Access));

return Retval;
end Test_Suite;
Expand Down

0 comments on commit d132331

Please sign in to comment.