Skip to content

Commit

Permalink
Move from Boost regexes to standard library regexes
Browse files Browse the repository at this point in the history
Regular expressions are currently only used for query
pipelines. Comments:

- The regular expression should be precompiled.
- We do not need regular expressions for this application.
  • Loading branch information
danieldk authored and Daniël de Kok committed Apr 6, 2020
1 parent 9c25b5f commit 7f32f9c
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 12 deletions.
4 changes: 3 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ endif()

include(GNUInstallDirs REQUIRED)

find_package(Boost 1.50 COMPONENTS system filesystem regex REQUIRED)
find_package(Boost 1.50 COMPONENTS system filesystem REQUIRED)

if(Boost_FOUND)
include_directories(${Boost_INCLUDE_DIRS})
Expand Down Expand Up @@ -135,6 +135,7 @@ set(HEADERS
src/util/base64.hh
src/util/bufutil.hh
src/util/parseString.hh
src/util/split.hh
src/util/textfile.hh
src/util/url.hh
)
Expand Down Expand Up @@ -163,6 +164,7 @@ set(SOURCES
src/macros.cpp
src/parseMacros.cpp
src/util/NameCompare.cpp
src/util/split.cpp
src/util/textfile.cpp
src/util/url.cpp
)
Expand Down
17 changes: 6 additions & 11 deletions src/CorpusReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,12 @@
#include <cstring>
#include <list>
#include <memory>
#include <regex>
#include <sstream>
#include <string>
#include <unordered_map>
#include <vector>

#include <boost/algorithm/string/regex.hpp>
#include <boost/regex.hpp>

#include <AlpinoCorpus/CorpusInfo.hh>
#include <AlpinoCorpus/CorpusReader.hh>
#include <AlpinoCorpus/Error.hh>
Expand All @@ -35,6 +33,7 @@
#include "FilterIter.hh"
#include "StylesheetIter.hh"
#include "util/parseString.hh"
#include "util/split.hh"

namespace xerces = XERCES_CPP_NAMESPACE;

Expand Down Expand Up @@ -241,8 +240,7 @@ namespace alpinocorpus {
std::string const &defaultValue,
CorpusInfo const &corpusInfo) const
{
std::vector<std::string> queries;
boost::split_regex(queries, query, boost::regex("\\+\\|\\+"));
auto queries = split_string(query, std::regex("\\+\\|\\+"));
assert(queries.size() > 0);

// Discard pre-filters
Expand Down Expand Up @@ -307,8 +305,7 @@ namespace alpinocorpus {

Either<std::string, Empty> CorpusReader::isValidQuery(QueryDialect d, bool variables, std::string const &q) const
{
std::vector<std::string> queries;
boost::split_regex(queries, q, boost::regex("\\+\\|\\+"));
auto queries = split_string(q, std::regex("\\+\\|\\+"));
assert(queries.size() > 0);

for (std::vector<std::string>::const_iterator iter = queries.begin();
Expand Down Expand Up @@ -338,8 +335,7 @@ namespace alpinocorpus {
for (std::list<MarkerQuery>::iterator iter = effectiveQueries.begin();
iter != effectiveQueries.end(); ++iter)
{
std::vector<std::string> queries;
boost::split_regex(queries, iter->query, boost::regex("\\+\\|\\+"));
auto queries = split_string(iter->query, std::regex("\\+\\|\\+"));
assert(queries.size() > 0);
iter->query = queries.back();
}
Expand Down Expand Up @@ -545,8 +541,7 @@ namespace alpinocorpus {
{
if (d == XPATH)
{
std::vector<std::string> queries;
boost::split_regex(queries, q, boost::regex("\\+\\|\\+"));
auto queries = split_string(q, std::regex("\\+\\|\\+"));
assert(queries.size() > 0);

EntryIterator qIter = runXPath(queries[0], sortOrder);
Expand Down
14 changes: 14 additions & 0 deletions src/util/split.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#include <algorithm>
#include <regex>
#include <string>
#include <vector>

std::vector<std::string> split_string(std::string const &s, std::regex const &re) {
std::vector<std::string> parts;

std::copy(std::sregex_token_iterator(s.begin(), s.end(), re, -1),
std::sregex_token_iterator(),
std::back_inserter(parts));

return parts;
}
5 changes: 5 additions & 0 deletions src/util/split.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#include <regex>
#include <string>
#include <vector>

std::vector<std::string> split_string(std::string const &s, std::regex const &re);

0 comments on commit 7f32f9c

Please sign in to comment.