Skip to content

Commit

Permalink
move preg_quote to runtime-common (#1192)
Browse files Browse the repository at this point in the history
  • Loading branch information
apolyakov authored Dec 18, 2024
1 parent 11c5362 commit 6f81360
Show file tree
Hide file tree
Showing 9 changed files with 110 additions and 96 deletions.
13 changes: 5 additions & 8 deletions builtin-functions/kphp-light/regex.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,24 +17,21 @@ define('PREG_SPLIT_DELIM_CAPTURE', 16);
define('PREG_SPLIT_OFFSET_CAPTURE', 32);
define('PREG_UNMATCHED_AS_NULL', 64);

function preg_match ($regex ::: string, $subject ::: string, &$matches ::: mixed = TODO, $flags ::: int = 0, $offset ::: int = 0) ::: int | false;
function preg_match ($regex ::: string, $subject ::: string, &$matches ::: mixed = TODO, $flags ::: int = 0, $offset ::: int = 0): int | false;

function preg_match_all ($regex ::: string, $subject ::: string, &$matches ::: mixed = TODO, $flags ::: int = 0, $offset ::: int = 0) ::: int | false;
function preg_match_all ($regex ::: string, $subject ::: string, &$matches ::: mixed = TODO, $flags ::: int = 0, $offset ::: int = 0): int | false;

function preg_replace ($regex ::: string | array, $replace_val ::: string | array, $subject ::: string | array, $limit ::: int = -1, &$replace_count ::: int = TODO) ::: string | ^3 | false | null;
function preg_quote ($str ::: string, $delimiter ::: string = ''): string;

function preg_replace ($regex ::: string | array, $replace_val ::: string | array, $subject ::: string | array, $limit ::: int = -1, &$replace_count ::: int = TODO): string | ^3 | false | null;

// ===== UNSUPPORTED =====


function preg_replace_callback ($regex ::: regexp, callable(string[] $x):string $callback, $subject, $limit ::: int = -1, &$replace_count ::: int = TODO) ::: ^3|string|null;

/** @kphp-extern-func-info generate-stub */
function preg_quote ($str ::: string, $delimiter ::: string = '') ::: string;

/** @kphp-extern-func-info generate-stub */
function preg_last_error() ::: int;

function preg_split ($pattern ::: regexp, $subject ::: string, $limit ::: int = -1, $flags ::: int = 0) ::: mixed[] | false;



2 changes: 1 addition & 1 deletion runtime-common/stdlib/stdlib.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ prepend(STDLIB_ARRAY stdlib/array/ array-functions.cpp)
prepend(STDLIB_MATH stdlib/math/ math-functions.cpp)
prepend(STDLIB_SERIALIZATION stdlib/serialization/ json-functions.cpp
json-writer.cpp serialize-functions.cpp)
prepend(STDLIB_STRING stdlib/string/ mbstring-functions.cpp
prepend(STDLIB_STRING stdlib/string/ mbstring-functions.cpp regex-functions.cpp
string-functions.cpp)
prepend(STDLIB_SERVER stdlib/server/ url-functions.cpp)

Expand Down
56 changes: 56 additions & 0 deletions runtime-common/stdlib/string/regex-functions.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Compiler for PHP (aka KPHP)
// Copyright (c) 2024 LLC «V Kontakte»
// Distributed under the GPL v3 License, see LICENSE.notice.txt

#include "runtime-common/stdlib/string/regex-functions.h"

#include "runtime-common/core/runtime-core.h"

string f$preg_quote(const string &str, const string &delimiter) noexcept {
auto &static_SB{RuntimeContext::get().static_SB};

const string::size_type len{str.size()};
static_SB.clean().reserve(4 * len);
for (string::size_type i = 0; i < len; i++) {
switch (str[i]) {
case '.':
case '\\':
case '+':
case '*':
case '?':
case '[':
case '^':
case ']':
case '$':
case '(':
case ')':
case '{':
case '}':
case '=':
case '!':
case '>':
case '<':
case '|':
case ':':
case '-':
case '#':
static_SB.append_char('\\');
static_SB.append_char(str[i]);
break;
case '\0':
static_SB.append_char('\\');
static_SB.append_char('0');
static_SB.append_char('0');
static_SB.append_char('0');
break;
default:
if (!delimiter.empty() && str[i] == delimiter[0]) {
static_SB.append_char('\\');
}
static_SB.append_char(str[i]);
break;
}
}

return static_SB.str();
}
9 changes: 9 additions & 0 deletions runtime-common/stdlib/string/regex-functions.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
// Compiler for PHP (aka KPHP)
// Copyright (c) 2024 LLC «V Kontakte»
// Distributed under the GPL v3 License, see LICENSE.notice.txt

#pragma once

#include "runtime-common/core/runtime-core.h"

string f$preg_quote(const string &str, const string &delimiter = {}) noexcept;
31 changes: 14 additions & 17 deletions runtime-light/stdlib/string/regex-functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,11 @@ bool valid_preg_replace_mixed(const mixed &param) noexcept {

template<typename... Args>
requires((std::is_same_v<Args, int64_t> && ...) && sizeof...(Args) > 0) bool valid_regex_flags(int64_t flags, Args... supported_flags) noexcept {
return (flags & ~(supported_flags | ...)) == PREG_NO_FLAGS;
const bool valid{(flags & ~(supported_flags | ...)) == regex::PREG_NO_FLAGS};
if (!valid) [[unlikely]] {
php_warning("invalid flags: %" PRIi64, flags);
}
return valid;
}

bool correct_offset(int64_t &offset, std::string_view subject) noexcept {
Expand Down Expand Up @@ -331,8 +335,8 @@ PCRE2_SIZE set_matches(const RegexInfo &regex_info, int64_t flags, mixed &matche

const auto &regex_state{RegexInstanceState::get()};

const auto offset_capture{static_cast<bool>(flags & PREG_OFFSET_CAPTURE)};
const auto unmatched_as_null{static_cast<bool>(flags & PREG_UNMATCHED_AS_NULL)};
const auto offset_capture{static_cast<bool>(flags & regex::PREG_OFFSET_CAPTURE)};
const auto unmatched_as_null{static_cast<bool>(flags & regex::PREG_UNMATCHED_AS_NULL)};
// get the ouput vector from the match data
const auto *ovector{pcre2_get_ovector_pointer_8(regex_state.regex_pcre2_match_data.get())};
// calculate last matched group
Expand Down Expand Up @@ -386,7 +390,7 @@ PCRE2_SIZE set_matches(const RegexInfo &regex_info, int64_t flags, mixed &matche
// *** importrant ***
// in case of a pattern order all_matches must already contain all groups as empty arrays before the first call to set_all_matches
PCRE2_SIZE set_all_matches(const RegexInfo &regex_info, int64_t flags, mixed &all_matches) noexcept {
const auto pattern_order{!static_cast<bool>(flags & PREG_SET_ORDER)};
const auto pattern_order{!static_cast<bool>(flags & regex::PREG_SET_ORDER)};

mixed matches;
PCRE2_SIZE offset{set_matches(regex_info, flags, matches, pattern_order ? trailing_unmatch::include : trailing_unmatch::skip)};
Expand Down Expand Up @@ -487,11 +491,7 @@ Optional<int64_t> f$preg_match(const string &pattern, const string &subject, mix
matches = array<mixed>{};
RegexInfo regex_info{{pattern.c_str(), pattern.size()}, {subject.c_str(), subject.size()}, {}, RuntimeAllocator::get().memory_resource};

bool success{valid_regex_flags(flags, PREG_NO_FLAGS, PREG_OFFSET_CAPTURE, PREG_UNMATCHED_AS_NULL)};
if (!success) [[unlikely]] {
php_warning("invalid preg_match flags %" PRIi64, flags);
return false;
}
bool success{valid_regex_flags(flags, regex::PREG_NO_FLAGS, regex::PREG_OFFSET_CAPTURE, regex::PREG_UNMATCHED_AS_NULL)};
success &= correct_offset(offset, regex_info.subject);
success &= parse_regex(regex_info);
success &= compile_regex(regex_info);
Expand All @@ -510,18 +510,15 @@ Optional<int64_t> f$preg_match_all(const string &pattern, const string &subject,
int64_t entire_match_count{};
RegexInfo regex_info{{pattern.c_str(), pattern.size()}, {subject.c_str(), subject.size()}, {}, RuntimeAllocator::get().memory_resource};

bool success{valid_regex_flags(flags, PREG_NO_FLAGS, PREG_PATTERN_ORDER, PREG_SET_ORDER, PREG_OFFSET_CAPTURE, PREG_UNMATCHED_AS_NULL)};
if (!success) [[unlikely]] {
php_warning("invalid preg_match_all flags %" PRIi64, flags);
return false;
}
bool success{valid_regex_flags(flags, regex::PREG_NO_FLAGS, regex::PREG_PATTERN_ORDER, regex::PREG_SET_ORDER, regex::PREG_OFFSET_CAPTURE,
regex::PREG_UNMATCHED_AS_NULL)};
success &= correct_offset(offset, regex_info.subject);
success &= parse_regex(regex_info);
success &= compile_regex(regex_info);
success &= collect_group_names(regex_info);

// pre-init matches in case of pattern order
if (success && !static_cast<bool>(flags & PREG_SET_ORDER)) [[likely]] {
if (success && !static_cast<bool>(flags & regex::PREG_SET_ORDER)) [[likely]] {
const array<mixed> init_val{};
for (const auto *group_name : regex_info.group_names) {
if (group_name != nullptr) {
Expand Down Expand Up @@ -556,7 +553,7 @@ Optional<int64_t> f$preg_match_all(const string &pattern, const string &subject,

Optional<string> f$preg_replace(const string &pattern, const string &replacement, const string &subject, int64_t limit, int64_t &count) noexcept {
count = 0;
if (limit < 0 && limit != PREG_REPLACE_NOLIMIT) [[unlikely]] {
if (limit < 0 && limit != regex::PREG_REPLACE_NOLIMIT) [[unlikely]] {
php_warning("invalid limit %" PRIi64 " in preg_replace", limit);
return {};
}
Expand Down Expand Up @@ -586,7 +583,7 @@ Optional<string> f$preg_replace(const string &pattern, const string &replacement

bool success{parse_regex(regex_info)};
success &= compile_regex(regex_info);
success &= replace_regex(regex_info, limit == PREG_REPLACE_NOLIMIT ? std::numeric_limits<uint64_t>::max() : static_cast<uint64_t>(limit));
success &= replace_regex(regex_info, limit == regex::PREG_REPLACE_NOLIMIT ? std::numeric_limits<uint64_t>::max() : static_cast<uint64_t>(limit));
if (!success) [[unlikely]] {
return {};
}
Expand Down
40 changes: 23 additions & 17 deletions runtime-light/stdlib/string/regex-functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
#include "runtime-common/core/runtime-core.h"
#include "runtime-light/stdlib/string/regex-state.h"

namespace regex {

inline constexpr int64_t PREG_NO_ERROR = 0;
inline constexpr int64_t PREG_INTERNAL_ERROR = 1;
inline constexpr int64_t PREG_BACKTRACK_LIMIT_ERROR = 2;
Expand All @@ -27,35 +29,37 @@ inline constexpr auto PREG_UNMATCHED_AS_NULL = static_cast<int64_t>(1U << 6U);

inline constexpr int64_t PREG_REPLACE_NOLIMIT = -1;

} // namespace regex

using regexp = string;

Optional<int64_t> f$preg_match(const string &pattern, const string &subject, mixed &matches = RegexInstanceState::get().default_matches,
int64_t flags = PREG_NO_FLAGS, int64_t offset = 0) noexcept;
int64_t flags = regex::PREG_NO_FLAGS, int64_t offset = 0) noexcept;

Optional<int64_t> f$preg_match_all(const string &pattern, const string &subject, mixed &matches = RegexInstanceState::get().default_matches,
int64_t flags = PREG_NO_FLAGS, int64_t offset = 0) noexcept;
int64_t flags = regex::PREG_NO_FLAGS, int64_t offset = 0) noexcept;

/*
* PHP's implementation of preg_replace doesn't replace some errors. For example, consider replacement containing
* back reference $123. It cannot be found in the pattern, but PHP doesn't report it as error, it just treats such
* back reference as an empty string. Our implementation warns user about such error and returns null.
*/
Optional<string> f$preg_replace(const string &pattern, const string &replacement, const string &subject, int64_t limit = PREG_REPLACE_NOLIMIT,
Optional<string> f$preg_replace(const string &pattern, const string &replacement, const string &subject, int64_t limit = regex::PREG_REPLACE_NOLIMIT,
int64_t &count = RegexInstanceState::get().default_preg_replace_count) noexcept;

Optional<string> f$preg_replace(const mixed &pattern, const string &replacement, const string &subject, int64_t limit = PREG_REPLACE_NOLIMIT,
Optional<string> f$preg_replace(const mixed &pattern, const string &replacement, const string &subject, int64_t limit = regex::PREG_REPLACE_NOLIMIT,
int64_t &count = RegexInstanceState::get().default_preg_replace_count) noexcept;

Optional<string> f$preg_replace(const mixed &pattern, const mixed &replacement, const string &subject, int64_t limit = PREG_REPLACE_NOLIMIT,
Optional<string> f$preg_replace(const mixed &pattern, const mixed &replacement, const string &subject, int64_t limit = regex::PREG_REPLACE_NOLIMIT,
int64_t &count = RegexInstanceState::get().default_preg_replace_count) noexcept;

mixed f$preg_replace(const mixed &pattern, const mixed &replacement, const mixed &subject, int64_t limit = PREG_REPLACE_NOLIMIT,
mixed f$preg_replace(const mixed &pattern, const mixed &replacement, const mixed &subject, int64_t limit = regex::PREG_REPLACE_NOLIMIT,
int64_t &count = RegexInstanceState::get().default_preg_replace_count) noexcept;

template<class T1, class T2, class T3, class = enable_if_t_is_optional<T3>>
auto f$preg_replace(const T1 &regex, const T2 &replace_val, const T3 &subject, int64_t limit = PREG_REPLACE_NOLIMIT,
int64_t &replace_count = RegexInstanceState::get().default_preg_replace_count) noexcept {
return f$preg_replace(regex, replace_val, subject.val(), limit, replace_count);
auto f$preg_replace(const T1 &regex, const T2 &replace_val, const T3 &subject, int64_t limit = regex::PREG_REPLACE_NOLIMIT,
int64_t &count = RegexInstanceState::get().default_preg_replace_count) noexcept {
return f$preg_replace(regex, replace_val, subject.val(), limit, count);
}

template<class T1, class T2, class T3, class = enable_if_t_is_optional<T3>>
Expand All @@ -65,31 +69,33 @@ auto f$preg_replace_callback(const T1 &regex, const T2 &replace_val, const T3 &s
}

template<class T>
Optional<string> f$preg_replace_callback(const regexp &, const T &, const string &, int64_t = -1,
int64_t & = RegexInstanceState::get().default_preg_replace_count) {
Optional<string> f$preg_replace_callback(const regexp & /*unused*/, const T & /*unused*/, const string & /*unused*/, int64_t /*unused*/ = -1,
int64_t & /*unused*/ = RegexInstanceState::get().default_preg_replace_count) {
php_critical_error("call to unsupported function");
}

template<class T>
mixed f$preg_replace_callback(const regexp &, const T &, const mixed &, int64_t = -1, int64_t & = RegexInstanceState::get().default_preg_replace_count) {
mixed f$preg_replace_callback(const regexp & /*unused*/, const T & /*unused*/, const mixed & /*unused*/, int64_t /*unused*/ = -1,
int64_t & /*unused*/ = RegexInstanceState::get().default_preg_replace_count) {
php_critical_error("call to unsupported function");
}

template<class T>
Optional<string> f$preg_replace_callback(const mixed &, const T &, const string &, int64_t = -1,
int64_t & = RegexInstanceState::get().default_preg_replace_count) {
Optional<string> f$preg_replace_callback(const mixed & /*unused*/, const T & /*unused*/, const string & /*unused*/, int64_t /*unused*/ = -1,
int64_t & /*unused*/ = RegexInstanceState::get().default_preg_replace_count) {
php_critical_error("call to unsupported function");
}

template<class T>
mixed f$preg_replace_callback(const mixed &, const T &, const mixed &, int64_t = -1, int64_t & = RegexInstanceState::get().default_preg_replace_count) {
mixed f$preg_replace_callback(const mixed & /*unused*/, const T & /*unused*/, const mixed & /*unused*/, int64_t /*unused*/ = -1,
int64_t & /*unused*/ = RegexInstanceState::get().default_preg_replace_count) {
php_critical_error("call to unsupported function");
}

inline Optional<array<mixed>> f$preg_split(const string &, const string &, int64_t = -1, int64_t = 0) {
inline Optional<array<mixed>> f$preg_split(const string & /*unused*/, const string & /*unused*/, int64_t /*unused*/ = -1, int64_t /*unused*/ = 0) {
php_critical_error("call to unsupported function");
}

inline Optional<array<mixed>> f$preg_split(const mixed &, const string &, int64_t = -1, int64_t = 0) {
inline Optional<array<mixed>> f$preg_split(const mixed & /*unused*/, const string & /*unused*/, int64_t /*unused*/ = -1, int64_t /*unused*/ = 0) {
php_critical_error("call to unsupported function");
}
50 changes: 0 additions & 50 deletions runtime/regexp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1074,56 +1074,6 @@ int64_t regexp::last_error() {
}
}


string f$preg_quote(const string &str, const string &delimiter) {
const string::size_type len = str.size();

kphp_runtime_context.static_SB.clean().reserve(4 * len);

for (string::size_type i = 0; i < len; i++) {
switch (str[i]) {
case '.':
case '\\':
case '+':
case '*':
case '?':
case '[':
case '^':
case ']':
case '$':
case '(':
case ')':
case '{':
case '}':
case '=':
case '!':
case '>':
case '<':
case '|':
case ':':
case '-':
case '#':
kphp_runtime_context.static_SB.append_char('\\');
kphp_runtime_context.static_SB.append_char(str[i]);
break;
case '\0':
kphp_runtime_context.static_SB.append_char('\\');
kphp_runtime_context.static_SB.append_char('0');
kphp_runtime_context.static_SB.append_char('0');
kphp_runtime_context.static_SB.append_char('0');
break;
default:
if (!delimiter.empty() && str[i] == delimiter[0]) {
kphp_runtime_context.static_SB.append_char('\\');
}
kphp_runtime_context.static_SB.append_char(str[i]);
break;
}
}

return kphp_runtime_context.static_SB.str();
}

void regexp::global_init() {
extra.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
extra.match_limit = PCRE_BACKTRACK_LIMIT;
Expand Down
3 changes: 1 addition & 2 deletions runtime/regexp.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include "runtime-common/core/runtime-core.h"
#include "runtime-common/stdlib/string/mbstring-functions.h"
#include "runtime-common/stdlib/string/regex-functions.h"
#include "runtime-common/stdlib/tracing/tracing.h"
#include "runtime/context/runtime-context.h"

Expand Down Expand Up @@ -209,8 +210,6 @@ inline Optional<array<mixed>> f$preg_split(const string &regex, const string &su

inline Optional<array<mixed>> f$preg_split(const mixed &regex, const string &subject, int64_t limit = -1, int64_t flags = 0);

string f$preg_quote(const string &str, const string &delimiter = string());

inline int64_t f$preg_last_error();

/*
Expand Down
2 changes: 1 addition & 1 deletion tests/phpt/regexp/006_preg_quote.php
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
@ok k2_skip
@ok
<?php

// Starting with PHP 7.3, preg_quote does escape '#' sign.
Expand Down

0 comments on commit 6f81360

Please sign in to comment.