diff --git a/README.md b/README.md index cf9139b..d51f56b 100644 --- a/README.md +++ b/README.md @@ -86,6 +86,8 @@ The options are described thoroughly in the file itself. By default, the package and the [`PorterStemmer`](src/Stemmer/PorterStemmer.php) which is suitable for the English language. The search adds a trailing wildcard to the last token and not all search terms need to be found in order for a document to show up in the results (there must be at least one match though). +You may also add a wildcard to each search token by enabling `wildcard_all_tokens` in the config file altough this is not recommended for performance reasons. + _A basic installation most likely does not require you to change any of these settings. Just to make sure, you should have a look at the `connection` option though. If you want to change this, do so before running the migrations or the tables will be created using the wrong database connection._ diff --git a/config/scout-database.php b/config/scout-database.php index 740bdf0..0d97633 100644 --- a/config/scout-database.php +++ b/config/scout-database.php @@ -167,6 +167,42 @@ 'wildcard_last_token' => true, + /* + |-------------------------------------------------------------------------- + | Use Wildcard for all Search Tokens + |-------------------------------------------------------------------------- + | + | This setting controls whether all tokens of a search query shoudld be + | handled using a wildcard instead of an exact match. This basically + | means that for a search input of "hell wor", the query will match + | documents containing "hell%" or "wor%" where % is the SQL wildcard of + | a "like" condition. The wildcard will only be applied for + | search tokens that have a minimum length of `wilcard_min_length`. + | + | Setting this to `true` will add a wildcard to the end of each search + | token. You may also set this to "both" to add wildcards to the beginning + | and the end of each search token, for example: "%hell%" and "%wor%. + | + | Note: Please not that changing this setting may negatively impact the + | performance of search queries. Also you might want to make sure that + | `require_match_for_all_tokens` is set to `false` when using this. + | + */ + + 'wildcard_all_tokens' => false, + + /* + |-------------------------------------------------------------------------- + | Minimum Token Length to apply Wildcards + |-------------------------------------------------------------------------- + | + | If `wildcard_all_tokens` is enabled this setting defines the minimum + | length search tokens must have before wildcards are applied. + | + */ + + 'wildcard_min_length' => 3, + /* |-------------------------------------------------------------------------- | Require a Match for all Tokens diff --git a/phpunit.xml b/phpunit.xml index 1e22368..70ba9e6 100644 --- a/phpunit.xml +++ b/phpunit.xml @@ -1,7 +1,7 @@ tests - + src src/Stemmer - + diff --git a/src/DatabaseSeeker.php b/src/DatabaseSeeker.php index f89386f..f19e536 100644 --- a/src/DatabaseSeeker.php +++ b/src/DatabaseSeeker.php @@ -11,6 +11,7 @@ use Namoshek\Scout\Database\Contracts\Stemmer; use Namoshek\Scout\Database\Contracts\Tokenizer; use Namoshek\Scout\Database\Support\DatabaseHelper; +use Illuminate\Support\Str; /** * The database seeker searches the database for collection items of a specific model, @@ -75,10 +76,7 @@ public function search(Builder $builder, int $page = 1, int $pageSize = null): S */ private function performSearch(Builder $builder, array $keywords, int $page, ?int $limit): SearchResult { - // Add a wildcard to the last search token if it is configured. - if ($this->searchConfiguration->lastTokenShouldUseWildcard()) { - $keywords[count($keywords) - 1] .= '%'; - } + $keywords = $this->addWildcards($keywords); // First, we retrieve the paginated results. $results = $this->createSearchQuery($builder, $keywords) @@ -104,6 +102,33 @@ private function performSearch(Builder $builder, array $keywords, int $page, ?in return new SearchResult($builder, $results, $totalHits); } + /** + * Add wildcards to the given keywords if configured. + * + * @param string[] $keywords + */ + private function addWildcards(array $keywords): array + { + // Add a wildcard to each search token if it is configured. + if ($this->searchConfiguration->allTokensShouldUseWildcard()) { + $pattern = $this->searchConfiguration->allTokensShouldUseWildcard() === 'both' ? '%%%s%%' : '%s%%'; + + $keywords = array_map( + fn ($token) => mb_strlen($token) >= $this->searchConfiguration->minimumLengthForWildcard() + ? sprintf($pattern, $token) + : $token, + $keywords + ); + } + + // Add a wildcard to the last search token if it is configured. + if ($this->searchConfiguration->lastTokenShouldUseWildcard()) { + $keywords[count($keywords) - 1] = Str::finish($keywords[count($keywords) - 1], '%'); + } + + return $keywords; + } + /** * Creates a new search query using the given builder. The query can be used to retrieve paginated results * and also to count the total number of potential hits. diff --git a/src/ScoutDatabaseServiceProvider.php b/src/ScoutDatabaseServiceProvider.php index 116b6bd..b0fb285 100644 --- a/src/ScoutDatabaseServiceProvider.php +++ b/src/ScoutDatabaseServiceProvider.php @@ -68,7 +68,9 @@ public function register(): void $config->get('scout-database.search.term_frequency_weight', 1), $config->get('scout-database.search.term_deviation_weight', 1), $config->get('scout-database.search.wildcard_last_token', true), - $config->get('scout-database.search.require_match_for_all_tokens', false) + $config->get('scout-database.search.require_match_for_all_tokens', false), + $config->get('scout-database.search.wildcard_all_tokens', false), + $config->get('scout-database.search.wildcard_min_length', 3), ); }); } diff --git a/src/SearchConfiguration.php b/src/SearchConfiguration.php index ebfb435..8ca51bf 100644 --- a/src/SearchConfiguration.php +++ b/src/SearchConfiguration.php @@ -19,7 +19,9 @@ public function __construct( private float $termFrequencyWeight, private float $termDeviationWeight, private bool $wildcardLastToken, - private bool $requireMatchForAllTokens + private bool $requireMatchForAllTokens, + private bool|string $wildcardAllTokens = false, + private int $wildcardMinLength = 3, ) { } @@ -56,6 +58,22 @@ public function lastTokenShouldUseWildcard(): bool return $this->wildcardLastToken; } + /** + * Returns whether all tokens of a search query shall use a wildcard. + */ + public function allTokensShouldUseWildcard(): string|bool + { + return $this->wildcardAllTokens; + } + + /** + * Returns th minimum token length for using wildcards. + */ + public function minimumLengthForWildcard(): int + { + return $this->wildcardMinLength; + } + /** * Returns whether search shall only return documents containing all searched tokens. */ diff --git a/tests/ScopedDatabaseSeekerTest.php b/tests/ScopedDatabaseSeekerTest.php index a289063..d81a808 100644 --- a/tests/ScopedDatabaseSeekerTest.php +++ b/tests/ScopedDatabaseSeekerTest.php @@ -9,7 +9,6 @@ use Illuminate\Contracts\Config\Repository as ConfigRepository; use Illuminate\Database\ConnectionInterface; use Illuminate\Foundation\Testing\DatabaseMigrations; -use Namoshek\Scout\Database\DatabaseSeeker; use Namoshek\Scout\Database\SearchResult; use Namoshek\Scout\Database\Tests\Stubs\User; @@ -285,4 +284,62 @@ public function test_builder_returned_by_raw_results_is_the_one_used_for_searchi $this->assertEquals($builder, $result->getBuilder()); } + + public function test_does_not_find_documents_if_wildcard_all_tokens_is_disabled_and_no_exact_match_is_given(): void + { + $result = User::search('eur ent')->where('tenant_id', self::TENANT_ID_1)->keys(); + + $this->assertEmpty($result); + } + + public function test_finds_documents_if_wildcard_all_tokens_is_enabled_and_no_exact_match_is_given(): void + { + $this->app->make('config')->set('scout-database.search.wildcard_all_tokens', true); + + $result = User::search('eur ent')->where('tenant_id', self::TENANT_ID_1)->keys(); + + $this->assertEquals([6, 7], $result->toArray()); + } + + public function test_finds_documents_if_wildcard_all_tokens_is_set_to_both(): void + { + $this->app->make('config')->set('scout-database.search.wildcard_all_tokens', 'both'); + + $result = User::search('eur ent')->where('tenant_id', self::TENANT_ID_1)->keys(); + + $this->assertEquals([8, 6, 7], $result->toArray()); + } + + public function test_does_not_find_documents_by_wildcard_if_minimum_token_length_is_not_reached(): void + { + $this->app->make('config')->set('scout-database.search.wildcard_all_tokens', 'both'); + $this->app->make('config')->set('scout-database.search.wildcard_min_length', 4); + + $result = User::search('eur ent')->where('tenant_id', self::TENANT_ID_1)->keys(); + + $this->assertEmpty($result); + } + + public function test_finds_documents_by_wildcard_if_minimum_token_length_is_reached(): void + { + $this->app->make('config')->set('scout-database.search.wildcard_all_tokens', 'both'); + $this->app->make('config')->set('scout-database.search.wildcard_min_length', 4); + + $result = User::search('ello abc xamp')->where('tenant_id', self::TENANT_ID_1)->keys(); + $this->assertEquals([2, 11, 1, 10], $result->toArray()); + + $result = User::search('ello abc xamp')->where('tenant_id', self::TENANT_ID_2)->keys(); + $this->assertEquals([3], $result->toArray()); + } + + public function test_adds_wildcard_to_last_token_even_if_minimum_length_is_not_reached(): void + { + $this->app->make('config')->set('scout-database.search.wildcard_all_tokens', 'both'); + $this->app->make('config')->set('scout-database.search.wildcard_min_length', 7); + $this->app->make('config')->set('scout-database.search.wildcard_last_token', true); + + $result = User::search('ello examp')->where('tenant_id', self::TENANT_ID_2)->keys(); + + $this->assertEquals([3], $result->toArray()); + } }