diff --git a/.ruby-version b/.ruby-version index d4bcea9..f6ab44e 100644 --- a/.ruby-version +++ b/.ruby-version @@ -1 +1 @@ -ruby-2.5.3 +ruby-2.6.6 diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..117aa33 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,48 @@ +# Apium Changelog + +All notable changes to Apium will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic +Versioning](https://semver.org/spec/v2.0.0.html). + + + +## [v1.0.2](https://github.com/CDRH/api/compare/v1.0.1...v1.0.2) - escapes and sorting + +### Fixed +- question mark and asterisk behavior in queries +- order of expected, actual in tests +- sort behavior for relevancy + +### Added +- support for multivalued and nested field sorting +- documentation moved back into apium from henbit location in order to version it with software + +### Changed +- ruby, rails, and other gem versions + +## [v1.0.1](https://github.com/CDRH/api/compare/v1.00...v1.0.1) - version 1.0.1 + +### Changed +- ruby, rails, and other gem versions +- version moved to initializer + +## [v1.0.0](https://github.com/CDRH/api/tree/v1.0.0) - Initial Launch diff --git a/Gemfile.lock b/Gemfile.lock index b5c83fc..4e1e715 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,150 +1,149 @@ GEM remote: https://rubygems.org/ specs: - actioncable (5.2.1) - actionpack (= 5.2.1) + actioncable (5.2.4.3) + actionpack (= 5.2.4.3) nio4r (~> 2.0) websocket-driver (>= 0.6.1) - actionmailer (5.2.1) - actionpack (= 5.2.1) - actionview (= 5.2.1) - activejob (= 5.2.1) + actionmailer (5.2.4.3) + actionpack (= 5.2.4.3) + actionview (= 5.2.4.3) + activejob (= 5.2.4.3) mail (~> 2.5, >= 2.5.4) rails-dom-testing (~> 2.0) - actionpack (5.2.1) - actionview (= 5.2.1) - activesupport (= 5.2.1) - rack (~> 2.0) + actionpack (5.2.4.3) + actionview (= 5.2.4.3) + activesupport (= 5.2.4.3) + rack (~> 2.0, >= 2.0.8) rack-test (>= 0.6.3) rails-dom-testing (~> 2.0) rails-html-sanitizer (~> 1.0, >= 1.0.2) - actionview (5.2.1) - activesupport (= 5.2.1) + actionview (5.2.4.3) + activesupport (= 5.2.4.3) builder (~> 3.1) erubi (~> 1.4) rails-dom-testing (~> 2.0) rails-html-sanitizer (~> 1.0, >= 1.0.3) - activejob (5.2.1) - activesupport (= 5.2.1) + activejob (5.2.4.3) + activesupport (= 5.2.4.3) globalid (>= 0.3.6) - activemodel (5.2.1) - activesupport (= 5.2.1) - activerecord (5.2.1) - activemodel (= 5.2.1) - activesupport (= 5.2.1) + activemodel (5.2.4.3) + activesupport (= 5.2.4.3) + activerecord (5.2.4.3) + activemodel (= 5.2.4.3) + activesupport (= 5.2.4.3) arel (>= 9.0) - activestorage (5.2.1) - actionpack (= 5.2.1) - activerecord (= 5.2.1) + activestorage (5.2.4.3) + actionpack (= 5.2.4.3) + activerecord (= 5.2.4.3) marcel (~> 0.3.1) - activesupport (5.2.1) + activesupport (5.2.4.3) concurrent-ruby (~> 1.0, >= 1.0.2) i18n (>= 0.7, < 2) minitest (~> 5.1) tzinfo (~> 1.1) arel (9.0.0) - bootsnap (1.3.2) + bootsnap (1.4.6) msgpack (~> 1.0) - builder (3.2.3) - byebug (10.0.2) - concurrent-ruby (1.1.3) - crass (1.0.4) - domain_name (0.5.20180417) + builder (3.2.4) + byebug (11.1.3) + concurrent-ruby (1.1.6) + crass (1.0.6) + domain_name (0.5.20190701) unf (>= 0.0.5, < 1.0.0) - erubi (1.7.1) - ffi (1.9.25) - globalid (0.4.1) + erubi (1.9.0) + ffi (1.13.1) + globalid (0.4.2) activesupport (>= 4.2.0) http-accept (1.7.0) http-cookie (1.0.3) domain_name (~> 0.5) - i18n (1.1.1) + i18n (1.8.3) concurrent-ruby (~> 1.0) listen (3.1.5) rb-fsevent (~> 0.9, >= 0.9.4) rb-inotify (~> 0.9, >= 0.9.7) ruby_dep (~> 1.2) - loofah (2.2.3) + loofah (2.6.0) crass (~> 1.0.2) nokogiri (>= 1.5.9) mail (2.7.1) mini_mime (>= 0.1.1) marcel (0.3.3) mimemagic (~> 0.3.2) - method_source (0.9.2) - mime-types (3.2.2) + method_source (1.0.0) + mime-types (3.3.1) mime-types-data (~> 3.2015) - mime-types-data (3.2018.0812) - mimemagic (0.3.2) - mini_mime (1.0.1) - mini_portile2 (2.3.0) - minitest (5.11.3) - msgpack (1.2.4) + mime-types-data (3.2020.0512) + mimemagic (0.3.5) + mini_mime (1.0.2) + mini_portile2 (2.4.0) + minitest (5.14.1) + msgpack (1.3.3) netrc (0.11.0) - nio4r (2.3.1) - nokogiri (1.8.5) - mini_portile2 (~> 2.3.0) - puma (3.12.0) - rack (2.0.6) + nio4r (2.5.2) + nokogiri (1.10.10) + mini_portile2 (~> 2.4.0) + puma (3.12.6) + rack (2.2.3) rack-test (1.1.0) rack (>= 1.0, < 3) - rails (5.2.1) - actioncable (= 5.2.1) - actionmailer (= 5.2.1) - actionpack (= 5.2.1) - actionview (= 5.2.1) - activejob (= 5.2.1) - activemodel (= 5.2.1) - activerecord (= 5.2.1) - activestorage (= 5.2.1) - activesupport (= 5.2.1) + rails (5.2.4.3) + actioncable (= 5.2.4.3) + actionmailer (= 5.2.4.3) + actionpack (= 5.2.4.3) + actionview (= 5.2.4.3) + activejob (= 5.2.4.3) + activemodel (= 5.2.4.3) + activerecord (= 5.2.4.3) + activestorage (= 5.2.4.3) + activesupport (= 5.2.4.3) bundler (>= 1.3.0) - railties (= 5.2.1) + railties (= 5.2.4.3) sprockets-rails (>= 2.0.0) rails-dom-testing (2.0.3) activesupport (>= 4.2.0) nokogiri (>= 1.6) - rails-html-sanitizer (1.0.4) - loofah (~> 2.2, >= 2.2.2) - railties (5.2.1) - actionpack (= 5.2.1) - activesupport (= 5.2.1) + rails-html-sanitizer (1.3.0) + loofah (~> 2.3) + railties (5.2.4.3) + actionpack (= 5.2.4.3) + activesupport (= 5.2.4.3) method_source rake (>= 0.8.7) thor (>= 0.19.0, < 2.0) - rake (12.3.1) - rb-fsevent (0.10.3) - rb-inotify (0.9.10) - ffi (>= 0.5.0, < 2) - rest-client (2.1.0.rc1) + rake (13.0.1) + rb-fsevent (0.10.4) + rb-inotify (0.10.1) + ffi (~> 1.0) + rest-client (2.1.0) http-accept (>= 1.7.0, < 2.0) http-cookie (>= 1.0.2, < 2.0) mime-types (>= 1.16, < 4.0) netrc (~> 0.8) ruby_dep (1.5.0) - spring (2.0.2) - activesupport (>= 4.2) + spring (2.1.0) spring-watcher-listen (2.0.1) listen (>= 2.7, < 4.0) spring (>= 1.2, < 3.0) - sprockets (3.7.2) + sprockets (4.0.2) concurrent-ruby (~> 1.0) rack (> 1, < 3) sprockets-rails (3.2.1) actionpack (>= 4.0) activesupport (>= 4.0) sprockets (>= 3.0.0) - sqlite3 (1.3.13) - thor (0.20.3) + sqlite3 (1.4.2) + thor (1.0.1) thread_safe (0.3.6) - tzinfo (1.2.5) + tzinfo (1.2.7) thread_safe (~> 0.1) unf (0.1.4) unf_ext - unf_ext (0.0.7.5) - websocket-driver (0.7.0) + unf_ext (0.0.7.7) + websocket-driver (0.7.3) websocket-extensions (>= 0.1.0) - websocket-extensions (0.1.3) + websocket-extensions (0.1.5) PLATFORMS ruby @@ -162,4 +161,4 @@ DEPENDENCIES tzinfo-data BUNDLED WITH - 1.16.2 + 2.1.4 diff --git a/README.md b/README.md index 4c41d37..bfa29a5 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,5 @@ -# Henbit +# Apium -Henbit is an API to access all public Center for Digital Research in the Humanities resources. It is also an invasive weed in Nebraska. +Apium is an API to access all public Center for Digital Research in the Humanities resources. It is also an invasive weed in Nebraska. -**Henbit Documentation can be found [in our documentation repository](https://github.com/CDRH/sneezewort/blob/master/docs/api/README.md)** - -**Documentation for entire publishing system can be found here: ([https://github.com/CDRH/sneezewort](https://github.com/CDRH/sneezewort))** +**[Apium Documentation](docs/README.md)** diff --git a/app/services/search_item_req.rb b/app/services/search_item_req.rb index 9b90ed5..117462d 100644 --- a/app/services/search_item_req.rb +++ b/app/services/search_item_req.rb @@ -59,7 +59,9 @@ def self.escape_chars(query) # for the lucene escaping code below # Note: removed () and : from list, because escaping # those characters interfered with elasticsearch multifield searching - escaped_characters = Regexp.escape('\\+-&|!{}[]^~*?\/') + # Also removed * and ? from the list because escaping those + # characters meant queries with uncertainty couldn't be done + escaped_characters = Regexp.escape('\\+-&|!{}[]^~\/') query.gsub(/([#{escaped_characters}])/, '\\\\\1') end @@ -234,7 +236,7 @@ def sort sort_param = nil if @params["sort"].blank? if @params["q"].present? - sort_param = ["_score|desc"] + sort_param = ["_score"] else sort_param = [SETTINGS["sort_fl"]] end @@ -244,11 +246,33 @@ def sort sort_param.each do |sort| term, dir = sort.split(@@filter_separator) - term = "_score" if term == "relevancy" - if dir.blank? - dir = term == "relevancy" ? "desc" : "asc" + if term == "relevancy" || term == "_score" + sort_obj << "_score" + else + dir = "asc" if dir.blank? + # instructions for multivalued field sorting + # ex: desc [D], [A, F] -> [A, F], [D] because A is max from set + mode = dir == "desc" ? "max" : "min" + # default to sorting missing values last, this may + # be added as a configurable parameter later + missing = "_last" + + sort_setting = { + term => { + "order" => dir, + "mode" => mode, + "missing" => missing + } + } + # nested fields require different sorting setup + # note: does not support nested fields inside of nested fields + if term.include?(".") + path = term.split(".").first + sort_setting[term]["nested"] = { "path" => path } + end + sort_obj << sort_setting end - sort_obj << { term => dir } + end return sort_obj diff --git a/config/application.rb b/config/application.rb index ddb6f89..c6d9b6c 100644 --- a/config/application.rb +++ b/config/application.rb @@ -32,5 +32,8 @@ class Application < Rails::Application # Middleware like session, flash, cookies can be added back manually. # Skip views, helpers and assets when generating a new resource. config.api_only = true + + # sets to avoid deprecation warning in terminal + Rails.application.config.active_record.sqlite3.represent_boolean_as_integer = true end end diff --git a/config/environments/development.rb b/config/environments/development.rb index d52ec9e..4bdb9c3 100644 --- a/config/environments/development.rb +++ b/config/environments/development.rb @@ -51,4 +51,15 @@ # Use an evented file watcher to asynchronously detect changes in source code, # routes, locales, etc. This feature depends on the listen gem. config.file_watcher = ActiveSupport::EventedFileUpdateChecker + + + + # LOCAL + # Custom dev env logger to empty log more frequently + config.logger = ActiveSupport::TaggedLogging.new( + ActiveSupport::Logger.new(File.join(Rails.root.to_s, "log", "development.log"), + # Keep one old log file, rotate after size reaches 32 MB + 1, 32 * 1024 * 1024 + ) + ) end diff --git a/config/initializers/version.rb b/config/initializers/version.rb index a5017e3..cdc4aed 100644 --- a/config/initializers/version.rb +++ b/config/initializers/version.rb @@ -1,5 +1,5 @@ module Api class Application < Rails::Application - VERSION = "1.0.1" + VERSION = "1.0.2" end end diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..026368f --- /dev/null +++ b/docs/README.md @@ -0,0 +1,260 @@ +# Apium + +Apium is an API to access all public Center for Digital Research in the Humanities resources. It is also an invasive weed in Nebraska. + +## item query + +- [Facets](#facets) +- [Field List](#field-list) +- [Filters](#filters) +- [Highlighting](#highlighting) +- [Sorting](#sorting) +- [Start and Rows](#start-and-rows) +- [Text Searches](#text-search) + +### facets + +_Lists number of documents matching keyword fields_ + +Defaults: + +- no defaults + +__Standard fields__ + +`facet[]=keyword_field` + +``` +facet[]=category +facet[]=category&facet[]=title +``` + +__Nested fields__ + +`facet[]=nested_field.keyword_field` + +``` +facet[]=creator.name +facet[]=creator.name&facet[]=creator.role +``` + +__Date ranges__ (currently supports days or years) + +`facet[]=date_field.range` + +``` +facet[]=date.year + #=> { 1889 : 10, 1890 : 20 } + +facet[]=date + #=> { 01-02-1889 : 2, 03-04-1889 : 8 } +``` + +Number of facets returned and sorting alphabetically (by default sorts by count) + +`facet_num=number&facet_sort=term|direction` + +``` +facet_num=100 +facet_sort=term|asc + +facet_num=30&facet_sort=term|desc +``` + +__Sorting facets__ + +Defaults: + +- no selection: score|desc +- term selection, no order: term|desc + +Always defaults to score descending. If you wish to sort alphabetically, add "term" and a direction. If you wish to sort score ascending, use "score" and a direction. Multiple sorts for single facets, and distinct sorts for separate facets are not supported at this time. + +`facet_sort=type|direction` + +``` +facet_sort=term|desc +facet_sort=score|asc +``` + +### field list + +_The fields returned by a query_ + +Defaults: + +- returns all possible fields + +Restrict the fields displayed per document in the response. Use `!` to exclude a field. Wildcards in fieldnames supported. + +`fl=yes,!no` + +``` +fl=title,!date*,date_written +``` + +### filters + +_Filters by keyword field across the possible documents_ + +Defaults: + +- no filters applied except `_type` for collection + +__Standard fields__ + +`f[]=field|type` + +``` +f[]=category|Writings +f[]=category|Writings&f[]=format|manuscript +``` + +__Nested fields__ + +`f[]=nested.keyword|type` + +``` +f[]=creator.name|Cather, Willa +f[]=contributor.role|Editor +``` + +__Date field__ + +If given one date, will use it has both start and end. + +Can give year range or specify date range + +`f[]=field|range_start|(range_end)` + +```bash +f[]=date|1884 + #=> 01-01-1884 to 12-31-1884 +f[]=date|1884|1887 + #=> 01-01-1884 to 12-31-1887 + +f[]=date|1884-02-01|1887-03-01 + #=> 02-01-1884 to 03-01-1887 +``` + +### highlighting + +_Returns context of text match results_ + +Defaults: + +- `hl=true` +- `hl_chars=100` +- `hl_fl=text` +- `hl_num=3` + +__Disabling Highlighting__ + +If you wish to turn highlighting off: + +`hl=false` + +__Characters__ + +This sets the number of characters that will be returned around a highlight match + +`hl_chars=number` + +``` +hl_chars=100 +``` + +__Field List__ + +Highlights will always be returned for the `text` field, but if you are searching multiple fields, you may wish to see highlights on those fields, also. You do not need to send `text` when specifying additional fields. + +`hl_fl=field1,field2,field3` + +``` +hl_fl=annotations +hl_fl=annotations,catherwords +``` + +__Number__ + +The number of highlights returned per field. If you set `hl_num=3` for `text` and `annotations` you could receive up to 6 highlights, 3 from each field. + +`hl_num=number` + +``` +hl_num=1 +hl_num=5 +``` + +### sorting + +_Specify the order of results_ + +Defaults: + +When no sort or partial sort is supplied + +- query present: sort by "relevancy" descending +- given term is "relevancy", no order provided: sort descending +- given term is not "relevancy", no order provided: sort ascending + +You may pass multiple fields to be sorted. The first one appearing in the URL parameters will take precedence over the other(s). + +`sort[]=field|direction` + +``` +sort[]=date|desc&sort[]=title|asc +``` + +__Sorting facets__ + +Please refer to the section on [facets](#facets) for information about how to sort facets, specifically. + +### start and rows + +_Manual pagination of results_ + +Defaults: + +- start=0 +- num=50 + +Note: Zero indexed + +`start=number`
+`num=number` + +``` +start=0&num=50 # returns first 50 results +start=49&num=50 # returns second 50 results +start=9&num=10 # returns second 10 results +``` + +### text search + +Please refer to [the Elasticsearch query string syntax](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-string-syntax) for a list of all possibilities for text searching. + +__Basic search__ + +`q=word` + +``` +q=multiple words +q=word +``` + +__Multiple fields__ + +By default, this will search the "text" field, you can specify a different one to use or multiple fields. If adding fields, you will want to make sure that your [highlights](#highlighting) include fields beyond "text" + +`q=field:word`
+`q=field:word AND otherfield:other`
+`q=field:word OR otherfield:other` + +__Advanced search__ + +`q="phrase of words"`
+`q=wildcard*`
+`q=word OR other`
+`q=word AND other`
+`q=(word OR other) OR -nothanks` diff --git a/test/services/search_item_req_test.rb b/test/services/search_item_req_test.rb index fa13aed..2a2c785 100644 --- a/test/services/search_item_req_test.rb +++ b/test/services/search_item_req_test.rb @@ -6,21 +6,25 @@ def test_escape_chars # phrase search (quotation marks) query = '"fire in the fireplace"' - assert_equal SearchItemReq.escape_chars(query), "\"fire in the fireplace\"" + assert_equal "\"fire in the fireplace\"", SearchItemReq.escape_chars(query) # make sure that (text:searches) are not destroyed query = '(text:water) OR (annotations_text:Cather)' - assert_equal SearchItemReq.escape_chars(query), "(text:water) OR (annotations_text:Cather)" + assert_equal "(text:water) OR (annotations_text:Cather)", SearchItemReq.escape_chars(query) + + # do not escape ? and * + query = 'wat?r OR cat*' + assert_equal "wat?r OR cat*", SearchItemReq.escape_chars(query) # escape odd numbered quotation marks query = '"something' - assert_equal SearchItemReq.escape_chars(query), "\"something" + assert_equal "\"something", SearchItemReq.escape_chars(query) query = '"phrase" plus "' - assert_equal SearchItemReq.escape_chars(query), "\"phrase\" plus \"" + assert_equal "\"phrase\" plus \"", SearchItemReq.escape_chars(query) # escape brackets, etc query = '{\\+~' - assert_equal SearchItemReq.escape_chars(query), "\\{\\\\\\+\\~" + assert_equal "\\{\\\\\\+\\~", SearchItemReq.escape_chars(query) end @@ -28,7 +32,10 @@ def test_facets # normal with no pagination overrides facets = SearchItemReq.new({ "facet" => [ "title" ] }).facets - assert_equal facets, {"title"=>{"terms"=>{"field"=>"title", "order"=>{"_count"=>"desc"}, "size"=>20}}} + assert_equal( + {"title"=>{"terms"=>{"field"=>"title", "order"=>{"_count"=>"desc"}, "size"=>20}}}, + facets + ) # normal with pagination overrides, multiple facets facets = SearchItemReq.new({ @@ -36,7 +43,10 @@ def test_facets "facet_sort" => "term|asc", "facet" => [ "title", "subcategory" ] }).facets - assert_equal facets, {"title"=>{"terms"=>{"field"=>"title", "order"=>{"_term"=>"asc"}, "size"=>10}}, "subcategory"=>{"terms"=>{"field"=>"subcategory", "order"=>{"_term"=>"asc"}, "size"=>10}}} + assert_equal( + {"title"=>{"terms"=>{"field"=>"title", "order"=>{"_term"=>"asc"}, "size"=>10}}, "subcategory"=>{"terms"=>{"field"=>"subcategory", "order"=>{"_term"=>"asc"}, "size"=>10}}}, + facets + ) # should be blank if there are no facets provided facets = SearchItemReq.new({ @@ -44,34 +54,52 @@ def test_facets "facet_sort" => "nonterm|asc", "facet" => [] }).facets - assert_equal facets, {} + assert_equal({}, facets) # getting dates involved facets = SearchItemReq.new({ "facet" => [ "date.year", "date"] }).facets - assert_equal facets, {"date.year"=>{"date_histogram"=>{"field"=>"date", "interval"=>"year", "format"=>"yyyy", "min_doc_count"=>1, "order"=>{"_count"=>"desc"}}}, "date"=>{"date_histogram"=>{"field"=>"date", "interval"=>"day", "format"=>"yyyy-MM-dd", "min_doc_count"=>1, "order"=>{"_count"=>"desc"}}}} + assert_equal( + {"date.year"=>{"date_histogram"=>{"field"=>"date", "interval"=>"year", "format"=>"yyyy", "min_doc_count"=>1, "order"=>{"_count"=>"desc"}}}, "date"=>{"date_histogram"=>{"field"=>"date", "interval"=>"day", "format"=>"yyyy-MM-dd", "min_doc_count"=>1, "order"=>{"_count"=>"desc"}}}}, + facets + ) # nested field facets = SearchItemReq.new({ "facet_sort" => "term|desc", "facet" => [ "creator.name" ] }).facets - assert_equal facets, {"creator.name"=>{"nested"=>{"path"=>"creator"}, "aggs"=>{"creator.name"=>{"terms"=>{"field"=>"creator.name", "order"=>{"_term"=>"desc"}, "size"=>20}}}}} + assert_equal( + {"creator.name"=>{"nested"=>{"path"=>"creator"}, "aggs"=>{"creator.name"=>{"terms"=>{"field"=>"creator.name", "order"=>{"_term"=>"desc"}, "size"=>20}}}}}, + facets + ) # with non-array facets = SearchItemReq.new({ "facet" => "title" }).facets - assert_equal facets, {"title"=>{"terms"=>{"field"=>"title", "order"=>{"_count"=>"desc"}, "size"=>20}}} + assert_equal( + {"title"=>{"terms"=>{"field"=>"title", "order"=>{"_count"=>"desc"}, "size"=>20}}}, + facets + ) # sort term order specified facets = SearchItemReq.new({ "facet" => ["title", "format"], "facet_sort" => "term|desc" }).facets - assert_equal facets, {"title"=>{"terms"=>{"field"=>"title", "order"=>{"_term"=>"desc"}, "size"=>20}}, "format"=>{"terms"=>{"field"=>"format", "order"=>{"_term"=>"desc"}, "size"=>20}}} + assert_equal( + {"title"=>{"terms"=>{"field"=>"title", "order"=>{"_term"=>"desc"}, "size"=>20}}, "format"=>{"terms"=>{"field"=>"format", "order"=>{"_term"=>"desc"}, "size"=>20}}}, + facets + ) # sort term no order specified facets = SearchItemReq.new({ "facet" => ["title", "format"], "facet_sort" => "term" }).facets - assert_equal facets, {"title"=>{"terms"=>{"field"=>"title", "order"=>{"_term"=>"desc"}, "size"=>20}}, "format"=>{"terms"=>{"field"=>"format", "order"=>{"_term"=>"desc"}, "size"=>20}}} + assert_equal( + {"title"=>{"terms"=>{"field"=>"title", "order"=>{"_term"=>"desc"}, "size"=>20}}, "format"=>{"terms"=>{"field"=>"format", "order"=>{"_term"=>"desc"}, "size"=>20}}}, + facets + ) # sort count, no order specified facets = SearchItemReq.new({ "facet" => ["title"], "facet_sort" => "count" }).facets - assert_equal facets, {"title"=>{"terms"=>{"field"=>"title", "order"=>{"_count"=>"desc"}, "size"=>20}}} + assert_equal( + {"title"=>{"terms"=>{"field"=>"title", "order"=>{"_count"=>"desc"}, "size"=>20}}}, + facets + ) end @@ -79,47 +107,74 @@ def test_filters # single filter filters = SearchItemReq.new({ "f" => ["category|Writings"] }).filters - assert_equal filters, [{"term"=>{"category"=>"Writings"}}] + assert_equal( + [{"term"=>{"category"=>"Writings"}}], + filters + ) # multiple filters filters = SearchItemReq.new({ "f" => ["category|Writings", "author.name|Herriot, James"] }).filters - assert_equal filters, [{"term"=>{"category"=>"Writings"}}, {"nested"=>{"path"=>"author", "query"=>{"term"=>{"author.name"=>"Herriot, James"}}}}] + assert_equal( + [{"term"=>{"category"=>"Writings"}}, {"nested"=>{"path"=>"author", "query"=>{"term"=>{"author.name"=>"Herriot, James"}}}}], + filters + ) # multiple filters, including one with CR present filters = SearchItemReq.new({ "f" => ["category|Writings", "places_written_k|Jaffrey, New Hampshire, United\r\n States"] }).filters - assert_equal filters, [{"term"=>{"category"=>"Writings"}}, {"term"=>{"places_written_k"=>"Jaffrey, New Hampshire, United\n States"}}] + assert_equal( + [{"term"=>{"category"=>"Writings"}}, {"term"=>{"places_written_k"=>"Jaffrey, New Hampshire, United\n States"}}], + filters + ) # single year filters = SearchItemReq.new({ "f" => ["date|1900"] }).filters - assert_equal filters, [{"range"=>{"date"=>{"gte"=>"1900-01-01", "lte"=>"1900-12-31", "format"=>"yyyy-MM-dd"}}}] + assert_equal( + [{"range"=>{"date"=>{"gte"=>"1900-01-01", "lte"=>"1900-12-31", "format"=>"yyyy-MM-dd"}}}], + filters + ) # double year filters = SearchItemReq.new({ "f" => ["date|1900|1904"] }).filters - assert_equal filters, [{"range"=>{"date"=>{"gte"=>"1900-01-01", "lte"=>"1904-12-31", "format"=>"yyyy-MM-dd"}}}] + assert_equal( + [{"range"=>{"date"=>{"gte"=>"1900-01-01", "lte"=>"1904-12-31", "format"=>"yyyy-MM-dd"}}}], + filters + ) # double day range filters = SearchItemReq.new({ "f" => ["date|1904-01-03|1908-12-10"] }).filters - assert_equal filters, [{"range"=>{"date"=>{"gte"=>"1904-01-03", "lte"=>"1908-12-10", "format"=>"yyyy-MM-dd"}}}] + assert_equal( + [{"range"=>{"date"=>{"gte"=>"1904-01-03", "lte"=>"1908-12-10", "format"=>"yyyy-MM-dd"}}}], + filters + ) # nested field filters = SearchItemReq.new({ "f" => ["creator.name|Willa, Cather"] }).filters - assert_equal filters, [{"nested"=>{"path"=>"creator", "query"=>{"term"=>{"creator.name"=>"Willa, Cather"}}}}] + assert_equal( + [{"nested"=>{"path"=>"creator", "query"=>{"term"=>{"creator.name"=>"Willa, Cather"}}}}], + filters + ) # multiple filters, including a nested field with CR present filters = SearchItemReq.new({ "f" => ["category|Writings", "author.name|Herriot,\r\nJames"] }).filters - assert_equal filters, [{"term"=>{"category"=>"Writings"}}, {"nested"=>{"path"=>"author", "query"=>{"term"=>{"author.name"=>"Herriot,\nJames"}}}}] + assert_equal( + [{"term"=>{"category"=>"Writings"}}, {"nested"=>{"path"=>"author", "query"=>{"term"=>{"author.name"=>"Herriot,\nJames"}}}}], + filters + ) # dynamic field filters = SearchItemReq.new({ "f" => ["publication_d|1900"] }).filters - assert_equal filters, [{"range"=>{"publication_d"=>{"gte"=>"1900-01-01", "lte"=>"1900-12-31", "format"=>"yyyy-MM-dd"}}}] + assert_equal( + [{"range"=>{"publication_d"=>{"gte"=>"1900-01-01", "lte"=>"1900-12-31", "format"=>"yyyy-MM-dd"}}}], + filters + ) # with non-array filters = SearchItemReq.new({ "f" => "category|Writings" }).filters - assert_equal filters, [{"term"=>{"category"=>"Writings"}}] + assert_equal([{"term"=>{"category"=>"Writings"}}], filters) # where empty filters = SearchItemReq.new({ "f" => "places|" }).filters - assert_equal filters, ["term"=>{"places"=>""}] + assert_equal(["term"=>{"places"=>""}], filters) end @@ -127,23 +182,35 @@ def test_highlights # no parameters hl = SearchItemReq.new({}).highlights - assert_equal hl, {"fields"=>{"text"=>{"fragment_size"=>100, "number_of_fragments"=>3}}} + assert_equal( + {"fields"=>{"text"=>{"fragment_size"=>100, "number_of_fragments"=>3}}}, + hl + ) # specifying fragment size and number hl = SearchItemReq.new({ "hl_chars" => 20, "hl_num" => 1 }).highlights - assert_equal hl, {"fields"=>{"text"=>{"fragment_size"=>20, "number_of_fragments"=>1}}} + assert_equal( + {"fields"=>{"text"=>{"fragment_size"=>20, "number_of_fragments"=>1}}}, + hl + ) # fragment size and number multiple fields hl = SearchItemReq.new({ "hl_chars" => 20, "hl_num" => 1, "hl_fl" => "annotations,extra" }).highlights - assert_equal hl, {"fields"=>{"text"=>{"fragment_size"=>20, "number_of_fragments"=>1}, "annotations"=>{"fragment_size"=>20, "number_of_fragments"=>1}, "extra"=>{"fragment_size"=>20, "number_of_fragments"=>1}}} + assert_equal( + {"fields"=>{"text"=>{"fragment_size"=>20, "number_of_fragments"=>1}, "annotations"=>{"fragment_size"=>20, "number_of_fragments"=>1}, "extra"=>{"fragment_size"=>20, "number_of_fragments"=>1}}}, + hl + ) # no highlights despite params hl = SearchItemReq.new({ "hl_fl" => "annotations", "hl" => "false" }).highlights - assert_equal hl, {} + assert_equal({}, hl) # highlight field list hl = SearchItemReq.new({ "hl_fl" => "annotations, text" }).highlights - assert_equal hl, {"fields"=>{"text"=>{"fragment_size"=>100, "number_of_fragments"=>3}, "annotations"=>{"fragment_size"=>100, "number_of_fragments"=>3}}} + assert_equal( + {"fields"=>{"text"=>{"fragment_size"=>100, "number_of_fragments"=>3}, "annotations"=>{"fragment_size"=>100, "number_of_fragments"=>3}}}, + hl + ) end @@ -151,74 +218,119 @@ def test_sort # single sort sort = SearchItemReq.new({ "sort" => ["title|asc"] }).sort - assert_equal sort, [{"title"=>"asc"}] + assert_equal( + [{"title"=>{"order"=>"asc", "mode"=>"min", "missing"=>"_last"}}], + sort + ) # multiple sorts and subfield sort = SearchItemReq.new({ "sort" => ["title|desc", "author.name|asc"] }).sort - assert_equal sort, [{"title"=>"desc"}, {"author.name"=>"asc"}] + assert_equal( + [{"title"=>{"order"=>"desc", "mode"=>"max", "missing"=>"_last"}}, {"author.name"=>{"order"=>"asc", "mode"=>"min", "missing"=>"_last", "nested"=>{"path"=>"author"}}}], + sort + ) # with non-array sort = SearchItemReq.new({ "sort" => "title|asc" }).sort - assert_equal sort, [{"title"=>"asc"}] + assert_equal( + [{"title"=>{"order"=>"asc", "mode"=>"min", "missing"=>"_last"}}], + sort + ) # no sort specified, query present sort = SearchItemReq.new({ "q" => "water" }).sort - assert_equal sort, [{"_score"=>"desc"}] + assert_equal(["_score"], sort) # no sort direction specified, query present sort = SearchItemReq.new({ "q" => "water", "sort" => "date" }).sort - assert_equal sort, [{"date"=>"asc"}] + assert_equal( + [{"date"=>{"order"=>"asc", "mode"=>"min", "missing"=>"_last"}}], + sort + ) # sort specified, query present sort = SearchItemReq.new({ "q" => "water", "sort" => "date|desc" }).sort - assert_equal sort, [{"date"=>"desc"}] + assert_equal( + [{"date"=>{"order"=>"desc", "mode"=>"max", "missing"=>"_last"}}], + sort + ) # no sort specified, no query sort = SearchItemReq.new({}).sort - assert_equal sort, [{"identifier" => "asc"}] + assert_equal( + [{"identifier"=>{"order"=>"asc", "mode"=>"min", "missing"=>"_last"}}], + sort + ) # no sort direction specified, no query sort = SearchItemReq.new({ "sort" => "title" }).sort - assert_equal sort, [{"title" => "asc"}] + assert_equal( + [{"title"=>{"order"=>"asc", "mode"=>"min", "missing"=>"_last"}}], + sort + ) end def test_text_search # simple text = SearchItemReq.new({ "q" => "water" }).text_search - assert_equal text, {"query_string"=>{"default_field"=>"text", "query"=>"water"}} + assert_equal( + {"query_string"=>{"default_field"=>"text", "query"=>"water"}}, + text + ) # boolean text = SearchItemReq.new({ "q" => "water AND college" }).text_search - assert_equal text, {"query_string"=>{"default_field"=>"text", "query"=>"water AND college"}} + assert_equal( + {"query_string"=>{"default_field"=>"text", "query"=>"water AND college"}}, + text + ) # multiple fields text = SearchItemReq.new({ "q" => "(text:water) AND (annotations:water)" }).text_search - assert_equal text, {"query_string"=>{"query"=>"(text:water) AND (annotations:water)"}} + assert_equal( + {"query_string"=>{"query"=>"(text:water) AND (annotations:water)"}}, + text + ) # multiple fields different input text = SearchItemReq.new({ "q" => "(text:water) OR (annotations:balcony)" }).text_search - assert_equal text, {"query_string"=>{"query"=>"(text:water) OR (annotations:balcony)"}} + assert_equal( + {"query_string"=>{"query"=>"(text:water) OR (annotations:balcony)"}}, + text + ) # multiple fields with grouped inputs text = SearchItemReq.new({ "q" => '(text:water OR "fire in the fireplace") OR (annotations:water AND "fire in the fireplace")'}).text_search - assert_equal text, {"query_string"=>{"query"=>"(text:water OR \"fire in the fireplace\") OR (annotations:water AND \"fire in the fireplace\")"}} + assert_equal( + {"query_string"=>{"query"=>"(text:water OR \"fire in the fireplace\") OR (annotations:water AND \"fire in the fireplace\")"}}, + text + ) # non-text field search text = SearchItemReq.new({ "q" => "transcriptions_t:wouldnt" }).text_search - assert_equal text, {"query_string"=>{"query"=>"transcriptions_t:wouldnt"}} + assert_equal( + {"query_string"=>{"query"=>"transcriptions_t:wouldnt"}}, + text + ) # text field search beginning with what looks like text field text = SearchItemReq.new({ "q" => "yosemite: cool place to visit" }).text_search - assert_equal text, {"query_string"=>{"default_field"=>"text", "query"=>"yosemite: cool place to visit"}} + assert_equal( + {"query_string"=>{"default_field"=>"text", "query"=>"yosemite: cool place to visit"}}, + text + ) # text field search beginning with what really looks like a text field text = SearchItemReq.new({ "q" => "Exploring the Text: Cather's Hand" }).text_search - assert_equal text, {"query_string"=>{"default_field"=>"text", "query"=>"Exploring the Text: Cather's Hand"}} + assert_equal( + {"query_string"=>{"default_field"=>"text", "query"=>"Exploring the Text: Cather's Hand"}}, + text + ) # none text = SearchItemReq.new({}).text_search - assert_equal text, { "match_all" => {} } + assert_equal({ "match_all" => {} }, text) end @@ -226,15 +338,15 @@ def test_source # spaces, whitelist only source = SearchItemReq.new({ "fl" => "title, creator.name" }).source - assert_equal source, {"includes"=>["title", "creator.name"]} + assert_equal({"includes"=>["title", "creator.name"]}, source) # blacklist only source = SearchItemReq.new({ "fl" => "!title,!creator.name" }).source - assert_equal source, {"excludes"=>["title", "creator.name"]} + assert_equal({"excludes"=>["title", "creator.name"]}, source) # both source = SearchItemReq.new({ "fl" => "id, title, date, !dat*" }).source - assert_equal source, {"includes"=>["id", "title", "date"], "excludes"=>["dat*"]} + assert_equal({"includes"=>["id", "title", "date"], "excludes"=>["dat*"]}, source) end