Skip to content

Commit

Permalink
Merge pull request #32 from sul-dlss/earlier-ranges
Browse files Browse the repository at this point in the history
handle earlier dates correctly (negative 4 digits;  ranges of 3 digits)
  • Loading branch information
justinlittman authored Oct 24, 2019
2 parents e4217fb + bca5e6b commit 1594666
Show file tree
Hide file tree
Showing 6 changed files with 36 additions and 10 deletions.
2 changes: 1 addition & 1 deletion .rubocop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Metrics/LineLength:
Max: 120

Metrics/MethodLength:
Max: 22
Max: 25

Style/NumericLiterals:
Enabled: false
Expand Down
6 changes: 3 additions & 3 deletions .rubocop_todo.yml
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
# This configuration was generated by
# `rubocop --auto-gen-config`
# on 2019-10-22 09:04:50 -0700 using RuboCop version 0.74.0.
# on 2019-10-24 13:45:33 -0700 using RuboCop version 0.74.0.
# The point is for the user to remove these configuration records
# one by one as the offenses are removed from the code base.
# Note that changes in the inspected code, or installation of new
# versions of RuboCop, may require this file to be generated again.

# Offense count: 5
Metrics/AbcSize:
Max: 37
Max: 39

# Offense count: 4
Metrics/CyclomaticComplexity:
Expand All @@ -17,7 +17,7 @@ Metrics/CyclomaticComplexity:
# Offense count: 1
# Configuration parameters: CountComments.
Metrics/ModuleLength:
Max: 168
Max: 176

# Offense count: 4
Metrics/PerceivedComplexity:
Expand Down
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ ParseDate.parse_range('ca. 5th–6th century A.D.') # (400..599).to_a
ParseDate.parse_range('ca. 9th–8th century B.C.') # (-999..-800).to_a
ParseDate.parse_range('ca. 13th–12th century B.C.') # (-1399..-1200).to_a
ParseDate.parse_range('5th century B.C.') # (-599..-500).to_a
ParseDate.parse_range('502-504') # [502, 503, 504]
ParseDate.parse_range('-2100 - -2000') # (-2100..-2000).to_a
ParseDate.parse_range('1975 - 1905') # last year > first year, raises error
ParseDate.parse_range('-100 - -150') # last year > first year, raises error
ParseDate.parse_range('1975 or 1905') # last year > first year, raises error
Expand Down Expand Up @@ -90,6 +92,8 @@ ParseDate.earliest_year('ca. 5th–6th century A.D.') # 400
ParseDate.earliest_year('ca. 9th–8th century B.C.') # -999
ParseDate.earliest_year('ca. 13th–12th century B.C.') # -1399
ParseDate.earliest_year('5th century B.C.') # -599
ParseDate.earliest_year('502-504') # 502
ParseDate.earliest_year('-2100 - -2000') # -2100
ParseDate.latest_year('20000222') # 2000
ParseDate.latest_year('195-') # 1959
Expand All @@ -113,7 +117,9 @@ ParseDate.latest_year('ca. 5th–6th century A.D.') # 599
ParseDate.latest_year('ca. 9th–8th century B.C.') # -800
ParseDate.latest_year('ca. 13th–12th century B.C.') # -1200
ParseDate.latest_year('5th century B.C.') # -500
ParseDate.latest_year('-5 - 3') # 3,
ParseDate.latest_year('-5 - 3') # 3
ParseDate.latest_year('502-504') # 504
ParseDate.latest_year('-2100 - -2000') # -2000
ParseDate.range_array('1993', '1995') # [1993, 1994, 1995]
ParseDate.range_array(1993, 1995) # [1993, 1994, 1995]
Expand Down
22 changes: 18 additions & 4 deletions lib/parse_date/int_from_string.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def self.earliest_year(date_str)
return ParseDate.send(:year_int_for_bc, date_str) if date_str.match(YEAR_BC_REGEX)

result ||= ParseDate.send(:between_earliest_year, date_str)
result ||= ParseDate.send(:negative_first_four_digits, date_str)
result ||= ParseDate.send(:first_four_digits, date_str)
result ||= ParseDate.send(:year_from_mm_dd_yy, date_str)
result ||= ParseDate.send(:first_year_for_decade, date_str) # 198x or 201x
Expand Down Expand Up @@ -61,6 +62,7 @@ def self.latest_year(date_str)
result ||= ParseDate.send(:hyphen_2digit_latest_year, date_str)
result ||= ParseDate.send(:yyuu_after_hyphen, date_str)
result ||= ParseDate.send(:year_after_or, date_str)
result ||= ParseDate.send(:negative_4digits_after_hyphen, date_str)
result ||= ParseDate.send(:first_four_digits, date_str)
result ||= ParseDate.send(:year_from_mm_dd_yy, date_str)
result ||= ParseDate.send(:last_year_for_decade, date_str) # 198x or 201x
Expand All @@ -75,12 +77,12 @@ def self.latest_year(date_str)
result.to_i if result && year_int_valid?(result.to_i)
end

# true if the year is between -999 and (current year + 1), inclusive
# true if the year is between -9999 and (current year + 1), inclusive
# @return [Boolean] true if the year is between -999 and (current year + 1); false otherwise
def self.year_int_valid?(year)
return false unless year.is_a? Integer

(-1000 < year.to_i) && (year < Date.today.year + 2)
(-10000 < year.to_i) && (year < Date.today.year + 2)
end

protected
Expand Down Expand Up @@ -169,6 +171,18 @@ def last_year_mult_centuries_bc(date_str)
nth * -100
end

# looks for -yyyy at beginning of date_str and returns if found
# @return [String, nil] negative 4 digit year (e.g. -1865) if date_str has -yyyy, nil otherwise
def negative_first_four_digits(date_str)
Regexp.last_match(1) if date_str.match(/^(\-\d{4})/)
end

# looks for -yyyy after hyphen and returns if found
# @return [String, nil] negative 4 digit year (e.g. -1865) if date_str has -yyyy - -yyyy, nil otherwise
def negative_4digits_after_hyphen(date_str)
Regexp.last_match(1) if date_str.match(/\-\d{4}\s*\-\s*(\-\d{4})/)
end

# looks for 4 consecutive digits in date_str and returns first occurrence if found
# @return [String, nil] 4 digit year (e.g. 1865, 0950) if date_str has yyyy, nil otherwise
def first_four_digits(date_str)
Expand All @@ -195,7 +209,7 @@ def year_from_mm_dd_yy(date_str)
nil # explicitly want nil if date won't parse
end

DECADE_4CHAR_REGEX = Regexp.new('(^|\D)\d{3}[u\-?x]', REGEX_OPTS)
DECADE_4CHAR_REGEX = Regexp.new('(^|\D)\d{3}[u\-?x]($|\D)', REGEX_OPTS)

# first year of decade (as String) if we have: yyyu, yyy-, yyy? or yyyx pattern
# note that these are the only decade patterns found in our actual date strings in MODS records
Expand Down Expand Up @@ -279,7 +293,7 @@ def between_bc_latest_year(date_str)
"-#{Regexp.last_match(:last)}".to_i if date_str.match(BETWEEN_Yn_AND_Yn_BC_REGEX)
end

EARLY_NUMERIC_REGEX = Regexp.new('^\-?\d{1,3}([^\du\-\[]|$)', REGEX_OPTS)
EARLY_NUMERIC_REGEX = Regexp.new('^\-?\d{1,3}([^\du\[]|$)', REGEX_OPTS)

# year if date_str contains yyy, yy, y, -y, -yy, -yyy, -yyyy
# @return [Integer, nil] year if date_str matches pattern; nil otherwise
Expand Down
6 changes: 5 additions & 1 deletion spec/parse_date/int_from_string_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,8 @@
'ca. 5th–6th century A.D.' => 400,
'ca. 9th–8th century B.C.' => -999,
'ca. 13th–12th century B.C.' => -1399,
'502-504' => 502,
'-2100 - -2000' => -2100,
# '2nd millennium B.C. or ca. 8th century B.C.' => -899, # millennium not yet implemented
}.each do |example, expected|
it "#{expected} for #{example}" do
Expand Down Expand Up @@ -534,6 +536,8 @@
'ca. 5th–6th century A.D.' => 599,
'ca. 9th–8th century B.C.' => -800,
'ca. 13th–12th century B.C.' => -1200,
'502-504' => 504,
'-2100 - -2000' => -2000,
# '2nd millennium B.C. or ca. 8th century B.C.' => -800, # millennium not yet implemented
}.each do |example, expected|
it "#{expected} for #{example}" do
Expand Down Expand Up @@ -644,7 +648,7 @@

describe '.year_int_valid?' do
{ # example int as key, expected result as value
-1666 => false,
-1666 => true,
-999 => true,
-35 => true,
-3 => true,
Expand Down
2 changes: 2 additions & 0 deletions spec/parse_date_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
'ca. 9th–8th century B.C.' => (-999..-800).to_a,
'ca. 13th–12th century B.C.' => (-1399..-1200).to_a,
'5th century B.C.' => (-599..-500).to_a,
'502-504' => [502, 503, 504],
'-2100 - -2000' => (-2100..-2000).to_a,
}.each do |example, expected|
it "#{example} returns array from earliest (#{expected.first}) to latest (#{expected.last})" do
expect(ParseDate.parse_range(example)).to eq expected
Expand Down

0 comments on commit 1594666

Please sign in to comment.