Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make citations #2

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .ruby-gemset
Original file line number Diff line number Diff line change
@@ -1 +1 @@
tibetanmonasteries
datura-new
2 changes: 1 addition & 1 deletion .ruby-version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
ruby-3.1.2
ruby-3.1.6
5 changes: 3 additions & 2 deletions Gemfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
source "https://rubygems.org"
gem 'byebug'
gem 'datura', git: 'https://github.com/CDRH/datura', branch: "release/v1.0.0"
gem 'fileutils'
gem 'datura', git: 'https://github.com/CDRH/datura', branch: "dev"
gem 'fileutils'
gem "pdf-reader"
48 changes: 33 additions & 15 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,48 +1,66 @@
GIT
remote: https://github.com/CDRH/datura
revision: f286ab4d4a3510fe88dafa5f759f2cb5f0290ef2
branch: release/v1.0.0
revision: e1498c77c058d63c3e09dcaba99ec786bb262352
branch: dev
specs:
datura (0.2.0.pre.beta)
datura (0.2.0)
byebug (~> 11.0)
colorize (~> 0.8.1)
nokogiri (~> 1.10)
pdf-reader (~> 2.12)
rest-client (~> 2.1)

GEM
remote: https://rubygems.org/
specs:
Ascii85 (1.1.1)
afm (0.2.2)
bigdecimal (3.1.8)
byebug (11.1.3)
colorize (0.8.1)
domain_name (0.5.20190701)
unf (>= 0.0.5, < 1.0.0)
fileutils (1.7.0)
domain_name (0.6.20240107)
fileutils (1.7.2)
hashery (2.1.2)
http-accept (1.7.0)
http-cookie (1.0.5)
http-cookie (1.0.7)
domain_name (~> 0.5)
mime-types (3.4.1)
mime-types (3.5.2)
mime-types-data (~> 3.2015)
mime-types-data (3.2023.0218.1)
mime-types-data (3.2024.0903)
netrc (0.11.0)
nokogiri (1.14.3-x86_64-darwin)
nokogiri (1.16.7-arm64-darwin)
racc (~> 1.4)
racc (1.6.2)
nokogiri (1.16.7-x86_64-darwin)
racc (~> 1.4)
nokogiri (1.16.7-x86_64-linux)
racc (~> 1.4)
pdf-reader (2.12.0)
Ascii85 (~> 1.0)
afm (~> 0.2.1)
hashery (~> 2.0)
ruby-rc4
ttfunk
racc (1.8.1)
rest-client (2.1.0)
http-accept (>= 1.7.0, < 2.0)
http-cookie (>= 1.0.2, < 2.0)
mime-types (>= 1.16, < 4.0)
netrc (~> 0.8)
unf (0.1.4)
unf_ext
unf_ext (0.0.8.2)
ruby-rc4 (0.1.5)
ttfunk (1.8.0)
bigdecimal (~> 3.1)

PLATFORMS
arm64-darwin-23
x86_64-darwin-19
x86_64-darwin-22
x86_64-linux

DEPENDENCIES
byebug
datura!
fileutils
pdf-reader

BUNDLED WITH
2.4.3
2.3.7
81 changes: 69 additions & 12 deletions scripts/overrides/csv_to_es.rb
Original file line number Diff line number Diff line change
@@ -1,22 +1,33 @@
class CsvToEs

def assemble_collection_specific
# should be changed for baserow
@json["count_k"] = rdf.select { |i| i["predicate"] != "sameAs" }.count.to_s
if @row["Accessed"]
begin
@json["date_accessed_k"] = Date.parse(@row["Accessed"]).strftime("%Y-%m-%d")
rescue Date::Error
@json["date_accessed_k"] = @row["Accessed"]
end
end
end

def get_id
"fig_" + @row["id"]
#test to make sure this works with baserow but it should
@row["id 2"]
end

def category
"Religious figures"
end

def title
# should work for baserow
@row["name"]
end

def date_not_before
#shuold work with baserow
if @row["birth_date"] && !@row["birth_date"].empty?
Datura::Helpers.date_standardize(@row["birth_date"], false)
else
Expand All @@ -25,6 +36,7 @@ def date_not_before
end

def date_not_after
#should work with baserow
if @row["death_date"] && !@row["death_date"].empty?
Datura::Helpers.date_standardize(@row["death_date"], false)
else
Expand All @@ -39,6 +51,7 @@ def date_display
end

def type
#should work with baserow
@row["religious_tradition"]
end

Expand All @@ -47,33 +60,36 @@ def type
# end

def rdf
#I think this needs to be constructed for baserow
items = []
if @row["monasteries"]
if @row["Associated Monasteries"]
# each monastery should be in the format id|role|associated_teaching|story
JSON.parse(@row["monasteries"]).each do |monastery|
monastery_data = monastery.split("|")
@row["Associated Monasteries"].split("\",\"").each do |monastery|
monastery_data = monastery.tr("\"", "").split("|")
items << {
"subject" => title, #name of the current figure
"predicate" => monastery_data[1], #role
"object" => monastery_data[0], #monastery id and name
"source" => monastery_data[2], #associated teaching
"note" => monastery_data[3] #story
"predicate" => monastery_data[2], #role
"object" => monastery_data[1], #monastery id and name
"source" => monastery_data[3], #associated teaching
"note" => monastery_data[4] #story
}
end
end
if relation
#this part should still work, although need to add the uri
items << {
"subject" => uri,
"predicate" => "sameAs",
"object" => "https://library.bdrc.io/show/bdr:#{relation}",
"object" => "https://library.bdrc.io/show/bdr:#{has_relation}",
"source" => "Buddhist Digital Resource Center",
"note" => "link"
}
#
#TODO Treasury of Lives
items << {
"subject" => uri,
"predicate" => "sameAs",
"object" => "https://treasuryoflives.org/search/by_name/#{relation}",
"object" => "https://treasuryoflives.org/search/by_name/#{has_relation}",
"source" => "Treasury of Lives",
"note" => "link"
}
Expand All @@ -82,11 +98,15 @@ def rdf
end

def description
#same as baserow
@row["description"]
end

def relation
@row["BDRC number"]
def has_relation
#same as baserow
{
"id" => @row["BDRC number"]
}
end

def spatial
Expand All @@ -95,5 +115,42 @@ def spatial
}
end

def citation
citations = []
treasury_date = @row["Treasury date"]
treasury_citation = {
"name" => title,
"date" => Datura::Helpers.date_standardize(treasury_date, false),
"publisher" => "Treasury of Lives"
}
citations << treasury_citation
bdrc_date = "2024"
bdrc_citation = {
"name" => title + " (#{@row["BDRC number"]})",
"date" => Datura::Helpers.date_standardize(bdrc_date, false),
"publisher" => "BDRC"
}
citations << bdrc_citation
citations
end

def creator
{
"name" => @row["Treasury author"]
}
end

def date_updated
Datura::Helpers.date_standardize(@row["Accessed"], false)
end

def rights_uri
#TODO is there a way to make a canonical link like https://treasuryoflives.org/biographies/view/Tsongkhapa-Lobzang-Drakpa/8986
#or else to webscrape the cite this page link
if has_relation
["https://treasuryoflives.org/search/by_name/#{has_relation["id"]}", "http://library.bdrc.io/show/bdr:#{has_relation["id"]}"]
end
end


end
42 changes: 33 additions & 9 deletions scripts/overrides/csv_to_es_monasteries.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@ class CsvToEsMonasteries < CsvToEs

def assemble_collection_specific
@json["count_k"] = rdf.select { |i| i["predicate"] != "sameAs" }.count.to_s
@json["date_accessed_k"] = Datura::Helpers.date_standardize(@row["Accessed"], false)
end

def get_id
"mon_" + @row["id"]
#should work with baserow
@row["id 2"]
end

def category
Expand All @@ -22,9 +24,12 @@ def person
# how to get the associated figures back in to here?
# two-way relationships in Orchid and Elasticsearch
# it should it least
# how to change for baserow? I'm not sure it is really different from the rdf field
# could record the figures somewhere
end

def date_not_before
#should work with baserow
if @row["founding date"] && !@row["founding date"].empty?
Datura::Helpers.date_standardize(@row["founding date"], false)
end
Expand All @@ -38,24 +43,26 @@ def date_display


def rdf
# need to construct a markdown type field
items = []
if @row["figures"]
if @row["Associated Figures"]
# each figure should be in the format id|role|associated_teaching|story
JSON.parse(@row["figures"]).each do |figure|
figure_data = figure.split("|")
@row["Associated Figures"].split("\",\"").each do |figure|
figure_data = figure.tr("\"", "").split("|")
if figure_data[2] == "nan"
figure_data[2] = nil
end
items << {
"subject" => figure_data[0], #figure id and name
"predicate" => figure_data[1], #role
"predicate" => figure_data[2], #role
"object" => title, #name of current monastery
"source" => figure_data[2], #associated teaching
"note" => figure_data[3] #story
"source" => figure_data[3], #associated teaching
"note" => figure_data[4] #story
}
end
end
if relation
#this should work in baserow but I need to figure out the uri part
items << {
"subject" => uri,
"predicate" => "sameAs",
Expand All @@ -75,7 +82,24 @@ def rdf
items
end

def relation
@row["BDRC number"]
def has_relation
{
"id" => @row["BDRC number"]
}
end

def citation
date = "2024"
{
"name" => title,
"date" => Datura::Helpers.date_standardize(date, false),
"publisher" => "BDRC"
}
end

def rights_uri
if has_relation
["https://library.bdrc.io/show/bdr:#{has_relation["id"]}"]
end
end
end
2 changes: 1 addition & 1 deletion scripts/overrides/file_csv.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def read_csv(file_location, encoding="utf-8")

def row_to_es(headers, row, table)
# process the cases and people tables with different overrides
puts "processing " + row["id"]
puts "processing " + row["id 2"]
if table == "figures"
CsvToEs.new(row, options, @csv, self.filename(false)).json
elsif table == "monasteries"
Expand Down
Loading