Skip to content

Commit 65c3dca

Browse files
committed
add language processing to ldpath service
1 parent ed35214 commit 65c3dca

File tree

5 files changed

+241
-55
lines changed

5 files changed

+241
-55
lines changed

app/services/qa/linked_data/ldpath_service.rb

+74-28
Original file line numberDiff line numberDiff line change
@@ -4,39 +4,85 @@
44
module Qa
55
module LinkedData
66
class LdpathService
7-
VALUE_ON_ERROR = [].freeze
7+
LANGUAGE_PATTERN = "*LANG*".freeze
8+
PROPERTY_NAME = "property".freeze
89

910
class_attribute :predefined_prefixes
1011
self.predefined_prefixes = Ldpath::Transform.default_prefixes.with_indifferent_access
1112

12-
# Create the ldpath program for a given ldpath.
13-
# @param ldpath [String] ldpath to follow to get a value from a graph (documation: http://marmotta.apache.org/ldpath/language.html)
14-
# @param prefixes [Hash] shortcut names for URI prefixes with key = part of predicate that is the same for all terms (e.g. { "madsrdf": "http://www.loc.gov/mads/rdf/v1#" })
15-
# @return [Ldpath::Program] an executable program that will extract a value from a graph
16-
def self.ldpath_program(ldpath:, prefixes: {})
17-
program_code = ""
18-
prefixes.each { |key, url| program_code << "@prefix #{key} : <#{url}> \;\n" }
19-
program_code << "property = #{ldpath} \;"
20-
Ldpath::Program.parse program_code
21-
rescue => e
22-
Rails.logger.warn("WARNING: #{I18n.t('qa.linked_data.ldpath.parse_logger_error')}... cause: #{e.message}\n ldpath_program=\n#{program_code}")
23-
raise StandardError, I18n.t("qa.linked_data.ldpath.parse_error") + "... cause: #{e.message}"
24-
end
13+
class << self
14+
# Create the ldpath program for a given ldpath.
15+
# @param ldpath [String] ldpath to follow to get a value from a graph (documation: http://marmotta.apache.org/ldpath/language.html)
16+
# @param prefixes [Hash] shortcut names for URI prefixes with key = part of predicate that is the same for all terms (e.g. { "madsrdf": "http://www.loc.gov/mads/rdf/v1#" })
17+
# @param languages [Array<Symbol>] limit results to these languages and anything not tagged (applies to ldpaths with *LANG* marker)
18+
# @return [Ldpath::Program] an executable program that will extract a value from a graph
19+
def ldpath_program(ldpath:, prefixes: {}, languages: [])
20+
program_code = ldpath_program_code(ldpath: ldpath, prefixes: prefixes, languages: languages)
21+
Ldpath::Program.parse program_code
22+
rescue => e
23+
Rails.logger.warn("WARNING: #{I18n.t('qa.linked_data.ldpath.parse_logger_error')}... cause: #{e.message}\n ldpath_program=\n#{program_code}")
24+
raise StandardError, I18n.t("qa.linked_data.ldpath.parse_error") + "... cause: #{e.message}"
25+
end
26+
27+
# Create the program code for a given ldpath.
28+
# @param ldpath [String] ldpath to follow to get a value from a graph (documation: http://marmotta.apache.org/ldpath/language.html)
29+
# @param prefixes [Hash] shortcut names for URI prefixes with key = part of predicate that is the same for all terms (e.g. { "madsrdf": "http://www.loc.gov/mads/rdf/v1#" })
30+
# @param languages [Array<Symbol>] limit results to these languages and anything not tagged (applies to ldpaths with *LANG* marker)
31+
# @return [String] the program code string used with Ldpath::Program.parse
32+
def ldpath_program_code(ldpath:, prefixes: {}, languages: [])
33+
program_code = ""
34+
prefixes.each { |key, url| program_code << "@prefix #{key} : <#{url}> \;\n" }
35+
property_explode(program_code, ldpath, languages)
36+
end
37+
38+
# Evaluate an ldpath for a specific subject uri in the context of a graph and return the extracted values.
39+
# @param program [Ldpath::Program] an executable program that will extract a value from a graph
40+
# @param graph [RDF::Graph] the graph from which the values will be extracted
41+
# @param subject_uri [RDF::URI] retrieved values will be limited to those with the subject uri
42+
# @param limit_to_context [Boolean] if true, the evaluation process will not make any outside network calls.
43+
# It will limit results to those found in the context graph.
44+
## @return [Array<RDF::Literal>] the extracted values based on the ldpath
45+
def ldpath_evaluate(program:, graph:, subject_uri:, limit_to_context: Qa.config.limit_ldpath_to_context?)
46+
raise ArgumentError, "You must specify a program when calling ldpath_evaluate" if program.blank?
47+
output = program.evaluate(subject_uri, context: graph, limit_to_context: limit_to_context)
48+
property_implode(output)
49+
rescue ParseError => e
50+
Rails.logger.warn("WARNING: #{I18n.t('qa.linked_data.ldpath.evaluate_logger_error')} (cause: #{e.message}")
51+
raise ParseError, I18n.t("qa.linked_data.ldpath.evaluate_error") + "... cause: #{e.message}"
52+
end
53+
54+
private
55+
56+
# create program code with a property per language + untagged
57+
def property_explode(program_code, ldpath, languages)
58+
return program_code << "#{PROPERTY_NAME} = #{ldpath} \;\n" unless ldpath.index(LANGUAGE_PATTERN)
59+
return program_code << "#{PROPERTY_NAME} = #{ldpath.gsub(LANGUAGE_PATTERN, '')} \;\n" unless languages.present?
60+
languages.map { |language| program_code << "#{property_name_for(language)} = #{ldpath.gsub(LANGUAGE_PATTERN, "[@#{language}]")} \;\n" }
61+
program_code << "#{PROPERTY_NAME} = #{ldpath.gsub(LANGUAGE_PATTERN, '[@none]')} \;\n"
62+
end
63+
64+
# flatten all properties and turn into RDF::Literals with language tagging if appropriate
65+
def property_implode(output)
66+
return nil if output.blank?
67+
output.each do |property_name, values|
68+
output[property_name] = remap_string_values(property_name, values) if values.first.is_a? String
69+
end
70+
output.values.flatten.uniq
71+
end
72+
73+
def property_name_for(language)
74+
"#{language}_#{PROPERTY_NAME}"
75+
end
76+
77+
def language_from(property_name)
78+
return nil if property_name.casecmp?(PROPERTY_NAME)
79+
property_name.chomp("_#{PROPERTY_NAME}")
80+
end
2581

26-
# Evaluate an ldpath for a specific subject uri in the context of a graph and return the extracted values.
27-
# @param program [Ldpath::Program] an executable program that will extract a value from a graph
28-
# @param graph [RDF::Graph] the graph from which the values will be extracted
29-
# @param subject_uri [RDF::URI] retrieved values will be limited to those with the subject uri
30-
# @param limit_to_context [Boolean] if true, the evaluation process will not make any outside network calls.
31-
# It will limit results to those found in the context graph.
32-
## @return [Array<String>] the extracted values based on the ldpath
33-
def self.ldpath_evaluate(program:, graph:, subject_uri:, limit_to_context: Qa.config.limit_ldpath_to_context?)
34-
return VALUE_ON_ERROR if program.blank?
35-
output = program.evaluate(subject_uri, context: graph, limit_to_context: limit_to_context)
36-
output.present? ? output['property'].uniq : nil
37-
rescue => e
38-
Rails.logger.warn("WARNING: #{I18n.t('qa.linked_data.ldpath.evaluate_logger_error')} (cause: #{e.message}")
39-
raise StandardError, I18n.t("qa.linked_data.ldpath.evaluate_error") + "... cause: #{e.message}"
82+
def remap_string_values(property_name, values)
83+
language = language_from(property_name)
84+
values.map { |v| RDF::Literal.new(v, language: language) }
85+
end
4086
end
4187
end
4288
end

app/services/qa/linked_data/mapper/graph_ldpath_mapper_service.rb

+2-1
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,10 @@ class GraphLdpathMapperService
1919
# @example ldpath map
2020
# {
2121
# uri: :subject_uri,
22-
# id: 'locid:lccn :: xsd::string',
22+
# id: 'locid:lccn',
2323
# label: 'skos:prefLabel :: xsd::string',
2424
# altlabel: 'skos:altLabel :: xsd::string',
25+
# sameas: 'skos:sameAs :: xsd::anyURI',
2526
# sort: 'vivo:rank :: xsd::integer'
2627
# }
2728
# @param subject_uri [RDF::URI] the subject within the graph for which the values are being extracted

spec/models/linked_data/config/context_property_map_spec.rb

+3-3
Original file line numberDiff line numberDiff line change
@@ -277,9 +277,9 @@
277277
let(:expanded_id) { '123' }
278278

279279
before do
280-
allow(Ldpath::Program).to receive(:parse).with('property = madsrdf:identifiesRWO/madsrdf:birthDate/schema:label ;').and_return(basic_program)
281-
allow(Ldpath::Program).to receive(:parse).with('property = skos:prefLabel ::xsd:string ;').and_return(expanded_label_program)
282-
allow(Ldpath::Program).to receive(:parse).with('property = loc:lccn ::xsd:string ;').and_return(expanded_id_program)
280+
allow(Ldpath::Program).to receive(:parse).with("property = madsrdf:identifiesRWO/madsrdf:birthDate/schema:label ;\n").and_return(basic_program)
281+
allow(Ldpath::Program).to receive(:parse).with("property = skos:prefLabel ::xsd:string ;\n").and_return(expanded_label_program)
282+
allow(Ldpath::Program).to receive(:parse).with("property = loc:lccn ::xsd:string ;\n").and_return(expanded_id_program)
283283
allow(basic_program).to receive(:evaluate).with(subject_uri, context: graph, limit_to_context: true).and_return('property' => [expanded_uri])
284284
allow(expanded_label_program).to receive(:evaluate).with(RDF::URI.new(subject_uri), context: graph, limit_to_context: true).and_return('property' => [expanded_label])
285285
allow(expanded_id_program).to receive(:evaluate).with(RDF::URI.new(subject_uri), context: graph, limit_to_context: true).and_return('property' => [expanded_id])

spec/services/linked_data/ldpath_service_spec.rb

+148-9
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,13 @@
1717
context 'when ldpath_program gets parse error' do
1818
let(:cause) { "undefined method `ascii_tree' for nil:NilClass" }
1919
let(:warning) { I18n.t('qa.linked_data.ldpath.parse_logger_error') }
20-
let(:program_code) { "@prefix skos : <http://www.w3.org/2004/02/skos/core#> ;\nproperty = skos:prefLabel ::xsd:string ;" }
20+
let(:program_code) { "BAD_PROGRAM ;" }
2121
let(:log_message) { "WARNING: #{warning}... cause: #{cause}\n ldpath_program=\n#{program_code}" }
2222

23-
before { allow(Ldpath::Program).to receive(:parse).with(anything).and_raise(cause) }
23+
before do
24+
allow(described_class).to receive(:ldpath_program_code).with(anything).and_return(program_code)
25+
allow(Ldpath::Program).to receive(:parse).with(anything).and_raise(cause)
26+
end
2427

2528
it 'logs error and returns PARSE ERROR as the value' do
2629
expect(Rails.logger).to receive(:warn).with(log_message)
@@ -29,32 +32,168 @@
2932
end
3033
end
3134

35+
describe '.ldpath_program_code' do
36+
subject { described_class.ldpath_program_code(ldpath: ldpath, prefixes: prefixes, languages: languages) }
37+
38+
context 'for a ldpath without language pattern' do
39+
let(:ldpath) { 'dcterms:identifier' }
40+
let(:languages) { [:fr] }
41+
let(:prefixes) { { "dcterms" => "http://purl.org/dc/terms/" } }
42+
it 'generates the simple program code' do
43+
expected_program = <<-PROGRAM
44+
@prefix dcterms : <http://purl.org/dc/terms/> \;
45+
property = dcterms:identifier \;
46+
PROGRAM
47+
expect(subject).to eq expected_program
48+
end
49+
end
50+
51+
context 'for a ldpath with language pattern' do
52+
let(:ldpath) { 'madsrdf:authoritativeLabel*LANG* ::xsd:string' }
53+
let(:prefixes) { { "madsrdf" => "http://www.loc.gov/mads/rdf/v1#" } }
54+
context 'and no languages specified' do
55+
let(:languages) { nil }
56+
it 'generates the simple program code' do
57+
expected_program = <<-PROGRAM
58+
@prefix madsrdf : <http://www.loc.gov/mads/rdf/v1#> \;
59+
property = madsrdf:authoritativeLabel ::xsd:string \;
60+
PROGRAM
61+
expect(subject).to eq expected_program
62+
end
63+
end
64+
65+
context 'and one language specified' do
66+
let(:languages) { [:en] }
67+
it 'generates a program with the language' do
68+
expected_program = <<-PROGRAM
69+
@prefix madsrdf : <http://www.loc.gov/mads/rdf/v1#> \;
70+
en_property = madsrdf:authoritativeLabel[@en] ::xsd:string \;
71+
property = madsrdf:authoritativeLabel[@none] ::xsd:string \;
72+
PROGRAM
73+
expect(subject).to eq expected_program
74+
end
75+
end
76+
77+
context 'and multiple languages specified' do
78+
let(:languages) { [:fr, :de] }
79+
it 'generates a program with languages' do
80+
expected_program = <<-PROGRAM
81+
@prefix madsrdf : <http://www.loc.gov/mads/rdf/v1#> \;
82+
fr_property = madsrdf:authoritativeLabel[@fr] ::xsd:string \;
83+
de_property = madsrdf:authoritativeLabel[@de] ::xsd:string \;
84+
property = madsrdf:authoritativeLabel[@none] ::xsd:string \;
85+
PROGRAM
86+
expect(subject).to eq expected_program
87+
end
88+
end
89+
end
90+
end
91+
3292
describe '.ldpath_evaluate' do
3393
subject { described_class.ldpath_evaluate(program: program, graph: graph, subject_uri: subject_uri) }
3494

3595
let(:program) { instance_double(Ldpath::Program) }
3696
let(:graph) { instance_double(RDF::Graph) }
3797
let(:subject_uri) { instance_double(RDF::URI) }
38-
let(:values) { ['Expanded Label'] }
3998

4099
before do
41-
allow(Ldpath::Program).to receive(:parse).with('property = skos:prefLabel ::xsd:string ;').and_return(program)
42-
allow(program).to receive(:evaluate).with(subject_uri, context: graph, limit_to_context: true).and_return('property' => values)
100+
allow(Ldpath::Program).to receive(:parse).with(anything).and_return(program)
43101
end
44-
it 'returns the extracted label' do
45-
expect(subject).to match_array values
102+
103+
context 'when program does not contain languages' do
104+
context 'and value is a string' do
105+
let(:values) { ['value'] }
106+
before do
107+
allow(program).to receive(:evaluate)
108+
.with(subject_uri, context: graph, limit_to_context: true)
109+
.and_return('property' => values)
110+
end
111+
it 'returns the string values as is' do
112+
expected_values = values.map { |v| RDF::Literal.new(v) }
113+
expect(subject).to match_array expected_values
114+
end
115+
end
116+
117+
context 'and value is a URI' do
118+
let(:values) { [RDF::URI.new('http://example.com/1'), RDF::URI.new('http://example.com/2')] }
119+
before do
120+
allow(program).to receive(:evaluate)
121+
.with(subject_uri, context: graph, limit_to_context: true)
122+
.and_return('property' => values)
123+
end
124+
it 'returns the URIs' do
125+
expected_values = values
126+
expect(subject).to match_array expected_values
127+
end
128+
end
129+
130+
context 'and value is numeric' do
131+
let(:values) { [23, 14, 55] }
132+
before do
133+
allow(program).to receive(:evaluate)
134+
.with(subject_uri, context: graph, limit_to_context: true)
135+
.and_return('property' => values)
136+
end
137+
it 'returns the URIs' do
138+
expected_values = values
139+
expect(subject).to match_array expected_values
140+
end
141+
end
142+
end
143+
144+
context 'when program has languages' do
145+
context 'and one language specified' do
146+
let(:en_values) { ['en_value'] }
147+
let(:untagged_values) { ['untagged_value'] }
148+
before do
149+
allow(program).to receive(:evaluate)
150+
.with(subject_uri, context: graph, limit_to_context: true)
151+
.and_return('en_property' => en_values, 'property' => untagged_values)
152+
end
153+
it 'generates a program with the language' do
154+
expected_values =
155+
en_values.map { |v| RDF::Literal.new(v, language: :en) } +
156+
untagged_values.map { |v| RDF::Literal.new(v) }
157+
expect(subject).to match_array expected_values
158+
end
159+
end
160+
161+
context 'and multiple languages specified' do
162+
let(:fr_values) { ['fr_value1', 'fr_value2', 'fr_value1'] }
163+
let(:de_values) { ['de_value'] }
164+
let(:untagged_values) { ['untagged_value'] }
165+
before do
166+
allow(program).to receive(:evaluate)
167+
.with(subject_uri, context: graph, limit_to_context: true)
168+
.and_return('fr_property' => fr_values, 'de_property' => de_values, 'property' => untagged_values)
169+
end
170+
it 'returns the extracted label' do
171+
expected_values =
172+
(fr_values.uniq.map { |v| RDF::Literal.new(v, language: :fr) } +
173+
de_values.map { |v| RDF::Literal.new(v, language: :de) } +
174+
untagged_values.map { |v| RDF::Literal.new(v) }).uniq
175+
expect(subject).to match_array expected_values
176+
end
177+
end
46178
end
47179

48180
context 'when ldpath_evaluate gets parse error' do
49181
let(:cause) { "unknown cause" }
50182
let(:warning) { I18n.t('qa.linked_data.ldpath.evaluate_logger_error') }
51183
let(:log_message) { "WARNING: #{warning} (cause: #{cause}" }
52184

53-
before { allow(program).to receive(:evaluate).with(subject_uri, context: graph, limit_to_context: true).and_raise(cause) }
185+
before { allow(program).to receive(:evaluate).with(subject_uri, context: graph, limit_to_context: true).and_raise(ParseError, cause) }
54186

55187
it 'logs error and returns PARSE ERROR as the value' do
56188
expect(Rails.logger).to receive(:warn).with(log_message)
57-
expect { subject.values(graph, subject_uri) }.to raise_error StandardError, I18n.t('qa.linked_data.ldpath.evaluate_error') + "... cause: #{cause}"
189+
expect { subject }.to raise_error ParseError, I18n.t('qa.linked_data.ldpath.evaluate_error') + "... cause: #{cause}"
190+
end
191+
end
192+
193+
context 'when program is empty' do
194+
let(:program) { nil }
195+
it 'returns empty array' do
196+
expect { subject }.to raise_error ArgumentError, "You must specify a program when calling ldpath_evaluate"
58197
end
59198
end
60199
end

0 commit comments

Comments
 (0)