Skip to content

Commit

Permalink
Merge pull request #609 from IU-Libraries-Joint-Development/cleanup_j…
Browse files Browse the repository at this point in the history
…ob_updates

Cleanup job tweaks
  • Loading branch information
dlpierce authored May 14, 2024
2 parents cde2805 + b3dfd03 commit 4ce02a5
Showing 1 changed file with 24 additions and 17 deletions.
41 changes: 24 additions & 17 deletions app/jobs/clean_list_source_job.rb
Original file line number Diff line number Diff line change
@@ -1,25 +1,32 @@
class CleanListSourceJob < ApplicationJob
queue_as Hyrax.config.ingest_queue_name
queue_as 'important'

# Cleans up extraneous list_source resources that were left behind when adding a file to an existing work
# when using the OrderedMembersActor.
def perform(work)
sparql = ""
# Cleans up extraneous list_source resources that were left behind when adding a file to an existing work when using OrderedMembersActor.
def perform(work, batch_size = 100)
# raise ArgumentError, "batch size must be positive" unless batch_size > 0
list_source = work.list_source
# Build list of resources that need to be cleaned up, leaving out the active resources.
rm_uris = (list_source.ldp_source.graph.subjects - list_source.nodes_ids.to_a) - [list_source.uri]
rm_uris.each do |uri|
# Build SQARQL delete commands for each predicate of each resource to be cleaned.
rm_statements = list_source.ldp_source.graph.statements.filter { |s| s.subject == uri }
rm_statements.each do |s|
pattern = "<#{s.subject}> <#{s.predicate}> ?change ."
sparql << "DELETE { #{pattern} }\n WHERE { #{pattern} } ;\n"
graph = list_source.ldp_source.graph
rm_uris = (graph.subjects - list_source.nodes_ids.to_a) - [list_source.uri]
ls_statements = graph.statements
until rm_uris.empty?
Rails.logger.info { "Cleaning up #{rm_uris.length} leftover list_source parts for #{list_source.uri.to_s}." }
sparql = ""
rm_uris.slice!(0, batch_size).each do |uri|
# Build SPARQL delete commands for each predicate of each resource to be cleaned.
rm_statements = ls_statements.filter { |s| s.subject == uri }
rm_statements.each do |s|
pattern = "<#{s.subject}> <#{s.predicate}> ?change ."
sparql << "DELETE { #{pattern} }\n WHERE { #{pattern} } ;\n"
end
end
Rails.logger.debug { "SPARQL UPDATE:\n#{sparql}" }
# Send directly to Fedora. See ActiveFedora::SparqlInsert
result = ActiveFedora.fedora.connection.patch(list_source.uri, sparql, "Content-Type" => "application/sparql-update") do |req|
req.options.timeout = 600
end
raise "Problem updating #{list_source.uri.to_s} #{result.status} #{result.body}" unless result.status == 204
end
Rails.logger.debug { "Cleaning up leftover list_source parts for #{list_source.uri.to_s}. SPARQL UPDATE:\n#{sparql}" }
# Send directly to Fedora. See ActiveFedora::SparqlInsert
result = ActiveFedora.fedora.connection.patch(list_source.uri, sparql, "Content-Type" => "application/sparql-update")
return true if result.status == 204
raise "Problem updating #{result.status} #{result.body}"
return true
end
end

0 comments on commit 4ce02a5

Please sign in to comment.