Skip to content

Commit

Permalink
Support s3 file storage (#2338)
Browse files Browse the repository at this point in the history
* further refactor migrate resource job to make it more versitle

* support shrine based s3 storage in Hyku

* add some docs
  • Loading branch information
orangewolf authored Sep 20, 2024
1 parent ee48d6f commit b193a39
Show file tree
Hide file tree
Showing 8 changed files with 145 additions and 45 deletions.
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ gem 'tether-rails'
gem 'turbolinks', '~> 5'
gem 'twitter-typeahead-rails', '0.11.1.pre.corejavascript'
gem 'valkyrie', '~> 3.0', '>= 3.1.4' # There's a patch in 3.1.4 that we want
gem 'valkyrie-shrine'
gem 'web-console', '>= 3.3.0', group: %i[development] # <%= console %> in views
gem 'webdrivers', '~> 4.7.0', group: %i[test]
gem 'webmock', group: %i[test]
Expand Down
11 changes: 11 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,7 @@ GEM
colorize (0.8.1)
concurrent-ruby (1.3.4)
connection_pool (2.4.1)
content_disposition (1.0.0)
crack (0.4.5)
rexml
crass (1.0.6)
Expand Down Expand Up @@ -489,6 +490,8 @@ GEM
representable (>= 3.1.1, < 4)
docile (1.4.0)
docopt (0.5.0)
down (5.4.2)
addressable (~> 2.8)
draper (4.0.2)
actionpack (>= 5.0)
activemodel (>= 5.0)
Expand Down Expand Up @@ -1286,6 +1289,9 @@ GEM
sxp (~> 1.2)
shoulda-matchers (4.5.1)
activesupport (>= 4.2.0)
shrine (3.6.0)
content_disposition (~> 1.0)
down (~> 5.1)
sidekiq (6.5.12)
connection_pool (>= 2.2.5, < 3)
rack (~> 2.0)
Expand Down Expand Up @@ -1417,6 +1423,10 @@ GEM
rdf-vocab
reform (~> 2.2)
reform-rails
valkyrie-shrine (1.0.0)
aws-sdk-s3 (~> 1)
shrine (>= 2.0, < 4.0)
valkyrie (> 1.0)
version_gem (1.1.4)
view_component (2.74.1)
activesupport (>= 5.0.0, < 8.0)
Expand Down Expand Up @@ -1565,6 +1575,7 @@ DEPENDENCIES
turbolinks (~> 5)
twitter-typeahead-rails (= 0.11.1.pre.corejavascript)
valkyrie (~> 3.0, >= 3.1.4)
valkyrie-shrine
web-console (>= 3.3.0)
webdrivers (~> 4.7.0)
webmock
Expand Down
54 changes: 17 additions & 37 deletions app/jobs/migrate_resources_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,46 +2,26 @@

# migrates models from AF to valkyrie
class MigrateResourcesJob < ApplicationJob
attr_accessor :errors
# input [Array>>String] Array of ActiveFedora model names to migrate to valkyrie objects
# defaults to AdminSet & Collection models if empty
def perform(models: [])
models = collection_models_list if models.empty?

models.each do |model|
model.constantize.find_each do |item|
res = Hyrax.query_service.find_by(id: item.id)
# start with a form for the resource
fm = form_for(model:).constantize.new(resource: res)
# save the form
result = Hyrax::Transactions::Container[collection_model_event_mapping[model]]
.with_step_args(**collection_model_steps_mapping[model]).call(fm)
result.value!
def perform(models: ['AdminSet', 'Collection'], ids: [])
errors = []
if ids.blank?
models.each do |model|
model.constantize.find_each do |item|
resource = Hyrax.query_service.find_by(id: item.id)
result = MigrateResourceService.new(resource: resource).call
errors << result unless result.success?
end
end
else
ids.each do |id|
resource = Hyrax.query_service.find_by(id: id)
result = MigrateResourceService.new(resource: resource).call
errors << result unless result.success?
end
end
end

def form_for(model:)
model.to_s + 'ResourceForm'
end

def collection_models_list
%w[AdminSet Collection]
end

def collection_model_event_mapping
{
'AdminSet' => 'admin_set_resource.update',
'Collection' => 'change_set.update_collection'
}
end

def collection_model_steps_mapping
{
'AdminSet' => {},
'Collection' => {
'collection_resource.save_collection_banner' => { banner_unchanged_indicator: true },
'collection_resource.save_collection_logo' => { logo_unchanged_indicator: true }
}
}
raise errors.inspect if errors.present?
end
end
51 changes: 51 additions & 0 deletions app/services/migrate_resource_service.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# frozen_string_literal: true

# migrates models from AF to valkyrie
class MigrateResourceService
attr_accessor :resource
def initialize(resource:)
@resource = resource
end

def model
@model || Wings::ModelRegistry.lookup(resource.class).to_s
end

def call
prep_resource
Hyrax::Transactions::Container[collection_model_event_mapping[model]]
.with_step_args(**collection_model_steps_mapping[model]).call(resource_form)
end

def prep_resource
case model
when 'FileSet'
resource.creator << ::User.batch_user.email if resource.creator.blank?
end
end

def resource_form
@resource_form ||= Hyrax::Forms::ResourceForm.for(resource: resource)
end

def collection_model_event_mapping
{
'AdminSet' => 'admin_set_resource.update',
'Collection' => 'change_set.update_collection',
'FileSet' => 'change_set.update_file_set'
}
end

def collection_model_steps_mapping
{
'AdminSet' => {},
'Collection' => {
'collection_resource.save_collection_banner' => { banner_unchanged_indicator: true },
'collection_resource.save_collection_logo' => { logo_unchanged_indicator: true }
},
'FileSet' => {
'file_set.save_acl' => {}
}
}
end
end
40 changes: 34 additions & 6 deletions config/initializers/wings.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
# frozen_string_literal: true

if ActiveModel::Type::Boolean.new.cast(ENV.fetch("REPOSITORY_S3_STORAGE", false))
require "shrine/storage/s3"
require "valkyrie/storage/shrine"
require "valkyrie/shrine/checksum/s3"
end

# rubocop:disable Metrics/BlockLength
Rails.application.config.after_initialize do
[
Expand Down Expand Up @@ -31,12 +38,33 @@
Hyrax.config.query_index_from_valkyrie = true
Hyrax.config.index_adapter = :solr_index

Valkyrie::StorageAdapter.register(
Valkyrie::Storage::Disk.new(base_path: Rails.root.join("storage", "files"),
file_mover: FileUtils.method(:cp)),
:disk
)
Valkyrie.config.storage_adapter = :disk
if ActiveModel::Type::Boolean.new.cast(ENV.fetch("REPOSITORY_S3_STORAGE", false))
shrine_s3_options = {
bucket: ENV.fetch("REPOSITORY_S3_BUCKET") { "nurax_pg#{Rails.env}" },
region: ENV.fetch("REPOSITORY_S3_REGION", "us-east-1"),
access_key_id: ENV["REPOSITORY_S3_ACCESS_KEY"],
secret_access_key: ENV["REPOSITORY_S3_SECRET_KEY"]
}

if ENV["REPOSITORY_S3_ENDPOINT"].present?
shrine_s3_options[:endpoint] = "http://#{ENV['REPOSITORY_S3_ENDPOINT']}:#{ENV.fetch('REPOSITORY_S3_PORT', 9000)}"
shrine_s3_options[:force_path_style] = true
end

Valkyrie::StorageAdapter.register(
Valkyrie::Storage::Shrine.new(Shrine::Storage::S3.new(**shrine_s3_options)),
:repository_s3
)

Valkyrie.config.storage_adapter = :repository_s3
else
Valkyrie::StorageAdapter.register(
Valkyrie::Storage::Disk.new(base_path: Rails.root.join("storage", "files"),
file_mover: FileUtils.method(:cp)),
:disk
)
Valkyrie.config.storage_adapter = :disk
end
Valkyrie.config.indexing_adapter = :solr_index

# load all the sql based custom queries
Expand Down
13 changes: 12 additions & 1 deletion docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,13 @@ Hyku is primarily configured using environment variables. The default configurat
| REDIS_HOST | Host location of redis | redis | no |
| REDIS_PASSWORD | Password for redis, optional | - | no |
| REDIS_URL | Optional explicit redis url, build from host/passsword if not specified | redis://:staging@redis:6397/ | no |
| REPOSITORY_S3_STORAGE | Whether to turn on S3 or S3 like storage for Valkyrie or not | false | no |
| REPOSITORY_S3_BUCKET | If storing file uploads in S3, what bucket should they be put in | - | no |
| REPOSITORY_S3_REGION | Region code for S3 like storage | - | no |
| REPOSITORY_S3_ACCESS_KEY | Access key for S3 like storage | - | no |
| REPOSITORY_S3_SECRET_KEY | The secret key for S3 like storage | - | no |
| REPOSITORY_S3_ENDPOINT | Needed for S3 like storage such as Minio or custom S3 endpoints | - | no |
| REPOSITORY_S3_PORT | Only needed for S3 like storage like Minio | - | no |
| SECRET_KEY_BASE | Used by Rails to secure sessions, should be a 128 character hex | - | no |
| SMTP_ADDRESS | Address of the smtp endpoint for sending email | - | no |
| SMTP_DOMAIN | Domain for sending email | - | no |
Expand Down Expand Up @@ -197,4 +204,8 @@ You can log all of the I18n lookups to the Rails logger by setting the I18N_DEBU

```console
$ I18N_DEBUG=true bin/rails server
```
```

## S3 Like Storage

You can upload your primary works to S3 in Valkyrie mode by turning on `REPOSITORY_S3_STORAGE` and setting the accompanying bucket and credentials variables. This enables both AWS S3 and other S3 like storage engines such as Minio. As of this writing this only affects Valkyrie resources and only the primary storage. Derivatives, uploads and branding assets all still go to the shared storage directories.
7 changes: 7 additions & 0 deletions lib/reprocessor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,13 @@ def lambda_save
}
end

def lambda_migrate_resources
@lambda_migrate_resources = lambda { |line, _progress|
id = line.strip
MigrateResourcesJob.perform_later(ids: [id])
}
end

# because this takes an arg, we dont memoize
def lambda_job(_job_klass)
@lambda_job = lambda { |line, _progress, job_klass|
Expand Down
13 changes: 12 additions & 1 deletion spec/jobs/migrate_resources_job_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
clear_enqueued_jobs
end

let(:account) { create(:account_with_public_schema) }
let(:account) { create(:account_with_public_schema) }
let(:af_file_set) { create(:file_set, title: ['TestFS']) }

let!(:af_admin_set) do
as = AdminSet.new(title: ['AF Admin Set'])
Expand All @@ -29,5 +30,15 @@

expect(Valkyrie::Persistence::Postgres::ORM::Resource.find_by(id: af_admin_set.id.to_s)).to be_present
end

it "migrates a file set by its id", active_fedora_to_valkyrie: true do
expect(Valkyrie::Persistence::Postgres::ORM::Resource.find_by(id: af_file_set.id.to_s)).to be_nil

ActiveJob::Base.queue_adapter.perform_enqueued_jobs = true
switch!(account)
MigrateResourcesJob.perform_now(ids: [af_file_set.id])

expect(Valkyrie::Persistence::Postgres::ORM::Resource.find_by(id: af_file_set.id.to_s)).to be_present
end
end
end

0 comments on commit b193a39

Please sign in to comment.