Skip to content

Commit b193a39

Browse files
authored
Support s3 file storage (#2338)
* further refactor migrate resource job to make it more versitle * support shrine based s3 storage in Hyku * add some docs
1 parent ee48d6f commit b193a39

File tree

8 files changed

+145
-45
lines changed

8 files changed

+145
-45
lines changed

Gemfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ gem 'tether-rails'
117117
gem 'turbolinks', '~> 5'
118118
gem 'twitter-typeahead-rails', '0.11.1.pre.corejavascript'
119119
gem 'valkyrie', '~> 3.0', '>= 3.1.4' # There's a patch in 3.1.4 that we want
120+
gem 'valkyrie-shrine'
120121
gem 'web-console', '>= 3.3.0', group: %i[development] # <%= console %> in views
121122
gem 'webdrivers', '~> 4.7.0', group: %i[test]
122123
gem 'webmock', group: %i[test]

Gemfile.lock

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,7 @@ GEM
440440
colorize (0.8.1)
441441
concurrent-ruby (1.3.4)
442442
connection_pool (2.4.1)
443+
content_disposition (1.0.0)
443444
crack (0.4.5)
444445
rexml
445446
crass (1.0.6)
@@ -489,6 +490,8 @@ GEM
489490
representable (>= 3.1.1, < 4)
490491
docile (1.4.0)
491492
docopt (0.5.0)
493+
down (5.4.2)
494+
addressable (~> 2.8)
492495
draper (4.0.2)
493496
actionpack (>= 5.0)
494497
activemodel (>= 5.0)
@@ -1286,6 +1289,9 @@ GEM
12861289
sxp (~> 1.2)
12871290
shoulda-matchers (4.5.1)
12881291
activesupport (>= 4.2.0)
1292+
shrine (3.6.0)
1293+
content_disposition (~> 1.0)
1294+
down (~> 5.1)
12891295
sidekiq (6.5.12)
12901296
connection_pool (>= 2.2.5, < 3)
12911297
rack (~> 2.0)
@@ -1417,6 +1423,10 @@ GEM
14171423
rdf-vocab
14181424
reform (~> 2.2)
14191425
reform-rails
1426+
valkyrie-shrine (1.0.0)
1427+
aws-sdk-s3 (~> 1)
1428+
shrine (>= 2.0, < 4.0)
1429+
valkyrie (> 1.0)
14201430
version_gem (1.1.4)
14211431
view_component (2.74.1)
14221432
activesupport (>= 5.0.0, < 8.0)
@@ -1565,6 +1575,7 @@ DEPENDENCIES
15651575
turbolinks (~> 5)
15661576
twitter-typeahead-rails (= 0.11.1.pre.corejavascript)
15671577
valkyrie (~> 3.0, >= 3.1.4)
1578+
valkyrie-shrine
15681579
web-console (>= 3.3.0)
15691580
webdrivers (~> 4.7.0)
15701581
webmock

app/jobs/migrate_resources_job.rb

Lines changed: 17 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -2,46 +2,26 @@
22

33
# migrates models from AF to valkyrie
44
class MigrateResourcesJob < ApplicationJob
5+
attr_accessor :errors
56
# input [Array>>String] Array of ActiveFedora model names to migrate to valkyrie objects
67
# defaults to AdminSet & Collection models if empty
7-
def perform(models: [])
8-
models = collection_models_list if models.empty?
9-
10-
models.each do |model|
11-
model.constantize.find_each do |item|
12-
res = Hyrax.query_service.find_by(id: item.id)
13-
# start with a form for the resource
14-
fm = form_for(model:).constantize.new(resource: res)
15-
# save the form
16-
result = Hyrax::Transactions::Container[collection_model_event_mapping[model]]
17-
.with_step_args(**collection_model_steps_mapping[model]).call(fm)
18-
result.value!
8+
def perform(models: ['AdminSet', 'Collection'], ids: [])
9+
errors = []
10+
if ids.blank?
11+
models.each do |model|
12+
model.constantize.find_each do |item|
13+
resource = Hyrax.query_service.find_by(id: item.id)
14+
result = MigrateResourceService.new(resource: resource).call
15+
errors << result unless result.success?
16+
end
17+
end
18+
else
19+
ids.each do |id|
20+
resource = Hyrax.query_service.find_by(id: id)
21+
result = MigrateResourceService.new(resource: resource).call
22+
errors << result unless result.success?
1923
end
2024
end
21-
end
22-
23-
def form_for(model:)
24-
model.to_s + 'ResourceForm'
25-
end
26-
27-
def collection_models_list
28-
%w[AdminSet Collection]
29-
end
30-
31-
def collection_model_event_mapping
32-
{
33-
'AdminSet' => 'admin_set_resource.update',
34-
'Collection' => 'change_set.update_collection'
35-
}
36-
end
37-
38-
def collection_model_steps_mapping
39-
{
40-
'AdminSet' => {},
41-
'Collection' => {
42-
'collection_resource.save_collection_banner' => { banner_unchanged_indicator: true },
43-
'collection_resource.save_collection_logo' => { logo_unchanged_indicator: true }
44-
}
45-
}
25+
raise errors.inspect if errors.present?
4626
end
4727
end
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# frozen_string_literal: true
2+
3+
# migrates models from AF to valkyrie
4+
class MigrateResourceService
5+
attr_accessor :resource
6+
def initialize(resource:)
7+
@resource = resource
8+
end
9+
10+
def model
11+
@model || Wings::ModelRegistry.lookup(resource.class).to_s
12+
end
13+
14+
def call
15+
prep_resource
16+
Hyrax::Transactions::Container[collection_model_event_mapping[model]]
17+
.with_step_args(**collection_model_steps_mapping[model]).call(resource_form)
18+
end
19+
20+
def prep_resource
21+
case model
22+
when 'FileSet'
23+
resource.creator << ::User.batch_user.email if resource.creator.blank?
24+
end
25+
end
26+
27+
def resource_form
28+
@resource_form ||= Hyrax::Forms::ResourceForm.for(resource: resource)
29+
end
30+
31+
def collection_model_event_mapping
32+
{
33+
'AdminSet' => 'admin_set_resource.update',
34+
'Collection' => 'change_set.update_collection',
35+
'FileSet' => 'change_set.update_file_set'
36+
}
37+
end
38+
39+
def collection_model_steps_mapping
40+
{
41+
'AdminSet' => {},
42+
'Collection' => {
43+
'collection_resource.save_collection_banner' => { banner_unchanged_indicator: true },
44+
'collection_resource.save_collection_logo' => { logo_unchanged_indicator: true }
45+
},
46+
'FileSet' => {
47+
'file_set.save_acl' => {}
48+
}
49+
}
50+
end
51+
end

config/initializers/wings.rb

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,11 @@
11
# frozen_string_literal: true
2+
3+
if ActiveModel::Type::Boolean.new.cast(ENV.fetch("REPOSITORY_S3_STORAGE", false))
4+
require "shrine/storage/s3"
5+
require "valkyrie/storage/shrine"
6+
require "valkyrie/shrine/checksum/s3"
7+
end
8+
29
# rubocop:disable Metrics/BlockLength
310
Rails.application.config.after_initialize do
411
[
@@ -31,12 +38,33 @@
3138
Hyrax.config.query_index_from_valkyrie = true
3239
Hyrax.config.index_adapter = :solr_index
3340

34-
Valkyrie::StorageAdapter.register(
35-
Valkyrie::Storage::Disk.new(base_path: Rails.root.join("storage", "files"),
36-
file_mover: FileUtils.method(:cp)),
37-
:disk
38-
)
39-
Valkyrie.config.storage_adapter = :disk
41+
if ActiveModel::Type::Boolean.new.cast(ENV.fetch("REPOSITORY_S3_STORAGE", false))
42+
shrine_s3_options = {
43+
bucket: ENV.fetch("REPOSITORY_S3_BUCKET") { "nurax_pg#{Rails.env}" },
44+
region: ENV.fetch("REPOSITORY_S3_REGION", "us-east-1"),
45+
access_key_id: ENV["REPOSITORY_S3_ACCESS_KEY"],
46+
secret_access_key: ENV["REPOSITORY_S3_SECRET_KEY"]
47+
}
48+
49+
if ENV["REPOSITORY_S3_ENDPOINT"].present?
50+
shrine_s3_options[:endpoint] = "http://#{ENV['REPOSITORY_S3_ENDPOINT']}:#{ENV.fetch('REPOSITORY_S3_PORT', 9000)}"
51+
shrine_s3_options[:force_path_style] = true
52+
end
53+
54+
Valkyrie::StorageAdapter.register(
55+
Valkyrie::Storage::Shrine.new(Shrine::Storage::S3.new(**shrine_s3_options)),
56+
:repository_s3
57+
)
58+
59+
Valkyrie.config.storage_adapter = :repository_s3
60+
else
61+
Valkyrie::StorageAdapter.register(
62+
Valkyrie::Storage::Disk.new(base_path: Rails.root.join("storage", "files"),
63+
file_mover: FileUtils.method(:cp)),
64+
:disk
65+
)
66+
Valkyrie.config.storage_adapter = :disk
67+
end
4068
Valkyrie.config.indexing_adapter = :solr_index
4169

4270
# load all the sql based custom queries

docs/configuration.md

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,13 @@ Hyku is primarily configured using environment variables. The default configurat
8282
| REDIS_HOST | Host location of redis | redis | no |
8383
| REDIS_PASSWORD | Password for redis, optional | - | no |
8484
| REDIS_URL | Optional explicit redis url, build from host/passsword if not specified | redis://:staging@redis:6397/ | no |
85+
| REPOSITORY_S3_STORAGE | Whether to turn on S3 or S3 like storage for Valkyrie or not | false | no |
86+
| REPOSITORY_S3_BUCKET | If storing file uploads in S3, what bucket should they be put in | - | no |
87+
| REPOSITORY_S3_REGION | Region code for S3 like storage | - | no |
88+
| REPOSITORY_S3_ACCESS_KEY | Access key for S3 like storage | - | no |
89+
| REPOSITORY_S3_SECRET_KEY | The secret key for S3 like storage | - | no |
90+
| REPOSITORY_S3_ENDPOINT | Needed for S3 like storage such as Minio or custom S3 endpoints | - | no |
91+
| REPOSITORY_S3_PORT | Only needed for S3 like storage like Minio | - | no |
8592
| SECRET_KEY_BASE | Used by Rails to secure sessions, should be a 128 character hex | - | no |
8693
| SMTP_ADDRESS | Address of the smtp endpoint for sending email | - | no |
8794
| SMTP_DOMAIN | Domain for sending email | - | no |
@@ -197,4 +204,8 @@ You can log all of the I18n lookups to the Rails logger by setting the I18N_DEBU
197204

198205
```console
199206
$ I18N_DEBUG=true bin/rails server
200-
```
207+
```
208+
209+
## S3 Like Storage
210+
211+
You can upload your primary works to S3 in Valkyrie mode by turning on `REPOSITORY_S3_STORAGE` and setting the accompanying bucket and credentials variables. This enables both AWS S3 and other S3 like storage engines such as Minio. As of this writing this only affects Valkyrie resources and only the primary storage. Derivatives, uploads and branding assets all still go to the shared storage directories.

lib/reprocessor.rb

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,13 @@ def lambda_save
206206
}
207207
end
208208

209+
def lambda_migrate_resources
210+
@lambda_migrate_resources = lambda { |line, _progress|
211+
id = line.strip
212+
MigrateResourcesJob.perform_later(ids: [id])
213+
}
214+
end
215+
209216
# because this takes an arg, we dont memoize
210217
def lambda_job(_job_klass)
211218
@lambda_job = lambda { |line, _progress, job_klass|

spec/jobs/migrate_resources_job_spec.rb

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111
clear_enqueued_jobs
1212
end
1313

14-
let(:account) { create(:account_with_public_schema) }
14+
let(:account) { create(:account_with_public_schema) }
15+
let(:af_file_set) { create(:file_set, title: ['TestFS']) }
1516

1617
let!(:af_admin_set) do
1718
as = AdminSet.new(title: ['AF Admin Set'])
@@ -29,5 +30,15 @@
2930

3031
expect(Valkyrie::Persistence::Postgres::ORM::Resource.find_by(id: af_admin_set.id.to_s)).to be_present
3132
end
33+
34+
it "migrates a file set by its id", active_fedora_to_valkyrie: true do
35+
expect(Valkyrie::Persistence::Postgres::ORM::Resource.find_by(id: af_file_set.id.to_s)).to be_nil
36+
37+
ActiveJob::Base.queue_adapter.perform_enqueued_jobs = true
38+
switch!(account)
39+
MigrateResourcesJob.perform_now(ids: [af_file_set.id])
40+
41+
expect(Valkyrie::Persistence::Postgres::ORM::Resource.find_by(id: af_file_set.id.to_s)).to be_present
42+
end
3243
end
3344
end

0 commit comments

Comments
 (0)