Skip to content

Commit

Permalink
Merge pull request #2541 from pulibrary/scsb_update_investigation
Browse files Browse the repository at this point in the history
List all files in bucket when over 1,000 objects
  • Loading branch information
christinach authored Oct 25, 2024
2 parents 2181ce8 + b14f2f4 commit de79742
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 8 deletions.
10 changes: 8 additions & 2 deletions app/models/scsb/s3_bucket.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,14 @@ def initialize(s3_client:, s3_bucket_name:)
end

def list_files(prefix:)
objects = s3_client.list_objects(bucket: s3_bucket_name, prefix:, delimiter: '')
objects.contents
file_list = []
response = s3_client.list_objects_v2(bucket: s3_bucket_name, prefix:, delimiter: '')
file_list << response.contents.entries
while response.next_continuation_token
response = s3_client.list_objects_v2(bucket: s3_bucket_name, prefix:, delimiter: '', continuation_token: response.next_continuation_token)
file_list << response.contents.entries
end
file_list.flatten
end

def download_file(key:)
Expand Down
13 changes: 7 additions & 6 deletions spec/models/scsb/s3_bucket_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,10 @@

describe "list_files" do
it "returns the objects" do
allow(s3_client).to receive(:list_objects).with(bucket: 'test', prefix: 'prefix', delimiter: '').and_return(Aws::S3::Types::ListObjectsOutput.new(contents: Aws::Xml::DefaultList.new))
allow(s3_client).to receive(:list_objects_v2).with(bucket: 'test', prefix: 'prefix', delimiter: '').and_return(Aws::S3::Types::ListObjectsV2Output.new(contents: Aws::Xml::DefaultList.new, next_continuation_token: 'xyz123'))
allow(s3_client).to receive(:list_objects_v2).with(bucket: 'test', prefix: 'prefix', delimiter: '', continuation_token: 'xyz123').and_return(Aws::S3::Types::ListObjectsV2Output.new(contents: Aws::Xml::DefaultList.new))
results = s3.list_files(prefix: 'prefix')
expect(results).to be_a(Aws::Xml::DefaultList)
expect(results).to be_an(Array)
expect(results.size).to eq(0)
end
end
Expand Down Expand Up @@ -88,9 +89,9 @@
Aws::S3::Types::Object.new(key: "exports/ABC/MARCXml/Full/CUL_2.zip", last_modified: Time.new(2.days.ago.to_i)),
Aws::S3::Types::Object.new(key: "exports/ABC/MARCXml/Full/CUL_3.zip", last_modified: Time.new(1.week.ago.to_i))
]
aws_list = Aws::S3::Types::ListObjectsOutput.new(contents: Aws::Xml::DefaultList.new(files))
aws_list = Aws::S3::Types::ListObjectsV2Output.new(contents: Aws::Xml::DefaultList.new(files))

allow(s3_client).to receive(:list_objects).with(bucket: 'test', prefix: 'prefix', delimiter: '').and_return(aws_list)
allow(s3_client).to receive(:list_objects_v2).with(bucket: 'test', prefix: 'prefix', delimiter: '').and_return(aws_list)

output1 = Aws::S3::Types::GetObjectOutput.new(body: StringIO.new("abc123"))
allow(s3_client).to receive(:get_object).with(bucket: 'test', key: 'exports/ABC/MARCXml/Full/CUL_1.zip').and_return(output1)
Expand All @@ -107,9 +108,9 @@
files = [
Aws::S3::Types::Object.new(key: "exports/ABC/MARCXml/Full/NYPL_1.zip", last_modified: Time.new(1.day.ago.to_i))
]
aws_list = Aws::S3::Types::ListObjectsOutput.new(contents: Aws::Xml::DefaultList.new(files))
aws_list = Aws::S3::Types::ListObjectsV2Output.new(contents: Aws::Xml::DefaultList.new(files))

allow(s3_client).to receive(:list_objects).with(bucket: 'test', prefix: 'prefix', delimiter: '').and_return(aws_list)
allow(s3_client).to receive(:list_objects_v2).with(bucket: 'test', prefix: 'prefix', delimiter: '').and_return(aws_list)

path = Rails.root.join('tmp', 's3_bucket_test')
FileUtils.rm_rf(path)
Expand Down

0 comments on commit de79742

Please sign in to comment.