Skip to content

Commit 3c5dc93

Browse files
joelim41comandeo-mongo
andauthoredJul 4, 2024··
[RUBY-3496] Fix legacy read pool retry error (#2878)
Co-authored-by: Dmitry Rybakov <160598371+comandeo-mongo@users.noreply.github.com> Co-authored-by: Dmitry Rybakov <dmitry.rybakov@mongodb.com>
1 parent de60b9e commit 3c5dc93

File tree

4 files changed

+75
-38
lines changed

4 files changed

+75
-38
lines changed
 

‎lib/mongo/retryable/base_worker.rb

+28-3
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ def initialize(retryable)
4949

5050
private
5151

52-
# Indicate which exception classes that are generally retryable.
52+
# Indicate which exception classes that are generally retryable
53+
# when using modern retries mechanism.
5354
#
5455
# @return [ Array<Mongo:Error> ] Array of exception classes that are
5556
# considered retryable.
@@ -58,18 +59,42 @@ def retryable_exceptions
5859
Error::ConnectionPerished,
5960
Error::ServerNotUsable,
6061
Error::SocketError,
61-
Error::SocketTimeoutError
62+
Error::SocketTimeoutError,
6263
].freeze
6364
end
6465

66+
# Indicate which exception classes that are generally retryable
67+
# when using legacy retries mechanism.
68+
#
69+
# @return [ Array<Mongo:Error> ] Array of exception classes that are
70+
# considered retryable.
71+
def legacy_retryable_exceptions
72+
[
73+
Error::ConnectionPerished,
74+
Error::ServerNotUsable,
75+
Error::SocketError,
76+
Error::SocketTimeoutError,
77+
Error::PoolClearedError,
78+
Error::PoolPausedError,
79+
].freeze
80+
end
81+
82+
6583
# Tests to see if the given exception instance is of a type that can
66-
# be retried.
84+
# be retried with modern retry mechanism.
6785
#
6886
# @return [ true | false ] true if the exception is retryable.
6987
def is_retryable_exception?(e)
7088
retryable_exceptions.any? { |klass| klass === e }
7189
end
7290

91+
# Tests to see if the given exception instance is of a type that can
92+
# be retried with legacy retry mechanism.
93+
#
94+
# @return [ true | false ] true if the exception is retryable.
95+
def is_legacy_retryable_exception?(e)
96+
legacy_retryable_exceptions.any? { |klass| klass === e }
97+
end
7398
# Logs the given deprecation warning the first time it is called for a
7499
# given key; after that, it does nothing when given the same key.
75100
def deprecation_warning(key, warning)

‎lib/mongo/retryable/read_worker.rb

+9-8
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ def modern_read_with_retry(session, server_selector, &block)
198198
raise e if !is_retryable_exception?(e) && !e.write_retryable?
199199
retry_read(e, session, server_selector, failed_server: server, &block)
200200
end
201-
201+
202202
# Attempts to do a "legacy" read with retry. The operation will be
203203
# attempted multiple times, up to the client's `max_read_retries`
204204
# setting.
@@ -213,17 +213,18 @@ def modern_read_with_retry(session, server_selector, &block)
213213
def legacy_read_with_retry(session, server_selector, &block)
214214
attempt = attempt ? attempt + 1 : 1
215215
yield select_server(cluster, server_selector, session)
216-
rescue *retryable_exceptions, Error::OperationFailure, Error::PoolError => e
216+
rescue *legacy_retryable_exceptions, Error::OperationFailure => e
217217
e.add_notes('legacy retry', "attempt #{attempt}")
218-
219-
if is_retryable_exception?(e)
218+
219+
if is_legacy_retryable_exception?(e)
220+
220221
raise e if attempt > client.max_read_retries || session&.in_transaction?
221222
elsif e.retryable? && !session&.in_transaction?
222223
raise e if attempt > client.max_read_retries
223224
else
224225
raise e
225226
end
226-
227+
227228
log_retry(e, message: 'Legacy read retry')
228229
sleep(client.read_retry_interval) unless is_retryable_exception?(e)
229230
retry
@@ -261,7 +262,7 @@ def read_without_retry(session, server_selector, &block)
261262
# @param [ Mongo::Server ] failed_server The server on which the original
262263
# operation failed.
263264
# @param [ Proc ] block The block to execute.
264-
#
265+
#
265266
# @return [ Result ] The result of the operation.
266267
def retry_read(original_error, session, server_selector, failed_server: nil, &block)
267268
begin
@@ -270,9 +271,9 @@ def retry_read(original_error, session, server_selector, failed_server: nil, &bl
270271
original_error.add_note("later retry failed: #{e.class}: #{e}")
271272
raise original_error
272273
end
273-
274+
274275
log_retry(original_error, message: 'Read retry')
275-
276+
276277
begin
277278
yield server, true
278279
rescue *retryable_exceptions => e

‎lib/mongo/retryable/write_worker.rb

+4-4
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def nro_write_with_retry(write_concern, context:, &block)
104104
session = context.session
105105
server = select_server(cluster, ServerSelector.primary, session)
106106
options = session&.client&.options || {}
107-
107+
108108
if options[:retry_writes]
109109
begin
110110
server.with_connection(connection_global_id: context.connection_global_id) do |connection|
@@ -219,7 +219,7 @@ def legacy_write_with_retry(server = nil, context:)
219219
def modern_write_with_retry(session, server, context, &block)
220220
txn_num = nil
221221
connection_succeeded = false
222-
222+
223223
server.with_connection(connection_global_id: context.connection_global_id) do |connection|
224224
connection_succeeded = true
225225

@@ -264,7 +264,7 @@ def retry_write(original_error, txn_num, context:, failed_server: nil, &block)
264264
# a socket error or a not master error should have marked the respective
265265
# server unknown). Here we just need to wait for server selection.
266266
server = select_server(cluster, ServerSelector.primary, session, failed_server)
267-
267+
268268
unless server.retry_writes?
269269
# Do not need to add "modern retry" here, it should already be on
270270
# the first exception.
@@ -278,7 +278,7 @@ def retry_write(original_error, txn_num, context:, failed_server: nil, &block)
278278
# special marker class to bypass the ordinarily applicable rescues.
279279
raise Error::RaiseOriginalError
280280
end
281-
281+
282282
log_retry(original_error, message: 'Write retry')
283283
server.with_connection(connection_global_id: context.connection_global_id) do |connection|
284284
yield(connection, txn_num, context)

‎spec/integration/retryable_reads_errors_spec.rb

+34-23
Original file line numberDiff line numberDiff line change
@@ -74,31 +74,42 @@
7474
client.subscribe(Mongo::Monitoring::CONNECTION_POOL, subscriber)
7575
end
7676

77-
it "retries on PoolClearedError" do
78-
# After the first find fails, the pool is paused and retry is triggered.
79-
# Now, a race is started between the second find acquiring a connection,
80-
# and the first retrying the read. Now, retry reads cause the cluster to
81-
# be rescanned and the pool to be unpaused, allowing the second checkout
82-
# to succeed (when it should fail). Therefore we want the second find's
83-
# check out to win the race. This gives the check out a little head start.
84-
allow_any_instance_of(Mongo::Server::ConnectionPool).to receive(:ready).and_wrap_original do |m, *args, &block|
85-
::Utils.wait_for_condition(5) do
86-
# check_out_results should contain:
87-
# - find1 connection check out successful
88-
# - pool cleared
89-
# - find2 connection check out failed
90-
# We wait here for the third event to happen before we ready the pool.
91-
cmap_events.select do |e|
92-
event_types.include?(e.class)
93-
end.length >= 3
77+
shared_examples_for 'retries on PoolClearedError' do
78+
it "retries on PoolClearedError" do
79+
# After the first find fails, the pool is paused and retry is triggered.
80+
# Now, a race is started between the second find acquiring a connection,
81+
# and the first retrying the read. Now, retry reads cause the cluster to
82+
# be rescanned and the pool to be unpaused, allowing the second checkout
83+
# to succeed (when it should fail). Therefore we want the second find's
84+
# check out to win the race. This gives the check out a little head start.
85+
allow_any_instance_of(Mongo::Server::ConnectionPool).to receive(:ready).and_wrap_original do |m, *args, &block|
86+
::Utils.wait_for_condition(5) do
87+
# check_out_results should contain:
88+
# - find1 connection check out successful
89+
# - pool cleared
90+
# - find2 connection check out failed
91+
# We wait here for the third event to happen before we ready the pool.
92+
cmap_events.select do |e|
93+
event_types.include?(e.class)
94+
end.length >= 3
95+
end
96+
m.call(*args, &block)
9497
end
95-
m.call(*args, &block)
98+
threads.map(&:join)
99+
expect(check_out_results[0]).to be_a(Mongo::Monitoring::Event::Cmap::ConnectionCheckedOut)
100+
expect(check_out_results[1]).to be_a(Mongo::Monitoring::Event::Cmap::PoolCleared)
101+
expect(check_out_results[2]).to be_a(Mongo::Monitoring::Event::Cmap::ConnectionCheckOutFailed)
102+
expect(find_events.length).to eq(3)
96103
end
97-
threads.map(&:join)
98-
expect(check_out_results[0]).to be_a(Mongo::Monitoring::Event::Cmap::ConnectionCheckedOut)
99-
expect(check_out_results[1]).to be_a(Mongo::Monitoring::Event::Cmap::PoolCleared)
100-
expect(check_out_results[2]).to be_a(Mongo::Monitoring::Event::Cmap::ConnectionCheckOutFailed)
101-
expect(find_events.length).to eq(3)
104+
end
105+
106+
it_behaves_like 'retries on PoolClearedError'
107+
108+
context 'legacy read retries' do
109+
110+
let(:client) { authorized_client.with(options.merge(retry_reads: false, max_read_retries: 1)) }
111+
112+
it_behaves_like 'retries on PoolClearedError'
102113
end
103114

104115
after do

0 commit comments

Comments
 (0)
Please sign in to comment.