Skip to content

Commit 8eb8850

Browse files
Fix for specific time scheduled crawls skipping if too many crawlers (#597)
* Bump 8.16 version (#595) * New diffs * Make tests run for now until we better understand the tests around sync_triggered? and custom_sync_triggered? * Added additional tests in crawler_scheduler_spec.rb + wrapped Time.now() calls in Timecop.freeze() + fixed minor typo in :monthly_interval cron schedule * Added a new test to ensure debug logs are presented when a schedule is skipped + added an expect() to an existing test to check if debug logs are presented there as well + revised debug log wording to include the poll_interval value * Added regex to expect() for debug messages to ensure we are getting the correct messages + moved those expect() lines out of the before(:each) blocks and into it blocks --------- Co-authored-by: Chenhui Wang <[email protected]>
1 parent 7fef713 commit 8eb8850

File tree

5 files changed

+107
-33
lines changed

5 files changed

+107
-33
lines changed

Gemfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ group :test do
3939
gem 'ruby-debug-ide'
4040
gem 'pry-remote'
4141
gem 'pry-nav'
42-
gem 'debase', '0.2.5.beta2'
42+
gem 'debase', '0.2.8'
4343
gem 'timecop'
4444
gem 'simplecov', require: false
4545
gem 'simplecov-material'

Gemfile.lock

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@ GEM
2020
dry-validation (~> 1.0, >= 1.0.0)
2121
crack (0.4.5)
2222
rexml
23-
debase (0.2.5.beta2)
24-
debase-ruby_core_source (>= 0.10.12)
25-
debase-ruby_core_source (0.10.14)
23+
debase (0.2.8)
24+
debase-ruby_core_source (>= 3.3.6)
25+
debase-ruby_core_source (3.3.6)
2626
deep_merge (1.2.2)
2727
diff-lcs (1.5.0)
2828
docile (1.4.0)
@@ -94,7 +94,6 @@ GEM
9494
faraday-retry (1.0.3)
9595
faraday_middleware (1.0.0)
9696
faraday (~> 1.0)
97-
ffi (1.15.5-java)
9897
forwardable (1.3.2)
9998
fugit (1.11.1)
10099
et-orbi (~> 1, >= 1.2.11)
@@ -122,10 +121,6 @@ GEM
122121
pry (0.14.1)
123122
coderay (~> 1.1)
124123
method_source (~> 1.0)
125-
pry (0.14.1-java)
126-
coderay (~> 1.1)
127-
method_source (~> 1.0)
128-
spoon (~> 0.0)
129124
pry-nav (1.0.0)
130125
pry (>= 0.9.10, < 0.15)
131126
pry-remote (0.1.8)
@@ -186,8 +181,6 @@ GEM
186181
simplecov (>= 0.16.0)
187182
simplecov_json_formatter (0.1.4)
188183
slop (3.6.0)
189-
spoon (0.0.6)
190-
ffi
191184
timecop (0.9.4)
192185
tzinfo (2.0.6)
193186
concurrent-ruby (~> 1.0)
@@ -215,7 +208,7 @@ DEPENDENCIES
215208
bundler (= 2.3.15)
216209
concurrent-ruby (~> 1.1.9)
217210
config (~> 4.0.0)
218-
debase (= 0.2.5.beta2)
211+
debase (= 0.2.8)
219212
dry-configurable (= 0.13.0)
220213
dry-container (= 0.9.0)
221214
dry-core (= 0.7.1)

lib/connectors/crawler/scheduler.rb

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,15 @@ def connector_settings
2424

2525
def when_triggered
2626
loop do
27+
time_at_poll_start = Time.now # grab the time right before we iterate over all connectors
2728
connector_settings.each do |cs|
2829
# crawler only supports :sync
29-
if sync_triggered?(cs)
30+
if sync_triggered?(cs, time_at_poll_start)
3031
yield cs, :sync, nil
3132
next
3233
end
3334

34-
schedule_key = custom_schedule_triggered(cs)
35+
schedule_key = custom_schedule_triggered(cs, time_at_poll_start)
3536
yield cs, :sync, schedule_key if schedule_key
3637
end
3738
rescue *Utility::AUTHORIZATION_ERRORS => e
@@ -53,10 +54,10 @@ def connector_registered?(service_type)
5354
end
5455

5556
# custom scheduling has no ordering, so the first-found schedule is returned
56-
def custom_schedule_triggered(cs)
57+
def custom_schedule_triggered(cs, time_at_poll_start)
5758
cs.custom_scheduling_settings.each do |key, custom_scheduling|
5859
identifier = "#{cs.formatted} - #{custom_scheduling[:name]}"
59-
if schedule_triggered?(custom_scheduling, identifier)
60+
if schedule_triggered?(custom_scheduling, identifier, time_at_poll_start)
6061
return key
6162
end
6263
end

lib/core/scheduler.rb

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def shutdown
6262

6363
private
6464

65-
def sync_triggered?(connector_settings)
65+
def sync_triggered?(connector_settings, time_at_poll_start = Time.now)
6666
unless connector_settings.valid_index_name?
6767
Utility::Logger.warn("The index name of #{connector_settings.formatted} is invalid.")
6868
return false
@@ -80,7 +80,7 @@ def sync_triggered?(connector_settings)
8080
return true
8181
end
8282

83-
schedule_triggered?(connector_settings.full_sync_scheduling, connector_settings.formatted)
83+
schedule_triggered?(connector_settings.full_sync_scheduling, connector_settings.formatted, time_at_poll_start)
8484
end
8585

8686
def heartbeat_triggered?(connector_settings)
@@ -149,7 +149,7 @@ def connector_registered?(service_type)
149149
end
150150
end
151151

152-
def schedule_triggered?(scheduling_settings, identifier)
152+
def schedule_triggered?(scheduling_settings, identifier, time_at_poll_start = Time.now)
153153
# Don't sync if sync is explicitly disabled
154154
unless scheduling_settings.present? && scheduling_settings[:enabled] == true
155155
Utility::Logger.debug("#{identifier.capitalize} scheduling is disabled.")
@@ -179,12 +179,15 @@ def schedule_triggered?(scheduling_settings, identifier)
179179
return false
180180
end
181181

182-
next_trigger_time = cron_parser.next_time(Time.now)
183-
182+
next_trigger_time = cron_parser.next_time(time_at_poll_start)
184183
# Sync if next trigger happens before the next poll
185-
if next_trigger_time <= Time.now + @poll_interval
184+
poll_window = time_at_poll_start + @poll_interval
185+
if next_trigger_time <= poll_window
186186
Utility::Logger.info("#{identifier.capitalize} sync is triggered by cron schedule #{current_schedule}.")
187187
return true
188+
else
189+
# log that a sync was not triggered, share the next trigger time and when poll interval was meant to end
190+
Utility::Logger.debug("Sync for #{identifier.capitalize} not triggered as #{next_trigger_time} occurs after the poll window #{poll_window}. Poll window began at #{time_at_poll_start}, poll interval is #{@poll_interval} seconds.")
188191
end
189192

190193
false

spec/connectors/crawler/crawler_scheduler_spec.rb

Lines changed: 88 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
require 'core/connector_settings'
22
require 'connectors/crawler/scheduler'
3+
require 'timecop'
34

45
describe Connectors::Crawler::Scheduler do
56
subject { described_class.new(poll_interval, heartbeat_interval) }
@@ -67,7 +68,7 @@
6768
let(:weekly_enabled) { false }
6869
let(:weekly_interval) { '0 0 * * 1 ?' }
6970
let(:monthly_enabled) { false }
70-
let(:monthly_interval) { '0 0 * 1 * ?' }
71+
let(:monthly_interval) { '0 0 1 * * ?' }
7172
let(:custom_scheduling_settings) do
7273
{
7374
:weekly_key => {
@@ -88,14 +89,15 @@
8889
let(:weekly_next_trigger_time) { 1.day.from_now }
8990
let(:monthly_next_trigger_time) { 1.day.from_now }
9091

92+
let(:time_at_poll_start) { Timecop.freeze(Time.now) }
93+
9194
let(:cron_parser) { instance_double(Fugit::Cron) }
9295

9396
before(:each) do
9497
allow(Core::ConnectorSettings).to receive(:fetch_crawler_connectors).and_return(connector_settings)
9598

96-
allow(subject).to receive(:sync_triggered?).with(connector_settings).and_call_original
97-
allow(subject).to receive(:custom_sync_triggered?).with(connector_settings).and_call_original
98-
99+
allow(subject).to receive(:sync_triggered?).with(connector_settings, time_at_poll_start).and_call_original
100+
allow(subject).to receive(:custom_sync_triggered?).with(connector_settings, time_at_poll_start).and_call_original
99101
allow(connector_settings).to receive(:connector_status_allows_sync?).and_return(true)
100102
allow(connector_settings).to receive(:sync_now?).and_return(sync_now)
101103
allow(connector_settings).to receive(:full_sync_scheduling).and_return(full_sync_scheduling)
@@ -109,13 +111,17 @@
109111
allow(Fugit::Cron).to receive(:parse).and_return(cron_parser)
110112
end
111113

114+
after(:each) do
115+
Timecop.return
116+
end
117+
112118
context 'when none are enabled' do
113119
it_behaves_like 'does not trigger', :sync
114120
end
115121

116122
context 'when one custom scheduling is enabled and ready to sync' do
117123
let(:monthly_enabled) { true }
118-
let(:monthly_next_trigger_time) { Time.now + poll_interval - 10 }
124+
let(:monthly_next_trigger_time) { time_at_poll_start + poll_interval - 10 }
119125

120126
before(:each) do
121127
allow(Utility::Cron).to receive(:quartz_to_crontab).with(monthly_interval)
@@ -125,12 +131,12 @@
125131
it_behaves_like 'triggers', :monthly_key
126132
end
127133

128-
context 'when all custom schedulings are enabled and ready to sync' do
134+
context 'when all custom scheduling is enabled and ready to sync' do
129135
let(:weekly_enabled) { true }
130136
let(:monthly_enabled) { true }
131137

132-
let(:weekly_next_trigger_time) { Time.now + poll_interval - 10 }
133-
let(:monthly_next_trigger_time) { Time.now + poll_interval - 10 }
138+
let(:weekly_next_trigger_time) { time_at_poll_start + poll_interval - 10 }
139+
let(:monthly_next_trigger_time) { time_at_poll_start + poll_interval - 10 }
134140

135141
before(:each) do
136142
allow(cron_parser).to receive(:next_time).and_return(weekly_next_trigger_time, monthly_next_trigger_time)
@@ -145,9 +151,9 @@
145151
let(:weekly_enabled) { true }
146152
let(:monthly_enabled) { true }
147153

148-
let(:next_trigger_time) { Time.now + poll_interval - 10 }
149-
let(:weekly_next_trigger_time) { Time.now + poll_interval - 10 }
150-
let(:monthly_next_trigger_time) { Time.now + poll_interval - 10 }
154+
let(:next_trigger_time) { time_at_poll_start + poll_interval - 10 }
155+
let(:weekly_next_trigger_time) { time_at_poll_start + poll_interval - 10 }
156+
let(:monthly_next_trigger_time) { time_at_poll_start + poll_interval - 10 }
151157

152158
before(:each) do
153159
allow(cron_parser).to receive(:next_time).and_return(next_trigger_time, weekly_next_trigger_time, monthly_next_trigger_time)
@@ -156,6 +162,77 @@
156162
# it will return the base scheduling
157163
it_behaves_like 'triggers', nil
158164
end
165+
166+
context 'when base and custom scheduling are enabled and are scheduled after the poll interval' do
167+
let(:sync_enabled) { true }
168+
let(:weekly_enabled) { true }
169+
let(:monthly_enabled) { true }
170+
171+
let(:next_trigger_time) { time_at_poll_start + poll_interval + 10 }
172+
let(:weekly_next_trigger_time) { time_at_poll_start + poll_interval + 10 }
173+
let(:monthly_next_trigger_time) { time_at_poll_start + poll_interval + 10 }
174+
175+
before(:each) do
176+
allow(cron_parser).to receive(:next_time).with(time_at_poll_start).and_return(next_trigger_time, weekly_next_trigger_time, monthly_next_trigger_time)
177+
end
178+
179+
# functionally the same as shared test 'does not trigger' but with an extra expect() to check for debug messages
180+
it 'does not yield task' do
181+
# expect three debug messages because three schedules are not being triggered
182+
expect(Utility::Logger).to receive(:debug).exactly(3).times.with(match(/^Sync for (\w+.*)|( - \w+) not triggered as .*/))
183+
expect { |b| subject.when_triggered(&b) }.to_not yield_control
184+
end
185+
end
186+
187+
context 'when base and custom scheduling are enabled, but one is scheduled after the poll interval' do
188+
let(:sync_enabled) { true }
189+
let(:weekly_enabled) { true }
190+
191+
let(:next_trigger_time) { time_at_poll_start + poll_interval + 10 }
192+
let(:weekly_next_trigger_time) { time_at_poll_start + poll_interval - 10 }
193+
194+
before(:each) do
195+
allow(cron_parser).to receive(:next_time).with(time_at_poll_start).and_return(next_trigger_time, weekly_next_trigger_time)
196+
end
197+
198+
# functionally the same as shared test 'triggers', but with an extra expect() to check for a debug message
199+
it 'yields :sync task with an optional scheduling_key value' do
200+
expect(Utility::Logger).to receive(:debug).exactly(1).times.with(match(/^Sync for (\w+.*)|( - \w+) not triggered as .*/))
201+
expect { |b| subject.when_triggered(&b) }.to yield_with_args(connector_settings, :sync, :weekly_key)
202+
end
203+
end
204+
205+
context 'when base and custom scheduling are enabled and require sync and are scheduled at the start of the poll interval' do
206+
let(:sync_enabled) { true }
207+
let(:weekly_enabled) { true }
208+
let(:monthly_enabled) { true }
209+
210+
let(:next_trigger_time) { time_at_poll_start }
211+
let(:weekly_next_trigger_time) { time_at_poll_start }
212+
let(:monthly_next_trigger_time) { time_at_poll_start }
213+
214+
before(:each) do
215+
allow(cron_parser).to receive(:next_time).with(time_at_poll_start).and_return(next_trigger_time, weekly_next_trigger_time, monthly_next_trigger_time)
216+
end
217+
218+
it_behaves_like 'triggers', nil
219+
end
220+
221+
context 'when base and custom scheduling are enabled and require sync and are scheduled at end of the poll interval' do
222+
let(:sync_enabled) { true }
223+
let(:weekly_enabled) { true }
224+
let(:monthly_enabled) { true }
225+
226+
let(:next_trigger_time) { time_at_poll_start + poll_interval }
227+
let(:weekly_next_trigger_time) { time_at_poll_start + poll_interval }
228+
let(:monthly_next_trigger_time) { time_at_poll_start + poll_interval }
229+
230+
before(:each) do
231+
allow(cron_parser).to receive(:next_time).with(time_at_poll_start).and_return(next_trigger_time, weekly_next_trigger_time, monthly_next_trigger_time)
232+
end
233+
234+
it_behaves_like 'triggers', nil
235+
end
159236
end
160237
end
161238
end

0 commit comments

Comments
 (0)