@@ -125,9 +125,11 @@ PREREQS_ERROR_MSGS = {
125
125
' please free the port and restart the node.' ,
126
126
'ycql_metric_port' : 'YCQL metrics port {} is already in use. For accessing the YCQL metrics,' \
127
127
' please free the port and restart the node.' ,
128
+ 'clockbound_fail' : 'Failed to validate system configuration for clockbound. Please run ' \
129
+ 'bin/configure_clockbound.sh script to install and configure clockbound.' ,
128
130
'clockbound' : 'Clockbound is recommended on AWS/Azure/GCP clusters.' \
129
- ' It can reduce read restart errors significantly in concurrent workloads.' \
130
- ' Relevant flag: --enhance_time_sync_via_clockbound .' ,
131
+ ' It can reduce read restart errors significantly in concurrent workloads. Please run ' \
132
+ 'bin/configure_clockbound.sh script to install and configure clockbound .' ,
131
133
}
132
134
QUICK_START_LINKS = {
133
135
'mac' : 'https://docs.yugabyte.com/preview/quick-start/' ,
@@ -683,7 +685,7 @@ def using_time_sync_service():
683
685
'aws.com' , 'google.com' ]
684
686
685
687
cmd = ['chronyc' , 'sources' ]
686
- out , err , ret_code = run_process (cmd , timeout = 1 , log_cmd = True )
688
+ out , _ , ret_code = run_process (cmd , timeout = 1 , log_cmd = True )
687
689
if ret_code == 0 :
688
690
for source in allow_list :
689
691
if source in out :
@@ -693,7 +695,7 @@ def using_time_sync_service():
693
695
694
696
def is_phc_configured ():
695
697
cmd = ['systemctl' , 'status' , 'clockbound' ]
696
- out , err , retcode = run_process (cmd , timeout = 1 , log_cmd = True )
698
+ out , _ , retcode = run_process (cmd , timeout = 1 , log_cmd = True )
697
699
return retcode == 0 and 'PHC' in out
698
700
699
701
# Check if ip is ipv6
@@ -741,9 +743,6 @@ class ControlScript(object):
741
743
atexit .register (self .kill_children )
742
744
Output .script_exit_func = self .kill_children
743
745
744
- if self .configs .temp_data .get ("enhance_time_sync_via_clockbound" ):
745
- self .assert_system_configured_for_clockbound ()
746
-
747
746
if self .configs .saved_data .get ("read_replica" ):
748
747
self .start_rr_process ()
749
748
else :
@@ -2819,11 +2818,16 @@ class ControlScript(object):
2819
2818
prereqs_warn_flag = True
2820
2819
2821
2820
# TODO: Uncomment this block when clockbound becomes GA.
2822
- # # Configuring clockbound is strongly recommended for AWS clusters.
2823
- # if using_time_sync_service() and not self.configs.temp_data[
2824
- # "enhance_time_sync_via_clockbound"]:
2825
- # prereqs_warn.add('clockbound')
2826
- # prereqs_warn_flag = True
2821
+ # Configuring clockbound is strongly recommended for AWS clusters.
2822
+ if not self .assert_system_configured_for_clockbound ():
2823
+ if self .configs .temp_data ["enhance_time_sync_via_clockbound" ]:
2824
+ prereqs_failed .add ('clockbound_fail' )
2825
+ prereqs_failed_flag = True
2826
+ elif using_time_sync_service ():
2827
+ prereqs_warn .add ('clockbound' )
2828
+ prereqs_warn_flag = True
2829
+ else :
2830
+ self .configs .temp_data ["is_clockbound_configured" ] = True
2827
2831
2828
2832
(failed_ports , warning_ports , mandatory_port_available ,
2829
2833
recommended_port_available ) = self .check_ports ()
@@ -2904,13 +2908,13 @@ class ControlScript(object):
2904
2908
# Get pre-req failures and warnings
2905
2909
prereqs_failed_flag , prereqs_failed , prereqs_warn_flag , prereqs_warn , \
2906
2910
mandatory_port_available , recommended_port_available = check
2907
- if prereqs_warn_flag :
2908
- if OS_NAME == "Linux" :
2909
- help_links .append ("- Quick start for Linux: " +
2910
- Output .make_underline (QUICK_START_LINKS ['linux' ]))
2911
- else :
2912
- help_links .append ("- Quick start for macOS: " +
2913
- Output .make_underline (QUICK_START_LINKS ['mac' ]))
2911
+ # if prereqs_warn_flag:
2912
+ if OS_NAME == "Linux" :
2913
+ help_links .append ("- Quick start for Linux: " +
2914
+ Output .make_underline (QUICK_START_LINKS ['linux' ]))
2915
+ else :
2916
+ help_links .append ("- Quick start for macOS: " +
2917
+ Output .make_underline (QUICK_START_LINKS ['mac' ]))
2914
2918
2915
2919
if not mandatory_port_available or not recommended_port_available :
2916
2920
help_links .append ("- Default ports: " + Output .make_underline (DEFAULT_PORTS_LINK ))
@@ -3100,6 +3104,38 @@ class ControlScript(object):
3100
3104
master_rpc_port , master_addresses )
3101
3105
was_already_setup = self .configs .saved_data .get ("cluster_member" , False )
3102
3106
3107
+ warnings = []
3108
+ warnings_for_ui = []
3109
+ warning_help_msg = ""
3110
+ is_first_run = True
3111
+
3112
+ # Do the pre-req check before forming master and tserver commands
3113
+ if is_first_run :
3114
+ ulimits_failed = self .script .set_rlimits (print_info = True )
3115
+ if ulimits_failed :
3116
+ msg = "Failed to meet recommended settings. Ulimits too low - {}.\n " .format (
3117
+ ", " .join (ulimits_failed ))
3118
+ ulimit_warn_msg = msg + "Note {} will still run, although it may fail for " \
3119
+ "larger workloads. For more info, see {}" .format (SCRIPT_NAME , CONFIG_LINK )
3120
+ self .alerts .append ((ALERT_WARNING , ULIMIT_ERR_CODE , ulimit_warn_msg ))
3121
+
3122
+ prereqs_check_result = self .prereqs_check (ulimits = ulimits_failed )
3123
+
3124
+ if prereqs_check_result ['status' ]== Output .ANIMATION_SUCCESS :
3125
+ Output .print_out (prereqs_check_result ['msg' ])
3126
+ elif prereqs_check_result ['status' ]== Output .ANIMATION_WARNING :
3127
+
3128
+ warnings .extend (list (prereqs_check_result ['msg' ].values ())[:- 1 ])
3129
+ warning_help_msg = prereqs_check_result ['msg' ]["help_msg" ]
3130
+
3131
+ prereqs_check_result ['msg' ].pop ("help_msg" )
3132
+ warnings_for_ui = []
3133
+ for k in prereqs_check_result ['msg' ].keys ():
3134
+ warnings_for_ui .extend ([k ])
3135
+ elif prereqs_check_result ['status' ]== Output .ANIMATION_FAIL :
3136
+ Output .print_and_log (prereqs_check_result ['msg' ])
3137
+ sys .exit (1 )
3138
+
3103
3139
common_gflags = self .get_common_flags ()
3104
3140
3105
3141
yb_master_cmd = self .get_master_cmd (common_gflags )
@@ -3125,7 +3161,6 @@ class ControlScript(object):
3125
3161
self .processes = {}
3126
3162
return
3127
3163
3128
- is_first_run = True
3129
3164
callhome_thread = None
3130
3165
masters_list_update_thread = None
3131
3166
#Start the different thread for extracting the YBC binaries
@@ -3160,37 +3195,6 @@ class ControlScript(object):
3160
3195
3161
3196
# Start or initialize yb-master and yb-tserver.
3162
3197
if is_first_run :
3163
- # Output.init_animation("Running system checks...")
3164
- warnings = []
3165
- warnings_for_ui = []
3166
- warning_help_msg = ""
3167
- ulimits_failed = self .script .set_rlimits (print_info = True )
3168
- if ulimits_failed :
3169
- msg = "Failed to meet recommended settings. Ulimits too low - {}.\n " .format (
3170
- ", " .join (ulimits_failed ))
3171
- ulimit_warn_msg = msg + "Note {} will still run, although it may fail for " \
3172
- "larger workloads. For more info, see {}" .format (SCRIPT_NAME , CONFIG_LINK )
3173
- self .alerts .append ((ALERT_WARNING , ULIMIT_ERR_CODE , ulimit_warn_msg ))
3174
-
3175
- prereqs_check_result = self .prereqs_check (ulimits = ulimits_failed )
3176
- # Output.update_animation(msg=prereqs_check_result['msg'],
3177
- # status=prereqs_check_result['status'])
3178
- if prereqs_check_result ['status' ]== Output .ANIMATION_SUCCESS :
3179
- Output .print_out (prereqs_check_result ['msg' ])
3180
- elif prereqs_check_result ['status' ]== Output .ANIMATION_WARNING :
3181
-
3182
- warnings .extend (list (prereqs_check_result ['msg' ].values ())[:- 1 ])
3183
- warning_help_msg = prereqs_check_result ['msg' ]["help_msg" ]
3184
-
3185
- prereqs_check_result ['msg' ].pop ("help_msg" )
3186
- warnings_for_ui = []
3187
- for k in prereqs_check_result ['msg' ].keys ():
3188
- warnings_for_ui .extend ([k ])
3189
- elif prereqs_check_result ['status' ]== Output .ANIMATION_FAIL :
3190
- Output .print_and_log (prereqs_check_result ['msg' ])
3191
- sys .exit (1 )
3192
-
3193
-
3194
3198
Output .init_animation ("Starting the YugabyteDB Processes..." )
3195
3199
3196
3200
self .post_install_yb ()
@@ -3414,7 +3418,28 @@ class ControlScript(object):
3414
3418
if join_ip :
3415
3419
master_addresses = "{}:{},{}" .format (get_url_from_ip (join_ip ),
3416
3420
master_rpc_port , master_addresses )
3417
- was_already_setup = self .configs .saved_data .get ("cluster_member" , False )
3421
+
3422
+ is_first_run = True
3423
+ warnings = []
3424
+ warning_help_msg = ""
3425
+ if is_first_run :
3426
+ ulimits_failed = self .script .set_rlimits (print_info = True )
3427
+ if ulimits_failed :
3428
+ msg = "Failed to meet recommended settings. Ulimits too low - {}.\n " .format (
3429
+ ", " .join (ulimits_failed ))
3430
+ ulimit_warn_msg = msg + "Note {} will still run, although it may fail for " \
3431
+ "larger workloads. For more info, see {}" .format (SCRIPT_NAME , CONFIG_LINK )
3432
+ self .alerts .append ((ALERT_WARNING , ULIMIT_ERR_CODE , ulimit_warn_msg ))
3433
+
3434
+ prereqs_check_result = self .prereqs_check (ulimits = ulimits_failed )
3435
+ if prereqs_check_result ['status' ]== Output .ANIMATION_SUCCESS :
3436
+ Output .print_out (prereqs_check_result ['msg' ])
3437
+ elif prereqs_check_result ['status' ]== Output .ANIMATION_WARNING :
3438
+ warnings .extend (list (prereqs_check_result ['msg' ].values ())[:- 1 ])
3439
+ warning_help_msg = prereqs_check_result ['msg' ]["help_msg" ]
3440
+ elif prereqs_check_result ['status' ]== Output .ANIMATION_FAIL :
3441
+ Output .print_and_log (prereqs_check_result ['msg' ])
3442
+ sys .exit (1 )
3418
3443
3419
3444
common_gflags = self .get_common_flags ()
3420
3445
@@ -3436,7 +3461,6 @@ class ControlScript(object):
3436
3461
self .processes = {}
3437
3462
return
3438
3463
3439
- is_first_run = True
3440
3464
callhome_thread = None
3441
3465
masters_list_update_thread = None
3442
3466
self .stop_callhome = False
@@ -3463,26 +3487,6 @@ class ControlScript(object):
3463
3487
3464
3488
# Start or initialize yb-master and yb-tserver.
3465
3489
if is_first_run :
3466
- warnings = []
3467
- warning_help_msg = ""
3468
- ulimits_failed = self .script .set_rlimits (print_info = True )
3469
- if ulimits_failed :
3470
- msg = "Failed to meet recommended settings. Ulimits too low - {}.\n " .format (
3471
- ", " .join (ulimits_failed ))
3472
- ulimit_warn_msg = msg + "Note {} will still run, although it may fail for " \
3473
- "larger workloads. For more info, see {}" .format (SCRIPT_NAME , CONFIG_LINK )
3474
- self .alerts .append ((ALERT_WARNING , ULIMIT_ERR_CODE , ulimit_warn_msg ))
3475
-
3476
- prereqs_check_result = self .prereqs_check (ulimits = ulimits_failed )
3477
- if prereqs_check_result ['status' ]== Output .ANIMATION_SUCCESS :
3478
- Output .print_out (prereqs_check_result ['msg' ])
3479
- elif prereqs_check_result ['status' ]== Output .ANIMATION_WARNING :
3480
- warnings .extend (list (prereqs_check_result ['msg' ].values ())[:- 1 ])
3481
- warning_help_msg = prereqs_check_result ['msg' ]["help_msg" ]
3482
- elif prereqs_check_result ['status' ]== Output .ANIMATION_FAIL :
3483
- Output .print_and_log (prereqs_check_result ['msg' ])
3484
- sys .exit (1 )
3485
-
3486
3490
Output .init_animation ("Starting the YugabyteDB Processes..." )
3487
3491
3488
3492
self .post_install_yb ()
@@ -3652,14 +3656,18 @@ class ControlScript(object):
3652
3656
3653
3657
def config_time_source_clockbound (self , flags ):
3654
3658
# Configure tserver flag time_source=clockbound
3655
- # when --enhance_time_sync_via_clockbound is set .
3656
- if self .configs .temp_data ["enhance_time_sync_via_clockbound " ]:
3659
+ # when clockbound is installed and configured .
3660
+ if self .configs .temp_data ["is_clockbound_configured " ]:
3657
3661
# Check database configuration.
3658
3662
time_source = self .get_flag_value (flags , "time_source" )
3659
3663
if time_source and time_source != "clockbound" :
3660
- raise ValueError (
3661
- "Cannot configure time_source with"
3662
- " --enhance_time_sync_via_clockbound." )
3664
+ if self .configs .temp_data ["enhance_time_sync_via_clockbound" ]:
3665
+ raise ValueError ("--time_source gflag is already set to {}." .format (
3666
+ time_source ) + "Cannot configure time_source with" +
3667
+ " --enhance_time_sync_via_clockbound." )
3668
+ else :
3669
+ Output .log ("--time_source gflag is already set to {}." .format (time_source ) +
3670
+ " Cannot configure time_source to clockbound." )
3663
3671
3664
3672
# Configure time_source=clockbound if not already.
3665
3673
if not time_source :
@@ -4088,18 +4096,13 @@ class ControlScript(object):
4088
4096
# Sets YW metrics to use local database.
4089
4097
os .environ ["USE_NATIVE_METRICS" ] = "true"
4090
4098
4099
+ # Returns true if the system has been configured for clock bound.
4100
+ # Runs `configure_clockbound.sh --validate` and returns true if it returns 0.
4091
4101
def assert_system_configured_for_clockbound (self ):
4092
- Output .init_animation ("Validating system config for clockbound..." )
4093
4102
configure_clockbound_path = find_binary_location ("configure_clockbound.sh" )
4094
4103
cmd = ["bash" , configure_clockbound_path , "--validate" ]
4095
- out , err , retcode = run_process (cmd )
4096
- if retcode == 0 :
4097
- Output .update_animation ("System configured for clockbound." )
4098
- else :
4099
- Output .update_animation ("Failed to validate system configuration for clockbound." ,
4100
- status = Output .ANIMATION_FAIL )
4101
- Output .log_error_and_exit (
4102
- Output .make_red ("ERROR" ) + ": Did you run configure_clockbound.sh script?" )
4104
+ _ , _ , retcode = run_process (cmd )
4105
+ return retcode == 0
4103
4106
4104
4107
# Runs post_install script for linux computers.
4105
4108
def post_install_yb (self ):
@@ -8580,6 +8583,7 @@ class Configs(object):
8580
8583
"xcluster_target_addresses" : "" ,
8581
8584
"xcluster_bootstrap_done" : "" ,
8582
8585
"enhance_time_sync_via_clockbound" : False ,
8586
+ "is_clockbound_configured" : False ,
8583
8587
}
8584
8588
self .config_file = config_file
8585
8589
0 commit comments