Skip to content

Commit 3d85565

Browse files
committed
[#23331] yugabyted: Add pg_isready check to start and status command.
Summary: * Added the check pg_isready for each node during start and status command. * Added the field `YSQL Status` in the status string displayed during start and status command. * During the restart of a cluster if a node is in Bootstrapping step, yugabyted status will return the same status for `YSQL Status` * Added a new `Output.ANIMATION_STOP` status for Output.update_animation() function. * Giving `Output.ANIMATION_STOP` and empty msg string to Output.update_animation() will remove the spinner that was started with Output.init_animation(). Jira: DB-12256 Test Plan: Manual Testing Reviewers: nikhil Reviewed By: nikhil Subscribers: sgarg-yb Differential Revision: https://phorge.dev.yugabyte.com/D34597
1 parent d1afb7a commit 3d85565

File tree

1 file changed

+59
-8
lines changed

1 file changed

+59
-8
lines changed

bin/yugabyted

Lines changed: 59 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,14 @@ def find_binary_location(binary_name):
525525
os.path.join(YUGABYTE_DIR, "build", "latest", "gobin"),
526526
]
527527

528+
# Paths for pg_isready
529+
dir_candidates.extend([
530+
# If tar is downloaded
531+
os.path.join(YUGABYTE_DIR, "postgres", "bin"),
532+
# Development environment
533+
os.path.join(YUGABYTE_DIR, "build", "debug-clang17-dynamic-ninja", "postgres", "bin")
534+
])
535+
528536
# Jenkins Test Environment
529537
dir_candidates += [
530538
os.path.join(YUGABYTE_JENKINS_BUILD_DIR, "bin")
@@ -1109,9 +1117,11 @@ class ControlScript(object):
11091117
def status(self):
11101118
if len(os.listdir(self.configs.saved_data.get("data_dir"))) != 0:
11111119
Output.init_animation("Fetching status...")
1112-
status_output = self.get_status_string().strip()
1120+
status_output, ret_code = self.get_status_string()
11131121
Output.update_animation("", Output.ANIMATION_STOP)
1114-
Output.print_out("\n" + status_output)
1122+
Output.print_out("\n" + status_output.strip())
1123+
if ret_code:
1124+
sys.exit(ret_code)
11151125
else:
11161126
Output.print_out("{} is not running.".format(SCRIPT_NAME))
11171127

@@ -3270,8 +3280,8 @@ class ControlScript(object):
32703280
warning_msg += "\n" + warning_help_msg
32713281

32723282
if is_first_run:
3273-
status = self.get_status_string() + \
3274-
"{} YugabyteDB started successfully! To load a sample dataset, " \
3283+
status, _ = self.get_status_string()
3284+
status += "{} YugabyteDB started successfully! To load a sample dataset, " \
32753285
"try '{} demo'.\n" \
32763286
"{} Join us on Slack at {}\n" \
32773287
"{} Claim your free t-shirt at {}\n".format(
@@ -3497,8 +3507,8 @@ class ControlScript(object):
34973507
warning_msg += "\n" + warning_help_msg
34983508

34993509
if is_first_run:
3500-
status = self.get_status_string() + \
3501-
"{} YugabyteDB started successfully! To load a sample dataset, " \
3510+
status, _ = self.get_status_string()
3511+
status += "{} YugabyteDB started successfully! To load a sample dataset, " \
35023512
"try '{} demo'.\n" \
35033513
"{} Join us on Slack at {}\n" \
35043514
"{} Claim your free t-shirt at {}\n".format(
@@ -5823,6 +5833,18 @@ class ControlScript(object):
58235833
Output.log("Failed to login: {}".format(err))
58245834
return "Timeout: " + err
58255835

5836+
def check_pg_isready(self, timeout = 5, retries = 10):
5837+
advertise_ip = self.advertise_ip()
5838+
path = find_binary_location("pg_isready")
5839+
cmd = [path, "-h", str(advertise_ip)]
5840+
5841+
(out, err, retcode) = run_process_with_retries(cmd=cmd, log_cmd=True, retries=retries,
5842+
timeout=timeout)
5843+
if retcode:
5844+
return False
5845+
else:
5846+
return True
5847+
58265848
# Returns pretty output table.
58275849
def get_status_string(self):
58285850

@@ -5858,14 +5880,18 @@ class ControlScript(object):
58585880

58595881
status_info = []
58605882
status_display_info = dict()
5883+
ret_code = 0
58615884
# Make sure ascii escape characters for color encoding do not count towards char limit.
58625885
if self.get_failed_node_processes():
58635886
title = Output.make_bold(Output.make_red(SCRIPT_NAME))
58645887
extra_len = len(Output.make_bold(Output.make_red("")))
58655888
status = "Stopped"
5889+
ysql_status = "Not Ready"
58665890
status_info = [
58675891
(Output.make_yellow("Status"), status),
5892+
(Output.make_yellow("YSQL Status"), ysql_status),
58685893
]
5894+
ret_code = 1
58695895
else:
58705896
title = Output.make_bold(Output.make_green(SCRIPT_NAME))
58715897
extra_len = len(Output.make_bold(Output.make_green("")))
@@ -5875,17 +5901,39 @@ class ControlScript(object):
58755901
# the leader election
58765902
# In case of manual start or some other route, we can have a smaller timeout
58775903
status = ""
5904+
if self.configs.temp_data.get("yugabyted_cmd") == "start":
5905+
Output.init_animation("Checking YSQL Status...")
5906+
pg_isready = self.check_pg_isready()
5907+
if self.configs.temp_data.get("yugabyted_cmd") == "start":
5908+
Output.update_animation("", Output.ANIMATION_STOP)
58785909
if was_already_setup:
58795910
if master_addrs:
58805911
status = "Running."
5912+
if pg_isready:
5913+
ysql_status = "Ready"
5914+
ret_code = 0
5915+
else:
5916+
ysql_status = "Not Ready"
5917+
ret_code = 1
5918+
58815919
else:
58825920
status = "Bootstrapping."
5921+
ysql_status = "Not Ready"
5922+
ret_code = 0
58835923
else:
58845924
if self.wait_get_all_masters(timeout=10):
58855925
status = "Running."
5926+
if pg_isready:
5927+
ysql_status = "Ready"
5928+
ret_code = 0
5929+
else:
5930+
ysql_status = "Not Ready"
5931+
ret_code = 1
58865932
else:
58875933
status = "Status command timed out as YugabyteDB \"yb-master\" " + \
58885934
"process is not responding."
5935+
ysql_status = "Not Ready"
5936+
ret_code = 1
58895937

58905938
enabled_security_features = []
58915939
if self.configs.temp_data.get("yugabyted_cmd") == "status":
@@ -5913,7 +5961,10 @@ class ControlScript(object):
59135961
else:
59145962
rf = YBAdminProxy.get_cluster_rf(master_addrs)
59155963

5916-
status_info = [(Output.make_yellow("Status"), status)]
5964+
status_info = [
5965+
(Output.make_yellow("Status"), status),
5966+
(Output.make_yellow("YSQL Status"), ysql_status),
5967+
]
59175968
if rf:
59185969
status_info.append((Output.make_yellow("Replication Factor"), rf))
59195970

@@ -6011,7 +6062,7 @@ class ControlScript(object):
60116062
format(v if v is not None else "None")
60126063

60136064
status += div_line
6014-
return status
6065+
return status, ret_code
60156066

60166067
# Returns pretty output table
60176068
def get_status_string_common(self, status_info, status_display_info = None):

0 commit comments

Comments
 (0)