Skip to content

Commit c170ad3

Browse files
sdmarshall79GitHub Enterprise
authored and
GitHub Enterprise
committed
Startup probe (#648)
* Update chkmqstarted to check in-sync for Native-HA
1 parent 25e9eb8 commit c170ad3

File tree

2 files changed

+158
-12
lines changed

2 files changed

+158
-12
lines changed

cmd/chkmqstarted/main.go

+98-7
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,65 @@ import (
2323
"os"
2424
"os/exec"
2525
"os/signal"
26+
"strconv"
2627
"strings"
28+
"time"
2729

30+
"github.com/ibm-messaging/mq-container/internal/ready"
2831
"github.com/ibm-messaging/mq-container/pkg/name"
2932
)
3033

3134
func queueManagerStarted(ctx context.Context) (bool, error) {
35+
3236
name, err := name.GetQueueManagerName()
3337
if err != nil {
3438
return false, err
3539
}
40+
41+
readyStrings := []string{
42+
"(RUNNING)",
43+
"(RUNNING AS STANDBY)",
44+
"(RECOVERY GROUP LEADER)",
45+
"(STARTING)",
46+
"(REPLICA)",
47+
}
48+
49+
// For Native-HA only, check if the queue manager instance is in-sync with one or more replicas
50+
// - If not in-sync within the expected time period, revert to checking on queue manager 'ready' status
51+
// - This ensures we do not block indefinitely for breaking changes (i.e. protocol changes)
52+
if os.Getenv("MQ_NATIVE_HA") == "true" {
53+
54+
// Check if the Native-HA queue manager instance is currently in-sync
55+
isReadyToSync, isInSync, err := isInSyncWithReplicas(ctx, name, readyStrings)
56+
if err != nil {
57+
return false, err
58+
} else if isInSync {
59+
return true, nil
60+
}
61+
62+
// Check if the Native-HA queue manager instance is ready-to-sync
63+
// - A successful queue manager 'ready' status indicates that we are ready-to-sync
64+
if !isReadyToSync {
65+
return false, nil
66+
}
67+
err = ready.SetReadyToSync()
68+
if err != nil {
69+
return false, err
70+
}
71+
72+
// Check if the time period for checking in-sync has now expired
73+
// - We have already confirmed a successful queue manager 'ready' status
74+
// - Therefore the expiration of the in-sync time period will result in success
75+
expired, err := hasInSyncTimePeriodExpired()
76+
if err != nil {
77+
return false, err
78+
} else if expired {
79+
return true, nil
80+
}
81+
82+
return false, nil
83+
}
84+
3685
// Specify the queue manager name, just in case someone's created a second queue manager
3786
// #nosec G204
3887
cmd := exec.CommandContext(ctx, "dspmq", "-n", "-m", name)
@@ -42,18 +91,60 @@ func queueManagerStarted(ctx context.Context) (bool, error) {
4291
fmt.Println(err)
4392
return false, err
4493
}
45-
readyStrings := []string{
46-
"(RUNNING)",
47-
"(RUNNING AS STANDBY)",
48-
"(RECOVERY GROUP LEADER)",
49-
"(STARTING)",
50-
"(REPLICA)",
51-
}
94+
5295
for _, checkString := range readyStrings {
5396
if strings.Contains(string(out), checkString) {
5497
return true, nil
5598
}
5699
}
100+
101+
return false, nil
102+
}
103+
104+
// isInSyncWithReplicas returns the in-sync status for a Native-HA queue manager instance
105+
func isInSyncWithReplicas(ctx context.Context, name string, readyStrings []string) (bool, bool, error) {
106+
107+
cmd := exec.CommandContext(ctx, "dspmq", "-n", "-o", "nativeha", "-m", name)
108+
out, err := cmd.CombinedOutput()
109+
if err != nil {
110+
return false, false, err
111+
} else if strings.Contains(string(out), "INSYNC(YES)") {
112+
return true, true, nil
113+
}
114+
115+
for _, checkString := range readyStrings {
116+
if strings.Contains(string(out), checkString) {
117+
return true, false, nil
118+
}
119+
}
120+
121+
return false, false, nil
122+
}
123+
124+
// hasInSyncTimePeriodExpired returns true if a Native-HA queue manager instance is not in-sync within the expected time period, otherwise false
125+
func hasInSyncTimePeriodExpired() (bool, error) {
126+
127+
// Default timeout 5 seconds
128+
var timeout int64 = 5
129+
var err error
130+
131+
// Check if a timeout override has been set
132+
customTimeout := os.Getenv("MQ_NATIVE_HA_IN_SYNC_TIMEOUT")
133+
if customTimeout != "" {
134+
timeout, err = strconv.ParseInt(customTimeout, 10, 64)
135+
if err != nil {
136+
return false, err
137+
}
138+
}
139+
140+
isReadyToSync, readyToSyncStartTime, err := ready.GetReadyToSyncStartTime()
141+
if err != nil {
142+
return false, err
143+
}
144+
if isReadyToSync && time.Now().Unix()-readyToSyncStartTime.Unix() >= timeout {
145+
return true, nil
146+
}
147+
57148
return false, nil
58149
}
59150

internal/ready/ready.go

+60-5
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,17 @@ package ready
2020
import (
2121
"context"
2222
"os"
23+
"strconv"
2324
"strings"
25+
"time"
2426

2527
"github.com/ibm-messaging/mq-container/internal/command"
2628
)
2729

28-
const fileName string = "/run/runmqserver/ready"
30+
const readyFile string = "/run/runmqserver/ready"
31+
const readyToSyncFile string = "/run/runmqserver/ready-to-sync"
2932

30-
func fileExists() (bool, error) {
33+
func fileExists(fileName string) (bool, error) {
3134
_, err := os.Stat(fileName)
3235
if err != nil {
3336
if !os.IsNotExist(err) {
@@ -40,7 +43,21 @@ func fileExists() (bool, error) {
4043

4144
// Clear ensures that any readiness state is cleared
4245
func Clear() error {
43-
exist, err := fileExists()
46+
err := clearFile(readyFile)
47+
if err != nil {
48+
return err
49+
}
50+
err = clearFile(readyToSyncFile)
51+
if err != nil {
52+
return err
53+
}
54+
55+
return nil
56+
}
57+
58+
// clearFile removes the specified file if it exists
59+
func clearFile(fileName string) error {
60+
exist, err := fileExists(fileName)
4461
if err != nil {
4562
return err
4663
}
@@ -54,19 +71,57 @@ func Clear() error {
5471
// manager has finished its configuration step
5572
func Set() error {
5673
// #nosec G306 - this gives permissions to owner/s group only.
57-
return os.WriteFile(fileName, []byte("1"), 0770)
74+
return os.WriteFile(readyFile, []byte("1"), 0770)
5875
}
5976

6077
// Check checks whether or not the queue manager has finished its
6178
// configuration steps
6279
func Check() (bool, error) {
63-
exists, err := fileExists()
80+
exists, err := fileExists(readyFile)
6481
if err != nil {
6582
return false, err
6683
}
6784
return exists, nil
6885
}
6986

87+
// SetReadyToSync is used to indicate that a Native-HA queue manager instance is ready-to-sync
88+
func SetReadyToSync() error {
89+
90+
exists, err := fileExists(readyToSyncFile)
91+
if err != nil {
92+
return err
93+
} else if exists {
94+
return nil
95+
}
96+
97+
readyToSyncStartTime := strconv.FormatInt(time.Now().Unix(), 10)
98+
// #nosec G306 - required permissions
99+
return os.WriteFile(readyToSyncFile, []byte(readyToSyncStartTime), 0660)
100+
}
101+
102+
// GetReadyToSyncStartTime returns the start-time a Native-HA queue manager instance was ready-to-sync
103+
func GetReadyToSyncStartTime() (bool, time.Time, error) {
104+
105+
exists, err := fileExists(readyToSyncFile)
106+
if err != nil {
107+
return exists, time.Time{}, err
108+
}
109+
110+
if exists {
111+
buf, err := os.ReadFile(readyToSyncFile)
112+
if err != nil {
113+
return true, time.Time{}, err
114+
}
115+
readyToSyncStartTime, err := strconv.ParseInt(string(buf), 10, 64)
116+
if err != nil {
117+
return true, time.Time{}, err
118+
}
119+
return true, time.Unix(readyToSyncStartTime, 0), nil
120+
}
121+
122+
return false, time.Time{}, nil
123+
}
124+
70125
// Status returns an enum representing the current running status of the queue manager
71126
func Status(ctx context.Context, name string) (QMStatus, error) {
72127
out, _, err := command.RunContext(ctx, "dspmq", "-n", "-m", name)

0 commit comments

Comments
 (0)