Skip to content

Commit 0beeab3

Browse files
test/util: retry connection failures in dev
Our connection strategy from the test runner to the service cluster can be unreliable, as it's fairly naive - we just run `oc port-forward`. Let's retry connection failures to ameliorate issues stemming from this. Signed-off-by: Steve Kuznetsov <[email protected]>
1 parent 57ea5c4 commit 0beeab3

File tree

1 file changed

+87
-0
lines changed

1 file changed

+87
-0
lines changed

test/util/framework/per_invocation_framework.go

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,11 @@ package framework
1616

1717
import (
1818
"context"
19+
"crypto/tls"
20+
"errors"
1921
"fmt"
2022
"hash/fnv"
23+
"net"
2124
"net/http"
2225
"os"
2326
"path"
@@ -27,6 +30,10 @@ import (
2730
"time"
2831

2932
"github.com/onsi/ginkgo/v2/types"
33+
"golang.org/x/net/http2"
34+
35+
"k8s.io/apimachinery/pkg/util/sets"
36+
"k8s.io/apimachinery/pkg/util/wait"
3037

3138
"github.com/Azure/azure-sdk-for-go/sdk/azcore"
3239
azcorearm "github.com/Azure/azure-sdk-for-go/sdk/azcore/arm"
@@ -46,6 +53,8 @@ type perBinaryInvocationTestContext struct {
4653
isDevelopmentEnvironment bool
4754
skipCleanup bool
4855

56+
defaultTransport *http.Transport
57+
4958
contextLock sync.RWMutex
5059
subscriptionID string
5160
azureCredentials azcore.TokenCredential
@@ -79,6 +88,7 @@ func invocationContext() *perBinaryInvocationTestContext {
7988
location: location(),
8089
isDevelopmentEnvironment: IsDevelopmentEnvironment(),
8190
skipCleanup: skipCleanup(),
91+
defaultTransport: defaultHTTPTransport(),
8292
}
8393
})
8494
return invocationContextInstance
@@ -134,6 +144,15 @@ func (p *armSystemDataPolicy) Do(req *policy.Request) (*http.Response, error) {
134144
}
135145

136146
func (tc *perBinaryInvocationTestContext) getClientFactoryOptions() *azcorearm.ClientOptions {
147+
if tc.isDevelopmentEnvironment {
148+
return &azcorearm.ClientOptions{
149+
ClientOptions: azcore.ClientOptions{
150+
Transport: &proxiedConnectionTransporter{
151+
delegate: tc.defaultTransport,
152+
},
153+
},
154+
}
155+
}
137156
return nil
138157
}
139158

@@ -150,6 +169,9 @@ func (tc *perBinaryInvocationTestContext) getHCPClientFactoryOptions() *azcorear
150169
},
151170
},
152171
},
172+
Transport: &proxiedConnectionTransporter{
173+
delegate: tc.defaultTransport,
174+
},
153175
InsecureAllowCredentialWithHTTP: true,
154176
PerCallPolicies: []policy.Policy{
155177
&armSystemDataPolicy{},
@@ -160,6 +182,71 @@ func (tc *perBinaryInvocationTestContext) getHCPClientFactoryOptions() *azcorear
160182
return nil
161183
}
162184

185+
// default transport taken judiciously from azcore library to mimick their behavior when no transporter is provided
186+
func defaultHTTPTransport() *http.Transport {
187+
dialer := &net.Dialer{
188+
Timeout: 30 * time.Second,
189+
KeepAlive: 30 * time.Second,
190+
}
191+
defaultTransport := &http.Transport{
192+
Proxy: http.ProxyFromEnvironment,
193+
DialContext: dialer.DialContext,
194+
ForceAttemptHTTP2: true,
195+
MaxIdleConns: 100,
196+
MaxIdleConnsPerHost: 10,
197+
IdleConnTimeout: 90 * time.Second,
198+
TLSHandshakeTimeout: 10 * time.Second,
199+
ExpectContinueTimeout: 1 * time.Second,
200+
TLSClientConfig: &tls.Config{
201+
MinVersion: tls.VersionTLS12,
202+
Renegotiation: tls.RenegotiateFreelyAsClient,
203+
},
204+
}
205+
// TODO: evaluate removing this once https://github.com/golang/go/issues/59690 has been fixed
206+
if http2Transport, err := http2.ConfigureTransports(defaultTransport); err == nil {
207+
// if the connection has been idle for 10 seconds, send a ping frame for a health check
208+
http2Transport.ReadIdleTimeout = 10 * time.Second
209+
// if there's no response to the ping within the timeout, the connection will be closed
210+
http2Transport.PingTimeout = 5 * time.Second
211+
}
212+
return defaultTransport
213+
}
214+
215+
// proxiedConnectionTransporter retries connections done across the proxy path to a local RP,
216+
// in order to paper over transient errors in the proxied connection
217+
type proxiedConnectionTransporter struct {
218+
delegate *http.Transport
219+
}
220+
221+
func (t *proxiedConnectionTransporter) Do(req *http.Request) (*http.Response, error) {
222+
retryCtx, cancel := context.WithTimeoutCause(req.Context(), 2*time.Minute, errors.New("proxy transport retry timeout"))
223+
defer cancel()
224+
225+
var response *http.Response
226+
err := wait.ExponentialBackoffWithContext(retryCtx, wait.Backoff{
227+
Duration: 800 * time.Millisecond,
228+
Factor: 2,
229+
Jitter: 0.1,
230+
Steps: 10,
231+
Cap: 0,
232+
}, func(ctx context.Context) (done bool, err error) {
233+
resp, err := t.delegate.RoundTrip(req.Clone(ctx))
234+
if err != nil {
235+
if sets.NewString(
236+
"connect: connection refused",
237+
"connect: connection reset by peer",
238+
"proxy error from localhost",
239+
).Has(err.Error()) {
240+
return false, nil
241+
}
242+
return true, err
243+
}
244+
response = resp
245+
return true, nil
246+
})
247+
return response, err
248+
}
249+
163250
func (tc *perBinaryInvocationTestContext) getSubscriptionID(ctx context.Context, subscriptionClient *armsubscriptions.Client) (string, error) {
164251
tc.contextLock.RLock()
165252
if len(tc.subscriptionID) > 0 {

0 commit comments

Comments
 (0)