Skip to content

Commit 7a78133

Browse files
test/util: retry connection failures in dev
Our connection strategy from the test runner to the service cluster can be unreliable, as it's fairly naive - we just run `oc port-forward`. Let's retry connection failures to ameliorate issues stemming from this. Signed-off-by: Steve Kuznetsov <[email protected]>
1 parent 57ea5c4 commit 7a78133

File tree

1 file changed

+99
-0
lines changed

1 file changed

+99
-0
lines changed

test/util/framework/per_invocation_framework.go

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,14 @@
1515
package framework
1616

1717
import (
18+
"bytes"
1819
"context"
20+
"crypto/tls"
21+
"errors"
1922
"fmt"
2023
"hash/fnv"
24+
"io"
25+
"net"
2126
"net/http"
2227
"os"
2328
"path"
@@ -27,6 +32,10 @@ import (
2732
"time"
2833

2934
"github.com/onsi/ginkgo/v2/types"
35+
"golang.org/x/net/http2"
36+
37+
"k8s.io/apimachinery/pkg/util/sets"
38+
"k8s.io/apimachinery/pkg/util/wait"
3039

3140
"github.com/Azure/azure-sdk-for-go/sdk/azcore"
3241
azcorearm "github.com/Azure/azure-sdk-for-go/sdk/azcore/arm"
@@ -46,6 +55,8 @@ type perBinaryInvocationTestContext struct {
4655
isDevelopmentEnvironment bool
4756
skipCleanup bool
4857

58+
defaultTransport *http.Transport
59+
4960
contextLock sync.RWMutex
5061
subscriptionID string
5162
azureCredentials azcore.TokenCredential
@@ -79,6 +90,7 @@ func invocationContext() *perBinaryInvocationTestContext {
7990
location: location(),
8091
isDevelopmentEnvironment: IsDevelopmentEnvironment(),
8192
skipCleanup: skipCleanup(),
93+
defaultTransport: defaultHTTPTransport(),
8294
}
8395
})
8496
return invocationContextInstance
@@ -134,6 +146,15 @@ func (p *armSystemDataPolicy) Do(req *policy.Request) (*http.Response, error) {
134146
}
135147

136148
func (tc *perBinaryInvocationTestContext) getClientFactoryOptions() *azcorearm.ClientOptions {
149+
if tc.isDevelopmentEnvironment {
150+
return &azcorearm.ClientOptions{
151+
ClientOptions: azcore.ClientOptions{
152+
Transport: &proxiedConnectionTransporter{
153+
delegate: tc.defaultTransport,
154+
},
155+
},
156+
}
157+
}
137158
return nil
138159
}
139160

@@ -150,6 +171,9 @@ func (tc *perBinaryInvocationTestContext) getHCPClientFactoryOptions() *azcorear
150171
},
151172
},
152173
},
174+
Transport: &proxiedConnectionTransporter{
175+
delegate: tc.defaultTransport,
176+
},
153177
InsecureAllowCredentialWithHTTP: true,
154178
PerCallPolicies: []policy.Policy{
155179
&armSystemDataPolicy{},
@@ -160,6 +184,81 @@ func (tc *perBinaryInvocationTestContext) getHCPClientFactoryOptions() *azcorear
160184
return nil
161185
}
162186

187+
// default transport taken judiciously from azcore library to mimick their behavior when no transporter is provided
188+
func defaultHTTPTransport() *http.Transport {
189+
dialer := &net.Dialer{
190+
Timeout: 30 * time.Second,
191+
KeepAlive: 30 * time.Second,
192+
}
193+
defaultTransport := &http.Transport{
194+
Proxy: http.ProxyFromEnvironment,
195+
DialContext: dialer.DialContext,
196+
ForceAttemptHTTP2: true,
197+
MaxIdleConns: 100,
198+
MaxIdleConnsPerHost: 10,
199+
IdleConnTimeout: 90 * time.Second,
200+
TLSHandshakeTimeout: 10 * time.Second,
201+
ExpectContinueTimeout: 1 * time.Second,
202+
TLSClientConfig: &tls.Config{
203+
MinVersion: tls.VersionTLS12,
204+
Renegotiation: tls.RenegotiateFreelyAsClient,
205+
},
206+
}
207+
// TODO: evaluate removing this once https://github.com/golang/go/issues/59690 has been fixed
208+
if http2Transport, err := http2.ConfigureTransports(defaultTransport); err == nil {
209+
// if the connection has been idle for 10 seconds, send a ping frame for a health check
210+
http2Transport.ReadIdleTimeout = 10 * time.Second
211+
// if there's no response to the ping within the timeout, the connection will be closed
212+
http2Transport.PingTimeout = 5 * time.Second
213+
}
214+
return defaultTransport
215+
}
216+
217+
// proxiedConnectionTransporter retries connections done across the proxy path to a local RP,
218+
// in order to paper over transient errors in the proxied connection
219+
type proxiedConnectionTransporter struct {
220+
delegate *http.Transport
221+
}
222+
223+
func (t *proxiedConnectionTransporter) Do(req *http.Request) (*http.Response, error) {
224+
retryCtx, cancel := context.WithTimeoutCause(req.Context(), 2*time.Minute, errors.New("proxy transport retry timeout"))
225+
defer cancel()
226+
227+
body, err := io.ReadAll(req.Body)
228+
if err != nil {
229+
return nil, err
230+
}
231+
if err := req.Body.Close(); err != nil {
232+
return nil, err
233+
}
234+
235+
var response *http.Response
236+
responseErr := wait.ExponentialBackoffWithContext(retryCtx, wait.Backoff{
237+
Duration: 800 * time.Millisecond,
238+
Factor: 2,
239+
Jitter: 0.1,
240+
Steps: 10,
241+
Cap: 20 * time.Second,
242+
}, func(ctx context.Context) (done bool, err error) {
243+
thisReq := req.Clone(ctx)
244+
thisReq.Body = io.NopCloser(bytes.NewReader(body))
245+
resp, err := t.delegate.RoundTrip(thisReq)
246+
response = resp
247+
if err != nil {
248+
if sets.NewString(
249+
"connect: connection refused",
250+
"connect: connection reset by peer",
251+
"proxy error from localhost",
252+
).Has(err.Error()) {
253+
return false, nil
254+
}
255+
return true, err
256+
}
257+
return true, nil
258+
})
259+
return response, responseErr
260+
}
261+
163262
func (tc *perBinaryInvocationTestContext) getSubscriptionID(ctx context.Context, subscriptionClient *armsubscriptions.Client) (string, error) {
164263
tc.contextLock.RLock()
165264
if len(tc.subscriptionID) > 0 {

0 commit comments

Comments
 (0)