Skip to content

Commit 6bf8e19

Browse files
committed
fix: synchronize tf init to avoid plugin race condition
1 parent b66e396 commit 6bf8e19

File tree

6 files changed

+90
-14
lines changed

6 files changed

+90
-14
lines changed

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ GOLANGCI_LINT ?= $(LOCALBIN)/golangci-lint
115115
## Tool Versions
116116
KUSTOMIZE_VERSION ?= v3.8.7
117117
CONTROLLER_TOOLS_VERSION ?= v0.16.4
118-
GOLANGCI_LINT_VERSION ?= v1.60.3
118+
GOLANGCI_LINT_VERSION ?= v2.5.0
119119

120120
KUSTOMIZE_INSTALL_SCRIPT ?= "https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh"
121121
.PHONY: kustomize
@@ -135,7 +135,7 @@ $(ENVTEST): $(LOCALBIN)
135135
.PHONY: golangci-lint
136136
golangci-lint: $(GOLANGCI_LINT) ## Download golangci-lint locally if necessary
137137
$(GOLANGCI_LINT): $(LOCALBIN)
138-
test -s $(LOCALBIN)/golangci-lint || GOBIN=$(LOCALBIN) go install github.com/golangci/golangci-lint/cmd/golangci-lint@$(GOLANGCI_LINT_VERSION)
138+
test -s $(LOCALBIN)/golangci-lint || GOBIN=$(LOCALBIN) go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@$(GOLANGCI_LINT_VERSION)
139139

140140
# Development environment targets
141141
setup-dev-env: create-kind init-tilt

controllers/controlplane/kopscontrolplane_controller.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,9 @@ func (r *KopsControlPlaneReconciler) PrepareCustomCloudResources(ctx context.Con
184184
if err != nil {
185185
return err
186186
}
187-
defer karpenterResourcesContent.Close()
187+
defer func() {
188+
_ = karpenterResourcesContent.Close()
189+
}()
188190

189191
// This is needed because the apply will fail if the file is empty
190192
placeholder := corev1.ConfigMap{

pkg/utils/kops_utils.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,9 @@ func GetUserDataFromTerraformFile(clusterName, igName, terraformOutputDir string
367367
if err != nil {
368368
return "", err
369369
}
370-
defer userDataFile.Close()
370+
defer func() {
371+
_ = userDataFile.Close()
372+
}()
371373
userData, err := io.ReadAll(userDataFile)
372374
if err != nil {
373375
return "", err

pkg/utils/kops_utils_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,10 +125,10 @@ func TestParseSpotinstFeatureflags(t *testing.T) {
125125

126126
for _, tc := range testCases {
127127
t.Run(tc.description, func(t *testing.T) {
128-
os.Unsetenv("SPOTINST_TOKEN")
129-
os.Unsetenv("SPOTINST_ACCOUNT")
128+
_ = os.Unsetenv("SPOTINST_TOKEN")
129+
_ = os.Unsetenv("SPOTINST_ACCOUNT")
130130
for key, value := range tc.environmentVariables {
131-
os.Setenv(key, value)
131+
_ = os.Setenv(key, value)
132132
}
133133

134134
err := ParseSpotinstFeatureflags(tc.input)

pkg/utils/terraform_utils.go

Lines changed: 76 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@ import (
88
"path/filepath"
99
"regexp"
1010
"strings"
11+
"sync"
12+
"syscall"
1113
"text/template"
14+
"time"
1215

1316
"github.com/aws/aws-sdk-go-v2/aws"
1417
"github.com/hashicorp/terraform-exec/tfexec"
@@ -25,6 +28,40 @@ type Template struct {
2528
//go:embed templates/*.tpl
2629
var templates embed.FS
2730

31+
var tfPluginMux sync.Mutex
32+
33+
func lockPluginCache(pluginCacheDir string) (*os.File, error) {
34+
if err := os.MkdirAll(pluginCacheDir, 0755); err != nil {
35+
return nil, err
36+
}
37+
38+
lockPath := filepath.Join(pluginCacheDir, ".terraform-plugin.lock")
39+
lockFile, err := os.OpenFile(lockPath, os.O_CREATE|os.O_RDWR, 0644)
40+
if err != nil {
41+
return nil, fmt.Errorf("failed to open lock file: %w", err)
42+
}
43+
44+
if err := syscall.Flock(int(lockFile.Fd()), syscall.LOCK_EX); err != nil {
45+
_ = lockFile.Close()
46+
return nil, fmt.Errorf("failed to acquire exclusive lock: %w", err)
47+
}
48+
49+
return lockFile, nil
50+
}
51+
52+
func unlockPluginCache(lockFile *os.File) error {
53+
if lockFile == nil {
54+
return nil
55+
}
56+
57+
if err := syscall.Flock(int(lockFile.Fd()), syscall.LOCK_UN); err != nil {
58+
_ = lockFile.Close()
59+
return fmt.Errorf("failed to unlock plugin cache: %w", err)
60+
}
61+
62+
return lockFile.Close()
63+
}
64+
2865
// CreateTerraformFileFromTemplate populates a Terraform template and create files in the state
2966
func CreateTerraformFilesFromTemplate(terraformTemplateFilePath string, TerraformOutputFileName string, terraformOutputDir string, templateData any) error {
3067
template := Template{
@@ -43,7 +80,9 @@ func CreateAdditionalTerraformFiles(tfFiles ...Template) error {
4380
if err != nil {
4481
return err
4582
}
46-
defer file.Close()
83+
defer func() {
84+
_ = file.Close()
85+
}()
4786

4887
t := template.New(filepath.Base(tfFile.TemplateFilename)).Funcs(template.FuncMap{
4988
"stringReplace": strings.Replace,
@@ -96,23 +135,52 @@ func initTerraform(ctx context.Context, workingDir, terraformExecPath string, cr
96135
return nil, err
97136
}
98137

138+
pluginCacheDir := fmt.Sprintf("%s/plugin-cache", filepath.Dir(terraformExecPath))
139+
99140
env := map[string]string{
100141
"AWS_ACCESS_KEY_ID": credentials.AccessKeyID,
101142
"AWS_SECRET_ACCESS_KEY": credentials.SecretAccessKey,
102143
"SPOTINST_TOKEN": os.Getenv("SPOTINST_TOKEN"),
103144
"SPOTINST_ACCOUNT": os.Getenv("SPOTINST_ACCOUNT"),
104-
"TF_PLUGIN_CACHE_DIR": fmt.Sprintf("%s/plugin-cache", filepath.Dir(terraformExecPath)),
145+
"TF_PLUGIN_CACHE_DIR": pluginCacheDir,
105146
}
106147

107-
// this overrides all ENVVARs that are passed to Terraform
108148
err = tf.SetEnv(env)
109149
if err != nil {
110150
return nil, err
111151
}
112152

113-
err = tf.Init(ctx, tfexec.Upgrade(true))
153+
tfPluginMux.Lock()
154+
defer tfPluginMux.Unlock()
155+
156+
lockFile, err := lockPluginCache(pluginCacheDir)
114157
if err != nil {
115-
return nil, err
158+
return nil, fmt.Errorf("failed to acquire plugin cache lock: %w", err)
159+
}
160+
defer func() {
161+
time.Sleep(200 * time.Millisecond)
162+
_ = unlockPluginCache(lockFile)
163+
}()
164+
165+
var initErr error
166+
maxRetries := 3
167+
for i := 0; i < maxRetries; i++ {
168+
initErr = tf.Init(ctx, tfexec.Upgrade(true))
169+
if initErr == nil {
170+
break
171+
}
172+
173+
if strings.Contains(initErr.Error(), "text file busy") && i < maxRetries-1 {
174+
waitTime := time.Duration(i+1) * 2 * time.Second
175+
time.Sleep(waitTime)
176+
continue
177+
}
178+
179+
break
180+
}
181+
182+
if initErr != nil {
183+
return nil, initErr
116184
}
117185

118186
return tf, nil
@@ -177,7 +245,9 @@ func CleanupTerraformDirectory(dir string) error {
177245
if err != nil {
178246
return err
179247
}
180-
defer d.Close()
248+
defer func() {
249+
_ = d.Close()
250+
}()
181251
names, err := d.Readdirnames(-1)
182252
if err != nil {
183253
return err

pkg/utils/terraform_utils_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,9 @@ func TestModifyTerraformProviderVersion(t *testing.T) {
222222
t.Run(tc.description, func(t *testing.T) {
223223
tmpDir, err := os.MkdirTemp("", "test_terraform_provider")
224224
g.Expect(err).NotTo(HaveOccurred())
225-
defer os.RemoveAll(tmpDir)
225+
defer func() {
226+
_ = os.RemoveAll(tmpDir)
227+
}()
226228

227229
kubernetesFile := fmt.Sprintf("%s/kubernetes.tf", tmpDir)
228230

0 commit comments

Comments
 (0)