Skip to content

Commit 13af850

Browse files
committed
fix: prevent stale reads of kernel args in schematic id calculation
Fix the rare issue which was caught by our upgrade tests: `KernelArgs` resource status might not be up to date after we observed them to be initialized on the machine status resource. This caused an unwanted Talos upgrade in rare cases. We were also able to reproduce this with a unit test. Do an uncached read to circumvent that. Additionally, do a small fix the initialization of the `KernelArgs` resource: do the "emptiness check" on the extra kernel args after we filter the system ones out, so we won't create unnecessary `KernelArgs` resources (this did not cause any changes in the logic, can be considered an optimization fix). Signed-off-by: Utku Ozdemir <[email protected]> (cherry picked from commit 3e90bc6)
1 parent a243fa8 commit 13af850

File tree

2 files changed

+43
-25
lines changed

2 files changed

+43
-25
lines changed

internal/backend/kernelargs/kernelargs.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,14 @@ func NewInitializer(state state.State, logger *zap.Logger) (*Initializer, error)
4343
}
4444

4545
func (initializer *Initializer) Init(ctx context.Context, id resource.ID, args []string) error {
46-
if len(args) == 0 {
46+
extraArgs := xslices.Filter(args, func(value string) bool { return !isProtected(value) })
47+
48+
if len(extraArgs) == 0 {
4749
return nil
4850
}
4951

5052
kernelArgs := omni.NewKernelArgs(id)
51-
kernelArgs.TypedSpec().Value.Args = xslices.Filter(args, func(value string) bool {
52-
return !isProtected(value)
53-
})
53+
kernelArgs.TypedSpec().Value.Args = extraArgs
5454

5555
if err := initializer.state.Create(ctx, kernelArgs); err != nil && !state.IsConflictError(err) {
5656
return fmt.Errorf("error creating extra kernel args configuration: %w", err)

internal/backend/runtime/omni/controllers/omni/schematic_configuration.go

Lines changed: 39 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ package omni
88
import (
99
"context"
1010
"errors"
11+
"fmt"
1112
"slices"
1213

1314
"github.com/blang/semver/v4"
@@ -176,12 +177,12 @@ func (helper *schematicConfigurationHelper) reconcile(
176177
return nil, err
177178
}
178179

179-
kernelArgsRes, err := safe.ReaderGetByID[*omni.KernelArgs](ctx, r, clusterMachine.Metadata().ID())
180-
if err != nil && !state.IsNotFoundError(err) {
180+
kernelArgs, err := determineKernelArgs(ctx, ms, r)
181+
if err != nil {
181182
return nil, err
182183
}
183184

184-
customization, err := newMachineCustomization(cluster, ms, extensions, kernelArgsRes, securityState.BootedWithUki)
185+
customization, err := newMachineCustomization(cluster, ms, extensions, securityState.BootedWithUki)
185186
if err != nil {
186187
return nil, err
187188
}
@@ -193,7 +194,7 @@ func (helper *schematicConfigurationHelper) reconcile(
193194
SystemExtensions: schematic.SystemExtensions{
194195
OfficialExtensions: customization.extensionsList,
195196
},
196-
ExtraKernelArgs: customization.kernelArgsList,
197+
ExtraKernelArgs: kernelArgs,
197198
Meta: customization.meta,
198199
},
199200
Overlay: overlay,
@@ -248,16 +249,12 @@ func updateFinalizers(ctx context.Context, r controller.ReaderWriter, extensions
248249
return r.AddFinalizer(ctx, extensions.Metadata(), SchematicConfigurationControllerName)
249250
}
250251

251-
func newMachineCustomization(cluster *omni.Cluster, machineStatus *omni.MachineStatus, exts *omni.MachineExtensions, args *omni.KernelArgs, isUKI bool) (machineCustomization, error) {
252+
func newMachineCustomization(cluster *omni.Cluster, machineStatus *omni.MachineStatus, exts *omni.MachineExtensions, isUKI bool) (machineCustomization, error) {
252253
mc := machineCustomization{
253254
machineStatus: machineStatus,
254255
cluster: cluster,
255256
}
256257

257-
if err := mc.initializeKernelArgs(machineStatus, args); err != nil {
258-
return machineCustomization{}, err
259-
}
260-
261258
if isUKI {
262259
schematicConfig := machineStatus.TypedSpec().Value.Schematic
263260
if schematicConfig == nil {
@@ -324,41 +321,62 @@ type machineCustomization struct {
324321
machineExtensions *omni.MachineExtensions
325322
cluster *omni.Cluster
326323
extensionsList []string
327-
kernelArgsList []string
328324
detectedExtensions []string
329325
meta []schematic.MetaValue
330326
}
331327

332-
func (mc *machineCustomization) initializeKernelArgs(machineStatus *omni.MachineStatus, kernelArgs *omni.KernelArgs) error {
328+
func determineKernelArgs(ctx context.Context, machineStatus *omni.MachineStatus, r controller.Reader) ([]string, error) {
329+
if _, kernelArgsInitialized := machineStatus.Metadata().Annotations().Get(omni.KernelArgsInitialized); !kernelArgsInitialized {
330+
return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("kernel args are not yet initialized")
331+
}
332+
333333
updateSupported, err := kernelargs.UpdateSupported(machineStatus)
334334
if err != nil {
335-
return err
335+
return nil, err
336336
}
337337

338338
if !updateSupported {
339339
if machineStatus.TypedSpec().Value.Schematic == nil {
340-
return xerrors.NewTaggedf[qtransform.SkipReconcileTag]("machine schematic is not yet initialized")
340+
return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("machine schematic is not yet initialized")
341341
}
342342

343-
// initialize them to be unchanged - take the current args as-is
344-
mc.kernelArgsList = machineStatus.TypedSpec().Value.Schematic.KernelArgs
343+
// keep them unchanged - take the current args as-is
344+
return machineStatus.TypedSpec().Value.Schematic.KernelArgs, nil
345+
}
345346

346-
return nil
347+
// Here, we need to use an uncached reader. This is because we want to avoid
348+
// stale reads of the KernelArgs resource: there can be cases where the omni.KernelArgsInitialized annotation
349+
// is present in the MachineStatus, but the KernelArgs resource is not yet visible,
350+
// which can cause unwanted Talos upgrades through a schematic id update.
351+
uncachedReader, ok := r.(controller.UncachedReader)
352+
if !ok {
353+
return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("reader does not support uncached reads")
354+
}
355+
356+
kernelArgsRes, err := uncachedReader.GetUncached(ctx, omni.NewKernelArgs(machineStatus.Metadata().ID()).Metadata())
357+
if err != nil && !state.IsNotFoundError(err) {
358+
return nil, err
359+
}
360+
361+
var kernelArgs *omni.KernelArgs
362+
363+
if kernelArgsRes != nil {
364+
if kernelArgs, ok = kernelArgsRes.(*omni.KernelArgs); !ok {
365+
return nil, fmt.Errorf("expected %q resource, got %T", omni.KernelArgsType, kernelArgsRes)
366+
}
347367
}
348368

349369
// todo: centralize this...
350370
args, ok, err := kernelargs.Calculate(machineStatus, kernelArgs)
351371
if err != nil {
352-
return err
372+
return nil, err
353373
}
354374

355375
if !ok {
356-
return xerrors.NewTaggedf[qtransform.SkipReconcileTag]("extra kernel args for machine %q are not yet initialized", machineStatus.Metadata().ID())
376+
return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("extra kernel args for machine %q are not yet initialized", machineStatus.Metadata().ID())
357377
}
358378

359-
mc.kernelArgsList = args
360-
361-
return nil
379+
return args, nil
362380
}
363381

364382
func (mc *machineCustomization) isDetected(value string) bool {

0 commit comments

Comments
 (0)