Skip to content

Commit

Permalink
Update NIC management strategy
Browse files Browse the repository at this point in the history
Don't delete default netplan config on Debian 12. Write netplan configs to /run/netplan. Don't manage primary NIC unless the user has configured the new manage_primary_nic toggle.  Disable use-domains on secondary NICs with netplan. This avoids making the MDS reachable over secondary NICs instead of primary NICs when managing the primary interface. If the MDS is inaccessible during startup, rollback all agent configuration on all interfaces and try again. Apply changes during netplan and network manager rollbacks.
  • Loading branch information
a-crate committed Jul 31, 2024
1 parent 79a1124 commit e8c4b80
Show file tree
Hide file tree
Showing 11 changed files with 185 additions and 92 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ MetadataScripts | startup | `false` disables startup script exe
MetadataScripts | shutdown | `false` disables shutdown script execution.
NetworkInterfaces | setup | `false` skips network interface setup.
NetworkInterfaces | ip\_forwarding | `false` skips IP forwarding.
NetworkInterfaces | manage\_primary\_nic | `true` will start managing the primary NIC in addition to the secondary NICs.
NetworkInterfaces | dhcp\_command | String path for alternate dhcp executable used to enable network interfaces.
OSLogin | cert_authentication | `false` prevents guest-agent from setting up sshd's `TrustedUserCAKeys`, `AuthorizedPrincipalsCommand` and `AuthorizedPrincipalsCommandUser` configuration keys. Default value: `true`.

Expand Down
8 changes: 5 additions & 3 deletions google_guest_agent/cfg/cfg.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ sysprep-specialize = true
dhcp_command =
ip_forwarding = true
setup = true
manage_primary_nic =
[OSLogin]
cert_authentication = true
Expand Down Expand Up @@ -260,9 +261,10 @@ type MDS struct {

// NetworkInterfaces contains the configurations of NetworkInterfaces section.
type NetworkInterfaces struct {
DHCPCommand string `ini:"dhcp_command,omitempty"`
IPForwarding bool `ini:"ip_forwarding,omitempty"`
Setup bool `ini:"setup,omitempty"`
DHCPCommand string `ini:"dhcp_command,omitempty"`
IPForwarding bool `ini:"ip_forwarding,omitempty"`
Setup bool `ini:"setup,omitempty"`
ManagePrimaryNIC bool `ini:"manage_primary_nic,omitempty"`
}

// Snapshots contains the configurations of Snapshots section.
Expand Down
13 changes: 12 additions & 1 deletion google_guest_agent/instance_setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (

"github.com/GoogleCloudPlatform/guest-agent/google_guest_agent/agentcrypto"
"github.com/GoogleCloudPlatform/guest-agent/google_guest_agent/cfg"
network "github.com/GoogleCloudPlatform/guest-agent/google_guest_agent/network/manager"
"github.com/GoogleCloudPlatform/guest-agent/google_guest_agent/run"
"github.com/GoogleCloudPlatform/guest-agent/google_guest_agent/scheduler"
"github.com/GoogleCloudPlatform/guest-agent/retry"
Expand Down Expand Up @@ -157,7 +158,17 @@ func agentInit(ctx context.Context) {
newMetadata, err = mdsClient.Get(ctx)
if err != nil {
logger.Errorf("Failed to reach MDS(all retries exhausted): %+v", err)
os.Exit(1)
logger.Infof("Falling to OS default network configuration to attempt to recover.")
if err := network.FallbackToDefault(ctx); err != nil {
// Just log error and attempt to continue anyway, if we can't reach MDS
// we can't do anything.
logger.Errorf("Failed to rollback guest-agent network configuration: %v", err)
}
newMetadata, err = mdsClient.Get(ctx)
if err != nil {
logger.Errorf("Failed to reach MDS after attempt to recover network configuration(all retries exhausted): %+v", err)
os.Exit(1)
}
}
}

Expand Down
7 changes: 6 additions & 1 deletion google_guest_agent/network/manager/dhclient_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,12 @@ func partitionInterfaces(ctx context.Context, interfaces, ipv6Interfaces []strin
var obtainIpv6Interfaces []string
var releaseIpv6Interfaces []string

for _, iface := range interfaces {
for i, iface := range interfaces {
if !shouldManageInterface(i == 0) {
// Do not setup anything for this interface to avoid duplicate processes.
continue
}

// Check for IPv4 interfaces for which to obtain a lease.
processExists, err := dhclientProcessExists(ctx, iface, ipv4)
if err != nil {
Expand Down
28 changes: 16 additions & 12 deletions google_guest_agent/network/manager/dhclient_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"testing"
"time"

"github.com/GoogleCloudPlatform/guest-agent/google_guest_agent/cfg"
"github.com/GoogleCloudPlatform/guest-agent/google_guest_agent/osinfo"
"github.com/GoogleCloudPlatform/guest-agent/google_guest_agent/ps"
"github.com/GoogleCloudPlatform/guest-agent/google_guest_agent/run"
Expand Down Expand Up @@ -135,6 +136,9 @@ type dhclientProcessOpts struct {
// dhclientTestSetup sets up the test.
func dhclientTestSetup(t *testing.T, opts dhclientTestOpts) {
t.Helper()
if err := cfg.Load(nil); err != nil {
t.Fatalf("cfg.Load(nil) = %v, nil", err)
}

// We have to mock dhclientProcessExists as we cannot mock where the ps
// package checks for processes here.
Expand Down Expand Up @@ -276,42 +280,42 @@ func TestPartitionInterfaces(t *testing.T) {
}{
{
name: "all-ipv4",
testInterfaces: []string{"obtain1", "obtain2"},
testInterfaces: []string{"primary1", "obtain2"},
testIpv6Interfaces: []string{},
existFlags: []bool{false, false},
ipVersions: []ipVersion{ipv4, ipv4},
expectedObtainIpv4: []string{"obtain1", "obtain2"},
expectedObtainIpv4: []string{"obtain2"},
expectedObtainIpv6: []string{},
expectedReleaseIpv6: []string{},
},
{
name: "all-ipv6",
testInterfaces: []string{"obtain1", "obtain2"},
testIpv6Interfaces: []string{"obtain1", "obtain2"},
testInterfaces: []string{"primary1", "obtain2"},
testIpv6Interfaces: []string{"primary1", "obtain2"},
existFlags: []bool{false, false},
ipVersions: []ipVersion{ipv6, ipv6},
expectedObtainIpv4: []string{"obtain1", "obtain2"},
expectedObtainIpv6: []string{"obtain1", "obtain2"},
expectedObtainIpv4: []string{"obtain2"},
expectedObtainIpv6: []string{"obtain2"},
expectedReleaseIpv6: []string{},
},
{
name: "ipv4-ipv6",
testInterfaces: []string{"obtain1", "obtain2"},
testInterfaces: []string{"primary1", "obtain2"},
testIpv6Interfaces: []string{"obtain2"},
existFlags: []bool{false, false},
ipVersions: []ipVersion{ipv4, ipv6},
expectedObtainIpv4: []string{"obtain1", "obtain2"},
expectedObtainIpv4: []string{"obtain2"},
expectedObtainIpv6: []string{"obtain2"},
expectedReleaseIpv6: []string{},
},
{
name: "release-ipv6",
testInterfaces: []string{"obtain1", "release1"},
testIpv6Interfaces: []string{"obtain1"},
testInterfaces: []string{"primary1", "release1"},
testIpv6Interfaces: []string{"primary1"},
existFlags: []bool{false, true},
ipVersions: []ipVersion{ipv4, ipv6},
expectedObtainIpv4: []string{"obtain1", "release1"},
expectedObtainIpv6: []string{"obtain1"},
expectedObtainIpv4: []string{"release1"},
expectedObtainIpv6: []string{},
expectedReleaseIpv6: []string{"release1"},
},
}
Expand Down
63 changes: 57 additions & 6 deletions google_guest_agent/network/manager/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package manager
import (
"context"
"fmt"
"net"

"github.com/GoogleCloudPlatform/guest-agent/google_guest_agent/cfg"
"github.com/GoogleCloudPlatform/guest-agent/google_guest_agent/osinfo"
Expand Down Expand Up @@ -85,9 +86,6 @@ type osConfigRule struct {
type osConfigAction struct {
// ignorePrimary determines whether to ignore the primary network interface.
ignorePrimary bool

// ignoreSecondary determines whether to ignore all non-primary network interfaces.
ignoreSecondary bool
}

// guestAgentSection is the section added to guest-agent-written ini files to indicate
Expand Down Expand Up @@ -131,9 +129,9 @@ func detectNetworkManager(ctx context.Context, iface string) (*serviceStatus, er
return nil, fmt.Errorf("no network manager impl found for %s", iface)
}

// SetupInterfaces sets up all the network interfaces on the system, applying rules described
// by osRules and using the native network manager service detected to be managing the primary
// network interface.
// SetupInterfaces sets up all secondary network interfaces on the system, and primary network
// interface if enabled in the configuration using the native network manager service detected
// to be managing the primary network interface.
func SetupInterfaces(ctx context.Context, config *cfg.Sections, mds *metadata.Descriptor) error {
// User may have disabled network interface setup entirely.
if !config.NetworkInterfaces.Setup {
Expand Down Expand Up @@ -195,3 +193,56 @@ func SetupInterfaces(ctx context.Context, config *cfg.Sections, mds *metadata.De

return nil
}

// FallbackToDefault will attempt to rescue broken networking by rolling back
// all guest-agent modifications to the network configuration.
func FallbackToDefault(ctx context.Context) error {
nics, err := buildInterfacesFromAllPhysicalNICs()
if err != nil {
return fmt.Errorf("could not build list of NICs for fallback: %v", err)
}

// Rollback every NIC with every known network manager.
for _, svc := range knownNetworkManagers {
logger.Infof("Rolling back %s", svc.Name())
if err := svc.Rollback(ctx, nics); err != nil {
logger.Errorf("Failed to roll back config for %s: %v", svc.Name(), err)
}
}

return nil
}

// Build a *Interfaces from all physical interfaces rather than the MDS.
func buildInterfacesFromAllPhysicalNICs() (*Interfaces, error) {
nics := &Interfaces{
EthernetInterfaces: nil,
VlanInterfaces: map[int]metadata.VlanInterface{},
}

interfaces, err := net.Interfaces()
if err != nil {
return nil, fmt.Errorf("failed to get interfaces: %v", err)
}

for _, iface := range interfaces {
mac := iface.HardwareAddr.String()
if mac == "" {
continue
}
nics.EthernetInterfaces = append(nics.EthernetInterfaces, metadata.NetworkInterfaces{
Mac: mac,
})
}

return nics, nil
}

// shouldManageInterface returns whether the guest agent should manage an interface
// provided whether the interface of interest is the primary interface or not.
func shouldManageInterface(isPrimary bool) bool {
if isPrimary {
return cfg.Get().NetworkInterfaces.ManagePrimaryNIC
}
return true
}
2 changes: 1 addition & 1 deletion google_guest_agent/network/manager/manager_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ func init() {
// knownNetworkManagers is a list of supported/available network managers.
knownNetworkManagers = []Service{
&netplan{
netplanConfigDir: "/etc/netplan/",
netplanConfigDir: "/run/netplan/",
networkdDropinDir: "/etc/systemd/network/",
priority: 20,
},
Expand Down
Loading

0 comments on commit e8c4b80

Please sign in to comment.