Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Share host info between packages #5884

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions changelog/fragments/1730288427-shared-host.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Kind can be one of:
# - breaking-change: a change to previously-documented behavior
# - deprecation: functionality that is being removed in a later release
# - bug-fix: fixes a problem in a previous version
# - enhancement: extends functionality but does not break or fix existing behavior
# - feature: new functionality
# - known-issue: problems that we are aware of in a given version
# - security: impacts on the security of a product or a user’s deployment.
# - upgrade: important information for someone upgrading from a prior version
# - other: does not fit into any of the other categories
kind: enhancement

# Change summary; a 80ish characters long description of the change.
summary: Collect host info exactly once on startup

# Long description; in case the summary is not enough to describe the change
# this field accommodate a description without length limits.
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
#description:

# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc.
component: elastic-agent

# PR URL; optional; the PR number that added the changeset.
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
# Please provide it if you are adding a fragment for a different PR.
#pr: https://github.com/owner/repo/1234

# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
# If not present is automatically filled by the tooling with the issue linked to the PR number.
#issue: https://github.com/owner/repo/1234
5 changes: 2 additions & 3 deletions internal/pkg/agent/application/info/agent_metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ import (
"github.com/elastic/elastic-agent/internal/pkg/util"
"github.com/elastic/elastic-agent/pkg/core/logger"

"github.com/elastic/go-sysinfo"
"github.com/elastic/go-sysinfo/types"
)

Expand Down Expand Up @@ -146,7 +145,7 @@ func Metadata(ctx context.Context, l *logger.Logger) (*ECSMeta, error) {

// ECSMetadata returns an agent ECS compliant metadata.
func (i *AgentInfo) ECSMetadata(l *logger.Logger) (*ECSMeta, error) {
sysInfo, err := sysinfo.Host()
sysInfo, err := util.GetHost()
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -195,7 +194,7 @@ func (i *AgentInfo) ECSMetadataFlatMap(l *logger.Logger) (map[string]interface{}
// TODO: remove these values when kibana migrates to ECS
meta := make(map[string]interface{})

sysInfo, err := sysinfo.Host()
sysInfo, err := util.GetHost()
if err != nil {
return nil, err
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/elastic/go-sysinfo"

"github.com/elastic/elastic-agent/internal/pkg/agent/application/paths"
"github.com/elastic/elastic-agent/internal/pkg/release"
"github.com/elastic/elastic-agent/internal/pkg/testutils"
Expand All @@ -34,7 +32,7 @@ func TestECSMetadata(t *testing.T) {
assert.NotNil(t, metadata.Elastic.Agent, "metadata.Elastic.Agent must not be nil")
}

sysInfo, err := sysinfo.Host()
sysInfo, err := util.GetHost()
require.NoError(t, err)

info := sysInfo.Info()
Expand Down
5 changes: 3 additions & 2 deletions internal/pkg/agent/application/info/inject_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@ package info
import (
"runtime"

"github.com/elastic/elastic-agent/internal/pkg/util"

"github.com/elastic/elastic-agent/internal/pkg/agent/application/paths"
"github.com/elastic/elastic-agent/internal/pkg/agent/errors"
"github.com/elastic/elastic-agent/internal/pkg/config"
"github.com/elastic/go-sysinfo"
)

// InjectAgentConfig injects config to a provided configuration.
Expand All @@ -30,7 +31,7 @@ func InjectAgentConfig(c *config.Config) error {
// agentGlobalConfig gets global config used for resolution of variables inside configuration
// such as ${path.data}.
func agentGlobalConfig() (map[string]interface{}, error) {
hostInfo, err := sysinfo.Host()
hostInfo, err := util.GetHost()
if err != nil {
return nil, err
}
Expand Down
1 change: 1 addition & 0 deletions internal/pkg/composable/providers/host/host.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ func ContextProviderBuilder(log *logger.Logger, c *config.Config, _ bool) (corec

func getHostInfo(log *logger.Logger) func() (map[string]interface{}, error) {
return func() (map[string]interface{}, error) {
// We don't use the shared host info from util here, as we explicitly want the latest host information on every call.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would prefer if we didn't have two different ways to get Host information where callers need to think about which one to use, or whether they actually need the parts of it that can change at runtime (FQDN for an obvious example).

Can the periodic updates be pushed into util.GetHost()? They could be unconditional updates at a fixed rate, or it could behave as a rate limit where it updates at most every X seconds.

As a possible simplification for all of this, perhaps the network address information isn't valuable at all for containerized applications and we can add a way to filter it into https://github.com/elastic/go-sysinfo. There is an IsContainerized() already.

Copy link
Member

@cmacknz cmacknz Oct 31, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For some related context, add_kubernetes_metadata stopped including the host IPs by default:

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps we could just filter out the link local addresses (assuming those are the problem here too) if just not having the addresses at all doesn't work, or its too risky to think being in a container always means you are in k8s (it doesn't). elastic/integrations#6674 (comment)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel like the periodicity of the host info checks in the provider should be a part of the provider's logic, as opposed to being hidden in the utils package. I could see adding a parameter to this API indicating if a cached value is fine, if that helps.

Honestly, what I'm really trying to do here is avoid recomputing the host info, which is only done at the start. All the other methods on this struct fetch the latest data anyway. Maybe the solution is to compute host info at call time, same as everything else? Then every component could have its own types.Host (as creating one would be cheap), and we'd separately cache host info for the packages that want it at startup.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think all I really care about is that it's obvious when a call to get host info will potentially contain stale data. Keeping the periodicity of the host info check in the provider is fine if that is clear.

sysInfo, err := sysinfo.Host()
if err != nil {
return nil, err
Expand Down
60 changes: 60 additions & 0 deletions internal/pkg/util/host.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@ package util

import (
"context"
"sync"
"time"

"github.com/elastic/elastic-agent/pkg/core/logger"
"github.com/elastic/go-sysinfo"
"github.com/elastic/go-sysinfo/types"
)

Expand All @@ -31,3 +33,61 @@ func GetHostName(isFqdnFeatureEnabled bool, hostInfo types.HostInfo, host types.

return fqdn
}

var _ types.Host = &threadSafeHost{}

// threadSafeHost is a thread-safe wrapper around types.Host.
// It exists so we can only create it once, as some of the setup it does is relatively expensive.
type threadSafeHost struct {
sync.Mutex
inner types.Host
}

func newThreadSafeHost(inner types.Host) *threadSafeHost {
return &threadSafeHost{inner: inner}
}

func (s *threadSafeHost) CPUTime() (types.CPUTimes, error) {
s.Lock()
Copy link
Contributor

@pkoutsovasilis pkoutsovasilis Oct 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

my first reaction/thinking was that this thread-safety code most probably belongs to the go-sysinfo codebase?! but then looking again in the exposed functions we use, and from a quick look in go-sysinfo they do not mutate anything, so I am wondering do we need to hold a mutex to call them?! did I miss a mutation?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

go-sysinfo doesn't guarantee that these are thread-safe, so I don't have much of a choice here. We could try to make them so in the library, but I'm not sure it's worth it. These functions aren't called very often, so in practice there shouldn't be any contention.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since these functions are not mutating anything I would propose to remove the mutex. Or again you may have spotted a mutation that I missed?! If not, I am not entirely convinced that losing the concurrency of calling these functions, given that this is now a shared instance across multiple places, is wise. But if you insist sure go with it 🙂

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The functions are not mutating anything right now, but go-sysinfo doesn't guarantee this interface is thread-safe, so this is an implementation detail that may change. If we want to remove the locks, we should make the change in go-sysinfo, and I don't think that's worth the hassle. This PR is a bit borderline already in my opinion when it comes to complexity introduced vs performance gained.

At the very least, I'd have to see evidence of actual contention before looking into this further.

defer s.Unlock()
return s.inner.CPUTime()
}

func (s *threadSafeHost) Info() types.HostInfo {
s.Lock()
defer s.Unlock()
return s.inner.Info()
}

func (s *threadSafeHost) Memory() (*types.HostMemoryInfo, error) {
s.Lock()
defer s.Unlock()
return s.inner.Memory()
}

func (s *threadSafeHost) FQDNWithContext(ctx context.Context) (string, error) {
s.Lock()
defer s.Unlock()
return s.inner.FQDNWithContext(ctx)
}

func (s *threadSafeHost) FQDN() (string, error) {
s.Lock()
defer s.Unlock()
return s.inner.FQDN()
}

var (
sharedHost types.Host
once sync.Once
hostErr error
)

func GetHost() (types.Host, error) {
once.Do(func() {
var innerHost types.Host
innerHost, hostErr = sysinfo.Host()
sharedHost = newThreadSafeHost(innerHost)
})
return sharedHost, hostErr
}
20 changes: 20 additions & 0 deletions internal/pkg/util/host_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (

"github.com/elastic/elastic-agent-libs/logp"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/elastic/go-sysinfo/types"
Expand Down Expand Up @@ -59,6 +60,25 @@ func TestGetHostName(t *testing.T) {
}
}

func TestSharedHost(t *testing.T) {
innerHost := &mockHost{}
shared := newThreadSafeHost(innerHost)

innerCpuTime, _ := innerHost.CPUTime()
sharedCpuTime, _ := shared.CPUTime()
assert.Equal(t, innerCpuTime, sharedCpuTime)

assert.Equal(t, innerHost.Info(), shared.Info())

innerMemoryInfo, _ := innerHost.Memory()
sharedMemoryInfo, _ := shared.Memory()
assert.Equal(t, innerMemoryInfo, sharedMemoryInfo)

innerFQDN, _ := innerHost.FQDN()
sharedFQDN, _ := shared.FQDN()
assert.Equal(t, innerFQDN, sharedFQDN)
}

type mockHost struct {
fqdn string
fqdnErr error
Expand Down
5 changes: 2 additions & 3 deletions pkg/component/platforms.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ import (
goruntime "runtime"
"strings"

"github.com/elastic/go-sysinfo"

"github.com/elastic/elastic-agent/internal/pkg/util"
"github.com/elastic/elastic-agent/pkg/utils"
)

Expand Down Expand Up @@ -126,7 +125,7 @@ func LoadPlatformDetail(modifiers ...PlatformModifier) (PlatformDetail, error) {
if err != nil {
return PlatformDetail{}, err
}
info, err := sysinfo.Host()
info, err := util.GetHost()
if err != nil {
return PlatformDetail{}, err
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/testing/define/define.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ import (

"github.com/elastic/elastic-agent-libs/kibana"
"github.com/elastic/go-elasticsearch/v8"
"github.com/elastic/go-sysinfo"
"github.com/elastic/go-sysinfo/types"

"github.com/elastic/elastic-agent/internal/pkg/util"
atesting "github.com/elastic/elastic-agent/pkg/testing"
"github.com/elastic/elastic-agent/pkg/utils"
semver "github.com/elastic/elastic-agent/pkg/version"
Expand Down Expand Up @@ -205,7 +205,7 @@ func runOrSkip(t *testing.T, req Requirements, local bool, kubernetes bool) *Inf

func getOSInfo() (*types.OSInfo, error) {
osInfoOnce.Do(func() {
sysInfo, err := sysinfo.Host()
sysInfo, err := util.GetHost()
if err != nil {
osInfoErr = err
} else {
Expand Down