Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sync container logs #50

Open
wants to merge 12 commits into
base: next
Choose a base branch
from
45 changes: 41 additions & 4 deletions cmd/icinga-kubernetes/main.go
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@ import (
"github.com/icinga/icinga-go-library/driver"
"github.com/icinga/icinga-go-library/logging"
"github.com/icinga/icinga-kubernetes/internal"
"github.com/icinga/icinga-kubernetes/pkg/contracts"
"github.com/icinga/icinga-kubernetes/pkg/schema"
"github.com/icinga/icinga-kubernetes/pkg/sync"
"github.com/okzk/sdnotify"
@@ -70,22 +71,58 @@ func main() {

g, ctx := errgroup.WithContext(ctx)

forwardUpsertPodsChannel := make(chan database.Entity)
defer close(forwardUpsertPodsChannel)

forwardDeletePodsChannel := make(chan any)
defer close(forwardDeletePodsChannel)

g.Go(func() error {
return sync.NewSync(
db, schema.NewNode, informers.Core().V1().Nodes().Informer(), logs.GetChildLogger("Nodes"),
db,
schema.NewNode,
informers.Core().V1().Nodes().Informer(),
logs.GetChildLogger("Nodes"),
).Run(ctx)
})

g.Go(func() error {
return sync.NewSync(
db, schema.NewNamespace, informers.Core().V1().Namespaces().Informer(), logs.GetChildLogger("Namespaces"),
db,
schema.NewNamespace,
informers.Core().V1().Namespaces().Informer(),
logs.GetChildLogger("Namespaces"),
).Run(ctx)
})

forwardUpsertPodsToLogChannel := make(chan contracts.KUpsert)
forwardDeletePodsToLogChannel := make(chan contracts.KDelete)

g.Go(func() error {

defer close(forwardUpsertPodsToLogChannel)
defer close(forwardDeletePodsToLogChannel)

return sync.NewSync(
db, schema.NewPod, informers.Core().V1().Pods().Informer(), logs.GetChildLogger("Pods"),
).Run(ctx)
db,
schema.NewPod,
informers.Core().V1().Pods().Informer(),
logs.GetChildLogger("Pods"),
).Run(
ctx,
sync.WithForwardUpsertToLog(forwardUpsertPodsToLogChannel),
sync.WithForwardDeleteToLog(forwardDeletePodsToLogChannel),
)
})

logSync := sync.NewLogSync(k, db, logs.GetChildLogger("ContainerLogs"))

g.Go(func() error {
return logSync.MaintainList(ctx, forwardUpsertPodsToLogChannel, forwardDeletePodsToLogChannel)
})

g.Go(func() error {
return logSync.Run(ctx)
})

if err := g.Wait(); err != nil {
19 changes: 19 additions & 0 deletions pkg/schema/container.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package schema

//
//import (
// "github.com/icinga/icinga-kubernetes/pkg/contracts"
// kmetav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
//)
//
//type Container struct {
// kmetaWithNamespace
//}
//
//func NewContainer() contracts.Resource {
// return &Container{}
//}
//
//func (c *Container) Obtain(kobject kmetav1.Object) {
// c.kmetaWithNamespace.Obtain(kobject)
//}
10 changes: 10 additions & 0 deletions pkg/schema/log.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package schema

type Log struct {
kmetaWithoutNamespace
Id []byte
ReferenceId []byte
ContainerName string
Time string
Log string
}
251 changes: 251 additions & 0 deletions pkg/sync/logs.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
package sync

import (
"bufio"
"context"
"crypto/sha1"
"fmt"
"github.com/icinga/icinga-go-library/database"
"github.com/icinga/icinga-go-library/logging"
"github.com/icinga/icinga-kubernetes/pkg/contracts"
"github.com/icinga/icinga-kubernetes/pkg/schema"
"github.com/pkg/errors"
"golang.org/x/sync/errgroup"
"io"
kcorev1 "k8s.io/api/core/v1"
kmetav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"slices"
"strings"
msync "sync"
"time"
)

// LogSync syncs logs to database. Therefore, it maintains a list
// of pod elements to get logs from
type LogSync struct {
list []*kcorev1.Pod
lastChecked map[[20]byte]*kmetav1.Time
mutex *msync.RWMutex
clientset *kubernetes.Clientset
db *database.DB
logger *logging.Logger
}

// NewLogSync creates new LogSync initialized with clientset, database and logger
func NewLogSync(clientset *kubernetes.Clientset, db *database.DB, logger *logging.Logger) *LogSync {
return &LogSync{
list: []*kcorev1.Pod{},
lastChecked: make(map[[20]byte]*kmetav1.Time),
mutex: &msync.RWMutex{},
clientset: clientset,
db: db,
logger: logger,
}
}

// upsertStmt returns database upsert statement
func (ls *LogSync) upsertStmt() string {
return fmt.Sprintf(
"INSERT INTO %s (%s) VALUES (%s) ON DUPLICATE KEY UPDATE %s",
"log",
"id, reference_id, container_name, time, log",
":id, :reference_id, :container_name, :time, :log",
"time=CONCAT(time, '\n', :time), log=CONCAT(log, '\n', :log)",
)
}

// splitTimestampsFromMessages takes a log as []byte and returns timestamps and messages as separate string slices.
// Additionally, it updates the last checked timestamp for the log
func (ls *LogSync) splitTimestampsFromMessages(log []byte, curContainerId [20]byte) (times []string, messages []string, err error) {

stringReader := strings.NewReader(string(log))
reader := bufio.NewReader(stringReader)

for {
line, err := reader.ReadString('\n')
if err != nil {
if err == io.EOF {
break
}
return nil, nil, errors.Wrap(err, "error reading log message")
}

messageTime, err := time.Parse("2006-01-02T15:04:05.999999999Z", strings.Split(line, " ")[0])
if err != nil {
logging.Fatal(errors.Wrap(err, "error parsing log timestamp"))
continue
}

if ls.lastChecked[curContainerId] != nil && messageTime.UnixNano() <= ls.lastChecked[curContainerId].UnixNano() {
continue
}

times = append(times, strings.Split(line, " ")[0])
messages = append(messages, strings.Join(strings.Split(line, " ")[1:], " "))
}

return times, messages, nil
}

// removeFromList removes pod from maintained list
func (ls *LogSync) removeFromList(id database.ID) {
out := make([]*kcorev1.Pod, 0)

for _, element := range ls.list {

elementId := sha1.Sum([]byte(element.Namespace + "/" + element.Name))

if fmt.Sprintf("%x", elementId) != id.String() {
out = append(out, element)
}
}

ls.list = out
}

// MaintainList adds pods from the addChannel to the list and deletes pods from the deleteChannel from the list
func (ls *LogSync) MaintainList(ctx context.Context, addChannel <-chan contracts.KUpsert, deleteChannel <-chan contracts.KDelete) error {

ls.logger.Info("Starting maintain list")

g, ctx := errgroup.WithContext(ctx)

deletes := make(chan any)
g.Go(func() error {
defer close(deletes)

for {
select {
case <-ctx.Done():
return errors.Wrap(ctx.Err(), "context canceled maintain log sync list")

case podFromChannel, more := <-addChannel:
if !more {
return nil
}

pod := podFromChannel.KObject().(*kcorev1.Pod)

podIsInList := false

for _, listPod := range ls.list {
if listPod.UID == pod.UID {
podIsInList = true
}
}

if podIsInList {
continue
}

ls.mutex.RLock()
ls.list = append(ls.list, pod)
ls.mutex.RUnlock()

case podIdFromChannel, more := <-deleteChannel:
if !more {
return nil
}

idOfPod := podIdFromChannel.ID()

ls.mutex.RLock()
ls.removeFromList(idOfPod)
ls.mutex.RUnlock()

deletes <- idOfPod
}

}
})

g.Go(func() error {
return database.NewDelete(ls.db).ByColumn("reference_id").Stream(ctx, &schema.Log{}, deletes)
})

return g.Wait()
}

// Run starts syncing the logs to the database. Therefore, it loops over all
// containers of each pod in the maintained list every 15 seconds.
func (ls *LogSync) Run(ctx context.Context) error {

ls.logger.Info("Starting sync")

g, ctx := errgroup.WithContext(ctx)

upsertStmt := ls.upsertStmt()

upserts := make(chan database.Entity)
defer close(upserts)

g.Go(func() error {
for {
for _, pod := range ls.list {

curPodId := sha1.Sum([]byte(pod.Namespace + "/" + pod.Name))

for _, container := range pod.Spec.Containers {

curContainerId := sha1.Sum([]byte(pod.Namespace + "/" + pod.Name + "/" + container.Name))

podLogOpts := kcorev1.PodLogOptions{Container: container.Name, Timestamps: true}

if ls.lastChecked[curContainerId] != nil {
podLogOpts.SinceTime = ls.lastChecked[curContainerId]
}

log, err := ls.clientset.CoreV1().Pods(pod.Namespace).GetLogs(pod.Name, &podLogOpts).Do(ctx).Raw()
if err != nil {
fmt.Println(errors.Wrap(err, "error reading container log"))
continue
}

times, messages, err := ls.splitTimestampsFromMessages(log, curContainerId)
if err != nil {
return err
}

if len(messages) == 0 {
continue
}

newLog := &schema.Log{
Id: curContainerId[:],
ReferenceId: curPodId[:],
ContainerName: container.Name,
Time: strings.Join(times, "\n"),
Log: strings.Join(messages, "\n"),
}

upserts <- newLog

lastTime, err := time.Parse("2006-01-02T15:04:05.999999999Z", times[len(times)-1])
if err != nil {
return errors.Wrap(err, "error parsing log time")
}

if !slices.Contains(ls.list, pod) {
continue
}

lastV1Time := kmetav1.Time{Time: lastTime}
ls.lastChecked[curContainerId] = &lastV1Time
}
}

select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(time.Second * 15):
}
}
})

g.Go(func() error {
return database.NewUpsert(ls.db).WithStatement(upsertStmt, 5).Stream(ctx, upserts)
})

return g.Wait()
}
Loading