Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions infra/runtime/syncroot/base/studio-gateway.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,16 @@ kind: Namespace
metadata:
name: runtime-gateway
---
apiVersion: v1
kind: ConfigMap
metadata:
name: runtime-environment
namespace: runtime-gateway
data:
upgrade_channel: ${UPGRADE_CHANNEL}
environment: ${ENVIRONMENT}
serviceowner: ${SERVICEOWNER_ID}
---
apiVersion: source.toolkit.fluxcd.io/v1
kind: OCIRepository
metadata:
Expand Down
5 changes: 5 additions & 0 deletions src/Runtime/StudioGateway/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.config/
infra/
tests/
**/bin
**/obj
3 changes: 3 additions & 0 deletions src/Runtime/StudioGateway/.editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,9 @@ dotnet_diagnostic.CA1873.severity = suggestion
# CA2007: Consider calling ConfigureAwait on the awaited task
dotnet_diagnostic.CA2007.severity = none

# CA1031: Do not catch general exception types
dotnet_diagnostic.CA1031.severity = suggestion

[*.{yml,yaml}]
indent_size = 2
end_of_line = lf
Expand Down
86 changes: 82 additions & 4 deletions src/Runtime/StudioGateway/infra/kustomize/deployment.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: studio-gateway-pdb
spec:
maxUnavailable: 1
selector:
matchLabels:
app: studio-gateway
---
apiVersion: apps/v1
kind: Deployment
metadata:
Expand All @@ -6,45 +16,113 @@ metadata:
altinn.studio/image: studio-gateway:latest
altinn.studio/image-tag: latest
spec:
replicas: 1
minReadySeconds: 3
revisionHistoryLimit: 5
progressDeadlineSeconds: 60
replicas: 2
strategy:
# Rolling upgrade of pods
type: RollingUpdate
rollingUpdate:
maxUnavailable: 0
maxSurge: 1
selector:
matchLabels:
app: studio-gateway
template:
metadata:
labels:
app: studio-gateway
annotations:
# for mTLS mainly
linkerd.io/inject: enabled
spec:
topologySpreadConstraints:
# Try to spread across availability zones first (highest priority)
# A skew of 1 can result in 1 AZ being unused when AZs = 3 and replicas = 3
- maxSkew: 1
topologyKey: topology.kubernetes.io/zone
# ScheduleAnyway ensures progress even if distribution is imperfect
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: studio-gateway
# Try to spread across nodes within zones
# Prevents multiple replicas from running on the same node, improving fault tolerance
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app: studio-gateway
serviceAccountName: studio-gateway
terminationGracePeriodSeconds: 30
# explicitly set security context to embedded .net non-root user (1654)
securityContext:
runAsUser: 1654
runAsGroup: 1654
fsGroup: 1654
runAsNonRoot: true
# Seccomp (secure computing mode) restricts syscalls the container can make
# RuntimeDefault uses the container runtime's default profile, blocking risky syscalls
seccompProfile:
type: RuntimeDefault
containers:
- name: studio-gateway
image: studio-gateway:latest
image: ""
imagePullPolicy: IfNotPresent
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
privileged: false
capabilities:
drop:
- ALL
ports:
- containerPort: 8080
name: http
protocol: TCP
env:
- name: ASPNETCORE_HTTP_PORTS
value: "8080"
- name: ASPNETCORE_ENVIRONMENT
valueFrom:
# Configmap is created in syncroot
configMapKeyRef:
name: runtime-environment
key: environment
- name: GATEWAY_UPGRADE_CHANNEL
valueFrom:
configMapKeyRef:
name: runtime-environment
key: upgrade_channel
- name: GATEWAY_ENVIRONMENT
valueFrom:
configMapKeyRef:
name: runtime-environment
key: environment
- name: GATEWAY_SERVICEOWNER
valueFrom:
configMapKeyRef:
name: runtime-environment
key: serviceowner
livenessProbe:
httpGet:
path: /health/live
port: 8080
initialDelaySeconds: 10
initialDelaySeconds: 2
periodSeconds: 10
timeoutSeconds: 3
failureThreshold: 3
readinessProbe:
httpGet:
path: /health/ready
port: 8080
initialDelaySeconds: 5
initialDelaySeconds: 2
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
resources:
requests:
cpu: 10m
memory: 64Mi
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
using System.Diagnostics;
using System.Net;
using System.Runtime.InteropServices;
using Microsoft.AspNetCore.HttpOverrides;

namespace StudioGateway.Api.Hosting;

internal static class HostingExtensions
{
public static WebApplicationBuilder AddHostingConfiguration(this WebApplicationBuilder builder)
{
if (builder.Environment.IsDevelopment())
return builder;

// When in real environments, we are running behind an reverse proxy/load balancer
// which is at the time of writing Traefik ingress controller in k8s
builder.Services.Configure<ForwardedHeadersOptions>(options =>
{
options.ForwardedHeaders = ForwardedHeaders.All;
options.KnownIPNetworks.Clear();
options.KnownProxies.Clear();

options.KnownIPNetworks.Add(new System.Net.IPNetwork(IPAddress.Any, 0));
options.KnownIPNetworks.Add(new System.Net.IPNetwork(IPAddress.IPv6Any, 0));
});

// Need to coordinate graceful shutdown (let's assume k8s as the scheduler/runtime):
// - deployment is configured with a terminationGracePeriod of 30s (default timeout before SIGKILL)
// - k8s flow of information is eventually consistent.
// it takes time for knowledge of SIGTERM on the worker node to propagate to e.g. networking layers
// (k8s Service -> Endspoints rotation. It takes time to be taken out of Endpoint rotation)
// - we want to gracefully drain ASP.NET core for requests, leaving some time for active requests to complete
// This leaves us with the following sequence of events
// - container receives SIGTERM
// - `AppHostLifetime` intercepts SIGTERM and delays for `shutdownDelay`
// - `AppHostLifetime` calls `IHostApplicationLifetime.StopApplication`, to start ASP.NET Core shutdown process
// - ASP.NET Core will spend a maximum of `shutdownTimeout` trying to drain active requests
// (cancelable requests can combine cancellation tokens with `IHostApplicationLifetime.ApplicationStopping`)
// - If ASP.NET Core completes shutdown within `shutdownTimeout`, everything is fine
// - If ASP.NET Core is stuck or in some way can't terminate, kubelet will eventually SIGKILL
var shutdownDelay = TimeSpan.FromSeconds(5);
var shutdownTimeout = TimeSpan.FromSeconds(20);

builder.Services.AddSingleton<IHostLifetime>(sp =>
ActivatorUtilities.CreateInstance<AppHostLifetime>(sp, shutdownDelay)
);

builder.Services.Configure<HostOptions>(options => options.ShutdownTimeout = shutdownTimeout);

return builder;
}
}

// Based on guidance in:
// https://github.com/dotnet/dotnet-docker/blob/2a6f35b9361d1aacb664b0ce09e529698b622d2b/samples/kubernetes/graceful-shutdown/graceful-shutdown.md
#pragma warning disable CA1812 // Avoid uninstantiated internal classes
internal sealed class AppHostLifetime(
ILogger<AppHostLifetime> _logger,
IHostEnvironment _environment,
IHostApplicationLifetime _applicationLifetime,
TimeSpan _delay
) : IHostLifetime, IDisposable
#pragma warning restore CA1812 // Avoid uninstantiated internal classes
{
private IDisposable[]? _disposables;

public Task StopAsync(CancellationToken cancellationToken) => Task.CompletedTask;

public Task WaitForStartAsync(CancellationToken cancellationToken)
{
Debug.Assert(!_environment.IsDevelopment(), "We don't need graceful shutdown in development environments");
PosixSignalRegistration? sigint = null;
PosixSignalRegistration? sigquit = null;
PosixSignalRegistration? sigterm = null;
try
{
#pragma warning disable CA2000 // Dispose objects before losing scope
// If we get an exception, we dispose below
// Otherwise ownership is trafferred to _disposables
// which is disposed in Dispose()
sigint = PosixSignalRegistration.Create(PosixSignal.SIGINT, HandleSignal);
sigquit = PosixSignalRegistration.Create(PosixSignal.SIGQUIT, HandleSignal);
sigterm = PosixSignalRegistration.Create(PosixSignal.SIGTERM, HandleSignal);
#pragma warning restore CA2000 // Dispose objects before losing scope
_disposables = [sigint, sigquit, sigterm];
}
catch
{
TryDispose(sigint);
TryDispose(sigquit);
TryDispose(sigterm);
throw;
}
return Task.CompletedTask;
}

private void HandleSignal(PosixSignalContext ctx)
{
_logger.LogInformation("Received shutdown signal: {Signal}, delaying shutdown", ctx.Signal);
ctx.Cancel = true; // Signal intercepted here, we are now responsible for calling `StopApplication`

_ = Task.Delay(_delay)
.ContinueWith(
t =>
{
_logger.LogInformation("Starting host shutdown...");
_applicationLifetime.StopApplication();
},
TaskScheduler.Default
);
}

public void Dispose()
{
foreach (var disposable in _disposables ?? [])
TryDispose(disposable);
}

private void TryDispose(IDisposable? disposable)
{
if (disposable is null)
return;
try
{
disposable.Dispose();
}
catch (Exception ex)
{
_logger.LogError(ex, "Error during disposal of {Type}", disposable.GetType().FullName);
}
}
}
7 changes: 5 additions & 2 deletions src/Runtime/StudioGateway/src/StudioGateway.Api/Program.cs
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
using StudioGateway.Api;
using StudioGateway.Api.Flux;
using StudioGateway.Api.Hosting;

var builder = WebApplication.CreateSlimBuilder(args);

builder.AddHostingConfiguration();

builder.Services.ConfigureHttpJsonOptions(options =>
{
options.SerializerOptions.PropertyNameCaseInsensitive = true;
Expand All @@ -14,8 +17,8 @@

var app = builder.Build();

// OpenApi UI is served as a static file under /openapi.html
app.UseStaticFiles();
app.UseHsts();
app.UseForwardedHeaders();

app.MapOpenApi();
app.UseSwaggerUI(options =>
Expand Down