Skip to content

Commit c4458b5

Browse files
committed
feat: Allow updating logging verbosity dynamically at runtime
1 parent 6678065 commit c4458b5

File tree

11 files changed

+221
-19
lines changed

11 files changed

+221
-19
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ tracing-opentelemetry = { version = "=0.28.0", default-features = false }
117117
tracing-subscriber = { version = "0.3.19", default-features = false, features = ["fmt"] }
118118
typed-store = { git = "https://github.com/MystenLabs/sui", tag = "testnet-v1.45.2" }
119119
url = "2.5.4"
120+
urlencoding = "2.1.3"
120121
utoipa = { version = "5" }
121122
utoipa-redoc = { version = "6.0", features = ["axum"] }
122123
uuid = { version = "1.16.0", features = ["fast-rng", "macro-diagnostics", "v7"] }

crates/walrus-service/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ tracing.workspace = true
150150
tracing-opentelemetry.workspace = true
151151
tracing-subscriber.workspace = true
152152
typed-store = { workspace = true, optional = true }
153+
urlencoding.workspace = true
153154
utoipa = { workspace = true, features = ["axum_extras", "macros", "yaml"] }
154155
utoipa-redoc.workspace = true
155156
uuid.workspace = true

crates/walrus-service/bin/node.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1152,7 +1152,11 @@ impl StorageNodeRuntime {
11521152
StorageNode::builder()
11531153
.with_system_event_manager(event_manager)
11541154
.with_config_loader(config_loader)
1155-
.build(node_config, metrics_runtime.registry.clone()),
1155+
.build(
1156+
node_config,
1157+
metrics_runtime.registry.clone(),
1158+
metrics_runtime.tracing_handle.clone(),
1159+
),
11561160
)?,
11571161
);
11581162

crates/walrus-service/src/common/telemetry.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ use prometheus::{
3737
IntGaugeVec,
3838
Opts,
3939
};
40+
use telemetry_subscribers::TracingHandle;
4041
use tokio::time::Instant;
4142
use tower_http::trace::{MakeSpan, OnResponse};
4243
use tracing::{field, Span};
@@ -565,3 +566,24 @@ impl From<CurrentEpochStateMetric> for Box<dyn Collector> {
565566
Box::new(value.0)
566567
}
567568
}
569+
570+
pub struct WalrusTracingHandle(pub Arc<TracingHandle>);
571+
572+
impl std::ops::Deref for WalrusTracingHandle {
573+
type Target = Arc<TracingHandle>;
574+
fn deref(&self) -> &Self::Target {
575+
&self.0
576+
}
577+
}
578+
579+
impl From<WalrusTracingHandle> for Arc<TracingHandle> {
580+
fn from(value: WalrusTracingHandle) -> Self {
581+
value.0
582+
}
583+
}
584+
585+
impl std::fmt::Debug for WalrusTracingHandle {
586+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
587+
write!(f, "WalrusTracingHandle")
588+
}
589+
}

crates/walrus-service/src/common/utils.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,8 @@ pub struct MetricsAndLoggingRuntime {
251251
/// The Prometheus registry.
252252
pub registry: Registry,
253253
_telemetry_guards: TelemetryGuards,
254-
_tracing_handle: TracingHandle,
254+
/// The tracing handle.
255+
pub tracing_handle: Arc<TracingHandle>,
255256
/// The runtime for metrics and logging.
256257
// INV: Runtime must be dropped last.
257258
pub runtime: Option<Runtime>,
@@ -291,7 +292,7 @@ impl MetricsAndLoggingRuntime {
291292
runtime,
292293
registry: walrus_registry,
293294
_telemetry_guards: telemetry_guards,
294-
_tracing_handle: tracing_handle,
295+
tracing_handle: Arc::new(tracing_handle),
295296
})
296297
}
297298
}

crates/walrus-service/src/node.rs

Lines changed: 69 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
66
use std::{
77
future::Future,
8+
net::SocketAddr,
89
num::{NonZero, NonZeroU16},
910
pin::Pin,
1011
str::FromStr,
@@ -45,6 +46,7 @@ use sui_macros::fail_point_if;
4546
use sui_macros::{fail_point_arg, fail_point_async};
4647
use sui_types::{base_types::ObjectID, event::EventID};
4748
use system_events::{CompletableHandle, EventHandle};
49+
use telemetry_subscribers::TracingHandle;
4850
use thread_pool::ThreadPoolBuilder;
4951
use tokio::{select, sync::watch, time::Instant};
5052
use tokio_util::sync::CancellationToken;
@@ -155,6 +157,7 @@ use crate::{
155157
common::{
156158
active_committees::ActiveCommittees,
157159
config::SuiConfig,
160+
telemetry::WalrusTracingHandle,
158161
utils::should_reposition_cursor,
159162
},
160163
utils::ShardDiffCalculator,
@@ -276,6 +279,9 @@ pub trait ServiceState {
276279
/// Returns the node health information of this ServiceState.
277280
fn health_info(&self, detailed: bool) -> ServiceHealthInfo;
278281

282+
/// Returns the server's bound address.
283+
fn rest_api_address(&self) -> SocketAddr;
284+
279285
/// Returns whether the sliver is stored in the shard.
280286
fn sliver_status<A: EncodingAxis>(
281287
&self,
@@ -289,6 +295,9 @@ pub trait ServiceState {
289295
public_key: PublicKey,
290296
signed_request: SignedSyncShardRequest,
291297
) -> impl Future<Output = Result<SyncShardResponse, SyncShardServiceError>> + Send;
298+
299+
/// Updates the log directive for the node.
300+
fn update_log_directive<S: AsRef<str>>(&self, directive: S) -> Result<(), anyhow::Error>;
292301
}
293302

294303
/// Builder to construct a [`StorageNode`].
@@ -367,6 +376,7 @@ impl StorageNodeBuilder {
367376
self,
368377
config: &StorageNodeConfig,
369378
metrics_registry: Registry,
379+
tracing_handle: Arc<TracingHandle>,
370380
) -> Result<StorageNode, anyhow::Error> {
371381
let protocol_key_pair = config
372382
.protocol_key_pair
@@ -459,15 +469,16 @@ impl StorageNodeBuilder {
459469
num_checkpoints_per_blob: self.num_checkpoints_per_blob,
460470
};
461471

462-
StorageNode::new(
472+
StorageNode::new(StorageNodeArgs {
463473
config,
464474
event_manager,
465475
committee_service,
466476
contract_service,
467-
&metrics_registry,
468-
self.config_loader,
477+
registry: &metrics_registry,
478+
tracing_handle: WalrusTracingHandle(tracing_handle),
479+
config_loader: self.config_loader,
469480
node_params,
470-
)
481+
})
471482
.await
472483
}
473484
}
@@ -508,6 +519,8 @@ pub struct StorageNodeInner {
508519
node_capability: ObjectID,
509520
blob_retirement_notifier: Arc<BlobRetirementNotifier>,
510521
symbol_service: RecoverySymbolService,
522+
tracing_handle: WalrusTracingHandle,
523+
rest_api_address: SocketAddr,
511524
}
512525

513526
/// Parameters for configuring and initializing a node.
@@ -523,15 +536,39 @@ pub struct NodeParameters {
523536
num_checkpoints_per_blob: Option<u32>,
524537
}
525538

539+
/// Arguments for creating a new storage node.
540+
#[derive(Debug)]
541+
pub struct StorageNodeArgs<'a> {
542+
/// Configuration for the storage node.
543+
pub config: &'a StorageNodeConfig,
544+
/// Event manager for handling system events.
545+
pub event_manager: Box<dyn EventManager>,
546+
/// Service for managing committee-related operations.
547+
pub committee_service: Arc<dyn CommitteeService>,
548+
/// Service for managing system contracts.
549+
pub contract_service: Arc<dyn SystemContractService>,
550+
/// Prometheus registry for metrics.
551+
pub registry: &'a Registry,
552+
/// Handle for tracing operations.
553+
pub tracing_handle: WalrusTracingHandle,
554+
/// Optional configuration loader.
555+
pub config_loader: Option<Arc<dyn ConfigLoader>>,
556+
/// Additional node parameters.
557+
pub node_params: NodeParameters,
558+
}
559+
526560
impl StorageNode {
527561
async fn new(
528-
config: &StorageNodeConfig,
529-
event_manager: Box<dyn EventManager>,
530-
committee_service: Arc<dyn CommitteeService>,
531-
contract_service: Arc<dyn SystemContractService>,
532-
registry: &Registry,
533-
config_loader: Option<Arc<dyn ConfigLoader>>,
534-
node_params: NodeParameters,
562+
StorageNodeArgs {
563+
config,
564+
event_manager,
565+
committee_service,
566+
contract_service,
567+
registry,
568+
tracing_handle,
569+
config_loader,
570+
node_params,
571+
}: StorageNodeArgs<'_>,
535572
) -> Result<Self, anyhow::Error> {
536573
let start_time = Instant::now();
537574
let node_capability = contract_service
@@ -600,6 +637,8 @@ impl StorageNode {
600637
.build_bounded(),
601638
),
602639
encoding_config,
640+
tracing_handle,
641+
rest_api_address: config.rest_api_address,
603642
});
604643

605644
blocklist.start_refresh_task();
@@ -2111,6 +2150,10 @@ impl ServiceState for StorageNode {
21112150
self.inner.health_info(detailed)
21122151
}
21132152

2153+
fn rest_api_address(&self) -> SocketAddr {
2154+
self.inner.rest_api_address()
2155+
}
2156+
21142157
fn sliver_status<A: EncodingAxis>(
21152158
&self,
21162159
blob_id: &BlobId,
@@ -2126,6 +2169,10 @@ impl ServiceState for StorageNode {
21262169
) -> impl Future<Output = Result<SyncShardResponse, SyncShardServiceError>> + Send {
21272170
self.inner.sync_shard(public_key, signed_request)
21282171
}
2172+
2173+
fn update_log_directive<S: AsRef<str>>(&self, directive: S) -> Result<(), anyhow::Error> {
2174+
self.inner.update_log_directive(directive)
2175+
}
21292176
}
21302177

21312178
impl ServiceState for StorageNodeInner {
@@ -2526,6 +2573,17 @@ impl ServiceState for StorageNodeInner {
25262573
.handle_sync_shard_request(request, self.current_epoch())
25272574
.await
25282575
}
2576+
2577+
fn update_log_directive<S: AsRef<str>>(&self, directive: S) -> Result<(), anyhow::Error> {
2578+
self.tracing_handle
2579+
.update_log(directive)
2580+
.map_err(|e| anyhow::anyhow!("{}", e))?;
2581+
Ok(())
2582+
}
2583+
2584+
fn rest_api_address(&self) -> SocketAddr {
2585+
self.rest_api_address
2586+
}
25292587
}
25302588

25312589
#[tracing::instrument(skip_all, err)]

crates/walrus-service/src/node/server.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,10 @@ where
386386
.route(routes::BLOB_STATUS_ENDPOINT, get(routes::get_blob_status))
387387
.route(routes::HEALTH_ENDPOINT, get(routes::health_info))
388388
.route(routes::SYNC_SHARD_ENDPOINT, post(routes::sync_shard))
389+
.route(
390+
routes::LOG_DIRECTIVE_ENDPOINT,
391+
post(routes::set_log_directive),
392+
)
389393
}
390394
}
391395

@@ -424,6 +428,8 @@ fn to_pkcs8_key_pair(keypair: &NetworkKeyPair) -> RcGenKeyPair {
424428

425429
#[cfg(test)]
426430
mod tests {
431+
use std::net::{IpAddr, Ipv4Addr};
432+
427433
use anyhow::anyhow;
428434
use axum::http::StatusCode;
429435
use fastcrypto::traits::KeyPair;
@@ -689,6 +695,14 @@ mod tests {
689695
) -> Result<SyncShardResponse, SyncShardServiceError> {
690696
Ok(SyncShardResponse::V1(vec![]))
691697
}
698+
699+
fn rest_api_address(&self) -> SocketAddr {
700+
SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8080)
701+
}
702+
703+
fn update_log_directive<S: AsRef<str>>(&self, _directive: S) -> Result<(), anyhow::Error> {
704+
Ok(())
705+
}
692706
}
693707

694708
async fn start_rest_api_with_config(

crates/walrus-service/src/node/server/openapi.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ pub(super) const GROUP_READING_BLOBS: &str = "Reading Blobs";
1919
pub(super) const GROUP_RECOVERY: &str = "Recovery";
2020
pub(super) const GROUP_STATUS: &str = "Status";
2121
pub(super) const GROUP_SYNC_SHARD: &str = "Sync Shard";
22-
22+
pub(super) const GROUP_LOG_DIRECTIVE: &str = "Log Directive";
2323
#[derive(utoipa::OpenApi)]
2424
#[openapi(
2525
paths(
@@ -34,6 +34,7 @@ pub(super) const GROUP_SYNC_SHARD: &str = "Sync Shard";
3434
routes::list_recovery_symbols,
3535
routes::put_metadata,
3636
routes::put_sliver,
37+
routes::set_log_directive,
3738
),
3839
components(schemas(
3940
EpochSchema,

0 commit comments

Comments
 (0)