Skip to content

Commit feb0b83

Browse files
committed
Support copying retained metric objects via subscribe
- Introduces a timeout for subscribe queries to prevent indefinite execution - Refactors column name retrieval to not run in the same transaction - For each ::Retained relation, we add a ::Basic version that solely does a simple SELECT. We need to separate the two because we can't do both a SUBSCRIBE and SELECT query in the same transaction. There's a way to do the same behavior with just a ::Retained relation, but this approach leads to less code and we'd have to add this logic anyways for our iterators. I also think it's nice to explictly create a relation per query and it extends our retries to each of these different queries too.
1 parent f1a632d commit feb0b83

File tree

1 file changed

+131
-63
lines changed

1 file changed

+131
-63
lines changed

src/self-managed-debug/src/system_catalog_dumper.rs

+131-63
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
use anyhow::{Context as _, Result};
1919
use chrono::{DateTime, Utc};
2020
use futures::TryStreamExt;
21+
use mz_ore::future::{timeout, TimeoutError};
2122
use mz_tls_util::make_tls;
2223
use std::fmt;
2324
use std::path::PathBuf;
@@ -26,9 +27,7 @@ use std::sync::Arc;
2627
use std::time::Duration;
2728
use tokio::io::AsyncWriteExt;
2829
use tokio::sync::Mutex;
29-
use tokio_postgres::{
30-
Client as PgClient, Config as PgConfig, Connection, NoTls, Socket, Transaction,
31-
};
30+
use tokio_postgres::{Client as PgClient, Config as PgConfig, Connection, Socket, Transaction};
3231
use tokio_util::io::StreamReader;
3332

3433
use k8s_openapi::api::core::v1::Service;
@@ -229,7 +228,7 @@ pub enum RelationCategory {
229228
/// For relations that belong in the `mz_introspection` schema.
230229
/// These relations require a replica name to be specified.
231230
Introspection,
232-
/// For relations that are retained metric objects that we'd also like to get the SUBSCRIBE output for.
231+
/// For relations that are retained metric objects that we'd like to get the SUBSCRIBE output for.
233232
Retained,
234233
/// Other relations that we want to do a SELECT * FROM on.
235234
Basic,
@@ -372,10 +371,18 @@ static RELATIONS: &[Relation] = &[
372371
category: RelationCategory::Basic,
373372
},
374373
// Sources/sinks
374+
Relation {
375+
name: "mz_source_statistics_with_history",
376+
category: RelationCategory::Basic,
377+
},
375378
Relation {
376379
name: "mz_source_statistics_with_history",
377380
category: RelationCategory::Retained,
378381
},
382+
Relation {
383+
name: "mz_sink_statistics",
384+
category: RelationCategory::Basic,
385+
},
379386
Relation {
380387
name: "mz_sink_statistics",
381388
category: RelationCategory::Retained,
@@ -620,6 +627,11 @@ static PG_CONNECTION_TIMEOUT: Duration = Duration::from_secs(60);
620627
/// sign that the operation won't work.
621628
static PG_QUERY_TIMEOUT: Duration = Duration::from_secs(20);
622629

630+
/// The amount of time we wait to collect data from the subscribe
631+
/// query before cancelling the query. This is to prevent the query
632+
/// from running indefinitely.
633+
static SUBSCRIBE_SCRAPE_TIMEOUT: Duration = Duration::from_secs(3);
634+
623635
/// The maximum number of errors we tolerate for a cluster replica.
624636
/// If a cluster replica has more than this many errors, we skip it.
625637
static MAX_CLUSTER_REPLICA_ERROR_COUNT: usize = 3;
@@ -681,41 +693,110 @@ pub async fn create_postgres_connection(
681693
Ok((pg_client, pg_conn, tls))
682694
}
683695

696+
pub async fn write_copy_stream(
697+
transaction: &Transaction<'_>,
698+
copy_query: &str,
699+
file: &mut tokio::fs::File,
700+
relation_name: &str,
701+
) -> Result<(), anyhow::Error> {
702+
let copy_stream = transaction
703+
.copy_out(copy_query)
704+
.await
705+
.context(format!("Failed to COPY TO for {}", relation_name))?
706+
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e));
707+
let copy_stream = std::pin::pin!(copy_stream);
708+
let mut reader = StreamReader::new(copy_stream);
709+
tokio::io::copy(&mut reader, file).await?;
710+
Ok::<(), anyhow::Error>(())
711+
}
712+
684713
pub async fn copy_relation_to_csv(
685714
transaction: &Transaction<'_>,
686715
file_path_name: PathBuf,
687-
column_names: &mut Vec<String>,
688-
relation_name: &str,
716+
column_names: &Vec<String>,
717+
relation: &Relation,
718+
tls: MakeTlsConnector,
689719
) -> Result<(), anyhow::Error> {
690720
let mut file = tokio::fs::File::create(&file_path_name).await?;
691721

692722
file.write_all((column_names.join(",") + "\n").as_bytes())
693723
.await?;
694724

695-
// Stream data rows to CSV
696-
let copy_query = format!(
697-
"COPY (SELECT * FROM {}) TO STDOUT WITH (FORMAT CSV)",
698-
relation_name
699-
);
725+
match relation.category {
726+
RelationCategory::Retained => {
727+
let copy_query = format!(
728+
"COPY (SUBSCRIBE TO (SELECT * FROM {})) TO STDOUT WITH (FORMAT CSV);",
729+
relation.name
730+
);
731+
732+
let copy_fut = write_copy_stream(transaction, &copy_query, &mut file, relation.name);
733+
734+
// We use a timeout to cut the SUBSCRIBE query short since it's expected to run indefinitely.
735+
// Alternatively, we could use a `DECLARE...FETCH ALL` for the same effect, but then we'd have
736+
// to format the result as CSV ourselves, leading to more code. Another alternative is to
737+
// specify an UPTO, but it gets finicky to get the UPTO frontier right since we can't rely on
738+
// wallclock time.
739+
let res = timeout(SUBSCRIBE_SCRAPE_TIMEOUT, copy_fut).await;
740+
741+
match res {
742+
Ok(()) => Ok(()),
743+
Err(TimeoutError::DeadlineElapsed) => {
744+
transaction.cancel_token().cancel_query(tls).await?;
745+
Ok(())
746+
}
747+
Err(e) => Err(e),
748+
}
749+
.map_err(|e| anyhow::anyhow!(e))?;
750+
}
751+
_ => {
752+
let copy_query = format!(
753+
"COPY (SELECT * FROM {}) TO STDOUT WITH (FORMAT CSV)",
754+
relation.name
755+
);
756+
write_copy_stream(transaction, &copy_query, &mut file, relation.name).await?;
757+
}
758+
};
759+
760+
info!("Copied {} to {}", relation.name, file_path_name.display());
761+
Ok::<(), anyhow::Error>(())
762+
}
700763

701-
let copy_stream = transaction
702-
.copy_out(&copy_query)
764+
pub async fn query_column_names(
765+
pg_client: &PgClient,
766+
relation: &Relation,
767+
) -> Result<Vec<String>, anyhow::Error> {
768+
let relation_name = relation.name;
769+
// We query the column names to write the header row of the CSV file.
770+
// TODO (SangJunBak): Use `WITH (HEADER TRUE)` once database-issues#2846 is implemented.
771+
let mut column_names = pg_client
772+
.query(&format!("SHOW COLUMNS FROM {}", &relation_name), &[])
703773
.await
704-
.context(format!("Failed to COPY TO for {}", relation_name))?
705-
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e));
706-
let copy_stream = std::pin::pin!(copy_stream);
707-
let mut reader = StreamReader::new(copy_stream);
708-
tokio::io::copy(&mut reader, &mut file).await?;
774+
.context(format!("Failed to get column names for {}", relation_name))?
775+
.into_iter()
776+
.map(|row| match row.try_get::<_, String>("name") {
777+
Ok(name) => Some(name),
778+
Err(_) => None,
779+
})
780+
.filter_map(|row| row)
781+
.collect::<Vec<_>>();
709782

710-
info!("Copied {} to {}", relation_name, file_path_name.display());
711-
Ok::<(), anyhow::Error>(())
783+
match relation.category {
784+
RelationCategory::Retained => {
785+
column_names.splice(0..0, ["mz_timestamp".to_string(), "mz_diff".to_string()]);
786+
}
787+
_ => (),
788+
}
789+
790+
Ok(column_names)
712791
}
713792

714793
pub async fn query_relation(
715794
transaction: &Transaction<'_>,
716795
start_time: DateTime<Utc>,
717796
relation: &Relation,
797+
column_names: &Vec<String>,
718798
cluster_replica: Option<&ClusterReplica>,
799+
tls: MakeTlsConnector,
719800
) -> Result<(), anyhow::Error> {
720801
let relation_name = relation.name;
721802
let relation_category = &relation.category;
@@ -747,61 +828,31 @@ pub async fn query_relation(
747828
))?;
748829
}
749830

750-
// We query the column names to write the header row of the CSV file.
751-
// TODO (SangJunBak): Use `WITH (HEADER TRUE)` once database-issues#2846 is implemented.
752-
let mut column_names = transaction
753-
.query(&format!("SHOW COLUMNS FROM {}", &relation_name), &[])
754-
.await
755-
.context(format!("Failed to get column names for {}", relation_name))?
756-
.into_iter()
757-
.map(|row| match row.try_get::<_, String>("name") {
758-
Ok(name) => Some(name),
759-
Err(_) => None,
760-
})
761-
.filter_map(|row| row)
762-
.collect::<Vec<_>>();
763-
764831
match relation_category {
765832
RelationCategory::Basic => {
766833
let file_path = format_file_path(start_time, None);
767834
let file_path_name = file_path.join(relation_name).with_extension("csv");
768835
tokio::fs::create_dir_all(&file_path).await?;
769836

770-
copy_relation_to_csv(
771-
transaction,
772-
file_path_name,
773-
&mut column_names,
774-
relation_name,
775-
)
776-
.await?;
837+
copy_relation_to_csv(transaction, file_path_name, column_names, relation, tls).await?;
777838
}
778839
RelationCategory::Introspection => {
779840
let file_path = format_file_path(start_time, cluster_replica);
780841
tokio::fs::create_dir_all(&file_path).await?;
781842

782843
let file_path_name = file_path.join(relation_name).with_extension("csv");
783844

784-
copy_relation_to_csv(
785-
transaction,
786-
file_path_name,
787-
&mut column_names,
788-
relation_name,
789-
)
790-
.await?;
845+
copy_relation_to_csv(transaction, file_path_name, column_names, relation, tls).await?;
791846
}
792-
_ => {
847+
RelationCategory::Retained => {
848+
// Copy the current state and retained subscribe state
793849
let file_path = format_file_path(start_time, None);
794-
let file_path_name = file_path.join(relation_name).with_extension("csv");
850+
let file_path_name = file_path
851+
.join(format!("{}_subscribe", relation_name))
852+
.with_extension("csv");
795853
tokio::fs::create_dir_all(&file_path).await?;
796854

797-
copy_relation_to_csv(
798-
transaction,
799-
file_path_name,
800-
&mut column_names,
801-
relation_name,
802-
)
803-
.await?;
804-
// TODO (debug_tool1): Dump the `FETCH ALL SUBSCRIBE` output too
855+
copy_relation_to_csv(transaction, file_path_name, column_names, relation, tls).await?;
805856
}
806857
}
807858
Ok::<(), anyhow::Error>(())
@@ -861,8 +912,12 @@ impl<'n> SystemCatalogDumper<'n> {
861912
cluster_replica: Option<&ClusterReplica>,
862913
) -> Result<(), anyhow::Error> {
863914
info!(
864-
"Copying relation {}{}",
915+
"Copying relation {}{}{}",
865916
relation.name,
917+
match relation.category {
918+
RelationCategory::Retained => " (subscribe history)",
919+
_ => "",
920+
},
866921
cluster_replica.map_or_else(|| "".to_string(), |replica| format!(" in {}", replica))
867922
);
868923

@@ -881,10 +936,23 @@ impl<'n> SystemCatalogDumper<'n> {
881936

882937
async move {
883938
// TODO (debug_tool3): Use a transaction for the entire dump instead of per query.
884-
let mut pg_client_lock = pg_client.lock().await;
885-
let transaction = pg_client_lock.transaction().await?;
886-
887-
match query_relation(&transaction, start_time, &relation, cluster_replica).await
939+
let mut pg_client = pg_client.lock().await;
940+
941+
// We cannot query the column names in the transaction because SUBSCRIBE queries
942+
// cannot be executed with SELECT and SHOW queries in the same transaction.
943+
let column_names = query_column_names(&pg_client, &relation).await?;
944+
945+
let transaction = pg_client.transaction().await?;
946+
947+
match query_relation(
948+
&transaction,
949+
start_time,
950+
&relation,
951+
&column_names,
952+
cluster_replica,
953+
self.pg_tls.clone(),
954+
)
955+
.await
888956
{
889957
Ok(()) => Ok(()),
890958
Err(err) => {

0 commit comments

Comments
 (0)