Skip to content

Commit 657f6ba

Browse files
committed
test concurrent
1 parent dee32be commit 657f6ba

4 files changed

+105
-33
lines changed

src/json_rescue_v5_extract.rs

+7-3
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,7 @@ pub fn extract_v5_json_rescue(
4848
wtxs.function = make_function_name(script);
4949
trace!("function: {}", &wtxs.function);
5050
if !unique_functions.contains(&wtxs.function) {
51-
unique_functions.push(wtxs.function.clone());
52-
51+
unique_functions.push(wtxs.function.clone());
5352
}
5453

5554
decode_transaction_args(&mut wtxs, &t.bytes)?;
@@ -64,7 +63,7 @@ pub fn extract_v5_json_rescue(
6463
RelationLabel::Transfer(_) => tx_vec.push(wtxs),
6564
RelationLabel::Onboarding(_) => tx_vec.push(wtxs),
6665
RelationLabel::Vouch(_) => tx_vec.push(wtxs),
67-
RelationLabel::Configuration => {},
66+
RelationLabel::Configuration => {}
6867
RelationLabel::Miner => {}
6968
};
7069
}
@@ -97,6 +96,11 @@ pub fn decode_transaction_args(wtx: &mut WarehouseTxMaster, tx_bytes: &[u8]) ->
9796

9897
wtx.entry_function = Some(EntryFunctionArgs::V5(sf.to_owned()));
9998
}
99+
ScriptFunctionCallGenesis::AutopayCreateInstruction { payee, .. } => {
100+
wtx.relation_label =
101+
RelationLabel::Transfer(cast_legacy_account(destination)?);
102+
wtx.entry_function = Some(EntryFunctionArgs::V5(sf.to_owned()));
103+
}
100104
ScriptFunctionCallGenesis::CreateAccUser { .. } => {
101105
// onboards self
102106
wtx.relation_label = RelationLabel::Onboarding(wtx.sender);

src/json_rescue_v5_load.rs

+91-25
Original file line numberDiff line numberDiff line change
@@ -13,51 +13,42 @@ use std::path::Path;
1313
use std::sync::Arc;
1414
use tokio::sync::{Mutex, Semaphore};
1515
use tokio::task;
16+
use sysinfo::{System, SystemExt};
1617

1718
/// How many records to read from the archives before attempting insert
18-
static LOAD_QUEUE_SIZE: usize = 1000;
19+
// static LOAD_QUEUE_SIZE: usize = 1000;
1920
/// When we attempt insert, the chunks of txs that go in to each query
2021
static QUERY_BATCH_SIZE: usize = 250;
2122

2223
/// from a tgz file decompress all the .json files in archive
2324
/// and then read into the warehouse record format
24-
pub async fn decompress_and_extract(tgz_file: &Path, pool: &Graph) -> Result<u64> {
25+
pub async fn single_thread_decompress_extract(tgz_file: &Path, pool: &Graph) -> Result<u64> {
2526
let temppath = decompress_to_temppath(tgz_file)?;
2627
let json_vec = list_all_json_files(temppath.path())?;
2728

2829
let mut found_count = 0u64;
2930
let mut created_count = 0u64;
3031

3132
let mut unique_functions: Vec<String> = vec![];
32-
// fill to BATCH_SIZE before attempting insert.
33-
// many files may only have a handful of user txs,
34-
// so individual files may have far fewer than BATCH_SIZE.
35-
let mut queue: Vec<WarehouseTxMaster> = vec![];
3633

3734
for j in json_vec {
38-
if let Ok((mut r, _e, _)) = extract_v5_json_rescue(&j) {
39-
queue.append(&mut r);
40-
}
35+
let (records, _, unique) = extract_v5_json_rescue(&j)?;
4136

42-
queue.iter().for_each(|s| {
43-
if !unique_functions.contains(&s.function) {
44-
unique_functions.push(s.function.clone());
37+
unique.iter().for_each(|f| {
38+
if !unique_functions.contains(&f) {
39+
unique_functions.push(f.clone());
4540
}
4641
});
4742

48-
if queue.len() >= LOAD_QUEUE_SIZE {
49-
let drain: Vec<WarehouseTxMaster> = std::mem::take(&mut queue);
50-
51-
let res = tx_batch(
52-
&drain,
53-
pool,
54-
QUERY_BATCH_SIZE,
55-
j.file_name().unwrap().to_str().unwrap(),
56-
)
57-
.await?;
58-
created_count += res.created_tx as u64;
59-
found_count += drain.len() as u64;
60-
}
43+
let res = tx_batch(
44+
&records,
45+
pool,
46+
QUERY_BATCH_SIZE,
47+
j.file_name().unwrap().to_str().unwrap(),
48+
)
49+
.await?;
50+
created_count += res.created_tx as u64;
51+
found_count += records.len() as u64;
6152
}
6253

6354
info!("V5 transactions found: {}", found_count);
@@ -317,3 +308,78 @@ pub async fn rip(start_dir: &Path, pool: &Graph) -> Result<u64> {
317308
}
318309
Ok(txs)
319310
}
311+
312+
pub async fn rip_concurrent(start_dir: &Path, pool: &Graph) -> Result<()> {
313+
let tgz_list = list_all_tgz_archives(start_dir)?;
314+
info!("tgz archives found: {}", &tgz_list.len());
315+
316+
let tasks: Vec<_> = tgz_list
317+
.into_iter()
318+
.map(|p| {
319+
let pool = pool.clone(); // Clone pool for each task
320+
tokio::spawn(async move {
321+
single_thread_decompress_extract(&p, &pool).await // Call the async function
322+
})
323+
})
324+
.collect();
325+
326+
// Await all tasks and handle results
327+
let results = futures::future::join_all(tasks).await;
328+
// Check for errors
329+
for (i, result) in results.into_iter().enumerate() {
330+
match result {
331+
Ok(Ok(_)) => {
332+
info!("Task {} completed successfully.", i);
333+
}
334+
Ok(Err(e)) => {
335+
error!("Task {} failed: {:?}", i, e);
336+
}
337+
Err(e) => {
338+
error!("Task {} panicked: {:?}", i, e);
339+
}
340+
}
341+
}
342+
Ok(())
343+
}
344+
345+
346+
const MAX_CONCURRENT_TASKS: usize = 4; // Define the limit for concurrent tasks
347+
348+
pub async fn rip_concurrent_limited(start_dir: &Path, pool: &Graph) -> Result<()> {
349+
let tgz_list = list_all_tgz_archives(start_dir)?;
350+
info!("tgz archives found: {}", tgz_list.len());
351+
352+
let semaphore = Arc::new(Semaphore::new(MAX_CONCURRENT_TASKS)); // Semaphore to limit concurrency
353+
let mut tasks = vec![];
354+
355+
for p in tgz_list.into_iter() {
356+
let pool = pool.clone(); // Clone pool for each task
357+
let semaphore = Arc::clone(&semaphore); // Clone semaphore for each task
358+
359+
let task = tokio::spawn(async move {
360+
let _permit = semaphore.acquire().await; // Acquire semaphore permit
361+
single_thread_decompress_extract(&p, &pool).await // Perform the task
362+
});
363+
364+
tasks.push(task);
365+
}
366+
367+
// Await all tasks and handle results
368+
let results = futures::future::join_all(tasks).await;
369+
370+
for (i, result) in results.into_iter().enumerate() {
371+
match result {
372+
Ok(Ok(_)) => {
373+
info!("Task {} completed successfully.", i);
374+
}
375+
Ok(Err(e)) => {
376+
error!("Task {} failed: {:?}", i, e);
377+
}
378+
Err(e) => {
379+
error!("Task {} panicked: {:?}", i, e);
380+
}
381+
}
382+
}
383+
384+
Ok(())
385+
}

src/warehouse_cli.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ impl WarehouseCli {
174174
Sub::VersionFiveTx { archive_dir } => {
175175
let pool = try_db_connection_pool(self).await?;
176176

177-
json_rescue_v5_load::rip(archive_dir, &pool).await?;
177+
json_rescue_v5_load::rip_concurrent_limited(archive_dir, &pool).await?;
178178
}
179179
};
180180
Ok(())

tests/test_json_rescue_v5_load.rs

+6-4
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ async fn test_load_all_tgz() -> anyhow::Result<()> {
2323

2424
let path = fixtures::v5_json_tx_path().join("0-99900.tgz");
2525

26-
let tx_count = json_rescue_v5_load::decompress_and_extract(&path, &pool).await?;
26+
let tx_count = json_rescue_v5_load::single_thread_decompress_extract(&path, &pool).await?;
2727

2828
assert!(tx_count == 5244);
2929

@@ -89,13 +89,15 @@ async fn test_load_entrypoint() -> anyhow::Result<()> {
8989

9090
let path = fixtures::v5_json_tx_path();
9191

92-
let tx_count = json_rescue_v5_load::rip(&path, &pool).await?;
93-
dbg!(&tx_count);
94-
assert!(tx_count == 13);
92+
json_rescue_v5_load::rip_concurrent_limited(&path, &pool).await?;
93+
// dbg!(&tx_count);
94+
// assert!(tx_count == 13);
9595

9696
Ok(())
9797
}
9898

99+
100+
99101
#[tokio::test]
100102
async fn test_rescue_v5_parse_set_wallet_tx() -> anyhow::Result<()> {
101103
libra_forensic_db::log_setup();

0 commit comments

Comments
 (0)