Skip to content

Commit 536a93b

Browse files
siroukSandra di BitternIsa LeHind
authored
[storage] change github api to restore epoch archive (#394)
Co-authored-by: Sandra di Bittern <[email protected]> Co-authored-by: Isa LeHind <[email protected]>
1 parent 3a34ff7 commit 536a93b

File tree

2 files changed

+149
-82
lines changed

2 files changed

+149
-82
lines changed

tools/storage/src/download_bundle.rs

Lines changed: 114 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,35 @@ use std::fs;
1010
use std::path::{Path, PathBuf};
1111
use std::str::FromStr;
1212

13+
// Update GitHubContent structure for contents API
1314
#[derive(Deserialize, Debug)]
15+
#[allow(dead_code)]
1416
struct GitHubContent {
1517
download_url: Option<String>,
1618
#[serde(rename = "type")]
1719
content_type: String,
1820
name: String,
1921
}
2022

23+
// Add TreeResponse structures for the tree API
24+
#[derive(Deserialize, Debug)]
25+
#[allow(dead_code)]
26+
struct TreeItem {
27+
path: String,
28+
mode: String,
29+
#[serde(rename = "type")]
30+
item_type: String,
31+
sha: String,
32+
url: Option<String>,
33+
}
34+
35+
#[derive(Deserialize, Debug)]
36+
struct TreeResponse {
37+
tree: Vec<TreeItem>,
38+
truncated: bool,
39+
}
40+
41+
// Rest of the original data structures
2142
#[derive(Debug)]
2243
pub struct EpochFolders {
2344
pub epoch_ending: String,
@@ -63,13 +84,13 @@ fn find_closest_transaction_folder(
6384
}
6485
}
6586

66-
// println!("For target version {}, found candidates:", target_version);
67-
// if let Some((v, name)) = version_below {
68-
// println!(" Below target: {} ({})", name, v);
69-
// }
70-
// if let Some((v, name)) = version_above {
71-
// println!(" Above target: {} ({})", name, v);
72-
// }
87+
println!("For target version {}, found candidates:", target_version);
88+
if let Some((v, name)) = version_below {
89+
println!(" Below target: {} ({})", name, v);
90+
}
91+
if let Some((v, name)) = version_above {
92+
println!(" Above target: {} ({})", name, v);
93+
}
7394

7495
// Choose the version below target
7596
version_below
@@ -84,37 +105,51 @@ pub async fn find_closest_epoch_folder(
84105
branch: &str,
85106
target_epoch: u64,
86107
) -> Result<EpochFolders> {
108+
// Update to use the Git Tree API instead of Contents API
87109
let api_url = format!(
88-
"https://api.github.com/repos/{}/{}/contents/snapshots?ref={}",
110+
"https://api.github.com/repos/{}/{}/git/trees/{}:snapshots",
89111
owner, repo, branch
90112
);
91113

92-
let contents: Vec<GitHubContent> = client
114+
let response = client
93115
.get(&api_url)
94116
.header("User-Agent", "libra-framework-downloader")
95117
.send()
96118
.await
97-
.context("Failed to list snapshots directory")?
119+
.context("Failed to list snapshots directory")?;
120+
121+
let tree_response: TreeResponse = response
98122
.json()
99123
.await
100124
.context("Failed to parse snapshots directory contents")?;
101125

126+
if tree_response.truncated {
127+
info!("Warning: GitHub Tree API response is truncated. Some folders might be missing.");
128+
}
129+
102130
// Separate folders by type
103131
let mut epoch_ending_folders: Vec<(u64, String)> = Vec::new();
104132
let mut state_epoch_folders: Vec<(u64, String)> = Vec::new();
105133
let mut transaction_folders: Vec<(u64, String)> = Vec::new();
106134

107-
for item in contents {
108-
if item.content_type != "dir" {
135+
for item in tree_response.tree {
136+
// Only consider tree items (directories)
137+
if item.item_type != "tree" {
109138
continue;
110139
}
111140

112-
if let Some(epoch) = parse_epoch_ending_number(&item.name) {
113-
epoch_ending_folders.push((epoch, item.name));
114-
} else if let Some((epoch, _version)) = parse_state_epoch_info(&item.name) {
115-
state_epoch_folders.push((epoch, item.name));
116-
} else if let Some(version) = parse_transaction_number(&item.name) {
117-
transaction_folders.push((version, item.name));
141+
// Extract just the folder name from the path
142+
let folder_name = match Path::new(&item.path).file_name() {
143+
Some(name) => name.to_string_lossy().to_string(),
144+
None => continue,
145+
};
146+
147+
if let Some(epoch) = parse_epoch_ending_number(&folder_name) {
148+
epoch_ending_folders.push((epoch, folder_name));
149+
} else if let Some((epoch, _version)) = parse_state_epoch_info(&folder_name) {
150+
state_epoch_folders.push((epoch, folder_name));
151+
} else if let Some(version) = parse_transaction_number(&folder_name) {
152+
transaction_folders.push((version, folder_name));
118153
}
119154
}
120155

@@ -189,62 +224,77 @@ pub async fn download_github_folder(
189224
output_dir: &str,
190225
) -> Result<()> {
191226
let client = reqwest::Client::new();
192-
let mut pending_dirs = vec![(path.to_string(), output_dir.to_string())];
193227

194228
// Create the root output directory first
195229
fs::create_dir_all(output_dir)?;
196230

197-
while let Some((current_path, current_dir)) = pending_dirs.pop() {
198-
let api_url = format!(
199-
"https://api.github.com/repos/{}/{}/contents/{}?ref={}",
200-
owner, repo, current_path, branch
201-
);
231+
// Use the Git Tree API with recursive flag to get all contents at once
232+
let api_url = format!(
233+
"https://api.github.com/repos/{}/{}/git/trees/{}:{}?recursive=1",
234+
owner, repo, branch, path
235+
);
236+
237+
info!("Downloading tree from: {}", api_url);
238+
239+
let response = client
240+
.get(&api_url)
241+
.header("User-Agent", "libra-framework-downloader")
242+
.send()
243+
.await
244+
.context("Failed to send tree request")?;
245+
246+
let tree_response: TreeResponse = response
247+
.json()
248+
.await
249+
.context("Failed to parse JSON tree response")?;
250+
251+
if tree_response.truncated {
252+
info!("Warning: Response was truncated, not all files will be downloaded");
253+
}
254+
255+
// Get the base path to properly handle nested directories
256+
let base_path = Path::new(path)
257+
.file_name()
258+
.map_or(String::new(), |name| name.to_string_lossy().to_string());
259+
let base_dir = Path::new(output_dir).join(base_path);
260+
fs::create_dir_all(&base_dir)?;
261+
262+
// Process files from the tree
263+
for item in tree_response.tree {
264+
// Skip if not a blob (file)
265+
if item.item_type != "blob" {
266+
continue;
267+
}
268+
269+
// The path in tree response is relative to the requested path
270+
let relative_path = item.path;
202271

203-
info!("Downloading from: {}", api_url);
272+
// Compute where to save the file
273+
let output_path = base_dir.join(&relative_path);
274+
275+
// Create parent directory if it doesn't exist
276+
if let Some(parent) = output_path.parent() {
277+
fs::create_dir_all(parent)?;
278+
}
204279

205-
let contents: Vec<GitHubContent> = client
206-
.get(&api_url)
280+
// Download the file content using raw GitHub URL
281+
let content_url = format!(
282+
"https://raw.githubusercontent.com/{}/{}/{}/{}/{}",
283+
owner, repo, branch, path, relative_path
284+
);
285+
286+
let content = client
287+
.get(&content_url)
207288
.header("User-Agent", "libra-framework-downloader")
208289
.send()
209290
.await
210-
.context("Failed to send request")?
211-
.json()
291+
.with_context(|| format!("Failed to download file: {}", relative_path))?
292+
.bytes()
212293
.await
213-
.context("Failed to parse JSON response")?;
214-
215-
// Extract the last component of the path to create the current directory
216-
let current_folder = Path::new(&current_path)
217-
.file_name()
218-
.map(|s| s.to_string_lossy().to_string())
219-
.unwrap_or_default();
220-
221-
// Create full directory path including the current folder
222-
let full_dir_path = Path::new(&current_dir).join(&current_folder);
223-
fs::create_dir_all(&full_dir_path)?;
224-
225-
for item in contents {
226-
let output_path = full_dir_path.join(&item.name);
227-
228-
if item.content_type == "file" {
229-
if let Some(download_url) = item.download_url {
230-
// println!("Downloading file: {}", item.name);
231-
let content = client
232-
.get(&download_url)
233-
.header("User-Agent", "libra-framework-downloader")
234-
.send()
235-
.await?
236-
.bytes()
237-
.await?;
238-
239-
fs::write(&output_path, content)
240-
.with_context(|| format!("Failed to write file: {}", item.name))?;
241-
}
242-
} else if item.content_type == "dir" {
243-
println!("Processing directory: {}", item.name);
244-
let new_path = format!("{}/{}", current_path, item.name);
245-
pending_dirs.push((new_path, full_dir_path.to_str().unwrap().to_string()));
246-
}
247-
}
294+
.with_context(|| format!("Failed to read bytes from: {}", relative_path))?;
295+
296+
fs::write(&output_path, content)
297+
.with_context(|| format!("Failed to write file: {}", relative_path))?;
248298
}
249299

250300
Ok(())

tools/storage/src/restore_bundle.rs

Lines changed: 35 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -161,12 +161,19 @@ impl RestoreBundle {
161161
for entry in file_list.flatten() {
162162
let content = fs::read_to_string(&entry)?;
163163
let updated_content = Self::update_manifest_paths(&content);
164-
fs::write(&entry, &updated_content)?; // Add & here
165-
verify_valid_transaction_list(&entry, self.version)?;
164+
fs::write(&entry, &updated_content)?;
166165

167-
self.transaction_manifest = entry;
166+
if verify_valid_transaction_list(&entry, self.version) {
167+
self.transaction_manifest = entry;
168+
return Ok(());
169+
}
168170
}
169-
Ok(())
171+
172+
// If we get here, no valid transaction manifest was found
173+
bail!(
174+
"No valid transaction manifest found for version {}",
175+
self.version
176+
);
170177
}
171178

172179
fn update_manifest_paths(manifest_content: &str) -> String {
@@ -204,21 +211,31 @@ impl RestoreBundle {
204211
}
205212
}
206213

207-
pub fn verify_valid_transaction_list(
208-
transaction_manifest: &Path,
209-
version: u64,
210-
) -> anyhow::Result<()> {
211-
let s = fs::read_to_string(transaction_manifest)?;
212-
let tm: TransactionBackup = serde_json::from_str(&s)?;
213-
214-
if tm.last_version < version {
215-
bail!("the transaction you are looking for is newer than the last version in this bundle. Get a newer transaction backup");
216-
};
217-
218-
if tm.first_version > version {
219-
bail!("the transaction you are looking for is older than the last version in this bundle. Get an older transaction backup.");
214+
pub fn verify_valid_transaction_list(transaction_manifest: &Path, version: u64) -> bool {
215+
match fs::read_to_string(transaction_manifest) {
216+
Ok(s) => match serde_json::from_str::<TransactionBackup>(&s) {
217+
Ok(tm) => {
218+
if version > tm.last_version {
219+
info!("The transaction you are looking for is newer than the last version in this bundle. Get a newer transaction backup");
220+
return false;
221+
};
222+
223+
if version < tm.first_version {
224+
info!("The transaction you are looking for is older than the last version in this bundle. Get an older transaction backup.");
225+
return false;
226+
}
227+
true
228+
}
229+
Err(e) => {
230+
info!("Failed to parse transaction manifest: {}", e);
231+
false
232+
}
233+
},
234+
Err(e) => {
235+
info!("Failed to read transaction manifest file: {}", e);
236+
false
237+
}
220238
}
221-
Ok(())
222239
}
223240

224241
#[test]

0 commit comments

Comments
 (0)