Skip to content

Commit

Permalink
main, GitCommitReader: Don't use --stat with partial clones
Browse files Browse the repository at this point in the history
The --stat (or --shortstat) parameter would cause us to download lots
of data from origin for relatively little benefit. Walking the full
repository this is also fairly inefficient.

We warn when this happens so the user knows not to rely on the change
statistic, or to make a full clone if needed.

Discussed in GitHub #13.
  • Loading branch information
hpjansson committed May 7, 2021
1 parent c1d1bfb commit 525c66e
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 18 deletions.
37 changes: 22 additions & 15 deletions src/gitcommitreader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,23 +56,30 @@ pub struct GitCommitReader

impl GitCommitReader
{
pub fn new(repo_path: std::path::PathBuf, repo_name: &str, since: DateTime<Utc>) -> Result<GitCommitReader>
pub fn new(repo_path: std::path::PathBuf, repo_name: &str, since: DateTime<Utc>, use_stat: bool) -> Result<GitCommitReader>
{
let repo_path = repo_path.canonicalize().unwrap();
let stdout = Command::new("git")
.arg("-C")
.arg(&repo_path)
.arg("log")
.arg("--branches")
.arg("--remotes")
.arg("--pretty=format:%H__sep__%aD__sep__%aN__sep__%aE__sep__%cD__sep__%cN__sep__%cE")
.arg("--reverse")
.arg("--since")
.arg(since.to_rfc2822())
.arg("--date-order")
.arg("--shortstat")
.arg("HEAD")
.stdout(Stdio::piped())
let mut cmd;

cmd = Command::new("git");
cmd.arg("-C")
.arg(&repo_path)
.arg("log")
.arg("--branches")
.arg("--remotes")
.arg("--pretty=format:%H__sep__%aD__sep__%aN__sep__%aE__sep__%cD__sep__%cN__sep__%cE")
.arg("--reverse")
.arg("--since")
.arg(since.to_rfc2822())
.arg("--date-order")
.arg("HEAD");

if use_stat
{
cmd.arg("--shortstat");
}

let stdout = cmd.stdout(Stdio::piped())
.spawn().chain_err(|| "Could not spawn git")?
.stdout.chain_err(|| "Could not read git output")?;
let reader = BufReader::new(stdout);
Expand Down
28 changes: 25 additions & 3 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ mod projectmeta;
mod statuslogger;

use std::path::PathBuf;
use std::process::Command;
use structopt::StructOpt;
use errors::*;
use crate::commitdb::CommitDb;
Expand Down Expand Up @@ -169,12 +170,33 @@ fn run_ingest(db_path: PathBuf, repo_tree_paths: Vec<PathBuf>, _meta: &ProjectMe
.file_name().unwrap()
.to_string_lossy()
.into_owned();
let gcr = GitCommitReader::new(path.clone(),
&repo_name,
cdb.get_last_author_time(&repo_name))?;

sl.begin_repo(&repo_name);

// Check for promisor for origin remote; we interpret its presence
// as a preference for remote storage. If found, we turn off --stat
// collection, since that would cause git to fetch all the remote
// blobs (slowly).
//
// This will break change counts. Author and commit counts will still
// work.

let mut cmd;
cmd = Command::new("git");
cmd.arg("-C").arg(&path).arg("config").arg("remote.origin.promisor");
let output = cmd.output().unwrap();
let has_promisor = std::str::from_utf8(&output.stdout).unwrap().trim() == "true";

if has_promisor
{
sl.log_warning("origin has a promisor; change details omitted.");
}

let gcr = GitCommitReader::new(path.clone(),
&repo_name,
cdb.get_last_author_time(&repo_name),
!has_promisor)?;

for commit in gcr
{
cdb.insert_raw_commit(&commit)?;
Expand Down

0 comments on commit 525c66e

Please sign in to comment.