Skip to content

Commit 094b548

Browse files
authored
Merge pull request #1092 from launchableinc/AIENG-182
[AIENG-182] progress report for file transfer
2 parents 561c0b2 + 07be351 commit 094b548

File tree

11 files changed

+185
-37
lines changed

11 files changed

+185
-37
lines changed

launchable/jar/exe_deploy.jar

3.05 KB
Binary file not shown.

src/main/java/com/launchableinc/ingest/commits/CommitGraphCollector.java

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.apache.http.impl.client.HttpClientBuilder;
2222
import org.eclipse.jgit.diff.DiffAlgorithm.SupportedAlgorithm;
2323
import org.eclipse.jgit.diff.DiffEntry;
24+
import org.eclipse.jgit.errors.ConfigInvalidException;
2425
import org.eclipse.jgit.errors.InvalidObjectIdException;
2526
import org.eclipse.jgit.errors.MissingObjectException;
2627
import org.eclipse.jgit.lib.ConfigConstants;
@@ -46,15 +47,16 @@
4647
import java.io.UncheckedIOException;
4748
import java.net.URL;
4849
import java.nio.charset.StandardCharsets;
50+
import java.time.Duration;
4951
import java.util.ArrayList;
5052
import java.util.Collection;
5153
import java.util.List;
5254
import java.util.Objects;
5355
import java.util.Set;
5456
import java.util.concurrent.TimeUnit;
5557
import java.util.function.Consumer;
58+
import java.util.function.Function;
5659
import java.util.function.Supplier;
57-
import java.util.zip.GZIPInputStream;
5860
import java.util.zip.GZIPOutputStream;
5961

6062
import static com.google.common.collect.ImmutableList.*;
@@ -68,6 +70,8 @@ public class CommitGraphCollector {
6870
static final ObjectMapper objectMapper = new ObjectMapper();
6971
private static final int HTTP_TIMEOUT_MILLISECONDS = 15_000;
7072

73+
private final String rootName;
74+
7175
/**
7276
* Root repository to start processing.
7377
*
@@ -98,7 +102,8 @@ private boolean outputAuditLog() {
98102
return audit || dryRun;
99103
}
100104

101-
public CommitGraphCollector(Repository git) {
105+
public CommitGraphCollector(String name, Repository git) {
106+
this.rootName = name;
102107
this.root = git;
103108
}
104109

@@ -253,10 +258,11 @@ private ImmutableList<ObjectId> getAdvertisedRefs(HttpResponse response) throws
253258
public void transfer(
254259
Collection<ObjectId> advertised, IOConsumer<ContentProducer> commitSender, IOConsumer<ContentProducer> fileSender, int chunkSize)
255260
throws IOException {
256-
ByRepository r = new ByRepository(root);
261+
ByRepository r = new ByRepository(root, rootName);
257262
try (CommitChunkStreamer cs = new CommitChunkStreamer(commitSender, chunkSize);
258-
FileChunkStreamer fs = new FileChunkStreamer(fileSender, chunkSize)) {
259-
r.transfer(advertised, cs, fs);
263+
FileChunkStreamer fs = new FileChunkStreamer(fileSender, chunkSize);
264+
ProgressReportingConsumer<VirtualFile> fsr = new ProgressReportingConsumer<>(fs, VirtualFile::path, Duration.ofSeconds(3))) {
265+
r.transfer(advertised, cs, fsr);
260266
}
261267
}
262268

@@ -299,13 +305,14 @@ public void collectFiles(boolean collectFiles) {
299305

300306
/** Process commits per repository. */
301307
final class ByRepository implements AutoCloseable {
302-
308+
private final String name;
303309
private final Repository git;
304310

305311
private final ObjectReader objectReader;
306312
private final Set<ObjectId> shallowCommits;
307313

308-
ByRepository(Repository git) throws IOException {
314+
ByRepository(Repository git, String name) throws IOException {
315+
this.name = name;
309316
this.git = git;
310317
this.objectReader = git.newObjectReader();
311318
this.shallowCommits = objectReader.getShallowCommits();
@@ -390,8 +397,12 @@ That is, find submodules that are available in the working tree (thus `!isBare()
390397
while (swalk.next()) {
391398
try (Repository subRepo = swalk.getRepository()) {
392399
if (subRepo != null) {
393-
try (ByRepository br = new ByRepository(subRepo)) {
394-
br.transfer(advertised, commitReceiver, fileReceiver);
400+
try {
401+
try (ByRepository br = new ByRepository(subRepo, name + "/" + swalk.getModulesPath())) {
402+
br.transfer(advertised, commitReceiver, fileReceiver);
403+
}
404+
} catch (ConfigInvalidException e) {
405+
throw new IOException("Invalid Git submodule configuration: " + git.getDirectory(), e);
395406
}
396407
}
397408
}
@@ -421,9 +432,9 @@ private void collectFiles(TreeWalk treeWalk, Consumer<VirtualFile> receiver) thr
421432
treeWalk.enterSubtree();
422433
} else {
423434
if ((treeWalk.getFileMode(0).getBits()&FileMode.TYPE_MASK)==FileMode.TYPE_FILE) {
424-
GitFile f = new GitFile(treeWalk.getPathString(), head, objectReader);
435+
GitFile f = new GitFile(name, treeWalk.getPathString(), head, objectReader);
425436
// to avoid excessive data transfer, skip files that are too big
426-
if (f.size()<1024*1024) {
437+
if (f.size()<1024*1024 && f.isText()) {
427438
receiver.accept(f);
428439
filesSent++;
429440
}

src/main/java/com/launchableinc/ingest/commits/CommitIngester.java

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,8 @@
1818

1919
/** Driver for {@link CommitGraphCollector}. */
2020
public class CommitIngester {
21-
@Deprecated
22-
@Argument(required = true, metaVar = "COMMAND", index = 0)
23-
public String dummyCommandForBackwardCompatibility;
21+
@Argument(required = true, metaVar = "NAME", usage = "Uniquely identifies this repository within the workspace", index = 0)
22+
public String name;
2423

2524
@Argument(required = true, metaVar = "PATH", usage = "Path to Git repository", index = 1)
2625
public File repo;
@@ -40,13 +39,6 @@ public class CommitIngester {
4039
@Option(name = "-skip-cert-verification", usage = "Bypass SSL certification verification.")
4140
public boolean skipCertVerification;
4241

43-
/**
44-
* @deprecated this is an old option and this is on always.
45-
*/
46-
@Deprecated
47-
@Option(name = "-scrub-pii", usage = "Scrub emails and names", hidden = true)
48-
public boolean scrubPii;
49-
5042
@Option(name = "-commit-message", usage = "Collect commit messages")
5143
public boolean commitMessage;
5244

@@ -143,19 +135,27 @@ void run() throws CmdLineException, IOException {
143135
try (Repository db =
144136
new RepositoryBuilder().setFS(FS.DETECTED).findGitDir(repo).setMustExist(true).build()) {
145137
Git git = Git.wrap(db);
146-
CommitGraphCollector cgc = new CommitGraphCollector(git.getRepository());
138+
CommitGraphCollector cgc = new CommitGraphCollector(name, git.getRepository());
147139
cgc.setMaxDays(maxDays);
148140
cgc.setAudit(audit);
149141
cgc.setDryRun(dryRun);
150142
cgc.collectCommitMessage(commitMessage);
151143
cgc.collectFiles(collectFiles);
152144
cgc.transfer(endpoint, authenticator, enableTimeout);
153145
int numCommits = cgc.getCommitsSent();
154-
String suffix = "commit";
155-
if (numCommits != 1) {
156-
suffix = "commits";
157-
}
158-
System.out.printf("Launchable transferred %d more %s from repository %s%n", numCommits, suffix, repo);
146+
int numFiles = cgc.getFilesSent();
147+
System.out.printf("Launchable transferred %d more %s and %d more %s from repository %s%n",
148+
numCommits, plural(numCommits, "commit"),
149+
numFiles, plural(numFiles, "file"),
150+
repo);
151+
}
152+
}
153+
154+
private String plural(int count, String noun) {
155+
if (count == 1) {
156+
return noun;
157+
} else {
158+
return noun + "s";
159159
}
160160
}
161161

src/main/java/com/launchableinc/ingest/commits/FileChunkStreamer.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ protected void writeTo(List<VirtualFile> files, OutputStream os) throws IOExcept
2424
tar.setLongFileMode(LONGFILE_POSIX);
2525

2626
for (VirtualFile f : files) {
27-
TarArchiveEntry e = new TarArchiveEntry(f.path());
27+
TarArchiveEntry e = new TarArchiveEntry("repo:"+f.repo()+"/"+f.path());
2828
e.setSize(f.size());
2929
tar.putArchiveEntry(e);
3030
f.writeTo(tar);
Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,76 @@
11
package com.launchableinc.ingest.commits;
22

33
import org.eclipse.jgit.lib.ObjectId;
4+
import org.eclipse.jgit.lib.ObjectLoader;
45
import org.eclipse.jgit.lib.ObjectReader;
56

67
import java.io.IOException;
8+
import java.io.InputStreamReader;
79
import java.io.OutputStream;
10+
import java.io.Reader;
11+
import java.nio.charset.CharacterCodingException;
12+
import java.nio.charset.StandardCharsets;
813

14+
import static java.nio.charset.StandardCharsets.UTF_8;
915
import static org.eclipse.jgit.lib.Constants.*;
1016

1117
/**
1218
* Represents a file in a Git repository, and encapsulates the read access for convenience.
1319
*/
1420
final class GitFile implements VirtualFile {
21+
final String repo;
1522
final String path;
1623
final ObjectId blob;
1724
private final ObjectReader objectReader;
1825

19-
public GitFile(String path, ObjectId blob, ObjectReader objectReader) {
26+
public GitFile(String repo, String path, ObjectId blob, ObjectReader objectReader) {
27+
this.repo = repo;
2028
this.path = path;
2129
this.blob = blob;
2230
this.objectReader = objectReader;
2331
}
2432

33+
@Override
34+
public String repo() {
35+
return repo;
36+
}
37+
2538
@Override
2639
public String path() {
2740
return path;
2841
}
2942

3043
public long size() throws IOException {
31-
return objectReader.open(blob, OBJ_BLOB).getSize();
44+
return open().getSize();
3245
}
3346

3447
@Override
3548
public void writeTo(OutputStream os) throws IOException {
36-
objectReader.open(blob, OBJ_BLOB).copyTo(os);
49+
open().copyTo(os);
50+
}
51+
52+
private ObjectLoader open() throws IOException {
53+
return objectReader.open(blob, OBJ_BLOB);
54+
}
55+
56+
/**
57+
* Returns true if the file is a text file.
58+
*
59+
* <p>I briefly thought about whether it makes sense to deal with the platform default encoding, then
60+
* decided not. In the unlikely event we decide to deal with this, it'd be best to convert to UTF-8 on the CLI
61+
* side since encoding codec is not portable.
62+
*/
63+
public boolean isText() throws IOException {
64+
try {
65+
char[] c = new char[1024];
66+
try (Reader r = new InputStreamReader(open().openStream(), UTF_8)) {
67+
while (r.read(c)!= -1) {
68+
// Read the file until EOF.
69+
}
70+
}
71+
return true;
72+
} catch (CharacterCodingException e) {
73+
return false;
74+
}
3775
}
3876
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
package com.launchableinc.ingest.commits;
2+
3+
import java.time.Duration;
4+
import java.time.Instant;
5+
import java.util.ArrayList;
6+
import java.util.List;
7+
import java.util.function.Consumer;
8+
import java.util.function.Function;
9+
10+
import static java.time.Instant.now;
11+
12+
/**
13+
* Given a slow {@link Consumer} that goes over a large number of items,
14+
* provide a progress report to show that the work is still in progress.
15+
*/
16+
class ProgressReportingConsumer<T> implements Consumer<T>, AutoCloseable {
17+
private final Consumer<T> base;
18+
private final List<T> pool = new ArrayList<>();
19+
private final Function<T,String> printer;
20+
private final Duration reportInterval;
21+
22+
ProgressReportingConsumer(Consumer<T> base, Function<T,String> printer, Duration reportInterval) {
23+
this.base = base;
24+
this.printer = printer;
25+
this.reportInterval = reportInterval;
26+
}
27+
28+
@Override
29+
public void accept(T t) {
30+
pool.add(t);
31+
}
32+
33+
@Override
34+
public void close() {
35+
Instant nextReportTime = now().plus(reportInterval);
36+
int width = String.valueOf(pool.size()).length();
37+
int i = 0;
38+
for (T x : pool) {
39+
i++;
40+
if (now().isAfter(nextReportTime)) {
41+
System.err.printf("%s/%d: %s%n", pad(i, width), pool.size(), printer.apply(x));
42+
nextReportTime = now().plus(reportInterval);
43+
}
44+
base.accept(x);
45+
}
46+
pool.clear();
47+
}
48+
49+
static String pad(int i, int width) {
50+
String s = String.valueOf(i);
51+
while (s.length() < width) {
52+
s = " " + s;
53+
}
54+
return s;
55+
}
56+
}

src/main/java/com/launchableinc/ingest/commits/VirtualFile.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,14 @@
44
import java.io.OutputStream;
55

66
public interface VirtualFile {
7+
/**
8+
* Repository identifier, unique within the workspace.
9+
*/
10+
String repo();
11+
12+
/**
13+
* Path to the file within the repository.
14+
*/
715
String path();
816
long size() throws IOException;
917
void writeTo(OutputStream os) throws IOException;

src/test/java/com/launchableinc/ingest/commits/AllTests.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
CommitGraphCollectorTest.class,
1010
CommitIngesterTest.class,
1111
FileChunkStreamerTest.class,
12-
SSLBypassTest.class
12+
SSLBypassTest.class,
13+
ProgressReportingConsumerTest.class
1314
})
1415
public class AllTests {}

src/test/java/com/launchableinc/ingest/commits/CommitGraphCollectorTest.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ public void chunking() throws Exception {
100100
setupRepos();
101101
try (Git mainrepo = Git.open(mainrepoDir)) {
102102
addCommitInSubRepo(mainrepo);
103-
CommitGraphCollector cgc = new CommitGraphCollector(mainrepo.getRepository());
103+
CommitGraphCollector cgc = new CommitGraphCollector("test", mainrepo.getRepository());
104104
cgc.setMaxDays(30);
105105
cgc.collectFiles(true);
106106
cgc.transfer(
@@ -143,7 +143,7 @@ public void scrubPii() throws Exception {
143143
ByteArrayOutputStream baos = new ByteArrayOutputStream();
144144
try (Git mainrepo = Git.open(mainrepoDir)) {
145145
addCommitInSubRepo(mainrepo);
146-
CommitGraphCollector cgc = new CommitGraphCollector(mainrepo.getRepository());
146+
CommitGraphCollector cgc = new CommitGraphCollector("test", mainrepo.getRepository());
147147
cgc.setMaxDays(30);
148148
cgc.transfer(ImmutableList.of(), c -> c.writeTo(baos), f -> {}, Integer.MAX_VALUE);
149149
}
@@ -154,7 +154,7 @@ public void scrubPii() throws Exception {
154154

155155
private CommitGraphCollector collectCommit(Repository r, List<ObjectId> advertised)
156156
throws IOException {
157-
CommitGraphCollector cgc = new CommitGraphCollector(r);
157+
CommitGraphCollector cgc = new CommitGraphCollector("test", r);
158158
cgc.setMaxDays(30);
159159
cgc.collectFiles(true);
160160
cgc.transfer(advertised, c -> {}, f -> {}, 3);

src/test/java/com/launchableinc/ingest/commits/FileChunkStreamerTest.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,10 @@ public void basics() throws Exception {
3030
try (FileChunkStreamer fs = new FileChunkStreamer(content -> {
3131
switch(count[0]++) {
3232
case 0:
33-
assertThat(readEntries(content)).containsExactly("foo.txt", "bar.txt").inOrder();
33+
assertThat(readEntries(content)).containsExactly("repo:test/foo.txt", "repo:test/bar.txt").inOrder();
3434
break;
3535
case 1:
36-
assertThat(readEntries(content)).containsExactly("zot.txt").inOrder();
36+
assertThat(readEntries(content)).containsExactly("repo:test/zot.txt").inOrder();
3737
break;
3838
default:
3939
fail();
@@ -68,6 +68,11 @@ private static class VirtualFileImpl implements VirtualFile {
6868
this.path = path;
6969
}
7070

71+
@Override
72+
public String repo() {
73+
return "test";
74+
}
75+
7176
@Override
7277
public String path() {
7378
return path;

0 commit comments

Comments
 (0)