Skip to content

Commit b9b804e

Browse files
authored
improvements to support pausing: (#919)
- clear size to 0 immediately after wacz is uploaded - if crawler is in paused, ensure upload of any data on startup - fetcher q: stop queuing async requests if recorder is marked for stopping
1 parent 565ba54 commit b9b804e

File tree

2 files changed

+23
-5
lines changed

2 files changed

+23
-5
lines changed

src/crawler.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1554,7 +1554,10 @@ self.__bx_behaviors.selectMainBehavior();
15541554
if (interrupt) {
15551555
this.uploadAndDeleteLocal = true;
15561556
this.gracefulFinishOnInterrupt(interrupt);
1557+
return true;
15571558
}
1559+
1560+
return false;
15581561
}
15591562

15601563
gracefulFinishOnInterrupt(interruptReason: InterruptReason) {
@@ -1691,7 +1694,11 @@ self.__bx_behaviors.selectMainBehavior();
16911694
return;
16921695
}
16931696

1694-
await this.checkLimits();
1697+
if (await this.checkLimits()) {
1698+
// if interrupted
1699+
await this.postCrawl();
1700+
return;
1701+
}
16951702

16961703
await this.crawlState.setStatus("running");
16971704

@@ -1869,6 +1876,7 @@ self.__bx_behaviors.selectMainBehavior();
18691876
const uploaded = await this.generateWACZ();
18701877

18711878
if (uploaded && this.uploadAndDeleteLocal) {
1879+
await this.crawlState.setArchiveSize(0);
18721880
logger.info(
18731881
`Uploaded WACZ, deleting local data to free up space: ${this.collDir}`,
18741882
);

src/util/recorder.ts

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,8 @@ export class Recorder extends EventEmitter {
145145

146146
shouldSaveStorage = false;
147147

148+
stopping = false;
149+
148150
constructor({
149151
workerid,
150152
writer,
@@ -857,8 +859,10 @@ export class Recorder extends EventEmitter {
857859
}
858860

859861
addAsyncFetch(opts: AsyncFetchOptions) {
860-
const fetcher = new AsyncFetcher(opts);
861-
void this.fetcherQ.add(() => fetcher.load());
862+
if (!this.stopping) {
863+
const fetcher = new AsyncFetcher(opts);
864+
void this.fetcherQ.add(() => fetcher.load());
865+
}
862866
}
863867

864868
addExternalFetch(url: string, cdp: CDPSession) {
@@ -1046,6 +1050,8 @@ export class Recorder extends EventEmitter {
10461050
}
10471051

10481052
async onDone(timeout: number) {
1053+
this.stopping = true;
1054+
10491055
await this.crawlState.setStatus("pending-wait");
10501056

10511057
const finishFetch = async () => {
@@ -1063,6 +1069,8 @@ export class Recorder extends EventEmitter {
10631069
);
10641070
}
10651071

1072+
this.fetcherQ.clear();
1073+
10661074
logger.debug("Finishing WARC writing", this.logDetails, "recorder");
10671075

10681076
await this.writer.flush();
@@ -1356,8 +1364,10 @@ export class Recorder extends EventEmitter {
13561364
await fetcher.doCancel();
13571365
return false;
13581366
}
1359-
state.asyncLoading = true;
1360-
void this.fetcherQ.add(() => fetcher.loadDirectPage(state, crawler));
1367+
if (!this.stopping) {
1368+
state.asyncLoading = true;
1369+
void this.fetcherQ.add(() => fetcher.loadDirectPage(state, crawler));
1370+
}
13611371
return true;
13621372
}
13631373

0 commit comments

Comments
 (0)