Skip to content

Commit 993081d

Browse files
authored
better handling of net::ERR_HTTP_RESPONSE_CODE_FAILURE: (#934)
- http headers provided but no payload, record response - record page as failed with status code provided, don't attempt to retry
1 parent 822de93 commit 993081d

File tree

3 files changed

+26
-3
lines changed

3 files changed

+26
-3
lines changed

src/crawler.ts

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1327,7 +1327,7 @@ self.__bx_behaviors.selectMainBehavior();
13271327
}
13281328
// if page loaded, considered page finished successfully
13291329
// (even if behaviors timed out)
1330-
const { loadState, logDetails, depth, url, pageSkipped } = data;
1330+
const { loadState, logDetails, depth, url, pageSkipped, noRetries } = data;
13311331

13321332
if (data.loadState >= LoadState.FULL_PAGE_LOADED) {
13331333
await this.writePage(data);
@@ -1347,7 +1347,7 @@ self.__bx_behaviors.selectMainBehavior();
13471347
if (pageSkipped) {
13481348
await this.crawlState.markExcluded(url);
13491349
} else {
1350-
const retry = await this.crawlState.markFailed(url);
1350+
const retry = await this.crawlState.markFailed(url, noRetries);
13511351

13521352
if (this.healthChecker) {
13531353
this.healthChecker.incError();
@@ -2215,8 +2215,8 @@ self.__bx_behaviors.selectMainBehavior();
22152215
if (msg !== "logged") {
22162216
const loadState = data.loadState;
22172217

2218-
// excluded in recorder
22192218
if (msg.startsWith("net::ERR_BLOCKED_BY_RESPONSE")) {
2219+
// excluded in recorder
22202220
data.pageSkipped = true;
22212221
logger.warn("Page Load Blocked, skipping", { msg, loadState });
22222222
} else {
@@ -2274,6 +2274,17 @@ self.__bx_behaviors.selectMainBehavior();
22742274
}
22752275

22762276
if (failed) {
2277+
const failText = resp.request().failure()?.errorText;
2278+
if (isChromeError && failText === "net::ERR_HTTP_RESPONSE_CODE_FAILURE") {
2279+
data.noRetries = true;
2280+
logger.warn(
2281+
"Page is an empty non-200 response, not retrying",
2282+
{ url, status, ...logDetails },
2283+
"pageStatus",
2284+
);
2285+
throw new Error("logged");
2286+
}
2287+
22772288
return this.pageFailed(
22782289
isChromeError ? "Page Crashed on Load" : "Page Invalid Status",
22792290
retry,

src/util/recorder.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,16 @@ export class Recorder extends EventEmitter {
464464
}
465465
break;
466466

467+
case "net::ERR_HTTP_RESPONSE_CODE_FAILURE":
468+
logger.warn("Recording empty non-200 status response", {
469+
url,
470+
status: reqresp.status,
471+
errorText,
472+
type,
473+
...this.logDetails,
474+
});
475+
return this.serializeToWARC(reqresp);
476+
467477
default:
468478
this.lastErrorText = errorText;
469479
logger.warn(

src/util/state.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,8 @@ export class PageState {
8989

9090
skipBehaviors = false;
9191
pageSkipped = false;
92+
noRetries = false;
93+
9294
asyncLoading = false;
9395
filteredFrames: Frame[] = [];
9496
loadState: LoadState = LoadState.FAILED;

0 commit comments

Comments
 (0)