Skip to content

Commit 59df6bb

Browse files
authored
crash page on prompt dialog loop to continue: (#929)
- if a page is stuck in a window.alert / window.prompt loop, showing >10 or more consecutive dialogs (unrelated to unloading), call Page.crash() to more quickly move on to next page, as not much else can be done. - add exception handling in dialog accept/dismiss to avoid crawler crash - fixes #926
1 parent 8e44b31 commit 59df6bb

File tree

2 files changed

+38
-15
lines changed

2 files changed

+38
-15
lines changed

src/crawler.ts

Lines changed: 35 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ import {
4747
ExitCodes,
4848
InterruptReason,
4949
BxFunctionBindings,
50+
MAX_JS_DIALOG_PER_PAGE,
5051
} from "./util/constants.js";
5152

5253
import { AdBlockRules, BlockRuleDecl, BlockRules } from "./util/blockrules.js";
@@ -874,30 +875,49 @@ self.__bx_behaviors.selectMainBehavior();
874875
await this.browser.addInitScript(page, initScript);
875876
}
876877

878+
let dialogCount = 0;
879+
877880
// Handle JS dialogs:
878881
// - Ensure off-page navigation is canceled while behavior is running
879882
// - dismiss close all other dialogs if not blocking unload
880883
page.on("dialog", async (dialog) => {
881884
let accepted = true;
882-
if (dialog.type() === "beforeunload") {
883-
if (opts.pageBlockUnload) {
884-
accepted = false;
885-
await dialog.dismiss();
885+
let msg = {};
886+
try {
887+
if (dialog.type() === "beforeunload") {
888+
if (opts.pageBlockUnload) {
889+
accepted = false;
890+
}
886891
} else {
892+
// other JS dialog, just dismiss
893+
accepted = false;
894+
if (dialogCount >= MAX_JS_DIALOG_PER_PAGE) {
895+
// dialog likely in a loop, need to crash page to avoid being stuck
896+
logger.error(
897+
"JS Dialog appears to be in a loop, crashing page to continue",
898+
);
899+
await cdp.send("Page.crash");
900+
return;
901+
}
902+
dialogCount++;
903+
}
904+
msg = {
905+
accepted,
906+
blockingUnload: opts.pageBlockUnload,
907+
message: dialog.message(),
908+
type: dialog.type(),
909+
page: page.url(),
910+
workerid,
911+
};
912+
if (accepted) {
887913
await dialog.accept();
914+
} else {
915+
await dialog.dismiss();
888916
}
889-
} else {
890-
// other JS dialog, just dismiss
891-
await dialog.dismiss();
917+
logger.debug("JS Dialog", msg);
918+
} catch (e) {
919+
logger.warn("JS Dialog Error", { ...msg, ...formatErr(e) });
892920
}
893-
logger.debug("JS Dialog", {
894-
accepted,
895-
blockingUnload: opts.pageBlockUnload,
896-
message: dialog.message(),
897-
type: dialog.type(),
898-
page: page.url(),
899-
workerid,
900-
});
901921
});
902922

903923
// only add if running with autoclick behavior

src/util/constants.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ export const SITEMAP_INITIAL_FETCH_TIMEOUT_SECS = 30;
4343

4444
export const ROBOTS_CACHE_LIMIT = 100;
4545

46+
// max JS dialogs (alert/prompt) to allow per page
47+
export const MAX_JS_DIALOG_PER_PAGE = 10;
48+
4649
export type ExtractSelector = {
4750
selector: string;
4851
extract: string;

0 commit comments

Comments
 (0)