Skip to content

Commit f57cdea

Browse files
committed
Improve estimation, error handling, verification, etc.
1 parent 67f88dd commit f57cdea

File tree

3 files changed

+189
-58
lines changed

3 files changed

+189
-58
lines changed

estimate_push_protection_rate.py

Lines changed: 71 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,12 @@ def add_args(parser: argparse.ArgumentParser) -> None:
1919
type=str,
2020
help="Path to the file containing the list of patterns with push protection",
2121
)
22+
parser.add_argument(
23+
"--cut-off-date",
24+
type=str,
25+
default=None,
26+
help="ISO date string to filter secrets detected after this date (e.g., 2023-01-01)",
27+
)
2228

2329

2430
def main() -> None:
@@ -35,33 +41,85 @@ def main() -> None:
3541
with open(args.secrets_file, "r") as f:
3642
secrets = json.load(f)
3743

38-
total_secrets = len(secrets)
44+
secrets_count = len(secrets)
3945
protected_secrets = [secret for secret in secrets if secret.get("secret_type") in patterns]
46+
protected_secrets_count = len(protected_secrets)
4047

41-
print(f"Total secrets: {total_secrets}")
42-
print(f"Protected secrets: {len(protected_secrets)}")
48+
print(f"Total secrets: {secrets_count}")
49+
print(f"Protected secrets: {protected_secrets_count}")
4350

44-
if total_secrets > 0:
45-
protection_rate = (len(protected_secrets) / total_secrets) * 100
51+
if secrets_count > 0:
52+
protection_rate = (protected_secrets_count / secrets_count) * 100
4653
print(f"Estimated push protection rate: {protection_rate:.2f}%")
4754
else:
4855
print("No secrets found to evaluate.")
56+
return
4957

5058
# now evaluate how often we'd expect to block pushes, using the `first_commit_date` field
5159
# that's in ISO format with a Z suffix
5260
now = datetime.now(timezone.utc)
5361

54-
# find the oldest blocked commit
55-
earliest_blocked_commit_date = min([
56-
datetime.fromisoformat(secret["first_commit_date"].replace("Z", "+00:00"))
57-
for secret in protected_secrets
58-
])
62+
cut_off_date = args.cut_off_date
63+
cut_off_datetime = None
64+
65+
if cut_off_date is not None:
66+
try:
67+
# add a time and TZ if just a date is provided
68+
if len(cut_off_date) == 10:
69+
cut_off_date += "T00:00:00+00:00"
70+
# Handle 'Z' suffix for UTC
71+
if cut_off_date.endswith("Z"):
72+
cut_off_date = cut_off_date.replace("Z", "+00:00")
73+
cut_off_datetime = datetime.fromisoformat(cut_off_date)
74+
remaining_protected_secrets = [
75+
secret for secret in protected_secrets
76+
if "first_commit_date" in secret and datetime.fromisoformat(secret["first_commit_date"].replace("Z", "+00:00")) >= cut_off_datetime
77+
]
78+
remaining_secrets = [
79+
secret for secret in secrets
80+
if "first_commit_date" in secret and datetime.fromisoformat(secret["first_commit_date"].replace("Z", "+00:00")) >= cut_off_datetime
81+
]
82+
except ValueError:
83+
print(f"Invalid cut-off date format: {cut_off_date}. Expected ISO format.")
84+
return
85+
86+
if not remaining_protected_secrets:
87+
print("No protected secrets found after applying cut-off date filter.")
88+
return
89+
else:
90+
remaining_secrets_count = len(remaining_secrets)
91+
remaining_protected_secrets_count = len(remaining_protected_secrets)
92+
print(f"Total secrets after cut-off date: {remaining_secrets_count}")
93+
print(f"Protected secrets after cut-off date: {remaining_protected_secrets_count}")
94+
protection_rate = (remaining_protected_secrets_count / remaining_secrets_count) * 100
95+
print(f"Estimated push protection rate after cut-off date: {protection_rate:.2f}%")
96+
else:
97+
remaining_protected_secrets = protected_secrets
5998

60-
blocking_timespan = now - earliest_blocked_commit_date
61-
rate = len(protected_secrets) / blocking_timespan.days if blocking_timespan.days > 0 else len(protected_secrets)
99+
# get FPs for closed secrets, and estimate for any open ones
100+
false_positives = 0
101+
102+
false_positives += sum([1 for secret in remaining_protected_secrets if secret.get("state") == "closed" and secret.get("resolution") == "false_positive"])
103+
false_positives += sum([1 for secret in remaining_protected_secrets if secret.get("state") == "open"]) // 100
104+
105+
print(f"Measured + expected false positives: {false_positives}")
106+
107+
if cut_off_date:
108+
earliest_date = cut_off_datetime
109+
else:
110+
# find the oldest blocked commit with an accessible commit
111+
earliest_date = min((
112+
datetime.fromisoformat(secret["first_commit_date"].replace("Z", "+00:00")) if "first_commit_date" in secret else now
113+
for secret in remaining_protected_secrets
114+
))
62115

63-
print(f"Estimated secrets blocked per day since {earliest_blocked_commit_date.date()}: {rate:.2f}")
116+
blocking_timespan = now - earliest_date
117+
rate = len(remaining_protected_secrets) / blocking_timespan.days if blocking_timespan.days > 0 else len(remaining_protected_secrets)
64118

119+
print(f"Estimated secrets blocked per day since {earliest_date.date()}: {rate:.2f}")
120+
print(f" ... per week ... : {rate * 7:.2f}")
121+
print(f" ... per month ... : {rate * 30:.2f}")
122+
print(f" ... per year ... : {rate * 365:.2f}")
65123

66124
if __name__ == "__main__":
67125
main()

githubapi.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,13 @@ class RateLimited(Exception):
5050
class GitHub:
5151
"""A GitHub API client."""
5252

53-
def __init__(self, token: str | None = None, hostname="github.com") -> None:
53+
def __init__(self, token: str | None = None, hostname="github.com", verify: bool | str = True) -> None:
5454
token = token if token is not None else os.getenv("GITHUB_TOKEN")
5555
if token is None:
5656
raise ValueError("GITHUB_TOKEN environment variable must be set")
5757

5858
self.session = requests.Session()
59+
self.session.verify = verify
5960
self.session.headers.update({"Authorization": f"Bearer {token}"})
6061
self.session.headers.update({"Accept": "application/vnd.github.v3+json"})
6162
self.session.headers.update({"X-GitHub-Api-Version": "2022-11-28"})
@@ -304,7 +305,7 @@ def paginate(
304305
)
305306

306307
if progress:
307-
pbar = tqdm(desc="GitHub API", unit=" requests")
308+
pbar = tqdm(desc="Paging with GitHub API", unit="page")
308309
pbar.reset(total=None)
309310

310311
direction = ""
@@ -430,6 +431,7 @@ def list_secret_scanning_alerts(
430431
scope: str = "org",
431432
bypassed: bool = False,
432433
generic: bool = False,
434+
progress: bool = True,
433435
) -> Generator[dict, None, None]:
434436
"""List secret scanning alerts for a GitHub repository, organization or Enterprise."""
435437
query = {"state": state} if state is not None else {}
@@ -445,6 +447,7 @@ def list_secret_scanning_alerts(
445447
since=since,
446448
date_field="created_at",
447449
paging="cursor",
450+
progress=progress,
448451
)
449452

450453
results = (

0 commit comments

Comments
 (0)