From bcb084a0f7d7ee8da61b916054c8d4cb2c8d7191 Mon Sep 17 00:00:00 2001 From: Joshua Zhou Date: Thu, 29 Jan 2026 01:18:49 -0500 Subject: [PATCH] feats and also the download resumes script --- .DS_Store | Bin 0 -> 8196 bytes constants/role.constant.js | 15 ++- scripts/.DS_Store | Bin 0 -> 8196 bytes scripts/download_all_resumes.py | 220 ++++++++++++++++++++++++++++++++ 4 files changed, 230 insertions(+), 5 deletions(-) create mode 100644 .DS_Store create mode 100644 scripts/.DS_Store create mode 100644 scripts/download_all_resumes.py diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..167c2ec69a635c78aaf01ec3a62daeb57df5fa1c GIT binary patch literal 8196 zcmeHMTWl0n7(U;$&>1GcP+KU-vMVc4p@gNBDx$FMwvkKAZCiTbvfbSo>4fP_*_qu^ zo0|Hhf|~fEQBmKF`hXgRpid@B)WrBekSH%=Onlm z9|8PEV>$=;F|j}<134k2A4+SA(*uG~1V;>raI(j^JIQ1qCxjH?3?iH%xHEzi3jDiM zT+DZ8ND3MDVFkhp%&Y*XPZe`no)wsRy-&*fL!&k+=k@qH@;kD!EElfJ~Yzog3-t?dv~qFr#Tj*Vt!RS^%jeM9KB!9jiEGlVw3ZYvqgNY_wQb+I&oS=L?Qy?QvYwGab9H*E5cL+{PE0V(RU8 zor0J07wa`?ec@Hj@3DKB2fO=?f=g0SPS0Buy|;dC-l!IrWpR(l^0*>vXAbI=WT&I^6?NYdi7a0w%LlkVRWPU*YBjQ^fdaB!E~Oki zW@5dfXl~b0dif$$-PkNEn%-??bA}bD9^6c-ojoOyaiFZ-Dl5HtXK09$afL3DYLjK{ z^TYhc9a-1gJ#5&MkCfUb%l+) zH*`vRN!{Y=NIKFjYZoX+GwqT|V5fH=FfYf;Xp1LTi(&lZfdPNy*EpU`2z8ARlRe+$ zA}qmL*$#G?$TrGOvKQFv>;ra&on`0QSL_nI!hU2wv7gy5>>B$6m6(GXEX4|}LOt$7 zBQ~H3%}AjGo!EtbT4b?%R2sVt3WlE?g8_v3gz80}r=M0#V?$ z4AmPc;G=#DSA5j_$e4~;pNs0UXgATqIDVasf}CYaYE_)R@Bu= zYO{$nV!9@}HmXU)RWVIO8&$1>n5g)%8>4Gg4YHVQQa7twC2>+rx2VfhjVWT%8f}Ye zoMH0ch4nAk1@;YL{VHMoPgFq0BE+x~3BvXkB+-I4v=hGfU@y|>!XacZfE=OSf(-}9 zP{0_*3F{Mh0w?ecp2c%`5ib$qPvKR<`r5<{(goXc>?33;Ghk}azj9U~p1O{o|J!f<`~SZ(xA39E3WOE7y$YbRE#1~izPI|e zNo0%BH9!|{+;2iiKZH8&!u9@foaE#`3~3w-Y{@`Q2uT)d|M(99xBmxsxc`Uyzc>>A I2lxNK0MA}_F#rGn literal 0 HcmV?d00001 diff --git a/constants/role.constant.js b/constants/role.constant.js index ebd33a7d..7d2866f8 100644 --- a/constants/role.constant.js +++ b/constants/role.constant.js @@ -95,7 +95,8 @@ const sponsorT1Role = { Constants.Routes.searchRoutes.get, Constants.Routes.accountRoutes.getAnyById, - Constants.Routes.hackerRoutes.getAnyById + Constants.Routes.hackerRoutes.getAnyById, + Constants.Routes.teamRoutes.get ] }; @@ -110,7 +111,8 @@ const sponsorT2Role = { Constants.Routes.searchRoutes.get, Constants.Routes.accountRoutes.getAnyById, - Constants.Routes.hackerRoutes.getAnyById + Constants.Routes.hackerRoutes.getAnyById, + Constants.Routes.teamRoutes.get ] }; @@ -125,7 +127,8 @@ const sponsorT3Role = { Constants.Routes.searchRoutes.get, Constants.Routes.accountRoutes.getAnyById, - Constants.Routes.hackerRoutes.getAnyById + Constants.Routes.hackerRoutes.getAnyById, + Constants.Routes.teamRoutes.get ] }; @@ -140,7 +143,8 @@ const sponsorT4Role = { Constants.Routes.searchRoutes.get, Constants.Routes.accountRoutes.getAnyById, - Constants.Routes.hackerRoutes.getAnyById + Constants.Routes.hackerRoutes.getAnyById, + Constants.Routes.teamRoutes.get ] }; @@ -155,7 +159,8 @@ const sponsorT5Role = { Constants.Routes.searchRoutes.get, Constants.Routes.accountRoutes.getAnyById, - Constants.Routes.hackerRoutes.getAnyById + Constants.Routes.hackerRoutes.getAnyById, + Constants.Routes.teamRoutes.get ] }; diff --git a/scripts/.DS_Store b/scripts/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..0d34f37150ec352c43124a070a33a44507f13d74 GIT binary patch literal 8196 zcmeHMU2GIp6u#fIzzh>$s4Wy^*_9QjP{PvkBcibFwvnHfzisIcKke+!NGD8Z%FgVT z+SJr1m8yv^8Wr`;s1K-72>N8AL`{qj1c~w@#>6LH{KE$m!V?ODr_=n2 zK%F5iXw*g#h$3)v1bF&XGK1;NXZqChdv3_KJU^AX1gWBO#>`prELoKgX9wKjtgrh; zuURh~Ic6a+undz_4s}_M;SM*uMaS?0+pHLn6Zb;Q9);De(8&l+(C34pUY)`l-a5}C)U;7(|hnxpQaT(bH8cpzEv_?bl>#)w0R!6ww1=NxlT{FZ=Ue^7@ug`PN^~L(7aexyJ+$KOJXshbKRyqOLsA69WPq` zh?bOP@r>BAq#|o)4jJTRyKC?njbJ1eTeeb`4{~{?U{Wi@YHW2q8R#7zxg7q?)H+4c zypCh^+J&jQVWX^QMyIXkOgj`kvV}yux=O<1P*}4~R=V@<;2=5U30W-DBFoxWNBD!= zbkE;2WIB^SDYIRcd%Xd@VD1?v(3LIPWI1?^BPo7C-FR9z>9bVuM*Zz;SECiJ9roG;Uk>ES)9Ws0p`EKcesq7Z$NSTbQI%{sVFXG z3y$qNM@d#j{dD!aimMRb)bjIx>+SFV?<$`oXQBv15ttqURJLSWnn`xMHIJXQ6IA=D z^1}7T1r1E7nRXFC{Nx{o)K8Eqw@HU|Tu_ow`QI-B=!5*0pW4y>AMOA5(Ek4~ljCQ= literal 0 HcmV?d00001 diff --git a/scripts/download_all_resumes.py b/scripts/download_all_resumes.py new file mode 100644 index 00000000..8b8dc636 --- /dev/null +++ b/scripts/download_all_resumes.py @@ -0,0 +1,220 @@ +#!/usr/bin/env python3 +import argparse +import os +import tempfile +import zipfile +from urllib.parse import quote_plus + +from dotenv import load_dotenv +from google.cloud import storage +from google.oauth2 import service_account +from bson import ObjectId +from pymongo import MongoClient + + +def build_mongo_uri(address: str, username: str, password: str) -> str: + address = address.strip() + if address.startswith("mongodb://") or address.startswith("mongodb+srv://"): + return address + if "@" in address: + return f"mongodb://{address}" + user = quote_plus(username) + pwd = quote_plus(password) + return f"mongodb://{user}:{pwd}@{address}" + + +def guess_extension(content_type: str) -> str: + if not content_type: + return "" + mapping = { + "application/pdf": ".pdf", + "application/msword": ".doc", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx", + "image/png": ".png", + "image/jpeg": ".jpg", + } + return mapping.get(content_type, "") + + +def safe_name(value: str) -> str: + cleaned = "".join( + ch for ch in value.strip() if ch.isalnum() or ch in ("-", "_") + ) + return cleaned or "unknown" + + +def main() -> int: + load_dotenv(override=True) + parser = argparse.ArgumentParser( + description="Download all hacker resumes from GCS and bundle into a zip." + ) + parser.add_argument( + "--out", + default="resumes.zip", + help="Output zip path (default: resumes.zip).", + ) + parser.add_argument( + "--debug", + action="store_true", + help="Enable verbose logging for troubleshooting.", + ) + args = parser.parse_args() + + address = os.environ.get("DB_ADDRESS_DEPLOY") + username = os.environ.get("DB_USER_DEPLOY") + password = os.environ.get("DB_PASS_DEPLOY") + bucket_name = os.environ.get("BUCKET_NAME") + + missing = [k for k, v in { + "DB_ADDRESS_DEPLOY": address, + "DB_USER_DEPLOY": username, + "DB_PASS_DEPLOY": password, + "BUCKET_NAME": bucket_name, + }.items() if not v] + if missing: + raise SystemExit(f"Missing required env vars: {', '.join(missing)}") + + gcs_env = { + "TYPE": os.environ.get("TYPE"), + "PROJECT_ID": os.environ.get("PROJECT_ID"), + "PRIVATE_KEY_ID": os.environ.get("PRIVATE_KEY_ID"), + "PRIVATE_KEY": os.environ.get("PRIVATE_KEY"), + "CLIENT_EMAIL": os.environ.get("CLIENT_EMAIL"), + "CLIENT_ID": os.environ.get("CLIENT_ID"), + "AUTH_URI": os.environ.get("AUTH_URI"), + "TOKEN_URI": os.environ.get("TOKEN_URI"), + "AUTH_PROVIDER_X509_CERT_URL": os.environ.get("AUTH_PROVIDER_X509_CERT_URL"), + "CLIENT_X509_CERT_URL": os.environ.get("CLIENT_X509_CERT_URL"), + } + missing_gcs = [k for k, v in gcs_env.items() if not v] + if missing_gcs: + raise SystemExit(f"Missing required GCS env vars: {', '.join(missing_gcs)}") + + mongo_uri = build_mongo_uri(address, username, password) + if args.debug: + print(f"Mongo URI: {mongo_uri}") + client = MongoClient(mongo_uri) + db_name = "hackboard-deploy" + db = client[db_name] + hackers = db["hackers"] + accounts = db["accounts"] + account_cache = {} + if args.debug: + print(f"Database: {db_name}") + print(f"Collections: {', '.join(sorted(db.list_collection_names()))}") + print(f"Hackers count: {hackers.count_documents({})}") + + query = {"application.general.URL.resume": {"$exists": True, "$ne": ""}} + projection = {"application.general.URL.resume": 1, "accountId": 1} + cursor = hackers.find(query, projection=projection) + if args.debug: + match_count = hackers.count_documents(query) + print(f"Resume query matches: {match_count}") + + private_key = gcs_env["PRIVATE_KEY"] + if private_key and "\\n" in private_key: + private_key = private_key.replace("\\n", "\n") + + credentials_info = { + "type": gcs_env["TYPE"], + "project_id": gcs_env["PROJECT_ID"], + "private_key_id": gcs_env["PRIVATE_KEY_ID"], + "private_key": private_key, + "client_email": gcs_env["CLIENT_EMAIL"], + "client_id": gcs_env["CLIENT_ID"], + "auth_uri": gcs_env["AUTH_URI"], + "token_uri": gcs_env["TOKEN_URI"], + "auth_provider_x509_cert_url": gcs_env["AUTH_PROVIDER_X509_CERT_URL"], + "client_x509_cert_url": gcs_env["CLIENT_X509_CERT_URL"], + } + credentials = service_account.Credentials.from_service_account_info( + credentials_info + ) + storage_client = storage.Client( + project=credentials_info["project_id"], credentials=credentials + ) + bucket = storage_client.bucket(bucket_name) + + total = 0 + downloaded = 0 + with tempfile.TemporaryDirectory() as tmpdir: + for doc in cursor: + total += 1 + resume_path = ( + doc.get("application", {}) + .get("general", {}) + .get("URL", {}) + .get("resume", "") + ) + if not resume_path: + if args.debug: + print(f"Skip {doc.get('_id')}: missing resume path") + continue + + blob = bucket.blob(resume_path) + if not blob.exists(): + if args.debug: + print(f"Missing blob: {resume_path}") + continue + + account_id = doc.get("accountId") + if isinstance(account_id, dict) and "$oid" in account_id: + account_id = account_id["$oid"] + if isinstance(account_id, str): + try: + account_id = ObjectId(account_id) + except Exception: + pass + account = {} + if account_id in account_cache: + account = account_cache[account_id] + elif account_id is not None: + account = accounts.find_one({"_id": account_id}) or {} + if not account and isinstance(account_id, ObjectId): + account = ( + accounts.find_one({"_id": str(account_id)}) or {} + ) + account_cache[account_id] = account + + first = safe_name(str(account.get("firstName", ""))) + last = safe_name(str(account.get("lastName", ""))) + + if args.debug: + print("Names: ", first, last) + + if first == "unknown" and last == "unknown": + name_stub = str(doc["_id"]) + else: + name_stub = f"{first}_{last}" + + basename = os.path.basename(resume_path) + ext = os.path.splitext(basename)[1] + if not ext: + blob.reload() + ext = guess_extension(blob.content_type) + + local_name = ( + f"{name_stub}_resume{ext if ext else ''}" + if name_stub != str(doc["_id"]) + else f"{doc['_id']}__{basename}{ext if ext else ''}" + ) + local_path = os.path.join(tmpdir, local_name) + + if args.debug: + print(f"Downloading {resume_path} -> {local_name}") + with open(local_path, "wb") as fh: + fh.write(blob.download_as_bytes()) + downloaded += 1 + + with zipfile.ZipFile(args.out, "w", compression=zipfile.ZIP_DEFLATED) as zf: + for name in os.listdir(tmpdir): + path = os.path.join(tmpdir, name) + zf.write(path, arcname=name) + + print(f"Processed {total} hackers, downloaded {downloaded} resumes.") + print(f"Wrote {args.out}.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main())