77
88import re
99from pathlib import Path
10- from typing import List , Dict , Any
10+ from typing import List , Dict , Any , Optional
1111import openpyxl
1212
1313from utils import inject_content
1414from citation_utils import resolve_link
1515
1616
17+ def parse_cv_undergrad_order (cv_path : Path ) -> List [str ]:
18+ """Parse the CV to get the order of undergraduate advisees.
19+
20+ The CV lists undergrads in reverse chronological order by join date
21+ (most recent joiner first). This order is authoritative.
22+
23+ Args:
24+ cv_path: Path to JRM_CV.tex
25+
26+ Returns:
27+ List of names in CV order (first = highest priority)
28+ """
29+ if not cv_path .exists ():
30+ return []
31+
32+ content = cv_path .read_text (encoding = "utf-8" )
33+
34+ # Find the Undergraduate Advisees section
35+ match = re .search (
36+ r"\\textit\{Undergraduate Advisees\}.*?\\begin\{etaremune\}(.*?)\\end\{etaremune\}" ,
37+ content ,
38+ re .DOTALL
39+ )
40+ if not match :
41+ return []
42+
43+ section = match .group (1 )
44+
45+ # Extract names from \item entries
46+ # Format: \item Name[*]? (years)
47+ names = []
48+ for item_match in re .finditer (r"\\item\s+(.+?)\s*\(" , section ):
49+ name = item_match .group (1 ).strip ()
50+ # Remove asterisk (senior thesis marker)
51+ name = name .rstrip ("*" ).strip ()
52+ names .append (name )
53+
54+ return names
55+
56+
1757def parse_links_field (links_str : str ) -> str :
1858 """Parse links field into HTML.
1959
@@ -326,51 +366,35 @@ def generate_undergrad_entry(alum: Dict[str, Any]) -> str:
326366 return f"{ name } { paren_display } "
327367
328368
329- def get_start_year (years_str : str ) -> int :
330- """Extract start year from years string for sorting.
331-
332- Args:
333- years_str: Years string like '2024-2026', '2025', or '2023-2025'
334-
335- Returns:
336- Start year as integer (defaults to 0 if unparseable)
337- """
338- if not years_str :
339- return 0
340- years_str = str (years_str ).strip ()
341- # Handle "2024-2026" format - extract first year
342- if "-" in years_str :
343- try :
344- return int (years_str .split ("-" )[0 ])
345- except ValueError :
346- return 0
347- # Handle single year "2025"
348- try :
349- return int (years_str )
350- except ValueError :
351- return 0
352-
353-
354- def generate_undergrad_list_content (alumni : List [Dict [str , Any ]]) -> str :
369+ def generate_undergrad_list_content (
370+ alumni : List [Dict [str , Any ]], cv_order : Optional [List [str ]] = None
371+ ) -> str :
355372 """Generate HTML content for undergraduate alumni list.
356373
357- Alumni are sorted by start year (descending), matching CV order .
374+ Alumni are sorted to match CV order (reverse chronological by join date) .
358375
359376 Args:
360377 alumni: List of alumni dictionaries
378+ cv_order: Optional list of names in CV order (from parse_cv_undergrad_order)
361379
362380 Returns:
363381 HTML string with alumni entries separated by <br>
364382 """
365383 if not alumni :
366384 return ""
367385
368- # Sort by start year descending (most recent first)
369- # Use stable sort to preserve spreadsheet order within same start year (matches CV)
370- sorted_alumni = sorted (
371- alumni ,
372- key = lambda a : - get_start_year (a .get ("years" , "" ))
373- )
386+ # Create position map from CV order (lower = appears first)
387+ cv_position = {}
388+ if cv_order :
389+ for i , name in enumerate (cv_order ):
390+ cv_position [name ] = i
391+
392+ def sort_key (a ):
393+ name = a .get ("name" , "" )
394+ # Use CV position if available, otherwise put at end
395+ return cv_position .get (name , 99999 )
396+
397+ sorted_alumni = sorted (alumni , key = sort_key )
374398
375399 entries = [generate_undergrad_entry (a ) for a in sorted_alumni ]
376400 return "<br>\n " .join (entries )
@@ -419,17 +443,23 @@ def generate_collaborators_content(collaborators: List[Dict[str, Any]]) -> str:
419443 return "\n " .join (entries )
420444
421445
422- def build_people (data_path : Path , template_path : Path , output_path : Path ) -> None :
446+ def build_people (data_path : Path , template_path : Path , output_path : Path , cv_path : Optional [ Path ] = None ) -> None :
423447 """Build people.html from data and template.
424448
425449 Args:
426450 data_path: Path to people.xlsx
427451 template_path: Path to template HTML file
428452 output_path: Path for generated HTML file
453+ cv_path: Optional path to JRM_CV.tex for ordering undergrad alumni
429454 """
430455 # Load data
431456 data = load_people (data_path )
432457
458+ # Get CV order for undergrad alumni
459+ cv_order = []
460+ if cv_path :
461+ cv_order = parse_cv_undergrad_order (cv_path )
462+
433463 # Generate content for each section
434464 director_content = ""
435465 if data .get ("director" ):
@@ -448,7 +478,7 @@ def build_people(data_path: Path, template_path: Path, output_path: Path) -> Non
448478 data .get ("alumni_managers" , [])
449479 ),
450480 "ALUMNI_UNDERGRADS_CONTENT" : generate_undergrad_list_content (
451- data .get ("alumni_undergrads" , [])
481+ data .get ("alumni_undergrads" , []), cv_order
452482 ),
453483 "COLLABORATORS_CONTENT" : generate_collaborators_content (
454484 data .get ("collaborators" , [])
@@ -469,8 +499,9 @@ def main():
469499 data_path = project_root / "data" / "people.xlsx"
470500 template_path = project_root / "templates" / "people.html"
471501 output_path = project_root / "people.html"
502+ cv_path = project_root / "documents" / "JRM_CV.tex"
472503
473- build_people (data_path , template_path , output_path )
504+ build_people (data_path , template_path , output_path , cv_path )
474505
475506
476507if __name__ == "__main__" :
0 commit comments