diff --git a/.gitignore b/.gitignore index 91af8c3c1..ff41b4b5a 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ dist/* **/*.h5 **/*.csv.gz .env +.DS_Store # Ignore generated credentials from google-github-actions/auth gha-creds-*.json diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29bb..fd4509d1f 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,5 @@ +- bump: minor + changes: + added: + - Metadata for UK local authorities + - Calculation of UK local authority-level outputs \ No newline at end of file diff --git a/policyengine_api/constants.py b/policyengine_api/constants.py index c22dabaa2..d1dfac30b 100644 --- a/policyengine_api/constants.py +++ b/policyengine_api/constants.py @@ -38,6 +38,7 @@ "national", # National level (e.g., "uk") "country", # UK countries (e.g., "country/england", "country/scotland") "constituency", # UK parliamentary constituencies (e.g., "constituency/Aldershot") + "local_authority", # UK local authorities (e.g., "local_authority/Maidstone") ) # Valid region prefixes for each country @@ -51,6 +52,7 @@ "uk": [ "country/", # UK countries (e.g., "country/england", "country/scotland") "constituency/", # UK parliamentary constituencies (e.g., "constituency/Aldershot") + "local_authority/", # UK local authorities (e.g., "local_authority/Maidstone") ], } diff --git a/policyengine_api/country.py b/policyengine_api/country.py index 44cb47476..29f64fbbe 100644 --- a/policyengine_api/country.py +++ b/policyengine_api/country.py @@ -73,6 +73,7 @@ def build_microsimulation_options(self) -> dict: Path(__file__).parent / "data" / "constituencies_2024.csv" ) constituency_names = pd.read_csv(constituency_names_path) + constituency_names = constituency_names.sort_values("name") region = [ dict(name="uk", label="the UK", type="national"), dict(name="country/england", label="England", type="country"), @@ -92,6 +93,19 @@ def build_microsimulation_options(self) -> dict: type="constituency", ) ) + local_authority_names_path = ( + Path(__file__).parent / "data" / "local_authorities_2021.csv" + ) + local_authority_names = pd.read_csv(local_authority_names_path) + local_authority_names = local_authority_names.sort_values("name") + for i in range(len(local_authority_names)): + region.append( + dict( + name=f"local_authority/{local_authority_names.iloc[i]['name']}", + label=local_authority_names.iloc[i]["name"], + type="local_authority", + ) + ) time_period = [ dict(name=2024, label="2024"), dict(name=2025, label="2025"), diff --git a/policyengine_api/data/local_authorities_2021.csv b/policyengine_api/data/local_authorities_2021.csv new file mode 100644 index 000000000..9fcf922ed --- /dev/null +++ b/policyengine_api/data/local_authorities_2021.csv @@ -0,0 +1,361 @@ +code,x,y,name +E06000001,8.0,19.0,Hartlepool +E06000002,9.0,18.0,Middlesbrough +E06000003,9.0,19.0,Redcar and Cleveland +E06000004,8.0,18.0,Stockton-on-Tees +E06000005,7.0,18.0,Darlington +E06000006,1.0,11.0,Halton +E06000007,2.0,11.0,Warrington +E06000008,4.0,15.0,Blackburn with Darwen +E06000009,2.0,15.0,Blackpool +E06000010,10.0,15.0,"Kingston upon Hull, City of" +E06000011,11.0,16.0,East Riding of Yorkshire +E06000012,11.0,14.0,North East Lincolnshire +E06000013,10.0,14.0,North Lincolnshire +E06000014,9.0,17.0,York +E06000015,6.0,11.0,Derby +E06000016,8.0,8.0,Leicester +E06000017,10.0,9.0,Rutland +E06000018,8.0,10.0,Nottingham +E06000019,0.0,8.0,"Herefordshire, County of" +E06000020,2.0,9.0,Telford and Wrekin +E06000021,3.0,10.0,Stoke-on-Trent +E06000022,1.0,3.0,Bath and North East Somerset +E06000023,0.0,3.0,"Bristol, City of" +E06000024,0.0,2.0,North Somerset +E06000025,1.0,4.0,South Gloucestershire +E06000026,-4.0,-2.0,Plymouth +E06000027,-3.0,-2.0,Torbay +E06000030,2.0,4.0,Swindon +E06000031,11.0,9.0,Peterborough +E06000032,10.0,7.0,Luton +E06000033,16.0,6.0,Southend-on-Sea +E06000034,15.0,4.0,Thurrock +E06000035,15.0,1.0,Medway +E06000036,4.0,2.0,Bracknell Forest +E06000037,2.0,2.0,West Berkshire +E06000038,2.0,3.0,Reading +E06000039,6.0,4.0,Slough +E06000040,4.0,3.0,Windsor and Maidenhead +E06000041,3.0,3.0,Wokingham +E06000042,6.0,5.0,Milton Keynes +E06000043,9.0,-2.0,Brighton and Hove +E06000044,4.0,-1.0,Portsmouth +E06000045,2.0,0.0,Southampton +E06000046,1.0,-2.0,Isle of Wight +E06000047,6.0,18.0,County Durham +E06000049,4.0,11.0,Cheshire East +E06000050,3.0,11.0,Cheshire West and Chester +E06000051,1.0,9.0,Shropshire +E06000052,-5.0,-2.0,Cornwall +E06000053,-7.0,-3.0,Isles of Scilly +E06000054,1.0,2.0,Wiltshire +E06000055,9.0,7.0,Bedford +E06000056,9.0,6.0,Central Bedfordshire +E06000057,5.0,20.0,Northumberland +E06000058,0.0,0.0,"Bournemouth, Christchurch and Poole" +E06000059,-1.0,0.0,Dorset +E06000060,5.0,5.0,Buckinghamshire +E06000061,9.0,9.0,North Northamptonshire +E06000062,7.0,6.0,West Northamptonshire +E06000063,0.0,0.0,Cumberland +E06000064,0.0,0.0,Westmorland and Furness +E06000065,0.0,0.0,North Yorkshire +E06000066,0.0,0.0,Somerset +E07000008,12.0,8.0,Cambridge +E07000009,12.0,9.0,East Cambridgeshire +E07000010,13.0,10.0,Fenland +E07000011,10.0,8.0,Huntingdonshire +E07000012,11.0,8.0,South Cambridgeshire +E07000032,7.0,11.0,Amber Valley +E07000033,10.0,12.0,Bolsover +E07000034,9.0,12.0,Chesterfield +E07000035,7.0,12.0,Derbyshire Dales +E07000036,7.0,9.0,Erewash +E07000037,7.0,13.0,High Peak +E07000038,8.0,12.0,North East Derbyshire +E07000039,6.0,10.0,South Derbyshire +E07000040,-2.0,-1.0,East Devon +E07000041,-3.0,-1.0,Exeter +E07000042,-2.0,0.0,Mid Devon +E07000043,-3.0,1.0,North Devon +E07000044,-4.0,-3.0,South Hams +E07000045,-2.0,-2.0,Teignbridge +E07000046,-4.0,-1.0,Torridge +E07000047,-3.0,0.0,West Devon +E07000061,10.0,-2.0,Eastbourne +E07000062,13.0,-2.0,Hastings +E07000063,10.0,-1.0,Lewes +E07000064,12.0,-2.0,Rother +E07000065,11.0,-2.0,Wealden +E07000066,14.0,5.0,Basildon +E07000067,14.0,7.0,Braintree +E07000068,13.0,5.0,Brentwood +E07000069,15.0,5.0,Castle Point +E07000070,14.0,6.0,Chelmsford +E07000071,15.0,8.0,Colchester +E07000072,12.0,5.0,Epping Forest +E07000073,13.0,6.0,Harlow +E07000074,15.0,7.0,Maldon +E07000075,15.0,6.0,Rochford +E07000076,16.0,8.0,Tendring +E07000077,13.0,7.0,Uttlesford +E07000078,1.0,5.0,Cheltenham +E07000079,2.0,5.0,Cotswold +E07000080,-1.0,6.0,Forest of Dean +E07000081,0.0,6.0,Gloucester +E07000082,0.0,5.0,Stroud +E07000083,1.0,6.0,Tewkesbury +E07000084,2.0,1.0,Basingstoke and Deane +E07000085,4.0,0.0,East Hampshire +E07000086,3.0,0.0,Eastleigh +E07000087,2.0,-1.0,Fareham +E07000088,3.0,-1.0,Gosport +E07000089,3.0,2.0,Hart +E07000090,5.0,0.0,Havant +E07000091,1.0,0.0,New Forest +E07000092,4.0,1.0,Rushmoor +E07000093,1.0,1.0,Test Valley +E07000094,3.0,1.0,Winchester +E07000095,12.0,6.0,Broxbourne +E07000096,8.0,6.0,Dacorum +E07000098,9.0,5.0,Hertsmere +E07000099,11.0,7.0,North Hertfordshire +E07000102,7.0,5.0,Three Rivers +E07000103,8.0,5.0,Watford +E07000105,12.0,-1.0,Ashford +E07000106,15.0,0.0,Canterbury +E07000107,13.0,1.0,Dartford +E07000108,14.0,-1.0,Dover +E07000109,14.0,1.0,Gravesham +E07000110,14.0,0.0,Maidstone +E07000111,12.0,0.0,Sevenoaks +E07000112,13.0,-1.0,Folkestone and Hythe +E07000113,16.0,0.0,Swale +E07000114,15.0,-1.0,Thanet +E07000115,13.0,0.0,Tonbridge and Malling +E07000116,11.0,-1.0,Tunbridge Wells +E07000117,6.0,15.0,Burnley +E07000118,3.0,14.0,Chorley +E07000119,4.0,16.0,Fylde +E07000120,5.0,15.0,Hyndburn +E07000121,3.0,17.0,Lancaster +E07000122,6.0,16.0,Pendle +E07000123,5.0,16.0,Preston +E07000124,5.0,17.0,Ribble Valley +E07000125,6.0,14.0,Rossendale +E07000126,3.0,15.0,South Ribble +E07000127,2.0,13.0,West Lancashire +E07000128,3.0,16.0,Wyre +E07000129,7.0,7.0,Blaby +E07000130,8.0,9.0,Charnwood +E07000131,8.0,7.0,Harborough +E07000132,7.0,8.0,Hinckley and Bosworth +E07000133,11.0,10.0,Melton +E07000134,6.0,9.0,North West Leicestershire +E07000135,9.0,8.0,Oadby and Wigston +E07000136,12.0,12.0,Boston +E07000137,12.0,13.0,East Lindsey +E07000138,11.0,12.0,Lincoln +E07000139,11.0,11.0,North Kesteven +E07000140,12.0,11.0,South Holland +E07000141,12.0,10.0,South Kesteven +E07000142,11.0,13.0,West Lindsey +E07000143,14.0,10.0,Breckland +E07000144,15.0,12.0,Broadland +E07000145,15.0,11.0,Great Yarmouth +E07000146,13.0,11.0,King's Lynn and West Norfolk +E07000147,14.0,12.0,North Norfolk +E07000148,14.0,11.0,Norwich +E07000149,15.0,10.0,South Norfolk +E07000170,8.0,11.0,Ashfield +E07000171,10.0,13.0,Bassetlaw +E07000172,7.0,10.0,Broxtowe +E07000173,9.0,10.0,Gedling +E07000174,9.0,11.0,Mansfield +E07000175,10.0,11.0,Newark and Sherwood +E07000176,10.0,10.0,Rushcliffe +E07000177,4.0,5.0,Cherwell +E07000178,4.0,4.0,Oxford +E07000179,5.0,4.0,South Oxfordshire +E07000180,3.0,4.0,Vale of White Horse +E07000181,3.0,5.0,West Oxfordshire +E07000192,3.0,9.0,Cannock Chase +E07000193,5.0,11.0,East Staffordshire +E07000194,4.0,9.0,Lichfield +E07000195,2.0,10.0,Newcastle-under-Lyme +E07000196,2.0,8.0,South Staffordshire +E07000197,4.0,10.0,Stafford +E07000198,5.0,10.0,Staffordshire Moorlands +E07000199,5.0,9.0,Tamworth +E07000200,14.0,8.0,Babergh +E07000202,15.0,9.0,Ipswich +E07000203,14.0,9.0,Mid Suffolk +E07000207,7.0,2.0,Elmbridge +E07000208,8.0,0.0,Epsom and Ewell +E07000209,5.0,1.0,Guildford +E07000210,6.0,1.0,Mole Valley +E07000211,7.0,0.0,Reigate and Banstead +E07000212,5.0,3.0,Runnymede +E07000213,6.0,3.0,Spelthorne +E07000214,5.0,2.0,Surrey Heath +E07000215,9.0,-1.0,Tandridge +E07000216,6.0,0.0,Waverley +E07000217,6.0,2.0,Woking +E07000218,6.0,8.0,North Warwickshire +E07000219,6.0,7.0,Nuneaton and Bedworth +E07000220,6.0,6.0,Rugby +E07000221,3.0,6.0,Stratford-on-Avon +E07000222,4.0,6.0,Warwick +E07000223,8.0,-2.0,Adur +E07000224,6.0,-2.0,Arun +E07000225,5.0,-1.0,Chichester +E07000226,8.0,-1.0,Crawley +E07000227,6.0,-1.0,Horsham +E07000228,7.0,-1.0,Mid Sussex +E07000229,7.0,-2.0,Worthing +E07000234,2.0,7.0,Bromsgrove +E07000235,-1.0,7.0,Malvern Hills +E07000236,4.0,7.0,Redditch +E07000237,0.0,7.0,Worcester +E07000238,2.0,6.0,Wychavon +E07000239,1.0,8.0,Wyre Forest +E07000240,10.0,6.0,St Albans +E07000241,11.0,6.0,Welwyn Hatfield +E07000242,13.0,8.0,East Hertfordshire +E07000243,12.0,7.0,Stevenage +E07000244,16.0,10.0,East Suffolk +E07000245,13.0,9.0,West Suffolk +E08000001,4.0,14.0,Bolton +E08000002,5.0,14.0,Bury +E08000003,5.0,12.0,Manchester +E08000004,5.0,13.0,Oldham +E08000005,7.0,14.0,Rochdale +E08000006,4.0,13.0,Salford +E08000007,6.0,12.0,Stockport +E08000008,6.0,13.0,Tameside +E08000009,4.0,12.0,Trafford +E08000010,3.0,13.0,Wigan +E08000011,2.0,12.0,Knowsley +E08000012,1.0,13.0,Liverpool +E08000013,3.0,12.0,St. Helens +E08000014,2.0,14.0,Sefton +E08000015,1.0,12.0,Wirral +E08000016,8.0,14.0,Barnsley +E08000017,9.0,14.0,Doncaster +E08000018,9.0,13.0,Rotherham +E08000019,8.0,13.0,Sheffield +E08000021,5.0,19.0,Newcastle upon Tyne +E08000022,6.0,20.0,North Tyneside +E08000023,7.0,20.0,South Tyneside +E08000024,7.0,19.0,Sunderland +E08000025,5.0,8.0,Birmingham +E08000026,5.0,6.0,Coventry +E08000027,1.0,7.0,Dudley +E08000028,3.0,7.0,Sandwell +E08000029,5.0,7.0,Solihull +E08000030,4.0,8.0,Walsall +E08000031,3.0,8.0,Wolverhampton +E08000032,7.0,16.0,Bradford +E08000033,7.0,15.0,Calderdale +E08000034,8.0,15.0,Kirklees +E08000035,8.0,16.0,Leeds +E08000036,9.0,15.0,Wakefield +E08000037,6.0,19.0,Gateshead +E09000001,11.0,2.0,City of London +E09000002,13.0,3.0,Barking and Dagenham +E09000003,10.0,5.0,Barnet +E09000004,12.0,1.0,Bexley +E09000005,10.0,4.0,Brent +E09000006,11.0,0.0,Bromley +E09000007,11.0,4.0,Camden +E09000008,10.0,0.0,Croydon +E09000009,9.0,4.0,Ealing +E09000010,11.0,5.0,Enfield +E09000011,11.0,1.0,Greenwich +E09000012,12.0,3.0,Hackney +E09000013,8.0,3.0,Hammersmith and Fulham +E09000014,12.0,4.0,Haringey +E09000015,8.0,4.0,Harrow +E09000016,14.0,3.0,Havering +E09000017,7.0,4.0,Hillingdon +E09000018,7.0,3.0,Hounslow +E09000019,11.0,3.0,Islington +E09000020,9.0,3.0,Kensington and Chelsea +E09000021,7.0,1.0,Kingston upon Thames +E09000022,10.0,2.0,Lambeth +E09000023,10.0,1.0,Lewisham +E09000024,8.0,1.0,Merton +E09000025,13.0,2.0,Newham +E09000026,14.0,4.0,Redbridge +E09000027,8.0,2.0,Richmond upon Thames +E09000028,9.0,1.0,Southwark +E09000029,9.0,0.0,Sutton +E09000030,12.0,2.0,Tower Hamlets +E09000031,13.0,4.0,Waltham Forest +E09000032,9.0,2.0,Wandsworth +E09000033,10.0,3.0,Westminster +N09000001,-4.0,16.0,Antrim and Newtownabbey +N09000002,-5.0,16.0,"Armagh City, Banbridge and Craigavon" +N09000003,-4.0,17.0,Belfast +N09000004,-5.0,18.0,Causeway Coast and Glens +N09000005,-6.0,17.0,Derry City and Strabane +N09000006,-6.0,16.0,Fermanagh and Omagh +N09000007,-5.0,15.0,Lisburn and Castlereagh +N09000008,-4.0,18.0,Mid and East Antrim +N09000009,-5.0,17.0,Mid Ulster +N09000010,-4.0,15.0,"Newry, Mourne and Down" +S12000005,2.0,24.0,Clackmannanshire +S12000006,4.0,20.0,Dumfries and Galloway +S12000008,3.0,20.0,East Ayrshire +S12000010,5.0,22.0,East Lothian +S12000011,2.0,20.0,East Renfrewshire +S12000013,-1.0,27.0,Na h-Eileanan Siar +S12000014,2.0,23.0,Falkirk +S12000017,1.0,26.0,Highland +S12000018,0.0,21.0,Inverclyde +S12000019,3.0,21.0,Midlothian +S12000020,2.0,26.0,Moray +S12000021,1.0,20.0,North Ayrshire +S12000023,4.0,28.0,Orkney Islands +S12000026,4.0,21.0,Scottish Borders +S12000027,5.0,30.0,Shetland Islands +S12000028,1.0,19.0,South Ayrshire +S12000029,2.0,21.0,South Lanarkshire +S12000030,1.0,24.0,Stirling +S12000033,4.0,26.0,Aberdeen City +S12000034,3.0,26.0,Aberdeenshire +S12000035,0.0,24.0,Argyll and Bute +S12000036,4.0,22.0,City of Edinburgh +S12000038,1.0,22.0,Renfrewshire +S12000039,0.0,23.0,West Dunbartonshire +S12000040,3.0,22.0,West Lothian +S12000041,2.0,25.0,Angus +S12000042,3.0,25.0,Dundee City +S12000045,1.0,23.0,East Dunbartonshire +S12000047,3.0,24.0,Fife +S12000048,1.0,25.0,Perth and Kinross +S12000049,1.0,21.0,Glasgow City +S12000050,2.0,22.0,North Lanarkshire +W06000001,-2.0,12.0,Isle of Anglesey +W06000002,-2.0,10.0,Gwynedd +W06000003,-1.0,10.0,Conwy +W06000004,0.0,10.0,Denbighshire +W06000005,0.0,11.0,Flintshire +W06000006,1.0,10.0,Wrexham +W06000008,-2.0,9.0,Ceredigion +W06000009,-5.0,6.0,Pembrokeshire +W06000010,-4.0,6.0,Carmarthenshire +W06000011,-4.0,5.0,Swansea +W06000012,-3.0,5.0,Neath Port Talbot +W06000013,-3.0,6.0,Bridgend +W06000014,-2.0,4.0,Vale of Glamorgan +W06000015,-2.0,5.0,Cardiff +W06000016,-3.0,7.0,Rhondda Cynon Taf +W06000018,-2.0,6.0,Caerphilly +W06000019,0.0,9.0,Blaenau Gwent +W06000020,-2.0,7.0,Torfaen +W06000021,-1.0,8.0,Monmouthshire +W06000022,-1.0,5.0,Newport +W06000023,-1.0,9.0,Powys +W06000024,-2.0,8.0,Merthyr Tydfil diff --git a/policyengine_api/endpoints/economy/compare.py b/policyengine_api/endpoints/economy/compare.py index 1a21d40d0..c97a03f6f 100644 --- a/policyengine_api/endpoints/economy/compare.py +++ b/policyengine_api/endpoints/economy/compare.py @@ -548,18 +548,50 @@ class UKConstituencyBreakdown(BaseModel): outcomes_by_region: dict[str, dict[str, int]] +class UKLocalAuthorityBreakdownByLA(BaseModel): + average_household_income_change: float + relative_household_income_change: float + x: int + y: int + + +class UKLocalAuthorityBreakdown(BaseModel): + by_local_authority: dict[str, UKLocalAuthorityBreakdownByLA] + outcomes_by_region: dict[str, dict[str, int]] + + def uk_constituency_breakdown( - baseline: dict, reform: dict, country_id: str + baseline: dict, reform: dict, country_id: str, region: str | None = None ) -> UKConstituencyBreakdown | None: if country_id != "uk": return None + # If simulating a local authority, constituency breakdown is not applicable + if region is not None and region.startswith("local_authority/"): + return None + + # Determine if we're filtering to a specific constituency + selected_constituency = None + if region is not None and region.startswith("constituency/"): + selected_constituency = region.split("/", 1)[1] + + # Determine if we're filtering to a specific country + selected_country = None + if region is not None and region.startswith("country/"): + selected_country = region.split("/", 1)[1].upper() + output = { "by_constituency": {}, "outcomes_by_region": {}, } - for region in ["uk", "england", "scotland", "wales", "northern_ireland"]: - output["outcomes_by_region"][region] = { + for region_name in [ + "uk", + "england", + "scotland", + "wales", + "northern_ireland", + ]: + output["outcomes_by_region"][region_name] = { "Gain more than 5%": 0, "Gain less than 5%": 0, "No change": 0, @@ -589,6 +621,23 @@ def uk_constituency_breakdown( for i in range(len(constituency_names)): name: str = constituency_names.iloc[i]["name"] code: str = constituency_names.iloc[i]["code"] + + # Filter to specific constituency if requested + if selected_constituency is not None: + if name != selected_constituency and code != selected_constituency: + continue + + # Filter to specific country if requested + if selected_country is not None: + if selected_country == "ENGLAND" and "E" not in code: + continue + elif selected_country == "SCOTLAND" and "S" not in code: + continue + elif selected_country == "WALES" and "W" not in code: + continue + elif selected_country == "NORTHERN_IRELAND" and "N" not in code: + continue + weight: np.ndarray = weights[i] baseline_income = MicroSeries(baseline_hnet, weights=weight) reform_income = MicroSeries(reform_hnet, weights=weight) @@ -632,8 +681,131 @@ def uk_constituency_breakdown( return UKConstituencyBreakdown(**output) +def uk_local_authority_breakdown( + baseline: dict, reform: dict, country_id: str, region: str | None = None +) -> UKLocalAuthorityBreakdown | None: + if country_id != "uk": + return None + + # If simulating a constituency, local authority breakdown is not applicable + if region is not None and region.startswith("constituency/"): + return None + + # Determine if we're filtering to a specific local authority + selected_la = None + if region is not None and region.startswith("local_authority/"): + selected_la = region.split("/", 1)[1] + + # Determine if we're filtering to a specific country + selected_country = None + if region is not None and region.startswith("country/"): + selected_country = region.split("/", 1)[1].lower() + + output = { + "by_local_authority": {}, + "outcomes_by_region": {}, + } + for region_name in [ + "uk", + "england", + "scotland", + "wales", + "northern_ireland", + ]: + output["outcomes_by_region"][region_name] = { + "Gain more than 5%": 0, + "Gain less than 5%": 0, + "No change": 0, + "Lose less than 5%": 0, + "Lose more than 5%": 0, + } + baseline_hnet = baseline["household_net_income"] + reform_hnet = reform["household_net_income"] + + local_authority_weights_path = download_huggingface_dataset( + repo="policyengine/policyengine-uk-data-private", + repo_filename="local_authority_weights.h5", + ) + with h5py.File(local_authority_weights_path, "r") as f: + weights = f["2025"][...] + + local_authority_names_path = download_huggingface_dataset( + repo="policyengine/policyengine-uk-data-public", + repo_filename="local_authorities_2021.csv", + ) + local_authority_names = pd.read_csv(local_authority_names_path) + + for i in range(len(local_authority_names)): + name: str = local_authority_names.iloc[i]["name"] + code: str = local_authority_names.iloc[i]["code"] + + # Filter to specific local authority if requested + if selected_la is not None: + if name != selected_la and code != selected_la: + continue + + # Filter to specific country if requested + if selected_country is not None: + if selected_country == "england" and not code.startswith("E"): + continue + elif selected_country == "scotland" and not code.startswith("S"): + continue + elif selected_country == "wales" and not code.startswith("W"): + continue + elif ( + selected_country == "northern_ireland" + and not code.startswith("N") + ): + continue + + weight: np.ndarray = weights[i] + baseline_income = MicroSeries(baseline_hnet, weights=weight) + reform_income = MicroSeries(reform_hnet, weights=weight) + average_household_income_change: float = ( + reform_income.sum() - baseline_income.sum() + ) / baseline_income.count() + percent_household_income_change: float = ( + reform_income.sum() / baseline_income.sum() - 1 + ) + output["by_local_authority"][name] = { + "average_household_income_change": average_household_income_change, + "relative_household_income_change": percent_household_income_change, + "x": int(local_authority_names.iloc[i]["x"]), + "y": int(local_authority_names.iloc[i]["y"]), + } + + regions = ["uk"] + if code.startswith("E"): + regions.append("england") + elif code.startswith("S"): + regions.append("scotland") + elif code.startswith("W"): + regions.append("wales") + elif code.startswith("N"): + regions.append("northern_ireland") + + if percent_household_income_change > 0.05: + bucket = "Gain more than 5%" + elif percent_household_income_change > 1e-3: + bucket = "Gain less than 5%" + elif percent_household_income_change > -1e-3: + bucket = "No change" + elif percent_household_income_change > -0.05: + bucket = "Lose less than 5%" + else: + bucket = "Lose more than 5%" + + for region_ in regions: + output["outcomes_by_region"][region_][bucket] += 1 + + return UKLocalAuthorityBreakdown(**output) + + def compare_economic_outputs( - baseline: dict, reform: dict, country_id: str = None + baseline: dict, + reform: dict, + country_id: str = None, + region: str | None = None, ) -> dict: """ Compare the economic outputs of two economies. @@ -641,6 +813,9 @@ def compare_economic_outputs( Args: baseline (dict): The baseline economy. reform (dict): The reform economy. + country_id (str): The country identifier (e.g., "uk", "us"). + region (str | None): The region filter (e.g., "uk", "local_authority/Leicester", + "constituency/Aldershot", "country/scotland"). Used to filter breakdown results. Returns: dict: The comparison of the two economies. @@ -658,10 +833,17 @@ def compare_economic_outputs( intra_decile_impact_data = intra_decile_impact(baseline, reform) labor_supply_response_data = labor_supply_response(baseline, reform) constituency_impact_data: UKConstituencyBreakdown | None = ( - uk_constituency_breakdown(baseline, reform, country_id) + uk_constituency_breakdown(baseline, reform, country_id, region) ) if constituency_impact_data is not None: constituency_impact_data = constituency_impact_data.model_dump() + local_authority_impact_data: UKLocalAuthorityBreakdown | None = ( + uk_local_authority_breakdown(baseline, reform, country_id, region) + ) + if local_authority_impact_data is not None: + local_authority_impact_data = ( + local_authority_impact_data.model_dump() + ) try: wealth_decile_impact_data = wealth_decile_impact(baseline, reform) intra_wealth_decile_impact_data = intra_wealth_decile_impact( @@ -684,6 +866,7 @@ def compare_economic_outputs( intra_wealth_decile=intra_wealth_decile_impact_data, labor_supply_response=labor_supply_response_data, constituency_impact=constituency_impact_data, + local_authority_impact=local_authority_impact_data, ) elif baseline.get("type") == "cliff": return dict( diff --git a/tests/unit/endpoints/__init__.py b/tests/unit/endpoints/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/endpoints/economy/__init__.py b/tests/unit/endpoints/economy/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/endpoints/economy/test_compare.py b/tests/unit/endpoints/economy/test_compare.py new file mode 100644 index 000000000..17ff66275 --- /dev/null +++ b/tests/unit/endpoints/economy/test_compare.py @@ -0,0 +1,733 @@ +import pytest +from unittest.mock import patch, MagicMock +import numpy as np +import pandas as pd +from pydantic import ValidationError + +from policyengine_api.endpoints.economy.compare import ( + UKConstituencyBreakdownByConstituency, + UKConstituencyBreakdown, + UKLocalAuthorityBreakdownByLA, + UKLocalAuthorityBreakdown, + uk_constituency_breakdown, + uk_local_authority_breakdown, +) + + +class TestUKLocalAuthorityBreakdownByLA: + """Tests for the UKLocalAuthorityBreakdownByLA Pydantic model.""" + + def test__given_valid_data__creates_instance(self): + breakdown = UKLocalAuthorityBreakdownByLA( + average_household_income_change=100.50, + relative_household_income_change=0.05, + x=10, + y=20, + ) + assert breakdown.average_household_income_change == 100.50 + assert breakdown.relative_household_income_change == 0.05 + assert breakdown.x == 10 + assert breakdown.y == 20 + + def test__given_negative_income_change__creates_instance(self): + breakdown = UKLocalAuthorityBreakdownByLA( + average_household_income_change=-500.0, + relative_household_income_change=-0.03, + x=5, + y=-10, + ) + assert breakdown.average_household_income_change == -500.0 + assert breakdown.relative_household_income_change == -0.03 + + def test__given_zero_values__creates_instance(self): + breakdown = UKLocalAuthorityBreakdownByLA( + average_household_income_change=0.0, + relative_household_income_change=0.0, + x=0, + y=0, + ) + assert breakdown.average_household_income_change == 0.0 + assert breakdown.relative_household_income_change == 0.0 + + def test__given_missing_field__raises_validation_error(self): + with pytest.raises(ValidationError): + UKLocalAuthorityBreakdownByLA( + average_household_income_change=100.0, + # Missing relative_household_income_change + x=10, + y=20, + ) + + +class TestUKLocalAuthorityBreakdown: + """Tests for the UKLocalAuthorityBreakdown Pydantic model.""" + + def test__given_valid_data__creates_instance(self): + breakdown = UKLocalAuthorityBreakdown( + by_local_authority={ + "Hartlepool": UKLocalAuthorityBreakdownByLA( + average_household_income_change=100.0, + relative_household_income_change=0.02, + x=8, + y=19, + ) + }, + outcomes_by_region={ + "uk": {"Gain more than 5%": 1, "No change": 0}, + "england": {"Gain more than 5%": 1, "No change": 0}, + }, + ) + assert "Hartlepool" in breakdown.by_local_authority + assert "uk" in breakdown.outcomes_by_region + + def test__given_empty_by_local_authority__creates_instance(self): + breakdown = UKLocalAuthorityBreakdown( + by_local_authority={}, + outcomes_by_region={ + "uk": {"No change": 0}, + }, + ) + assert len(breakdown.by_local_authority) == 0 + + def test__model_dump_returns_dict(self): + breakdown = UKLocalAuthorityBreakdown( + by_local_authority={ + "Leicester": UKLocalAuthorityBreakdownByLA( + average_household_income_change=50.0, + relative_household_income_change=0.01, + x=8, + y=8, + ) + }, + outcomes_by_region={"uk": {"No change": 1}}, + ) + result = breakdown.model_dump() + assert isinstance(result, dict) + assert "by_local_authority" in result + assert "outcomes_by_region" in result + + +class TestUKLocalAuthorityBreakdownFunction: + """Tests for the uk_local_authority_breakdown function.""" + + def test__given_non_uk_country__returns_none(self): + result = uk_local_authority_breakdown({}, {}, "us") + assert result is None + + def test__given_non_uk_country_canada__returns_none(self): + result = uk_local_authority_breakdown({}, {}, "ca") + assert result is None + + @patch( + "policyengine_api.endpoints.economy.compare.download_huggingface_dataset" + ) + @patch("policyengine_api.endpoints.economy.compare.h5py.File") + @patch("policyengine_api.endpoints.economy.compare.pd.read_csv") + def test__given_uk_country__returns_breakdown( + self, mock_read_csv, mock_h5py_file, mock_download + ): + # Setup mocks + mock_download.side_effect = [ + "/path/to/weights.h5", + "/path/to/names.csv", + ] + + # Create mock weights - 3 local authorities, 10 households + mock_weights = np.ones((3, 10)) + mock_h5py_context = MagicMock() + mock_h5py_context.__enter__ = MagicMock( + return_value={"2025": mock_weights} + ) + mock_h5py_context.__exit__ = MagicMock(return_value=False) + mock_h5py_file.return_value = mock_h5py_context + + # Create mock local authority names DataFrame + mock_la_df = pd.DataFrame( + { + "code": ["E06000001", "S12000033", "W06000001"], + "name": ["Hartlepool", "Aberdeen City", "Isle of Anglesey"], + "x": [8.0, 5.0, 3.0], + "y": [19.0, 10.0, 15.0], + } + ) + mock_read_csv.return_value = mock_la_df + + # Create baseline and reform data + baseline = {"household_net_income": np.array([1000.0] * 10)} + reform = {"household_net_income": np.array([1050.0] * 10)} + + result = uk_local_authority_breakdown(baseline, reform, "uk") + + assert result is not None + assert isinstance(result, UKLocalAuthorityBreakdown) + assert "Hartlepool" in result.by_local_authority + assert "Aberdeen City" in result.by_local_authority + assert "Isle of Anglesey" in result.by_local_authority + + def test__region_categorization_by_code_prefix(self): + """Test that region categorization logic correctly identifies UK nations by code prefix.""" + # This is a unit test for the region categorization logic + # We test the logic directly rather than through the full function + + test_cases = [ + ("E06000001", ["uk", "england"]), # English LA + ("S12000033", ["uk", "scotland"]), # Scottish LA + ("W06000001", ["uk", "wales"]), # Welsh LA + ("N09000001", ["uk", "northern_ireland"]), # NI LA + ] + + for code, expected_regions in test_cases: + regions = ["uk"] + if code.startswith("E"): + regions.append("england") + elif code.startswith("S"): + regions.append("scotland") + elif code.startswith("W"): + regions.append("wales") + elif code.startswith("N"): + regions.append("northern_ireland") + + assert regions == expected_regions, f"Failed for code {code}" + + def test__outcome_bucket_categorization_logic(self): + """Test that outcome bucket categorization logic is correct.""" + # Thresholds: > 0.05 (5%), > 0.001 (0.1%), > -0.001, > -0.05 + test_cases = [ + (0.10, "Gain more than 5%"), # 10% gain + (0.06, "Gain more than 5%"), # 6% gain + (0.051, "Gain more than 5%"), # Just over 5% + (0.05, "Gain less than 5%"), # Exactly 5% gain (not > 5%) + (0.03, "Gain less than 5%"), # 3% gain + (0.002, "Gain less than 5%"), # 0.2% gain (> 0.001) + (0.001, "No change"), # Exactly 0.1% - not > 0.001 + (0.0005, "No change"), # 0.05% gain (within tolerance) + (0.0, "No change"), # No change + (-0.0005, "No change"), # 0.05% loss (> -0.001) + (-0.001, "Lose less than 5%"), # Exactly -0.1% (not > -0.001) + (-0.002, "Lose less than 5%"), # 0.2% loss + (-0.03, "Lose less than 5%"), # 3% loss + (-0.049, "Lose less than 5%"), # Just under 5% loss (> -0.05) + (-0.05, "Lose more than 5%"), # Exactly 5% loss (not > -0.05) + (-0.051, "Lose more than 5%"), # Just over 5% loss + (-0.06, "Lose more than 5%"), # 6% loss + (-0.10, "Lose more than 5%"), # 10% loss + ] + + for percent_change, expected_bucket in test_cases: + if percent_change > 0.05: + bucket = "Gain more than 5%" + elif percent_change > 1e-3: + bucket = "Gain less than 5%" + elif percent_change > -1e-3: + bucket = "No change" + elif percent_change > -0.05: + bucket = "Lose less than 5%" + else: + bucket = "Lose more than 5%" + + assert ( + bucket == expected_bucket + ), f"Failed for {percent_change}: expected {expected_bucket}, got {bucket}" + + @patch( + "policyengine_api.endpoints.economy.compare.download_huggingface_dataset" + ) + @patch("policyengine_api.endpoints.economy.compare.h5py.File") + @patch("policyengine_api.endpoints.economy.compare.pd.read_csv") + def test__outcome_buckets_are_correct( + self, mock_read_csv, mock_h5py_file, mock_download + ): + mock_download.side_effect = [ + "/path/to/weights.h5", + "/path/to/names.csv", + ] + + mock_weights = np.ones((1, 10)) + mock_h5py_context = MagicMock() + mock_h5py_context.__enter__ = MagicMock( + return_value={"2025": mock_weights} + ) + mock_h5py_context.__exit__ = MagicMock(return_value=False) + mock_h5py_file.return_value = mock_h5py_context + + mock_la_df = pd.DataFrame( + { + "code": ["E06000001"], + "name": ["Hartlepool"], + "x": [8.0], + "y": [19.0], + } + ) + mock_read_csv.return_value = mock_la_df + + baseline = {"household_net_income": np.array([1000.0] * 10)} + # 10% gain - should be "Gain more than 5%" + reform = {"household_net_income": np.array([1100.0] * 10)} + + result = uk_local_authority_breakdown(baseline, reform, "uk") + + assert result.outcomes_by_region["uk"]["Gain more than 5%"] == 1 + assert result.outcomes_by_region["uk"]["Gain less than 5%"] == 0 + + @patch( + "policyengine_api.endpoints.economy.compare.download_huggingface_dataset" + ) + @patch("policyengine_api.endpoints.economy.compare.h5py.File") + @patch("policyengine_api.endpoints.economy.compare.pd.read_csv") + def test__downloads_from_correct_repos( + self, mock_read_csv, mock_h5py_file, mock_download + ): + mock_download.side_effect = [ + "/path/to/weights.h5", + "/path/to/names.csv", + ] + + mock_weights = np.ones((1, 10)) + mock_h5py_context = MagicMock() + mock_h5py_context.__enter__ = MagicMock( + return_value={"2025": mock_weights} + ) + mock_h5py_context.__exit__ = MagicMock(return_value=False) + mock_h5py_file.return_value = mock_h5py_context + + mock_la_df = pd.DataFrame( + { + "code": ["E06000001"], + "name": ["Test"], + "x": [0.0], + "y": [0.0], + } + ) + mock_read_csv.return_value = mock_la_df + + baseline = {"household_net_income": np.array([1000.0] * 10)} + reform = {"household_net_income": np.array([1000.0] * 10)} + + uk_local_authority_breakdown(baseline, reform, "uk") + + # Verify correct repos are used + calls = mock_download.call_args_list + assert ( + calls[0][1]["repo"] == "policyengine/policyengine-uk-data-private" + ) + assert calls[0][1]["repo_filename"] == "local_authority_weights.h5" + assert ( + calls[1][1]["repo"] == "policyengine/policyengine-uk-data-public" + ) + assert calls[1][1]["repo_filename"] == "local_authorities_2021.csv" + + def test__given_constituency_region__returns_none(self): + """When simulating a constituency, local authority breakdown should not be computed.""" + result = uk_local_authority_breakdown( + {}, {}, "uk", "constituency/Aldershot" + ) + assert result is None + + def test__given_constituency_region_with_code__returns_none(self): + """When simulating a constituency by code, local authority breakdown should not be computed.""" + result = uk_local_authority_breakdown( + {}, {}, "uk", "constituency/E12345678" + ) + assert result is None + + @patch( + "policyengine_api.endpoints.economy.compare.download_huggingface_dataset" + ) + @patch("policyengine_api.endpoints.economy.compare.h5py.File") + @patch("policyengine_api.endpoints.economy.compare.pd.read_csv") + def test__given_specific_la_region__returns_only_that_la( + self, mock_read_csv, mock_h5py_file, mock_download + ): + """When simulating a specific local authority, only that LA should be returned.""" + mock_download.side_effect = [ + "/path/to/weights.h5", + "/path/to/names.csv", + ] + + mock_weights = np.ones((3, 10)) + mock_h5py_context = MagicMock() + mock_h5py_context.__enter__ = MagicMock( + return_value={"2025": mock_weights} + ) + mock_h5py_context.__exit__ = MagicMock(return_value=False) + mock_h5py_file.return_value = mock_h5py_context + + mock_la_df = pd.DataFrame( + { + "code": ["E06000001", "S12000033", "W06000001"], + "name": ["Hartlepool", "Aberdeen City", "Isle of Anglesey"], + "x": [8.0, 5.0, 3.0], + "y": [19.0, 10.0, 15.0], + } + ) + mock_read_csv.return_value = mock_la_df + + baseline = {"household_net_income": np.array([1000.0] * 10)} + reform = {"household_net_income": np.array([1050.0] * 10)} + + result = uk_local_authority_breakdown( + baseline, reform, "uk", "local_authority/Hartlepool" + ) + + assert result is not None + assert len(result.by_local_authority) == 1 + assert "Hartlepool" in result.by_local_authority + assert "Aberdeen City" not in result.by_local_authority + assert "Isle of Anglesey" not in result.by_local_authority + + @patch( + "policyengine_api.endpoints.economy.compare.download_huggingface_dataset" + ) + @patch("policyengine_api.endpoints.economy.compare.h5py.File") + @patch("policyengine_api.endpoints.economy.compare.pd.read_csv") + def test__given_country_scotland_region__returns_only_scottish_las( + self, mock_read_csv, mock_h5py_file, mock_download + ): + """When simulating country/scotland, only Scottish local authorities should be returned.""" + mock_download.side_effect = [ + "/path/to/weights.h5", + "/path/to/names.csv", + ] + + mock_weights = np.ones((3, 10)) + mock_h5py_context = MagicMock() + mock_h5py_context.__enter__ = MagicMock( + return_value={"2025": mock_weights} + ) + mock_h5py_context.__exit__ = MagicMock(return_value=False) + mock_h5py_file.return_value = mock_h5py_context + + mock_la_df = pd.DataFrame( + { + "code": ["E06000001", "S12000033", "W06000001"], + "name": ["Hartlepool", "Aberdeen City", "Isle of Anglesey"], + "x": [8.0, 5.0, 3.0], + "y": [19.0, 10.0, 15.0], + } + ) + mock_read_csv.return_value = mock_la_df + + baseline = {"household_net_income": np.array([1000.0] * 10)} + reform = {"household_net_income": np.array([1050.0] * 10)} + + result = uk_local_authority_breakdown( + baseline, reform, "uk", "country/scotland" + ) + + assert result is not None + assert len(result.by_local_authority) == 1 + assert "Aberdeen City" in result.by_local_authority + assert "Hartlepool" not in result.by_local_authority + assert "Isle of Anglesey" not in result.by_local_authority + + @patch( + "policyengine_api.endpoints.economy.compare.download_huggingface_dataset" + ) + @patch("policyengine_api.endpoints.economy.compare.h5py.File") + @patch("policyengine_api.endpoints.economy.compare.pd.read_csv") + def test__given_uk_region__returns_all_las( + self, mock_read_csv, mock_h5py_file, mock_download + ): + """When simulating uk-wide, all local authorities should be returned.""" + mock_download.side_effect = [ + "/path/to/weights.h5", + "/path/to/names.csv", + ] + + mock_weights = np.ones((3, 10)) + mock_h5py_context = MagicMock() + mock_h5py_context.__enter__ = MagicMock( + return_value={"2025": mock_weights} + ) + mock_h5py_context.__exit__ = MagicMock(return_value=False) + mock_h5py_file.return_value = mock_h5py_context + + mock_la_df = pd.DataFrame( + { + "code": ["E06000001", "S12000033", "W06000001"], + "name": ["Hartlepool", "Aberdeen City", "Isle of Anglesey"], + "x": [8.0, 5.0, 3.0], + "y": [19.0, 10.0, 15.0], + } + ) + mock_read_csv.return_value = mock_la_df + + baseline = {"household_net_income": np.array([1000.0] * 10)} + reform = {"household_net_income": np.array([1050.0] * 10)} + + result = uk_local_authority_breakdown(baseline, reform, "uk", "uk") + + assert result is not None + assert len(result.by_local_authority) == 3 + assert "Hartlepool" in result.by_local_authority + assert "Aberdeen City" in result.by_local_authority + assert "Isle of Anglesey" in result.by_local_authority + + @patch( + "policyengine_api.endpoints.economy.compare.download_huggingface_dataset" + ) + @patch("policyengine_api.endpoints.economy.compare.h5py.File") + @patch("policyengine_api.endpoints.economy.compare.pd.read_csv") + def test__given_no_region__returns_all_las( + self, mock_read_csv, mock_h5py_file, mock_download + ): + """When no region specified (None), all local authorities should be returned.""" + mock_download.side_effect = [ + "/path/to/weights.h5", + "/path/to/names.csv", + ] + + mock_weights = np.ones((3, 10)) + mock_h5py_context = MagicMock() + mock_h5py_context.__enter__ = MagicMock( + return_value={"2025": mock_weights} + ) + mock_h5py_context.__exit__ = MagicMock(return_value=False) + mock_h5py_file.return_value = mock_h5py_context + + mock_la_df = pd.DataFrame( + { + "code": ["E06000001", "S12000033", "W06000001"], + "name": ["Hartlepool", "Aberdeen City", "Isle of Anglesey"], + "x": [8.0, 5.0, 3.0], + "y": [19.0, 10.0, 15.0], + } + ) + mock_read_csv.return_value = mock_la_df + + baseline = {"household_net_income": np.array([1000.0] * 10)} + reform = {"household_net_income": np.array([1050.0] * 10)} + + result = uk_local_authority_breakdown(baseline, reform, "uk", None) + + assert result is not None + assert len(result.by_local_authority) == 3 + + +class TestUKConstituencyBreakdownModels: + """Tests for the existing UK constituency breakdown models (for completeness).""" + + def test__constituency_breakdown_by_constituency_creates_instance(self): + breakdown = UKConstituencyBreakdownByConstituency( + average_household_income_change=200.0, + relative_household_income_change=0.04, + x=56, + y=-40, + ) + assert breakdown.average_household_income_change == 200.0 + assert breakdown.x == 56 + + def test__constituency_breakdown_creates_instance(self): + breakdown = UKConstituencyBreakdown( + by_constituency={ + "Aldershot": UKConstituencyBreakdownByConstituency( + average_household_income_change=150.0, + relative_household_income_change=0.03, + x=56, + y=-40, + ) + }, + outcomes_by_region={"uk": {"No change": 1}}, + ) + assert "Aldershot" in breakdown.by_constituency + + +class TestUKConstituencyBreakdownFunction: + """Tests for the uk_constituency_breakdown function.""" + + def test__given_non_uk_country__returns_none(self): + result = uk_constituency_breakdown({}, {}, "us") + assert result is None + + def test__given_non_uk_country_nigeria__returns_none(self): + result = uk_constituency_breakdown({}, {}, "ng") + assert result is None + + def test__given_local_authority_region__returns_none(self): + """When simulating a local authority, constituency breakdown should not be computed.""" + result = uk_constituency_breakdown( + {}, {}, "uk", "local_authority/Leicester" + ) + assert result is None + + def test__given_local_authority_region_with_code__returns_none(self): + """When simulating a local authority by code, constituency breakdown should not be computed.""" + result = uk_constituency_breakdown( + {}, {}, "uk", "local_authority/E06000016" + ) + assert result is None + + @patch( + "policyengine_api.endpoints.economy.compare.download_huggingface_dataset" + ) + @patch("policyengine_api.endpoints.economy.compare.h5py.File") + @patch("policyengine_api.endpoints.economy.compare.pd.read_csv") + def test__given_specific_constituency_region__returns_only_that_constituency( + self, mock_read_csv, mock_h5py_file, mock_download + ): + """When simulating a specific constituency, only that constituency should be returned.""" + mock_download.side_effect = [ + "/path/to/weights.h5", + "/path/to/names.csv", + ] + + # Create mock weights - 3 constituencies, 10 households + mock_weights = np.ones((3, 10)) + mock_h5py_context = MagicMock() + mock_h5py_context.__enter__ = MagicMock( + return_value={"2025": mock_weights} + ) + mock_h5py_context.__exit__ = MagicMock(return_value=False) + mock_h5py_file.return_value = mock_h5py_context + + # Create mock constituency names DataFrame + mock_const_df = pd.DataFrame( + { + "code": ["E12345678", "S12345678", "W12345678"], + "name": ["Aldershot", "Edinburgh East", "Cardiff South"], + "x": [10.0, 5.0, 3.0], + "y": [20.0, 15.0, 12.0], + } + ) + mock_read_csv.return_value = mock_const_df + + baseline = {"household_net_income": np.array([1000.0] * 10)} + reform = {"household_net_income": np.array([1050.0] * 10)} + + result = uk_constituency_breakdown( + baseline, reform, "uk", "constituency/Aldershot" + ) + + assert result is not None + assert len(result.by_constituency) == 1 + assert "Aldershot" in result.by_constituency + assert "Edinburgh East" not in result.by_constituency + assert "Cardiff South" not in result.by_constituency + + @patch( + "policyengine_api.endpoints.economy.compare.download_huggingface_dataset" + ) + @patch("policyengine_api.endpoints.economy.compare.h5py.File") + @patch("policyengine_api.endpoints.economy.compare.pd.read_csv") + def test__given_country_scotland_region__returns_only_scottish_constituencies( + self, mock_read_csv, mock_h5py_file, mock_download + ): + """When simulating country/scotland, only Scottish constituencies should be returned.""" + mock_download.side_effect = [ + "/path/to/weights.h5", + "/path/to/names.csv", + ] + + mock_weights = np.ones((3, 10)) + mock_h5py_context = MagicMock() + mock_h5py_context.__enter__ = MagicMock( + return_value={"2025": mock_weights} + ) + mock_h5py_context.__exit__ = MagicMock(return_value=False) + mock_h5py_file.return_value = mock_h5py_context + + mock_const_df = pd.DataFrame( + { + "code": ["E12345678", "S12345678", "W12345678"], + "name": ["Aldershot", "Edinburgh East", "Cardiff South"], + "x": [10.0, 5.0, 3.0], + "y": [20.0, 15.0, 12.0], + } + ) + mock_read_csv.return_value = mock_const_df + + baseline = {"household_net_income": np.array([1000.0] * 10)} + reform = {"household_net_income": np.array([1050.0] * 10)} + + result = uk_constituency_breakdown( + baseline, reform, "uk", "country/scotland" + ) + + assert result is not None + assert len(result.by_constituency) == 1 + assert "Edinburgh East" in result.by_constituency + assert "Aldershot" not in result.by_constituency + assert "Cardiff South" not in result.by_constituency + + @patch( + "policyengine_api.endpoints.economy.compare.download_huggingface_dataset" + ) + @patch("policyengine_api.endpoints.economy.compare.h5py.File") + @patch("policyengine_api.endpoints.economy.compare.pd.read_csv") + def test__given_uk_region__returns_all_constituencies( + self, mock_read_csv, mock_h5py_file, mock_download + ): + """When simulating uk-wide, all constituencies should be returned.""" + mock_download.side_effect = [ + "/path/to/weights.h5", + "/path/to/names.csv", + ] + + mock_weights = np.ones((3, 10)) + mock_h5py_context = MagicMock() + mock_h5py_context.__enter__ = MagicMock( + return_value={"2025": mock_weights} + ) + mock_h5py_context.__exit__ = MagicMock(return_value=False) + mock_h5py_file.return_value = mock_h5py_context + + mock_const_df = pd.DataFrame( + { + "code": ["E12345678", "S12345678", "W12345678"], + "name": ["Aldershot", "Edinburgh East", "Cardiff South"], + "x": [10.0, 5.0, 3.0], + "y": [20.0, 15.0, 12.0], + } + ) + mock_read_csv.return_value = mock_const_df + + baseline = {"household_net_income": np.array([1000.0] * 10)} + reform = {"household_net_income": np.array([1050.0] * 10)} + + result = uk_constituency_breakdown(baseline, reform, "uk", "uk") + + assert result is not None + assert len(result.by_constituency) == 3 + assert "Aldershot" in result.by_constituency + assert "Edinburgh East" in result.by_constituency + assert "Cardiff South" in result.by_constituency + + @patch( + "policyengine_api.endpoints.economy.compare.download_huggingface_dataset" + ) + @patch("policyengine_api.endpoints.economy.compare.h5py.File") + @patch("policyengine_api.endpoints.economy.compare.pd.read_csv") + def test__given_no_region__returns_all_constituencies( + self, mock_read_csv, mock_h5py_file, mock_download + ): + """When no region specified (None), all constituencies should be returned.""" + mock_download.side_effect = [ + "/path/to/weights.h5", + "/path/to/names.csv", + ] + + mock_weights = np.ones((3, 10)) + mock_h5py_context = MagicMock() + mock_h5py_context.__enter__ = MagicMock( + return_value={"2025": mock_weights} + ) + mock_h5py_context.__exit__ = MagicMock(return_value=False) + mock_h5py_file.return_value = mock_h5py_context + + mock_const_df = pd.DataFrame( + { + "code": ["E12345678", "S12345678", "W12345678"], + "name": ["Aldershot", "Edinburgh East", "Cardiff South"], + "x": [10.0, 5.0, 3.0], + "y": [20.0, 15.0, 12.0], + } + ) + mock_read_csv.return_value = mock_const_df + + baseline = {"household_net_income": np.array([1000.0] * 10)} + reform = {"household_net_income": np.array([1050.0] * 10)} + + result = uk_constituency_breakdown(baseline, reform, "uk", None) + + assert result is not None + assert len(result.by_constituency) == 3 diff --git a/tests/unit/services/test_metadata_service.py b/tests/unit/services/test_metadata_service.py index ac33d5250..70ea9262e 100644 --- a/tests/unit/services/test_metadata_service.py +++ b/tests/unit/services/test_metadata_service.py @@ -123,7 +123,7 @@ def test_verify_metadata_for_given_country( @pytest.mark.parametrize( "country_id, expected_types", [ - ("uk", ["national", "country", "constituency"]), + ("uk", ["national", "country", "constituency", "local_authority"]), ("us", ["national", "state", "city", "congressional_district"]), ], ) diff --git a/tests/unit/test_constants.py b/tests/unit/test_constants.py new file mode 100644 index 000000000..439d5a239 --- /dev/null +++ b/tests/unit/test_constants.py @@ -0,0 +1,85 @@ +import pytest + +from policyengine_api.constants import ( + UK_REGION_TYPES, + US_REGION_TYPES, + REGION_PREFIXES, +) + + +class TestUKRegionTypes: + """Tests for UK_REGION_TYPES constant.""" + + def test__contains_national(self): + assert "national" in UK_REGION_TYPES + + def test__contains_country(self): + assert "country" in UK_REGION_TYPES + + def test__contains_constituency(self): + assert "constituency" in UK_REGION_TYPES + + def test__contains_local_authority(self): + assert "local_authority" in UK_REGION_TYPES + + def test__has_exactly_four_types(self): + assert len(UK_REGION_TYPES) == 4 + + +class TestUSRegionTypes: + """Tests for US_REGION_TYPES constant.""" + + def test__contains_national(self): + assert "national" in US_REGION_TYPES + + def test__contains_state(self): + assert "state" in US_REGION_TYPES + + def test__contains_city(self): + assert "city" in US_REGION_TYPES + + def test__contains_congressional_district(self): + assert "congressional_district" in US_REGION_TYPES + + def test__has_exactly_four_types(self): + assert len(US_REGION_TYPES) == 4 + + +class TestRegionPrefixes: + """Tests for REGION_PREFIXES constant.""" + + class TestUKPrefixes: + """Tests for UK region prefixes.""" + + def test__uk_key_exists(self): + assert "uk" in REGION_PREFIXES + + def test__contains_country_prefix(self): + assert "country/" in REGION_PREFIXES["uk"] + + def test__contains_constituency_prefix(self): + assert "constituency/" in REGION_PREFIXES["uk"] + + def test__contains_local_authority_prefix(self): + assert "local_authority/" in REGION_PREFIXES["uk"] + + def test__has_exactly_three_prefixes(self): + assert len(REGION_PREFIXES["uk"]) == 3 + + class TestUSPrefixes: + """Tests for US region prefixes.""" + + def test__us_key_exists(self): + assert "us" in REGION_PREFIXES + + def test__contains_state_prefix(self): + assert "state/" in REGION_PREFIXES["us"] + + def test__contains_city_prefix(self): + assert "city/" in REGION_PREFIXES["us"] + + def test__contains_congressional_district_prefix(self): + assert "congressional_district/" in REGION_PREFIXES["us"] + + def test__has_exactly_three_prefixes(self): + assert len(REGION_PREFIXES["us"]) == 3 diff --git a/tests/unit/test_country.py b/tests/unit/test_country.py new file mode 100644 index 000000000..b57e8ceee --- /dev/null +++ b/tests/unit/test_country.py @@ -0,0 +1,152 @@ +import pytest +import pandas as pd +from pathlib import Path + +from policyengine_api.country import COUNTRIES + + +class TestUKCountryMetadata: + """Tests for UK country metadata, specifically local authority loading.""" + + @pytest.fixture + def uk_country(self): + return COUNTRIES["uk"] + + @pytest.fixture + def uk_regions(self, uk_country): + return uk_country.metadata["economy_options"]["region"] + + def test__uk_metadata_contains_local_authorities(self, uk_regions): + """Verify that local authorities are included in UK region options.""" + local_authority_regions = [ + r for r in uk_regions if r.get("type") == "local_authority" + ] + assert len(local_authority_regions) > 0 + + def test__uk_has_360_local_authorities(self, uk_regions): + """Verify the correct number of local authorities are loaded.""" + local_authority_regions = [ + r for r in uk_regions if r.get("type") == "local_authority" + ] + assert len(local_authority_regions) == 360 + + def test__local_authority_regions_have_correct_name_format( + self, uk_regions + ): + """Verify local authority region names have the correct prefix.""" + local_authority_regions = [ + r for r in uk_regions if r.get("type") == "local_authority" + ] + for region in local_authority_regions: + assert region["name"].startswith("local_authority/") + + def test__local_authority_regions_have_labels(self, uk_regions): + """Verify all local authority regions have labels.""" + local_authority_regions = [ + r for r in uk_regions if r.get("type") == "local_authority" + ] + for region in local_authority_regions: + assert "label" in region + assert len(region["label"]) > 0 + + def test__local_authority_regions_have_type_field(self, uk_regions): + """Verify all local authority regions have type field set correctly.""" + local_authority_regions = [ + r for r in uk_regions if r.get("type") == "local_authority" + ] + for region in local_authority_regions: + assert region["type"] == "local_authority" + + def test__specific_local_authorities_present(self, uk_regions): + """Verify specific local authorities are present in metadata.""" + local_authority_names = [ + r["name"] for r in uk_regions if r.get("type") == "local_authority" + ] + # Check some well-known local authorities + assert "local_authority/Hartlepool" in local_authority_names + assert "local_authority/Middlesbrough" in local_authority_names + assert "local_authority/Leicester" in local_authority_names + + def test__uk_still_has_constituencies(self, uk_regions): + """Verify constituencies are still present after adding local authorities.""" + constituency_regions = [ + r for r in uk_regions if r.get("type") == "constituency" + ] + assert len(constituency_regions) == 650 + + def test__uk_has_all_region_types(self, uk_regions): + """Verify all expected region types are present.""" + types = set(r.get("type") for r in uk_regions) + assert "national" in types + assert "country" in types + assert "constituency" in types + assert "local_authority" in types + + +class TestLocalAuthoritiesDataFile: + """Tests for the local authorities CSV data file.""" + + @pytest.fixture + def local_authorities_df(self): + path = ( + Path(__file__).parents[2] + / "policyengine_api" + / "data" + / "local_authorities_2021.csv" + ) + return pd.read_csv(path) + + def test__file_has_correct_columns(self, local_authorities_df): + """Verify the CSV has the expected columns.""" + expected_columns = {"code", "name", "x", "y"} + assert expected_columns == set(local_authorities_df.columns) + + def test__file_has_360_local_authorities(self, local_authorities_df): + """Verify the correct number of local authorities in file.""" + assert len(local_authorities_df) == 360 + + def test__all_codes_are_valid_ons_codes(self, local_authorities_df): + """Verify all codes follow ONS local authority code patterns.""" + for code in local_authorities_df["code"]: + # ONS codes start with E (England), S (Scotland), W (Wales), or N (Northern Ireland) + assert code[0] in ["E", "S", "W", "N"] + + def test__all_names_are_non_empty(self, local_authorities_df): + """Verify all local authority names are non-empty.""" + for name in local_authorities_df["name"]: + assert len(str(name)) > 0 + + def test__coordinates_are_numeric(self, local_authorities_df): + """Verify x and y coordinates are numeric.""" + assert local_authorities_df["x"].dtype in ["float64", "int64"] + assert local_authorities_df["y"].dtype in ["float64", "int64"] + + def test__english_local_authorities_have_e_prefix( + self, local_authorities_df + ): + """Verify English local authorities have E prefix codes.""" + english_las = local_authorities_df[ + local_authorities_df["code"].str.startswith("E") + ] + # England has 296 local authorities (majority of the 360 total) + assert len(english_las) == 296 + + def test__scottish_local_authorities_have_s_prefix( + self, local_authorities_df + ): + """Verify Scottish local authorities have S prefix codes.""" + scottish_las = local_authorities_df[ + local_authorities_df["code"].str.startswith("S") + ] + # Scotland has 32 council areas + assert len(scottish_las) == 32 + + def test__welsh_local_authorities_have_w_prefix( + self, local_authorities_df + ): + """Verify Welsh local authorities have W prefix codes.""" + welsh_las = local_authorities_df[ + local_authorities_df["code"].str.startswith("W") + ] + # Wales has 22 principal areas + assert len(welsh_las) == 22