From a9e870e150f4958b6ef597c96b514d1fab2887f1 Mon Sep 17 00:00:00 2001
From: Anthony Volk <anth.volk@gmail.com>
Date: Sat, 13 Dec 2025 12:25:32 +0400
Subject: [PATCH 1/7] feat: Local authority breakdown

---
 policyengine_api/constants.py                 |   2 +
 policyengine_api/country.py                   |  12 +
 .../data/local_authorities_2021.csv           | 361 ++++++++++++++++++
 policyengine_api/endpoints/economy/compare.py |  98 +++++
 tests/unit/services/test_metadata_service.py  |   2 +-
 5 files changed, 474 insertions(+), 1 deletion(-)
 create mode 100644 policyengine_api/data/local_authorities_2021.csv

diff --git a/policyengine_api/constants.py b/policyengine_api/constants.py
index c22dabaa2..d1dfac30b 100644
--- a/policyengine_api/constants.py
+++ b/policyengine_api/constants.py
@@ -38,6 +38,7 @@
     "national",  # National level (e.g., "uk")
     "country",  # UK countries (e.g., "country/england", "country/scotland")
     "constituency",  # UK parliamentary constituencies (e.g., "constituency/Aldershot")
+    "local_authority",  # UK local authorities (e.g., "local_authority/Maidstone")
 )
 
 # Valid region prefixes for each country
@@ -51,6 +52,7 @@
     "uk": [
         "country/",  # UK countries (e.g., "country/england", "country/scotland")
         "constituency/",  # UK parliamentary constituencies (e.g., "constituency/Aldershot")
+        "local_authority/",  # UK local authorities (e.g., "local_authority/Maidstone")
     ],
 }
 
diff --git a/policyengine_api/country.py b/policyengine_api/country.py
index 44cb47476..4c602b347 100644
--- a/policyengine_api/country.py
+++ b/policyengine_api/country.py
@@ -92,6 +92,18 @@ def build_microsimulation_options(self) -> dict:
                         type="constituency",
                     )
                 )
+            local_authority_names_path = (
+                Path(__file__).parent / "data" / "local_authorities_2021.csv"
+            )
+            local_authority_names = pd.read_csv(local_authority_names_path)
+            for i in range(len(local_authority_names)):
+                region.append(
+                    dict(
+                        name=f"local_authority/{local_authority_names.iloc[i]['name']}",
+                        label=local_authority_names.iloc[i]["name"],
+                        type="local_authority",
+                    )
+                )
             time_period = [
                 dict(name=2024, label="2024"),
                 dict(name=2025, label="2025"),
diff --git a/policyengine_api/data/local_authorities_2021.csv b/policyengine_api/data/local_authorities_2021.csv
new file mode 100644
index 000000000..9fcf922ed
--- /dev/null
+++ b/policyengine_api/data/local_authorities_2021.csv
@@ -0,0 +1,361 @@
+code,x,y,name
+E06000001,8.0,19.0,Hartlepool
+E06000002,9.0,18.0,Middlesbrough
+E06000003,9.0,19.0,Redcar and Cleveland
+E06000004,8.0,18.0,Stockton-on-Tees
+E06000005,7.0,18.0,Darlington
+E06000006,1.0,11.0,Halton
+E06000007,2.0,11.0,Warrington
+E06000008,4.0,15.0,Blackburn with Darwen
+E06000009,2.0,15.0,Blackpool
+E06000010,10.0,15.0,"Kingston upon Hull, City of"
+E06000011,11.0,16.0,East Riding of Yorkshire
+E06000012,11.0,14.0,North East Lincolnshire
+E06000013,10.0,14.0,North Lincolnshire
+E06000014,9.0,17.0,York
+E06000015,6.0,11.0,Derby
+E06000016,8.0,8.0,Leicester
+E06000017,10.0,9.0,Rutland
+E06000018,8.0,10.0,Nottingham
+E06000019,0.0,8.0,"Herefordshire, County of"
+E06000020,2.0,9.0,Telford and Wrekin
+E06000021,3.0,10.0,Stoke-on-Trent
+E06000022,1.0,3.0,Bath and North East Somerset
+E06000023,0.0,3.0,"Bristol, City of"
+E06000024,0.0,2.0,North Somerset
+E06000025,1.0,4.0,South Gloucestershire
+E06000026,-4.0,-2.0,Plymouth
+E06000027,-3.0,-2.0,Torbay
+E06000030,2.0,4.0,Swindon
+E06000031,11.0,9.0,Peterborough
+E06000032,10.0,7.0,Luton
+E06000033,16.0,6.0,Southend-on-Sea
+E06000034,15.0,4.0,Thurrock
+E06000035,15.0,1.0,Medway
+E06000036,4.0,2.0,Bracknell Forest
+E06000037,2.0,2.0,West Berkshire
+E06000038,2.0,3.0,Reading
+E06000039,6.0,4.0,Slough
+E06000040,4.0,3.0,Windsor and Maidenhead
+E06000041,3.0,3.0,Wokingham
+E06000042,6.0,5.0,Milton Keynes
+E06000043,9.0,-2.0,Brighton and Hove
+E06000044,4.0,-1.0,Portsmouth
+E06000045,2.0,0.0,Southampton
+E06000046,1.0,-2.0,Isle of Wight
+E06000047,6.0,18.0,County Durham
+E06000049,4.0,11.0,Cheshire East
+E06000050,3.0,11.0,Cheshire West and Chester
+E06000051,1.0,9.0,Shropshire
+E06000052,-5.0,-2.0,Cornwall
+E06000053,-7.0,-3.0,Isles of Scilly
+E06000054,1.0,2.0,Wiltshire
+E06000055,9.0,7.0,Bedford
+E06000056,9.0,6.0,Central Bedfordshire
+E06000057,5.0,20.0,Northumberland
+E06000058,0.0,0.0,"Bournemouth, Christchurch and Poole"
+E06000059,-1.0,0.0,Dorset
+E06000060,5.0,5.0,Buckinghamshire
+E06000061,9.0,9.0,North Northamptonshire
+E06000062,7.0,6.0,West Northamptonshire
+E06000063,0.0,0.0,Cumberland
+E06000064,0.0,0.0,Westmorland and Furness
+E06000065,0.0,0.0,North Yorkshire
+E06000066,0.0,0.0,Somerset
+E07000008,12.0,8.0,Cambridge
+E07000009,12.0,9.0,East Cambridgeshire
+E07000010,13.0,10.0,Fenland
+E07000011,10.0,8.0,Huntingdonshire
+E07000012,11.0,8.0,South Cambridgeshire
+E07000032,7.0,11.0,Amber Valley
+E07000033,10.0,12.0,Bolsover
+E07000034,9.0,12.0,Chesterfield
+E07000035,7.0,12.0,Derbyshire Dales
+E07000036,7.0,9.0,Erewash
+E07000037,7.0,13.0,High Peak
+E07000038,8.0,12.0,North East Derbyshire
+E07000039,6.0,10.0,South Derbyshire
+E07000040,-2.0,-1.0,East Devon
+E07000041,-3.0,-1.0,Exeter
+E07000042,-2.0,0.0,Mid Devon
+E07000043,-3.0,1.0,North Devon
+E07000044,-4.0,-3.0,South Hams
+E07000045,-2.0,-2.0,Teignbridge
+E07000046,-4.0,-1.0,Torridge
+E07000047,-3.0,0.0,West Devon
+E07000061,10.0,-2.0,Eastbourne
+E07000062,13.0,-2.0,Hastings
+E07000063,10.0,-1.0,Lewes
+E07000064,12.0,-2.0,Rother
+E07000065,11.0,-2.0,Wealden
+E07000066,14.0,5.0,Basildon
+E07000067,14.0,7.0,Braintree
+E07000068,13.0,5.0,Brentwood
+E07000069,15.0,5.0,Castle Point
+E07000070,14.0,6.0,Chelmsford
+E07000071,15.0,8.0,Colchester
+E07000072,12.0,5.0,Epping Forest
+E07000073,13.0,6.0,Harlow
+E07000074,15.0,7.0,Maldon
+E07000075,15.0,6.0,Rochford
+E07000076,16.0,8.0,Tendring
+E07000077,13.0,7.0,Uttlesford
+E07000078,1.0,5.0,Cheltenham
+E07000079,2.0,5.0,Cotswold
+E07000080,-1.0,6.0,Forest of Dean
+E07000081,0.0,6.0,Gloucester
+E07000082,0.0,5.0,Stroud
+E07000083,1.0,6.0,Tewkesbury
+E07000084,2.0,1.0,Basingstoke and Deane
+E07000085,4.0,0.0,East Hampshire
+E07000086,3.0,0.0,Eastleigh
+E07000087,2.0,-1.0,Fareham
+E07000088,3.0,-1.0,Gosport
+E07000089,3.0,2.0,Hart
+E07000090,5.0,0.0,Havant
+E07000091,1.0,0.0,New Forest
+E07000092,4.0,1.0,Rushmoor
+E07000093,1.0,1.0,Test Valley
+E07000094,3.0,1.0,Winchester
+E07000095,12.0,6.0,Broxbourne
+E07000096,8.0,6.0,Dacorum
+E07000098,9.0,5.0,Hertsmere
+E07000099,11.0,7.0,North Hertfordshire
+E07000102,7.0,5.0,Three Rivers
+E07000103,8.0,5.0,Watford
+E07000105,12.0,-1.0,Ashford
+E07000106,15.0,0.0,Canterbury
+E07000107,13.0,1.0,Dartford
+E07000108,14.0,-1.0,Dover
+E07000109,14.0,1.0,Gravesham
+E07000110,14.0,0.0,Maidstone
+E07000111,12.0,0.0,Sevenoaks
+E07000112,13.0,-1.0,Folkestone and Hythe
+E07000113,16.0,0.0,Swale
+E07000114,15.0,-1.0,Thanet
+E07000115,13.0,0.0,Tonbridge and Malling
+E07000116,11.0,-1.0,Tunbridge Wells
+E07000117,6.0,15.0,Burnley
+E07000118,3.0,14.0,Chorley
+E07000119,4.0,16.0,Fylde
+E07000120,5.0,15.0,Hyndburn
+E07000121,3.0,17.0,Lancaster
+E07000122,6.0,16.0,Pendle
+E07000123,5.0,16.0,Preston
+E07000124,5.0,17.0,Ribble Valley
+E07000125,6.0,14.0,Rossendale
+E07000126,3.0,15.0,South Ribble
+E07000127,2.0,13.0,West Lancashire
+E07000128,3.0,16.0,Wyre
+E07000129,7.0,7.0,Blaby
+E07000130,8.0,9.0,Charnwood
+E07000131,8.0,7.0,Harborough
+E07000132,7.0,8.0,Hinckley and Bosworth
+E07000133,11.0,10.0,Melton
+E07000134,6.0,9.0,North West Leicestershire
+E07000135,9.0,8.0,Oadby and Wigston
+E07000136,12.0,12.0,Boston
+E07000137,12.0,13.0,East Lindsey
+E07000138,11.0,12.0,Lincoln
+E07000139,11.0,11.0,North Kesteven
+E07000140,12.0,11.0,South Holland
+E07000141,12.0,10.0,South Kesteven
+E07000142,11.0,13.0,West Lindsey
+E07000143,14.0,10.0,Breckland
+E07000144,15.0,12.0,Broadland
+E07000145,15.0,11.0,Great Yarmouth
+E07000146,13.0,11.0,King's Lynn and West Norfolk
+E07000147,14.0,12.0,North Norfolk
+E07000148,14.0,11.0,Norwich
+E07000149,15.0,10.0,South Norfolk
+E07000170,8.0,11.0,Ashfield
+E07000171,10.0,13.0,Bassetlaw
+E07000172,7.0,10.0,Broxtowe
+E07000173,9.0,10.0,Gedling
+E07000174,9.0,11.0,Mansfield
+E07000175,10.0,11.0,Newark and Sherwood
+E07000176,10.0,10.0,Rushcliffe
+E07000177,4.0,5.0,Cherwell
+E07000178,4.0,4.0,Oxford
+E07000179,5.0,4.0,South Oxfordshire
+E07000180,3.0,4.0,Vale of White Horse
+E07000181,3.0,5.0,West Oxfordshire
+E07000192,3.0,9.0,Cannock Chase
+E07000193,5.0,11.0,East Staffordshire
+E07000194,4.0,9.0,Lichfield
+E07000195,2.0,10.0,Newcastle-under-Lyme
+E07000196,2.0,8.0,South Staffordshire
+E07000197,4.0,10.0,Stafford
+E07000198,5.0,10.0,Staffordshire Moorlands
+E07000199,5.0,9.0,Tamworth
+E07000200,14.0,8.0,Babergh
+E07000202,15.0,9.0,Ipswich
+E07000203,14.0,9.0,Mid Suffolk
+E07000207,7.0,2.0,Elmbridge
+E07000208,8.0,0.0,Epsom and Ewell
+E07000209,5.0,1.0,Guildford
+E07000210,6.0,1.0,Mole Valley
+E07000211,7.0,0.0,Reigate and Banstead
+E07000212,5.0,3.0,Runnymede
+E07000213,6.0,3.0,Spelthorne
+E07000214,5.0,2.0,Surrey Heath
+E07000215,9.0,-1.0,Tandridge
+E07000216,6.0,0.0,Waverley
+E07000217,6.0,2.0,Woking
+E07000218,6.0,8.0,North Warwickshire
+E07000219,6.0,7.0,Nuneaton and Bedworth
+E07000220,6.0,6.0,Rugby
+E07000221,3.0,6.0,Stratford-on-Avon
+E07000222,4.0,6.0,Warwick
+E07000223,8.0,-2.0,Adur
+E07000224,6.0,-2.0,Arun
+E07000225,5.0,-1.0,Chichester
+E07000226,8.0,-1.0,Crawley
+E07000227,6.0,-1.0,Horsham
+E07000228,7.0,-1.0,Mid Sussex
+E07000229,7.0,-2.0,Worthing
+E07000234,2.0,7.0,Bromsgrove
+E07000235,-1.0,7.0,Malvern Hills
+E07000236,4.0,7.0,Redditch
+E07000237,0.0,7.0,Worcester
+E07000238,2.0,6.0,Wychavon
+E07000239,1.0,8.0,Wyre Forest
+E07000240,10.0,6.0,St Albans
+E07000241,11.0,6.0,Welwyn Hatfield
+E07000242,13.0,8.0,East Hertfordshire
+E07000243,12.0,7.0,Stevenage
+E07000244,16.0,10.0,East Suffolk
+E07000245,13.0,9.0,West Suffolk
+E08000001,4.0,14.0,Bolton
+E08000002,5.0,14.0,Bury
+E08000003,5.0,12.0,Manchester
+E08000004,5.0,13.0,Oldham
+E08000005,7.0,14.0,Rochdale
+E08000006,4.0,13.0,Salford
+E08000007,6.0,12.0,Stockport
+E08000008,6.0,13.0,Tameside
+E08000009,4.0,12.0,Trafford
+E08000010,3.0,13.0,Wigan
+E08000011,2.0,12.0,Knowsley
+E08000012,1.0,13.0,Liverpool
+E08000013,3.0,12.0,St. Helens
+E08000014,2.0,14.0,Sefton
+E08000015,1.0,12.0,Wirral
+E08000016,8.0,14.0,Barnsley
+E08000017,9.0,14.0,Doncaster
+E08000018,9.0,13.0,Rotherham
+E08000019,8.0,13.0,Sheffield
+E08000021,5.0,19.0,Newcastle upon Tyne
+E08000022,6.0,20.0,North Tyneside
+E08000023,7.0,20.0,South Tyneside
+E08000024,7.0,19.0,Sunderland
+E08000025,5.0,8.0,Birmingham
+E08000026,5.0,6.0,Coventry
+E08000027,1.0,7.0,Dudley
+E08000028,3.0,7.0,Sandwell
+E08000029,5.0,7.0,Solihull
+E08000030,4.0,8.0,Walsall
+E08000031,3.0,8.0,Wolverhampton
+E08000032,7.0,16.0,Bradford
+E08000033,7.0,15.0,Calderdale
+E08000034,8.0,15.0,Kirklees
+E08000035,8.0,16.0,Leeds
+E08000036,9.0,15.0,Wakefield
+E08000037,6.0,19.0,Gateshead
+E09000001,11.0,2.0,City of London
+E09000002,13.0,3.0,Barking and Dagenham
+E09000003,10.0,5.0,Barnet
+E09000004,12.0,1.0,Bexley
+E09000005,10.0,4.0,Brent
+E09000006,11.0,0.0,Bromley
+E09000007,11.0,4.0,Camden
+E09000008,10.0,0.0,Croydon
+E09000009,9.0,4.0,Ealing
+E09000010,11.0,5.0,Enfield
+E09000011,11.0,1.0,Greenwich
+E09000012,12.0,3.0,Hackney
+E09000013,8.0,3.0,Hammersmith and Fulham
+E09000014,12.0,4.0,Haringey
+E09000015,8.0,4.0,Harrow
+E09000016,14.0,3.0,Havering
+E09000017,7.0,4.0,Hillingdon
+E09000018,7.0,3.0,Hounslow
+E09000019,11.0,3.0,Islington
+E09000020,9.0,3.0,Kensington and Chelsea
+E09000021,7.0,1.0,Kingston upon Thames
+E09000022,10.0,2.0,Lambeth
+E09000023,10.0,1.0,Lewisham
+E09000024,8.0,1.0,Merton
+E09000025,13.0,2.0,Newham
+E09000026,14.0,4.0,Redbridge
+E09000027,8.0,2.0,Richmond upon Thames
+E09000028,9.0,1.0,Southwark
+E09000029,9.0,0.0,Sutton
+E09000030,12.0,2.0,Tower Hamlets
+E09000031,13.0,4.0,Waltham Forest
+E09000032,9.0,2.0,Wandsworth
+E09000033,10.0,3.0,Westminster
+N09000001,-4.0,16.0,Antrim and Newtownabbey
+N09000002,-5.0,16.0,"Armagh City, Banbridge and Craigavon"
+N09000003,-4.0,17.0,Belfast
+N09000004,-5.0,18.0,Causeway Coast and Glens
+N09000005,-6.0,17.0,Derry City and Strabane
+N09000006,-6.0,16.0,Fermanagh and Omagh
+N09000007,-5.0,15.0,Lisburn and Castlereagh
+N09000008,-4.0,18.0,Mid and East Antrim
+N09000009,-5.0,17.0,Mid Ulster
+N09000010,-4.0,15.0,"Newry, Mourne and Down"
+S12000005,2.0,24.0,Clackmannanshire
+S12000006,4.0,20.0,Dumfries and Galloway
+S12000008,3.0,20.0,East Ayrshire
+S12000010,5.0,22.0,East Lothian
+S12000011,2.0,20.0,East Renfrewshire
+S12000013,-1.0,27.0,Na h-Eileanan Siar
+S12000014,2.0,23.0,Falkirk
+S12000017,1.0,26.0,Highland
+S12000018,0.0,21.0,Inverclyde
+S12000019,3.0,21.0,Midlothian
+S12000020,2.0,26.0,Moray
+S12000021,1.0,20.0,North Ayrshire
+S12000023,4.0,28.0,Orkney Islands
+S12000026,4.0,21.0,Scottish Borders
+S12000027,5.0,30.0,Shetland Islands
+S12000028,1.0,19.0,South Ayrshire
+S12000029,2.0,21.0,South Lanarkshire
+S12000030,1.0,24.0,Stirling
+S12000033,4.0,26.0,Aberdeen City
+S12000034,3.0,26.0,Aberdeenshire
+S12000035,0.0,24.0,Argyll and Bute
+S12000036,4.0,22.0,City of Edinburgh
+S12000038,1.0,22.0,Renfrewshire
+S12000039,0.0,23.0,West Dunbartonshire
+S12000040,3.0,22.0,West Lothian
+S12000041,2.0,25.0,Angus
+S12000042,3.0,25.0,Dundee City
+S12000045,1.0,23.0,East Dunbartonshire
+S12000047,3.0,24.0,Fife
+S12000048,1.0,25.0,Perth and Kinross
+S12000049,1.0,21.0,Glasgow City
+S12000050,2.0,22.0,North Lanarkshire
+W06000001,-2.0,12.0,Isle of Anglesey
+W06000002,-2.0,10.0,Gwynedd
+W06000003,-1.0,10.0,Conwy
+W06000004,0.0,10.0,Denbighshire
+W06000005,0.0,11.0,Flintshire
+W06000006,1.0,10.0,Wrexham
+W06000008,-2.0,9.0,Ceredigion
+W06000009,-5.0,6.0,Pembrokeshire
+W06000010,-4.0,6.0,Carmarthenshire
+W06000011,-4.0,5.0,Swansea
+W06000012,-3.0,5.0,Neath Port Talbot
+W06000013,-3.0,6.0,Bridgend
+W06000014,-2.0,4.0,Vale of Glamorgan
+W06000015,-2.0,5.0,Cardiff
+W06000016,-3.0,7.0,Rhondda Cynon Taf
+W06000018,-2.0,6.0,Caerphilly
+W06000019,0.0,9.0,Blaenau Gwent
+W06000020,-2.0,7.0,Torfaen
+W06000021,-1.0,8.0,Monmouthshire
+W06000022,-1.0,5.0,Newport
+W06000023,-1.0,9.0,Powys
+W06000024,-2.0,8.0,Merthyr Tydfil
diff --git a/policyengine_api/endpoints/economy/compare.py b/policyengine_api/endpoints/economy/compare.py
index 1a21d40d0..38b098a7d 100644
--- a/policyengine_api/endpoints/economy/compare.py
+++ b/policyengine_api/endpoints/economy/compare.py
@@ -548,6 +548,18 @@ class UKConstituencyBreakdown(BaseModel):
     outcomes_by_region: dict[str, dict[str, int]]
 
 
+class UKLocalAuthorityBreakdownByLA(BaseModel):
+    average_household_income_change: float
+    relative_household_income_change: float
+    x: int
+    y: int
+
+
+class UKLocalAuthorityBreakdown(BaseModel):
+    by_local_authority: dict[str, UKLocalAuthorityBreakdownByLA]
+    outcomes_by_region: dict[str, dict[str, int]]
+
+
 def uk_constituency_breakdown(
     baseline: dict, reform: dict, country_id: str
 ) -> UKConstituencyBreakdown | None:
@@ -632,6 +644,86 @@ def uk_constituency_breakdown(
     return UKConstituencyBreakdown(**output)
 
 
+def uk_local_authority_breakdown(
+    baseline: dict, reform: dict, country_id: str
+) -> UKLocalAuthorityBreakdown | None:
+    if country_id != "uk":
+        return None
+
+    output = {
+        "by_local_authority": {},
+        "outcomes_by_region": {},
+    }
+    for region in ["uk", "england", "scotland", "wales", "northern_ireland"]:
+        output["outcomes_by_region"][region] = {
+            "Gain more than 5%": 0,
+            "Gain less than 5%": 0,
+            "No change": 0,
+            "Lose less than 5%": 0,
+            "Lose more than 5%": 0,
+        }
+    baseline_hnet = baseline["household_net_income"]
+    reform_hnet = reform["household_net_income"]
+
+    local_authority_weights_path = download_huggingface_dataset(
+        repo="policyengine/policyengine-uk-data-private",
+        repo_filename="local_authority_weights.h5",
+    )
+    with h5py.File(local_authority_weights_path, "r") as f:
+        weights = f["2025"][...]
+
+    local_authority_names_path = download_huggingface_dataset(
+        repo="policyengine/policyengine-uk-data-public",
+        repo_filename="local_authorities_2021.csv",
+    )
+    local_authority_names = pd.read_csv(local_authority_names_path)
+
+    for i in range(len(local_authority_names)):
+        name: str = local_authority_names.iloc[i]["name"]
+        code: str = local_authority_names.iloc[i]["code"]
+        weight: np.ndarray = weights[i]
+        baseline_income = MicroSeries(baseline_hnet, weights=weight)
+        reform_income = MicroSeries(reform_hnet, weights=weight)
+        average_household_income_change: float = (
+            reform_income.sum() - baseline_income.sum()
+        ) / baseline_income.count()
+        percent_household_income_change: float = (
+            reform_income.sum() / baseline_income.sum() - 1
+        )
+        output["by_local_authority"][name] = {
+            "average_household_income_change": average_household_income_change,
+            "relative_household_income_change": percent_household_income_change,
+            "x": int(local_authority_names.iloc[i]["x"]),
+            "y": int(local_authority_names.iloc[i]["y"]),
+        }
+
+        regions = ["uk"]
+        if code.startswith("E"):
+            regions.append("england")
+        elif code.startswith("S"):
+            regions.append("scotland")
+        elif code.startswith("W"):
+            regions.append("wales")
+        elif code.startswith("N"):
+            regions.append("northern_ireland")
+
+        if percent_household_income_change > 0.05:
+            bucket = "Gain more than 5%"
+        elif percent_household_income_change > 1e-3:
+            bucket = "Gain less than 5%"
+        elif percent_household_income_change > -1e-3:
+            bucket = "No change"
+        elif percent_household_income_change > -0.05:
+            bucket = "Lose less than 5%"
+        else:
+            bucket = "Lose more than 5%"
+
+        for region_ in regions:
+            output["outcomes_by_region"][region_][bucket] += 1
+
+    return UKLocalAuthorityBreakdown(**output)
+
+
 def compare_economic_outputs(
     baseline: dict, reform: dict, country_id: str = None
 ) -> dict:
@@ -662,6 +754,11 @@ def compare_economic_outputs(
         )
         if constituency_impact_data is not None:
             constituency_impact_data = constituency_impact_data.model_dump()
+        local_authority_impact_data: UKLocalAuthorityBreakdown | None = (
+            uk_local_authority_breakdown(baseline, reform, country_id)
+        )
+        if local_authority_impact_data is not None:
+            local_authority_impact_data = local_authority_impact_data.model_dump()
         try:
             wealth_decile_impact_data = wealth_decile_impact(baseline, reform)
             intra_wealth_decile_impact_data = intra_wealth_decile_impact(
@@ -684,6 +781,7 @@ def compare_economic_outputs(
             intra_wealth_decile=intra_wealth_decile_impact_data,
             labor_supply_response=labor_supply_response_data,
             constituency_impact=constituency_impact_data,
+            local_authority_impact=local_authority_impact_data,
         )
     elif baseline.get("type") == "cliff":
         return dict(
diff --git a/tests/unit/services/test_metadata_service.py b/tests/unit/services/test_metadata_service.py
index ac33d5250..70ea9262e 100644
--- a/tests/unit/services/test_metadata_service.py
+++ b/tests/unit/services/test_metadata_service.py
@@ -123,7 +123,7 @@ def test_verify_metadata_for_given_country(
     @pytest.mark.parametrize(
         "country_id, expected_types",
         [
-            ("uk", ["national", "country", "constituency"]),
+            ("uk", ["national", "country", "constituency", "local_authority"]),
             ("us", ["national", "state", "city", "congressional_district"]),
         ],
     )

From c9f7a36099a972fe4c9b81dcb75eb53ec79a38be Mon Sep 17 00:00:00 2001
From: Anthony Volk <anth.volk@gmail.com>
Date: Sat, 13 Dec 2025 12:47:11 +0400
Subject: [PATCH 2/7] test: Add tests

---
 tests/unit/endpoints/__init__.py             |   0
 tests/unit/endpoints/economy/__init__.py     |   0
 tests/unit/endpoints/economy/test_compare.py | 353 +++++++++++++++++++
 tests/unit/test_constants.py                 |  85 +++++
 tests/unit/test_country.py                   | 154 ++++++++
 5 files changed, 592 insertions(+)
 create mode 100644 tests/unit/endpoints/__init__.py
 create mode 100644 tests/unit/endpoints/economy/__init__.py
 create mode 100644 tests/unit/endpoints/economy/test_compare.py
 create mode 100644 tests/unit/test_constants.py
 create mode 100644 tests/unit/test_country.py

diff --git a/tests/unit/endpoints/__init__.py b/tests/unit/endpoints/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/unit/endpoints/economy/__init__.py b/tests/unit/endpoints/economy/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/unit/endpoints/economy/test_compare.py b/tests/unit/endpoints/economy/test_compare.py
new file mode 100644
index 000000000..26fd40f5d
--- /dev/null
+++ b/tests/unit/endpoints/economy/test_compare.py
@@ -0,0 +1,353 @@
+import pytest
+from unittest.mock import patch, MagicMock
+import numpy as np
+import pandas as pd
+from pydantic import ValidationError
+
+from policyengine_api.endpoints.economy.compare import (
+    UKConstituencyBreakdownByConstituency,
+    UKConstituencyBreakdown,
+    UKLocalAuthorityBreakdownByLA,
+    UKLocalAuthorityBreakdown,
+    uk_constituency_breakdown,
+    uk_local_authority_breakdown,
+)
+
+
+class TestUKLocalAuthorityBreakdownByLA:
+    """Tests for the UKLocalAuthorityBreakdownByLA Pydantic model."""
+
+    def test__given_valid_data__creates_instance(self):
+        breakdown = UKLocalAuthorityBreakdownByLA(
+            average_household_income_change=100.50,
+            relative_household_income_change=0.05,
+            x=10,
+            y=20,
+        )
+        assert breakdown.average_household_income_change == 100.50
+        assert breakdown.relative_household_income_change == 0.05
+        assert breakdown.x == 10
+        assert breakdown.y == 20
+
+    def test__given_negative_income_change__creates_instance(self):
+        breakdown = UKLocalAuthorityBreakdownByLA(
+            average_household_income_change=-500.0,
+            relative_household_income_change=-0.03,
+            x=5,
+            y=-10,
+        )
+        assert breakdown.average_household_income_change == -500.0
+        assert breakdown.relative_household_income_change == -0.03
+
+    def test__given_zero_values__creates_instance(self):
+        breakdown = UKLocalAuthorityBreakdownByLA(
+            average_household_income_change=0.0,
+            relative_household_income_change=0.0,
+            x=0,
+            y=0,
+        )
+        assert breakdown.average_household_income_change == 0.0
+        assert breakdown.relative_household_income_change == 0.0
+
+    def test__given_missing_field__raises_validation_error(self):
+        with pytest.raises(ValidationError):
+            UKLocalAuthorityBreakdownByLA(
+                average_household_income_change=100.0,
+                # Missing relative_household_income_change
+                x=10,
+                y=20,
+            )
+
+
+class TestUKLocalAuthorityBreakdown:
+    """Tests for the UKLocalAuthorityBreakdown Pydantic model."""
+
+    def test__given_valid_data__creates_instance(self):
+        breakdown = UKLocalAuthorityBreakdown(
+            by_local_authority={
+                "Hartlepool": UKLocalAuthorityBreakdownByLA(
+                    average_household_income_change=100.0,
+                    relative_household_income_change=0.02,
+                    x=8,
+                    y=19,
+                )
+            },
+            outcomes_by_region={
+                "uk": {"Gain more than 5%": 1, "No change": 0},
+                "england": {"Gain more than 5%": 1, "No change": 0},
+            },
+        )
+        assert "Hartlepool" in breakdown.by_local_authority
+        assert "uk" in breakdown.outcomes_by_region
+
+    def test__given_empty_by_local_authority__creates_instance(self):
+        breakdown = UKLocalAuthorityBreakdown(
+            by_local_authority={},
+            outcomes_by_region={
+                "uk": {"No change": 0},
+            },
+        )
+        assert len(breakdown.by_local_authority) == 0
+
+    def test__model_dump_returns_dict(self):
+        breakdown = UKLocalAuthorityBreakdown(
+            by_local_authority={
+                "Leicester": UKLocalAuthorityBreakdownByLA(
+                    average_household_income_change=50.0,
+                    relative_household_income_change=0.01,
+                    x=8,
+                    y=8,
+                )
+            },
+            outcomes_by_region={"uk": {"No change": 1}},
+        )
+        result = breakdown.model_dump()
+        assert isinstance(result, dict)
+        assert "by_local_authority" in result
+        assert "outcomes_by_region" in result
+
+
+class TestUKLocalAuthorityBreakdownFunction:
+    """Tests for the uk_local_authority_breakdown function."""
+
+    def test__given_non_uk_country__returns_none(self):
+        result = uk_local_authority_breakdown({}, {}, "us")
+        assert result is None
+
+    def test__given_non_uk_country_canada__returns_none(self):
+        result = uk_local_authority_breakdown({}, {}, "ca")
+        assert result is None
+
+    @patch(
+        "policyengine_api.endpoints.economy.compare.download_huggingface_dataset"
+    )
+    @patch("policyengine_api.endpoints.economy.compare.h5py.File")
+    @patch("policyengine_api.endpoints.economy.compare.pd.read_csv")
+    def test__given_uk_country__returns_breakdown(
+        self, mock_read_csv, mock_h5py_file, mock_download
+    ):
+        # Setup mocks
+        mock_download.side_effect = [
+            "/path/to/weights.h5",
+            "/path/to/names.csv",
+        ]
+
+        # Create mock weights - 3 local authorities, 10 households
+        mock_weights = np.ones((3, 10))
+        mock_h5py_context = MagicMock()
+        mock_h5py_context.__enter__ = MagicMock(
+            return_value={"2025": mock_weights}
+        )
+        mock_h5py_context.__exit__ = MagicMock(return_value=False)
+        mock_h5py_file.return_value = mock_h5py_context
+
+        # Create mock local authority names DataFrame
+        mock_la_df = pd.DataFrame(
+            {
+                "code": ["E06000001", "S12000033", "W06000001"],
+                "name": ["Hartlepool", "Aberdeen City", "Isle of Anglesey"],
+                "x": [8.0, 5.0, 3.0],
+                "y": [19.0, 10.0, 15.0],
+            }
+        )
+        mock_read_csv.return_value = mock_la_df
+
+        # Create baseline and reform data
+        baseline = {"household_net_income": np.array([1000.0] * 10)}
+        reform = {"household_net_income": np.array([1050.0] * 10)}
+
+        result = uk_local_authority_breakdown(baseline, reform, "uk")
+
+        assert result is not None
+        assert isinstance(result, UKLocalAuthorityBreakdown)
+        assert "Hartlepool" in result.by_local_authority
+        assert "Aberdeen City" in result.by_local_authority
+        assert "Isle of Anglesey" in result.by_local_authority
+
+    def test__region_categorization_by_code_prefix(self):
+        """Test that region categorization logic correctly identifies UK nations by code prefix."""
+        # This is a unit test for the region categorization logic
+        # We test the logic directly rather than through the full function
+
+        test_cases = [
+            ("E06000001", ["uk", "england"]),  # English LA
+            ("S12000033", ["uk", "scotland"]),  # Scottish LA
+            ("W06000001", ["uk", "wales"]),  # Welsh LA
+            ("N09000001", ["uk", "northern_ireland"]),  # NI LA
+        ]
+
+        for code, expected_regions in test_cases:
+            regions = ["uk"]
+            if code.startswith("E"):
+                regions.append("england")
+            elif code.startswith("S"):
+                regions.append("scotland")
+            elif code.startswith("W"):
+                regions.append("wales")
+            elif code.startswith("N"):
+                regions.append("northern_ireland")
+
+            assert regions == expected_regions, f"Failed for code {code}"
+
+    def test__outcome_bucket_categorization_logic(self):
+        """Test that outcome bucket categorization logic is correct."""
+        # Thresholds: > 0.05 (5%), > 0.001 (0.1%), > -0.001, > -0.05
+        test_cases = [
+            (0.10, "Gain more than 5%"),  # 10% gain
+            (0.06, "Gain more than 5%"),  # 6% gain
+            (0.051, "Gain more than 5%"),  # Just over 5%
+            (0.05, "Gain less than 5%"),  # Exactly 5% gain (not > 5%)
+            (0.03, "Gain less than 5%"),  # 3% gain
+            (0.002, "Gain less than 5%"),  # 0.2% gain (> 0.001)
+            (0.001, "No change"),  # Exactly 0.1% - not > 0.001
+            (0.0005, "No change"),  # 0.05% gain (within tolerance)
+            (0.0, "No change"),  # No change
+            (-0.0005, "No change"),  # 0.05% loss (> -0.001)
+            (-0.001, "Lose less than 5%"),  # Exactly -0.1% (not > -0.001)
+            (-0.002, "Lose less than 5%"),  # 0.2% loss
+            (-0.03, "Lose less than 5%"),  # 3% loss
+            (-0.049, "Lose less than 5%"),  # Just under 5% loss (> -0.05)
+            (-0.05, "Lose more than 5%"),  # Exactly 5% loss (not > -0.05)
+            (-0.051, "Lose more than 5%"),  # Just over 5% loss
+            (-0.06, "Lose more than 5%"),  # 6% loss
+            (-0.10, "Lose more than 5%"),  # 10% loss
+        ]
+
+        for percent_change, expected_bucket in test_cases:
+            if percent_change > 0.05:
+                bucket = "Gain more than 5%"
+            elif percent_change > 1e-3:
+                bucket = "Gain less than 5%"
+            elif percent_change > -1e-3:
+                bucket = "No change"
+            elif percent_change > -0.05:
+                bucket = "Lose less than 5%"
+            else:
+                bucket = "Lose more than 5%"
+
+            assert (
+                bucket == expected_bucket
+            ), f"Failed for {percent_change}: expected {expected_bucket}, got {bucket}"
+
+    @patch(
+        "policyengine_api.endpoints.economy.compare.download_huggingface_dataset"
+    )
+    @patch("policyengine_api.endpoints.economy.compare.h5py.File")
+    @patch("policyengine_api.endpoints.economy.compare.pd.read_csv")
+    def test__outcome_buckets_are_correct(
+        self, mock_read_csv, mock_h5py_file, mock_download
+    ):
+        mock_download.side_effect = [
+            "/path/to/weights.h5",
+            "/path/to/names.csv",
+        ]
+
+        mock_weights = np.ones((1, 10))
+        mock_h5py_context = MagicMock()
+        mock_h5py_context.__enter__ = MagicMock(
+            return_value={"2025": mock_weights}
+        )
+        mock_h5py_context.__exit__ = MagicMock(return_value=False)
+        mock_h5py_file.return_value = mock_h5py_context
+
+        mock_la_df = pd.DataFrame(
+            {
+                "code": ["E06000001"],
+                "name": ["Hartlepool"],
+                "x": [8.0],
+                "y": [19.0],
+            }
+        )
+        mock_read_csv.return_value = mock_la_df
+
+        baseline = {"household_net_income": np.array([1000.0] * 10)}
+        # 10% gain - should be "Gain more than 5%"
+        reform = {"household_net_income": np.array([1100.0] * 10)}
+
+        result = uk_local_authority_breakdown(baseline, reform, "uk")
+
+        assert result.outcomes_by_region["uk"]["Gain more than 5%"] == 1
+        assert result.outcomes_by_region["uk"]["Gain less than 5%"] == 0
+
+    @patch(
+        "policyengine_api.endpoints.economy.compare.download_huggingface_dataset"
+    )
+    @patch("policyengine_api.endpoints.economy.compare.h5py.File")
+    @patch("policyengine_api.endpoints.economy.compare.pd.read_csv")
+    def test__downloads_from_correct_repos(
+        self, mock_read_csv, mock_h5py_file, mock_download
+    ):
+        mock_download.side_effect = [
+            "/path/to/weights.h5",
+            "/path/to/names.csv",
+        ]
+
+        mock_weights = np.ones((1, 10))
+        mock_h5py_context = MagicMock()
+        mock_h5py_context.__enter__ = MagicMock(
+            return_value={"2025": mock_weights}
+        )
+        mock_h5py_context.__exit__ = MagicMock(return_value=False)
+        mock_h5py_file.return_value = mock_h5py_context
+
+        mock_la_df = pd.DataFrame(
+            {
+                "code": ["E06000001"],
+                "name": ["Test"],
+                "x": [0.0],
+                "y": [0.0],
+            }
+        )
+        mock_read_csv.return_value = mock_la_df
+
+        baseline = {"household_net_income": np.array([1000.0] * 10)}
+        reform = {"household_net_income": np.array([1000.0] * 10)}
+
+        uk_local_authority_breakdown(baseline, reform, "uk")
+
+        # Verify correct repos are used
+        calls = mock_download.call_args_list
+        assert calls[0][1]["repo"] == "policyengine/policyengine-uk-data-private"
+        assert calls[0][1]["repo_filename"] == "local_authority_weights.h5"
+        assert calls[1][1]["repo"] == "policyengine/policyengine-uk-data-public"
+        assert calls[1][1]["repo_filename"] == "local_authorities_2021.csv"
+
+
+class TestUKConstituencyBreakdownModels:
+    """Tests for the existing UK constituency breakdown models (for completeness)."""
+
+    def test__constituency_breakdown_by_constituency_creates_instance(self):
+        breakdown = UKConstituencyBreakdownByConstituency(
+            average_household_income_change=200.0,
+            relative_household_income_change=0.04,
+            x=56,
+            y=-40,
+        )
+        assert breakdown.average_household_income_change == 200.0
+        assert breakdown.x == 56
+
+    def test__constituency_breakdown_creates_instance(self):
+        breakdown = UKConstituencyBreakdown(
+            by_constituency={
+                "Aldershot": UKConstituencyBreakdownByConstituency(
+                    average_household_income_change=150.0,
+                    relative_household_income_change=0.03,
+                    x=56,
+                    y=-40,
+                )
+            },
+            outcomes_by_region={"uk": {"No change": 1}},
+        )
+        assert "Aldershot" in breakdown.by_constituency
+
+
+class TestUKConstituencyBreakdownFunction:
+    """Tests for the uk_constituency_breakdown function."""
+
+    def test__given_non_uk_country__returns_none(self):
+        result = uk_constituency_breakdown({}, {}, "us")
+        assert result is None
+
+    def test__given_non_uk_country_nigeria__returns_none(self):
+        result = uk_constituency_breakdown({}, {}, "ng")
+        assert result is None
diff --git a/tests/unit/test_constants.py b/tests/unit/test_constants.py
new file mode 100644
index 000000000..439d5a239
--- /dev/null
+++ b/tests/unit/test_constants.py
@@ -0,0 +1,85 @@
+import pytest
+
+from policyengine_api.constants import (
+    UK_REGION_TYPES,
+    US_REGION_TYPES,
+    REGION_PREFIXES,
+)
+
+
+class TestUKRegionTypes:
+    """Tests for UK_REGION_TYPES constant."""
+
+    def test__contains_national(self):
+        assert "national" in UK_REGION_TYPES
+
+    def test__contains_country(self):
+        assert "country" in UK_REGION_TYPES
+
+    def test__contains_constituency(self):
+        assert "constituency" in UK_REGION_TYPES
+
+    def test__contains_local_authority(self):
+        assert "local_authority" in UK_REGION_TYPES
+
+    def test__has_exactly_four_types(self):
+        assert len(UK_REGION_TYPES) == 4
+
+
+class TestUSRegionTypes:
+    """Tests for US_REGION_TYPES constant."""
+
+    def test__contains_national(self):
+        assert "national" in US_REGION_TYPES
+
+    def test__contains_state(self):
+        assert "state" in US_REGION_TYPES
+
+    def test__contains_city(self):
+        assert "city" in US_REGION_TYPES
+
+    def test__contains_congressional_district(self):
+        assert "congressional_district" in US_REGION_TYPES
+
+    def test__has_exactly_four_types(self):
+        assert len(US_REGION_TYPES) == 4
+
+
+class TestRegionPrefixes:
+    """Tests for REGION_PREFIXES constant."""
+
+    class TestUKPrefixes:
+        """Tests for UK region prefixes."""
+
+        def test__uk_key_exists(self):
+            assert "uk" in REGION_PREFIXES
+
+        def test__contains_country_prefix(self):
+            assert "country/" in REGION_PREFIXES["uk"]
+
+        def test__contains_constituency_prefix(self):
+            assert "constituency/" in REGION_PREFIXES["uk"]
+
+        def test__contains_local_authority_prefix(self):
+            assert "local_authority/" in REGION_PREFIXES["uk"]
+
+        def test__has_exactly_three_prefixes(self):
+            assert len(REGION_PREFIXES["uk"]) == 3
+
+    class TestUSPrefixes:
+        """Tests for US region prefixes."""
+
+        def test__us_key_exists(self):
+            assert "us" in REGION_PREFIXES
+
+        def test__contains_state_prefix(self):
+            assert "state/" in REGION_PREFIXES["us"]
+
+        def test__contains_city_prefix(self):
+            assert "city/" in REGION_PREFIXES["us"]
+
+        def test__contains_congressional_district_prefix(self):
+            assert "congressional_district/" in REGION_PREFIXES["us"]
+
+        def test__has_exactly_three_prefixes(self):
+            assert len(REGION_PREFIXES["us"]) == 3
diff --git a/tests/unit/test_country.py b/tests/unit/test_country.py
new file mode 100644
index 000000000..1b597ec0a
--- /dev/null
+++ b/tests/unit/test_country.py
@@ -0,0 +1,154 @@
+import pytest
+import pandas as pd
+from pathlib import Path
+
+from policyengine_api.country import COUNTRIES
+
+
+class TestUKCountryMetadata:
+    """Tests for UK country metadata, specifically local authority loading."""
+
+    @pytest.fixture
+    def uk_country(self):
+        return COUNTRIES["uk"]
+
+    @pytest.fixture
+    def uk_regions(self, uk_country):
+        return uk_country.metadata["economy_options"]["region"]
+
+    def test__uk_metadata_contains_local_authorities(self, uk_regions):
+        """Verify that local authorities are included in UK region options."""
+        local_authority_regions = [
+            r for r in uk_regions if r.get("type") == "local_authority"
+        ]
+        assert len(local_authority_regions) > 0
+
+    def test__uk_has_360_local_authorities(self, uk_regions):
+        """Verify the correct number of local authorities are loaded."""
+        local_authority_regions = [
+            r for r in uk_regions if r.get("type") == "local_authority"
+        ]
+        assert len(local_authority_regions) == 360
+
+    def test__local_authority_regions_have_correct_name_format(
+        self, uk_regions
+    ):
+        """Verify local authority region names have the correct prefix."""
+        local_authority_regions = [
+            r for r in uk_regions if r.get("type") == "local_authority"
+        ]
+        for region in local_authority_regions:
+            assert region["name"].startswith("local_authority/")
+
+    def test__local_authority_regions_have_labels(self, uk_regions):
+        """Verify all local authority regions have labels."""
+        local_authority_regions = [
+            r for r in uk_regions if r.get("type") == "local_authority"
+        ]
+        for region in local_authority_regions:
+            assert "label" in region
+            assert len(region["label"]) > 0
+
+    def test__local_authority_regions_have_type_field(self, uk_regions):
+        """Verify all local authority regions have type field set correctly."""
+        local_authority_regions = [
+            r for r in uk_regions if r.get("type") == "local_authority"
+        ]
+        for region in local_authority_regions:
+            assert region["type"] == "local_authority"
+
+    def test__specific_local_authorities_present(self, uk_regions):
+        """Verify specific local authorities are present in metadata."""
+        local_authority_names = [
+            r["name"]
+            for r in uk_regions
+            if r.get("type") == "local_authority"
+        ]
+        # Check some well-known local authorities
+        assert "local_authority/Hartlepool" in local_authority_names
+        assert "local_authority/Middlesbrough" in local_authority_names
+        assert "local_authority/Leicester" in local_authority_names
+
+    def test__uk_still_has_constituencies(self, uk_regions):
+        """Verify constituencies are still present after adding local authorities."""
+        constituency_regions = [
+            r for r in uk_regions if r.get("type") == "constituency"
+        ]
+        assert len(constituency_regions) == 650
+
+    def test__uk_has_all_region_types(self, uk_regions):
+        """Verify all expected region types are present."""
+        types = set(r.get("type") for r in uk_regions)
+        assert "national" in types
+        assert "country" in types
+        assert "constituency" in types
+        assert "local_authority" in types
+
+
+class TestLocalAuthoritiesDataFile:
+    """Tests for the local authorities CSV data file."""
+
+    @pytest.fixture
+    def local_authorities_df(self):
+        path = (
+            Path(__file__).parents[2]
+            / "policyengine_api"
+            / "data"
+            / "local_authorities_2021.csv"
+        )
+        return pd.read_csv(path)
+
+    def test__file_has_correct_columns(self, local_authorities_df):
+        """Verify the CSV has the expected columns."""
+        expected_columns = {"code", "name", "x", "y"}
+        assert expected_columns == set(local_authorities_df.columns)
+
+    def test__file_has_360_local_authorities(self, local_authorities_df):
+        """Verify the correct number of local authorities in file."""
+        assert len(local_authorities_df) == 360
+
+    def test__all_codes_are_valid_ons_codes(self, local_authorities_df):
+        """Verify all codes follow ONS local authority code patterns."""
+        for code in local_authorities_df["code"]:
+            # ONS codes start with E (England), S (Scotland), W (Wales), or N (Northern Ireland)
+            assert code[0] in ["E", "S", "W", "N"]
+
+    def test__all_names_are_non_empty(self, local_authorities_df):
+        """Verify all local authority names are non-empty."""
+        for name in local_authorities_df["name"]:
+            assert len(str(name)) > 0
+
+    def test__coordinates_are_numeric(self, local_authorities_df):
+        """Verify x and y coordinates are numeric."""
+        assert local_authorities_df["x"].dtype in ["float64", "int64"]
+        assert local_authorities_df["y"].dtype in ["float64", "int64"]
+
+    def test__english_local_authorities_have_e_prefix(
+        self, local_authorities_df
+    ):
+        """Verify English local authorities have E prefix codes."""
+        english_las = local_authorities_df[
+            local_authorities_df["code"].str.startswith("E")
+        ]
+        # England has 296 local authorities (majority of the 360 total)
+        assert len(english_las) == 296
+
+    def test__scottish_local_authorities_have_s_prefix(
+        self, local_authorities_df
+    ):
+        """Verify Scottish local authorities have S prefix codes."""
+        scottish_las = local_authorities_df[
+            local_authorities_df["code"].str.startswith("S")
+        ]
+        # Scotland has 32 council areas
+        assert len(scottish_las) == 32
+
+    def test__welsh_local_authorities_have_w_prefix(
+        self, local_authorities_df
+    ):
+        """Verify Welsh local authorities have W prefix codes."""
+        welsh_las = local_authorities_df[
+            local_authorities_df["code"].str.startswith("W")
+        ]
+        # Wales has 22 principal areas
+        assert len(welsh_las) == 22

From 32ab3e7b21a6e1024c6d8fa0ca2fd982afd39a60 Mon Sep 17 00:00:00 2001
From: Anthony Volk <anth.volk@gmail.com>
Date: Mon, 15 Dec 2025 13:29:52 +0400
Subject: [PATCH 3/7] fix: Properly filter outputs for a given LA or PC

---
 policyengine_api/endpoints/economy/compare.py |  88 ++++-
 tests/unit/endpoints/economy/test_compare.py  | 372 ++++++++++++++++++
 2 files changed, 451 insertions(+), 9 deletions(-)

diff --git a/policyengine_api/endpoints/economy/compare.py b/policyengine_api/endpoints/economy/compare.py
index 38b098a7d..f82f7eaa5 100644
--- a/policyengine_api/endpoints/economy/compare.py
+++ b/policyengine_api/endpoints/economy/compare.py
@@ -561,17 +561,31 @@ class UKLocalAuthorityBreakdown(BaseModel):
 
 
 def uk_constituency_breakdown(
-    baseline: dict, reform: dict, country_id: str
+    baseline: dict, reform: dict, country_id: str, region: str | None = None
 ) -> UKConstituencyBreakdown | None:
     if country_id != "uk":
         return None
 
+    # If simulating a local authority, constituency breakdown is not applicable
+    if region is not None and region.startswith("local_authority/"):
+        return None
+
+    # Determine if we're filtering to a specific constituency
+    selected_constituency = None
+    if region is not None and region.startswith("constituency/"):
+        selected_constituency = region.split("/", 1)[1]
+
+    # Determine if we're filtering to a specific country
+    selected_country = None
+    if region is not None and region.startswith("country/"):
+        selected_country = region.split("/", 1)[1].upper()
+
     output = {
         "by_constituency": {},
         "outcomes_by_region": {},
     }
-    for region in ["uk", "england", "scotland", "wales", "northern_ireland"]:
-        output["outcomes_by_region"][region] = {
+    for region_name in ["uk", "england", "scotland", "wales", "northern_ireland"]:
+        output["outcomes_by_region"][region_name] = {
             "Gain more than 5%": 0,
             "Gain less than 5%": 0,
             "No change": 0,
@@ -601,6 +615,23 @@ def uk_constituency_breakdown(
     for i in range(len(constituency_names)):
         name: str = constituency_names.iloc[i]["name"]
         code: str = constituency_names.iloc[i]["code"]
+
+        # Filter to specific constituency if requested
+        if selected_constituency is not None:
+            if name != selected_constituency and code != selected_constituency:
+                continue
+
+        # Filter to specific country if requested
+        if selected_country is not None:
+            if selected_country == "ENGLAND" and "E" not in code:
+                continue
+            elif selected_country == "SCOTLAND" and "S" not in code:
+                continue
+            elif selected_country == "WALES" and "W" not in code:
+                continue
+            elif selected_country == "NORTHERN_IRELAND" and "N" not in code:
+                continue
+
         weight: np.ndarray = weights[i]
         baseline_income = MicroSeries(baseline_hnet, weights=weight)
         reform_income = MicroSeries(reform_hnet, weights=weight)
@@ -645,17 +676,31 @@ def uk_constituency_breakdown(
 
 
 def uk_local_authority_breakdown(
-    baseline: dict, reform: dict, country_id: str
+    baseline: dict, reform: dict, country_id: str, region: str | None = None
 ) -> UKLocalAuthorityBreakdown | None:
     if country_id != "uk":
         return None
 
+    # If simulating a constituency, local authority breakdown is not applicable
+    if region is not None and region.startswith("constituency/"):
+        return None
+
+    # Determine if we're filtering to a specific local authority
+    selected_la = None
+    if region is not None and region.startswith("local_authority/"):
+        selected_la = region.split("/", 1)[1]
+
+    # Determine if we're filtering to a specific country
+    selected_country = None
+    if region is not None and region.startswith("country/"):
+        selected_country = region.split("/", 1)[1].lower()
+
     output = {
         "by_local_authority": {},
         "outcomes_by_region": {},
     }
-    for region in ["uk", "england", "scotland", "wales", "northern_ireland"]:
-        output["outcomes_by_region"][region] = {
+    for region_name in ["uk", "england", "scotland", "wales", "northern_ireland"]:
+        output["outcomes_by_region"][region_name] = {
             "Gain more than 5%": 0,
             "Gain less than 5%": 0,
             "No change": 0,
@@ -681,6 +726,25 @@ def uk_local_authority_breakdown(
     for i in range(len(local_authority_names)):
         name: str = local_authority_names.iloc[i]["name"]
         code: str = local_authority_names.iloc[i]["code"]
+
+        # Filter to specific local authority if requested
+        if selected_la is not None:
+            if name != selected_la and code != selected_la:
+                continue
+
+        # Filter to specific country if requested
+        if selected_country is not None:
+            if selected_country == "england" and not code.startswith("E"):
+                continue
+            elif selected_country == "scotland" and not code.startswith("S"):
+                continue
+            elif selected_country == "wales" and not code.startswith("W"):
+                continue
+            elif selected_country == "northern_ireland" and not code.startswith(
+                "N"
+            ):
+                continue
+
         weight: np.ndarray = weights[i]
         baseline_income = MicroSeries(baseline_hnet, weights=weight)
         reform_income = MicroSeries(reform_hnet, weights=weight)
@@ -725,7 +789,10 @@ def uk_local_authority_breakdown(
 
 
 def compare_economic_outputs(
-    baseline: dict, reform: dict, country_id: str = None
+    baseline: dict,
+    reform: dict,
+    country_id: str = None,
+    region: str | None = None,
 ) -> dict:
     """
     Compare the economic outputs of two economies.
@@ -733,6 +800,9 @@ def compare_economic_outputs(
     Args:
         baseline (dict): The baseline economy.
         reform (dict): The reform economy.
+        country_id (str): The country identifier (e.g., "uk", "us").
+        region (str | None): The region filter (e.g., "uk", "local_authority/Leicester",
+            "constituency/Aldershot", "country/scotland"). Used to filter breakdown results.
 
     Returns:
         dict: The comparison of the two economies.
@@ -750,12 +820,12 @@ def compare_economic_outputs(
         intra_decile_impact_data = intra_decile_impact(baseline, reform)
         labor_supply_response_data = labor_supply_response(baseline, reform)
         constituency_impact_data: UKConstituencyBreakdown | None = (
-            uk_constituency_breakdown(baseline, reform, country_id)
+            uk_constituency_breakdown(baseline, reform, country_id, region)
         )
         if constituency_impact_data is not None:
             constituency_impact_data = constituency_impact_data.model_dump()
         local_authority_impact_data: UKLocalAuthorityBreakdown | None = (
-            uk_local_authority_breakdown(baseline, reform, country_id)
+            uk_local_authority_breakdown(baseline, reform, country_id, region)
         )
         if local_authority_impact_data is not None:
             local_authority_impact_data = local_authority_impact_data.model_dump()
diff --git a/tests/unit/endpoints/economy/test_compare.py b/tests/unit/endpoints/economy/test_compare.py
index 26fd40f5d..0b8cf5f94 100644
--- a/tests/unit/endpoints/economy/test_compare.py
+++ b/tests/unit/endpoints/economy/test_compare.py
@@ -312,6 +312,193 @@ def test__downloads_from_correct_repos(
         assert calls[1][1]["repo"] == "policyengine/policyengine-uk-data-public"
         assert calls[1][1]["repo_filename"] == "local_authorities_2021.csv"
 
+    def test__given_constituency_region__returns_none(self):
+        """When simulating a constituency, local authority breakdown should not be computed."""
+        result = uk_local_authority_breakdown(
+            {}, {}, "uk", "constituency/Aldershot"
+        )
+        assert result is None
+
+    def test__given_constituency_region_with_code__returns_none(self):
+        """When simulating a constituency by code, local authority breakdown should not be computed."""
+        result = uk_local_authority_breakdown(
+            {}, {}, "uk", "constituency/E12345678"
+        )
+        assert result is None
+
+    @patch(
+        "policyengine_api.endpoints.economy.compare.download_huggingface_dataset"
+    )
+    @patch("policyengine_api.endpoints.economy.compare.h5py.File")
+    @patch("policyengine_api.endpoints.economy.compare.pd.read_csv")
+    def test__given_specific_la_region__returns_only_that_la(
+        self, mock_read_csv, mock_h5py_file, mock_download
+    ):
+        """When simulating a specific local authority, only that LA should be returned."""
+        mock_download.side_effect = [
+            "/path/to/weights.h5",
+            "/path/to/names.csv",
+        ]
+
+        mock_weights = np.ones((3, 10))
+        mock_h5py_context = MagicMock()
+        mock_h5py_context.__enter__ = MagicMock(
+            return_value={"2025": mock_weights}
+        )
+        mock_h5py_context.__exit__ = MagicMock(return_value=False)
+        mock_h5py_file.return_value = mock_h5py_context
+
+        mock_la_df = pd.DataFrame(
+            {
+                "code": ["E06000001", "S12000033", "W06000001"],
+                "name": ["Hartlepool", "Aberdeen City", "Isle of Anglesey"],
+                "x": [8.0, 5.0, 3.0],
+                "y": [19.0, 10.0, 15.0],
+            }
+        )
+        mock_read_csv.return_value = mock_la_df
+
+        baseline = {"household_net_income": np.array([1000.0] * 10)}
+        reform = {"household_net_income": np.array([1050.0] * 10)}
+
+        result = uk_local_authority_breakdown(
+            baseline, reform, "uk", "local_authority/Hartlepool"
+        )
+
+        assert result is not None
+        assert len(result.by_local_authority) == 1
+        assert "Hartlepool" in result.by_local_authority
+        assert "Aberdeen City" not in result.by_local_authority
+        assert "Isle of Anglesey" not in result.by_local_authority
+
+    @patch(
+        "policyengine_api.endpoints.economy.compare.download_huggingface_dataset"
+    )
+    @patch("policyengine_api.endpoints.economy.compare.h5py.File")
+    @patch("policyengine_api.endpoints.economy.compare.pd.read_csv")
+    def test__given_country_scotland_region__returns_only_scottish_las(
+        self, mock_read_csv, mock_h5py_file, mock_download
+    ):
+        """When simulating country/scotland, only Scottish local authorities should be returned."""
+        mock_download.side_effect = [
+            "/path/to/weights.h5",
+            "/path/to/names.csv",
+        ]
+
+        mock_weights = np.ones((3, 10))
+        mock_h5py_context = MagicMock()
+        mock_h5py_context.__enter__ = MagicMock(
+            return_value={"2025": mock_weights}
+        )
+        mock_h5py_context.__exit__ = MagicMock(return_value=False)
+        mock_h5py_file.return_value = mock_h5py_context
+
+        mock_la_df = pd.DataFrame(
+            {
+                "code": ["E06000001", "S12000033", "W06000001"],
+                "name": ["Hartlepool", "Aberdeen City", "Isle of Anglesey"],
+                "x": [8.0, 5.0, 3.0],
+                "y": [19.0, 10.0, 15.0],
+            }
+        )
+        mock_read_csv.return_value = mock_la_df
+
+        baseline = {"household_net_income": np.array([1000.0] * 10)}
+        reform = {"household_net_income": np.array([1050.0] * 10)}
+
+        result = uk_local_authority_breakdown(
+            baseline, reform, "uk", "country/scotland"
+        )
+
+        assert result is not None
+        assert len(result.by_local_authority) == 1
+        assert "Aberdeen City" in result.by_local_authority
+        assert "Hartlepool" not in result.by_local_authority
+        assert "Isle of Anglesey" not in result.by_local_authority
+
+    @patch(
+        "policyengine_api.endpoints.economy.compare.download_huggingface_dataset"
+    )
+    @patch("policyengine_api.endpoints.economy.compare.h5py.File")
+    @patch("policyengine_api.endpoints.economy.compare.pd.read_csv")
+    def test__given_uk_region__returns_all_las(
+        self, mock_read_csv, mock_h5py_file, mock_download
+    ):
+        """When simulating uk-wide, all local authorities should be returned."""
+        mock_download.side_effect = [
+            "/path/to/weights.h5",
+            "/path/to/names.csv",
+        ]
+
+        mock_weights = np.ones((3, 10))
+        mock_h5py_context = MagicMock()
+        mock_h5py_context.__enter__ = MagicMock(
+            return_value={"2025": mock_weights}
+        )
+        mock_h5py_context.__exit__ = MagicMock(return_value=False)
+        mock_h5py_file.return_value = mock_h5py_context
+
+        mock_la_df = pd.DataFrame(
+            {
+                "code": ["E06000001", "S12000033", "W06000001"],
+                "name": ["Hartlepool", "Aberdeen City", "Isle of Anglesey"],
+                "x": [8.0, 5.0, 3.0],
+                "y": [19.0, 10.0, 15.0],
+            }
+        )
+        mock_read_csv.return_value = mock_la_df
+
+        baseline = {"household_net_income": np.array([1000.0] * 10)}
+        reform = {"household_net_income": np.array([1050.0] * 10)}
+
+        result = uk_local_authority_breakdown(baseline, reform, "uk", "uk")
+
+        assert result is not None
+        assert len(result.by_local_authority) == 3
+        assert "Hartlepool" in result.by_local_authority
+        assert "Aberdeen City" in result.by_local_authority
+        assert "Isle of Anglesey" in result.by_local_authority
+
+    @patch(
+        "policyengine_api.endpoints.economy.compare.download_huggingface_dataset"
+    )
+    @patch("policyengine_api.endpoints.economy.compare.h5py.File")
+    @patch("policyengine_api.endpoints.economy.compare.pd.read_csv")
+    def test__given_no_region__returns_all_las(
+        self, mock_read_csv, mock_h5py_file, mock_download
+    ):
+        """When no region specified (None), all local authorities should be returned."""
+        mock_download.side_effect = [
+            "/path/to/weights.h5",
+            "/path/to/names.csv",
+        ]
+
+        mock_weights = np.ones((3, 10))
+        mock_h5py_context = MagicMock()
+        mock_h5py_context.__enter__ = MagicMock(
+            return_value={"2025": mock_weights}
+        )
+        mock_h5py_context.__exit__ = MagicMock(return_value=False)
+        mock_h5py_file.return_value = mock_h5py_context
+
+        mock_la_df = pd.DataFrame(
+            {
+                "code": ["E06000001", "S12000033", "W06000001"],
+                "name": ["Hartlepool", "Aberdeen City", "Isle of Anglesey"],
+                "x": [8.0, 5.0, 3.0],
+                "y": [19.0, 10.0, 15.0],
+            }
+        )
+        mock_read_csv.return_value = mock_la_df
+
+        baseline = {"household_net_income": np.array([1000.0] * 10)}
+        reform = {"household_net_income": np.array([1050.0] * 10)}
+
+        result = uk_local_authority_breakdown(baseline, reform, "uk", None)
+
+        assert result is not None
+        assert len(result.by_local_authority) == 3
+
 
 class TestUKConstituencyBreakdownModels:
     """Tests for the existing UK constituency breakdown models (for completeness)."""
@@ -351,3 +538,188 @@ def test__given_non_uk_country__returns_none(self):
     def test__given_non_uk_country_nigeria__returns_none(self):
         result = uk_constituency_breakdown({}, {}, "ng")
         assert result is None
+
+    def test__given_local_authority_region__returns_none(self):
+        """When simulating a local authority, constituency breakdown should not be computed."""
+        result = uk_constituency_breakdown({}, {}, "uk", "local_authority/Leicester")
+        assert result is None
+
+    def test__given_local_authority_region_with_code__returns_none(self):
+        """When simulating a local authority by code, constituency breakdown should not be computed."""
+        result = uk_constituency_breakdown({}, {}, "uk", "local_authority/E06000016")
+        assert result is None
+
+    @patch(
+        "policyengine_api.endpoints.economy.compare.download_huggingface_dataset"
+    )
+    @patch("policyengine_api.endpoints.economy.compare.h5py.File")
+    @patch("policyengine_api.endpoints.economy.compare.pd.read_csv")
+    def test__given_specific_constituency_region__returns_only_that_constituency(
+        self, mock_read_csv, mock_h5py_file, mock_download
+    ):
+        """When simulating a specific constituency, only that constituency should be returned."""
+        mock_download.side_effect = [
+            "/path/to/weights.h5",
+            "/path/to/names.csv",
+        ]
+
+        # Create mock weights - 3 constituencies, 10 households
+        mock_weights = np.ones((3, 10))
+        mock_h5py_context = MagicMock()
+        mock_h5py_context.__enter__ = MagicMock(
+            return_value={"2025": mock_weights}
+        )
+        mock_h5py_context.__exit__ = MagicMock(return_value=False)
+        mock_h5py_file.return_value = mock_h5py_context
+
+        # Create mock constituency names DataFrame
+        mock_const_df = pd.DataFrame(
+            {
+                "code": ["E12345678", "S12345678", "W12345678"],
+                "name": ["Aldershot", "Edinburgh East", "Cardiff South"],
+                "x": [10.0, 5.0, 3.0],
+                "y": [20.0, 15.0, 12.0],
+            }
+        )
+        mock_read_csv.return_value = mock_const_df
+
+        baseline = {"household_net_income": np.array([1000.0] * 10)}
+        reform = {"household_net_income": np.array([1050.0] * 10)}
+
+        result = uk_constituency_breakdown(
+            baseline, reform, "uk", "constituency/Aldershot"
+        )
+
+        assert result is not None
+        assert len(result.by_constituency) == 1
+        assert "Aldershot" in result.by_constituency
+        assert "Edinburgh East" not in result.by_constituency
+        assert "Cardiff South" not in result.by_constituency
+
+    @patch(
+        "policyengine_api.endpoints.economy.compare.download_huggingface_dataset"
+    )
+    @patch("policyengine_api.endpoints.economy.compare.h5py.File")
+    @patch("policyengine_api.endpoints.economy.compare.pd.read_csv")
+    def test__given_country_scotland_region__returns_only_scottish_constituencies(
+        self, mock_read_csv, mock_h5py_file, mock_download
+    ):
+        """When simulating country/scotland, only Scottish constituencies should be returned."""
+        mock_download.side_effect = [
+            "/path/to/weights.h5",
+            "/path/to/names.csv",
+        ]
+
+        mock_weights = np.ones((3, 10))
+        mock_h5py_context = MagicMock()
+        mock_h5py_context.__enter__ = MagicMock(
+            return_value={"2025": mock_weights}
+        )
+        mock_h5py_context.__exit__ = MagicMock(return_value=False)
+        mock_h5py_file.return_value = mock_h5py_context
+
+        mock_const_df = pd.DataFrame(
+            {
+                "code": ["E12345678", "S12345678", "W12345678"],
+                "name": ["Aldershot", "Edinburgh East", "Cardiff South"],
+                "x": [10.0, 5.0, 3.0],
+                "y": [20.0, 15.0, 12.0],
+            }
+        )
+        mock_read_csv.return_value = mock_const_df
+
+        baseline = {"household_net_income": np.array([1000.0] * 10)}
+        reform = {"household_net_income": np.array([1050.0] * 10)}
+
+        result = uk_constituency_breakdown(
+            baseline, reform, "uk", "country/scotland"
+        )
+
+        assert result is not None
+        assert len(result.by_constituency) == 1
+        assert "Edinburgh East" in result.by_constituency
+        assert "Aldershot" not in result.by_constituency
+        assert "Cardiff South" not in result.by_constituency
+
+    @patch(
+        "policyengine_api.endpoints.economy.compare.download_huggingface_dataset"
+    )
+    @patch("policyengine_api.endpoints.economy.compare.h5py.File")
+    @patch("policyengine_api.endpoints.economy.compare.pd.read_csv")
+    def test__given_uk_region__returns_all_constituencies(
+        self, mock_read_csv, mock_h5py_file, mock_download
+    ):
+        """When simulating uk-wide, all constituencies should be returned."""
+        mock_download.side_effect = [
+            "/path/to/weights.h5",
+            "/path/to/names.csv",
+        ]
+
+        mock_weights = np.ones((3, 10))
+        mock_h5py_context = MagicMock()
+        mock_h5py_context.__enter__ = MagicMock(
+            return_value={"2025": mock_weights}
+        )
+        mock_h5py_context.__exit__ = MagicMock(return_value=False)
+        mock_h5py_file.return_value = mock_h5py_context
+
+        mock_const_df = pd.DataFrame(
+            {
+                "code": ["E12345678", "S12345678", "W12345678"],
+                "name": ["Aldershot", "Edinburgh East", "Cardiff South"],
+                "x": [10.0, 5.0, 3.0],
+                "y": [20.0, 15.0, 12.0],
+            }
+        )
+        mock_read_csv.return_value = mock_const_df
+
+        baseline = {"household_net_income": np.array([1000.0] * 10)}
+        reform = {"household_net_income": np.array([1050.0] * 10)}
+
+        result = uk_constituency_breakdown(baseline, reform, "uk", "uk")
+
+        assert result is not None
+        assert len(result.by_constituency) == 3
+        assert "Aldershot" in result.by_constituency
+        assert "Edinburgh East" in result.by_constituency
+        assert "Cardiff South" in result.by_constituency
+
+    @patch(
+        "policyengine_api.endpoints.economy.compare.download_huggingface_dataset"
+    )
+    @patch("policyengine_api.endpoints.economy.compare.h5py.File")
+    @patch("policyengine_api.endpoints.economy.compare.pd.read_csv")
+    def test__given_no_region__returns_all_constituencies(
+        self, mock_read_csv, mock_h5py_file, mock_download
+    ):
+        """When no region specified (None), all constituencies should be returned."""
+        mock_download.side_effect = [
+            "/path/to/weights.h5",
+            "/path/to/names.csv",
+        ]
+
+        mock_weights = np.ones((3, 10))
+        mock_h5py_context = MagicMock()
+        mock_h5py_context.__enter__ = MagicMock(
+            return_value={"2025": mock_weights}
+        )
+        mock_h5py_context.__exit__ = MagicMock(return_value=False)
+        mock_h5py_file.return_value = mock_h5py_context
+
+        mock_const_df = pd.DataFrame(
+            {
+                "code": ["E12345678", "S12345678", "W12345678"],
+                "name": ["Aldershot", "Edinburgh East", "Cardiff South"],
+                "x": [10.0, 5.0, 3.0],
+                "y": [20.0, 15.0, 12.0],
+            }
+        )
+        mock_read_csv.return_value = mock_const_df
+
+        baseline = {"household_net_income": np.array([1000.0] * 10)}
+        reform = {"household_net_income": np.array([1050.0] * 10)}
+
+        result = uk_constituency_breakdown(baseline, reform, "uk", None)
+
+        assert result is not None
+        assert len(result.by_constituency) == 3

From 9f33189f7ab73675f9f9f8e64299a542e6f3c218 Mon Sep 17 00:00:00 2001
From: Anthony Volk <anth.volk@gmail.com>
Date: Mon, 15 Dec 2025 22:02:03 +0400
Subject: [PATCH 4/7] chore: Lint and changelog

---
 .DS_Store                                     |  Bin 0 -> 6148 bytes
 changelog_entry.yaml                          |    5 +
 policyengine_api/endpoints/economy/compare.py |   25 +-
 scripts/.datasets/constituencies_2024.csv     |  651 ++++++++++
 scripts/.datasets/local_authorities_2021.csv  |  361 ++++++
 scripts/BUG_REPORT_build_from_dataframe.md    |  172 +++
 scripts/diagnose_country_filtering.ipynb      |  503 ++++++++
 scripts/prove_build_from_dataframe_bug.ipynb  |  841 ++++++++++++
 scripts/test_local_authority_api.py           |  570 ++++++++
 scripts/verify_country_filtering_bug.ipynb    | 1147 +++++++++++++++++
 tests/unit/endpoints/economy/test_compare.py  |   16 +-
 tests/unit/test_country.py                    |    4 +-
 12 files changed, 4283 insertions(+), 12 deletions(-)
 create mode 100644 .DS_Store
 create mode 100644 scripts/.datasets/constituencies_2024.csv
 create mode 100644 scripts/.datasets/local_authorities_2021.csv
 create mode 100644 scripts/BUG_REPORT_build_from_dataframe.md
 create mode 100644 scripts/diagnose_country_filtering.ipynb
 create mode 100644 scripts/prove_build_from_dataframe_bug.ipynb
 create mode 100755 scripts/test_local_authority_api.py
 create mode 100644 scripts/verify_country_filtering_bug.ipynb

diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..b2a6b3e6c870dde3f033c72e8cfe8a9aab8f5326
GIT binary patch
literal 6148
zcmeHKOG-mQ5UkdK0XJD@IalxoLx?BH1p*O5K%yk-`mJ&<k7o4;F}!3WxRGk;u9=>$
zdAwS@eht7D?@tfF48WA`h?9r0`MLYZZYpC$I`4SJ8;&2V!~XeYRDC(&+&dhx!2!FU
z{QWj>y-ZV93P=GdAO)m=6!@hA-g{}&`$R=4AO)nrw*vlsXmrP3I3&iWgCRx$;)Lli
zu49%UHct?H;gHA-&5}w?s?~^LNoT%QT`wFGlMbul!|KUa6N<&td4G#?SWi@x0#e{y
zf!o~9y#GJaugw4FB<-Yt6!=#P*krj{F8E5-TSqVFy|&TUbg%iOyKx;9hG@scXvf@m
fJDx{T)-_-AycZ6ML1#YbMEwl7E;1?b*9v?AELauN

literal 0
HcmV?d00001

diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index e69de29bb..fd4509d1f 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -0,0 +1,5 @@
+- bump: minor
+  changes:
+    added:
+    - Metadata for UK local authorities
+    - Calculation of UK local authority-level outputs
\ No newline at end of file
diff --git a/policyengine_api/endpoints/economy/compare.py b/policyengine_api/endpoints/economy/compare.py
index f82f7eaa5..c97a03f6f 100644
--- a/policyengine_api/endpoints/economy/compare.py
+++ b/policyengine_api/endpoints/economy/compare.py
@@ -584,7 +584,13 @@ def uk_constituency_breakdown(
         "by_constituency": {},
         "outcomes_by_region": {},
     }
-    for region_name in ["uk", "england", "scotland", "wales", "northern_ireland"]:
+    for region_name in [
+        "uk",
+        "england",
+        "scotland",
+        "wales",
+        "northern_ireland",
+    ]:
         output["outcomes_by_region"][region_name] = {
             "Gain more than 5%": 0,
             "Gain less than 5%": 0,
@@ -699,7 +705,13 @@ def uk_local_authority_breakdown(
         "by_local_authority": {},
         "outcomes_by_region": {},
     }
-    for region_name in ["uk", "england", "scotland", "wales", "northern_ireland"]:
+    for region_name in [
+        "uk",
+        "england",
+        "scotland",
+        "wales",
+        "northern_ireland",
+    ]:
         output["outcomes_by_region"][region_name] = {
             "Gain more than 5%": 0,
             "Gain less than 5%": 0,
@@ -740,8 +752,9 @@ def uk_local_authority_breakdown(
                 continue
             elif selected_country == "wales" and not code.startswith("W"):
                 continue
-            elif selected_country == "northern_ireland" and not code.startswith(
-                "N"
+            elif (
+                selected_country == "northern_ireland"
+                and not code.startswith("N")
             ):
                 continue
 
@@ -828,7 +841,9 @@ def compare_economic_outputs(
             uk_local_authority_breakdown(baseline, reform, country_id, region)
         )
         if local_authority_impact_data is not None:
-            local_authority_impact_data = local_authority_impact_data.model_dump()
+            local_authority_impact_data = (
+                local_authority_impact_data.model_dump()
+            )
         try:
             wealth_decile_impact_data = wealth_decile_impact(baseline, reform)
             intra_wealth_decile_impact_data = intra_wealth_decile_impact(
diff --git a/scripts/.datasets/constituencies_2024.csv b/scripts/.datasets/constituencies_2024.csv
new file mode 100644
index 000000000..bd9a1df28
--- /dev/null
+++ b/scripts/.datasets/constituencies_2024.csv
@@ -0,0 +1,651 @@
+code,name,x,y
+E14001063,Aldershot,56,-40
+E14001064,Aldridge-Brownhills,56,-30
+E14001065,Altrincham and Sale West,52,-25
+E14001066,Amber Valley,58,-27
+E14001067,Arundel and South Downs,61,-44
+E14001068,Ashfield,60,-27
+E14001069,Ashford,72,-42
+E14001070,Ashton-under-Lyne,54,-23
+E14001071,Aylesbury,60,-35
+E14001072,Banbury,58,-33
+E14001073,Barking,68,-38
+E14001074,Barnsley North,57,-23
+E14001075,Barnsley South,58,-23
+E14001076,Barrow and Furness,54,-16
+E14001077,Basildon and Billericay,67,-34
+E14001078,Basingstoke,55,-39
+E14001079,Bassetlaw,61,-26
+E14001080,Bath,51,-40
+E14001081,Battersea,62,-41
+E14001082,Beaconsfield,57,-37
+E14001083,Beckenham and Penge,65,-43
+E14001084,Bedford,63,-32
+E14001085,Bermondsey and Old Southwark,64,-40
+E14001086,Bethnal Green and Stepney,65,-39
+E14001087,Beverley and Holderness,64,-22
+E14001088,Bexhill and Battle,70,-44
+E14001089,Bexleyheath and Crayford,67,-39
+E14001090,Bicester and Woodstock,59,-34
+E14001091,Birkenhead,49,-27
+E14001092,Birmingham Edgbaston,53,-33
+E14001093,Birmingham Erdington,54,-31
+E14001094,Birmingham Hall Green and Moseley,55,-32
+E14001095,Birmingham Hodge Hill and Solihull North,55,-31
+E14001096,Birmingham Ladywood,54,-32
+E14001097,Birmingham Northfield,54,-34
+E14001098,Birmingham Perry Barr,53,-31
+E14001099,Birmingham Selly Oak,54,-33
+E14001100,Birmingham Yardley,56,-32
+E14001101,Bishop Auckland,54,-14
+E14001102,Blackburn,53,-19
+E14001103,Blackley and Middleton South,53,-23
+E14001104,Blackpool North and Fleetwood,53,-18
+E14001105,Blackpool South,52,-18
+E14001106,Blaydon and Consett,55,-14
+E14001107,Blyth and Ashington,55,-12
+E14001108,Bognor Regis and Littlehampton,63,-44
+E14001109,Bolsover,60,-26
+E14001110,Bolton North East,52,-21
+E14001111,Bolton South and Walkden,52,-22
+E14001112,Bolton West,51,-21
+E14001113,Bootle,49,-22
+E14001114,Boston and Skegness,64,-26
+E14001115,Bournemouth East,52,-43
+E14001116,Bournemouth West,52,-42
+E14001117,Bracknell,56,-39
+E14001118,Bradford East,58,-20
+E14001119,Bradford South,56,-21
+E14001120,Bradford West,57,-20
+E14001121,Braintree,67,-31
+E14001122,Brent East,61,-38
+E14001123,Brent West,60,-38
+E14001124,Brentford and Isleworth,60,-40
+E14001125,Brentwood and Ongar,66,-33
+E14001126,Bridgwater,48,-41
+E14001127,Bridlington and The Wolds,63,-20
+E14001128,Brigg and Immingham,62,-24
+E14001129,Brighton Kemptown and Peacehaven,67,-45
+E14001130,Brighton Pavilion,67,-44
+E14001131,Bristol Central,51,-38
+E14001132,Bristol East,52,-38
+E14001133,Bristol North East,51,-37
+E14001134,Bristol North West,50,-38
+E14001135,Bristol South,51,-39
+E14001136,Broadland and Fakenham,66,-27
+E14001137,Bromley and Biggin Hill,67,-42
+E14001138,Bromsgrove,52,-33
+E14001139,Broxbourne,66,-35
+E14001140,Broxtowe,59,-27
+E14001141,Buckingham and Bletchley,60,-34
+E14001142,Burnley,55,-19
+E14001143,Burton and Uttoxeter,56,-28
+E14001144,Bury North,53,-21
+E14001145,Bury South,53,-22
+E14001146,Bury St Edmunds and Stowmarket,68,-31
+E14001147,Calder Valley,56,-20
+E14001148,Camborne and Redruth,43,-45
+E14001149,Cambridge,65,-30
+E14001150,Cannock Chase,54,-29
+E14001151,Canterbury,71,-41
+E14001152,Carlisle,53,-14
+E14001153,Carshalton and Wallington,62,-43
+E14001154,Castle Point,69,-36
+E14001155,Central Devon,47,-42
+E14001156,Central Suffolk and North Ipswich,68,-29
+E14001157,Chatham and Aylesford,69,-40
+E14001158,Cheadle,55,-26
+E14001159,Chelmsford,67,-33
+E14001160,Chelsea and Fulham,61,-40
+E14001161,Cheltenham,52,-36
+E14001162,Chesham and Amersham,59,-36
+E14001163,Chester North and Neston,50,-28
+E14001164,Chester South and Eddisbury,51,-27
+E14001165,Chesterfield,59,-26
+E14001166,Chichester,60,-44
+E14001167,Chingford and Woodford Green,64,-35
+E14001168,Chippenham,52,-39
+E14001169,Chipping Barnet,62,-36
+E14001170,Chorley,53,-20
+E14001171,Christchurch,53,-42
+E14001172,Cities of London and Westminster,63,-40
+E14001173,City of Durham,55,-16
+E14001174,Clacton,69,-32
+E14001175,Clapham and Brixton Hill,62,-42
+E14001176,Colchester,68,-32
+E14001177,Colne Valley,55,-23
+E14001178,Congleton,54,-27
+E14001179,Corby and East Northamptonshire,62,-30
+E14001180,Coventry East,57,-33
+E14001181,Coventry North West,56,-33
+E14001182,Coventry South,57,-34
+E14001183,Cramlington and Killingworth,56,-12
+E14001184,Crawley,69,-44
+E14001185,Crewe and Nantwich,53,-27
+E14001186,Croydon East,65,-42
+E14001187,Croydon South,64,-43
+E14001188,Croydon West,63,-43
+E14001189,Dagenham and Rainham,67,-37
+E14001190,Darlington,55,-17
+E14001191,Dartford,68,-40
+E14001192,Daventry,60,-32
+E14001193,Derby North,58,-28
+E14001194,Derby South,57,-28
+E14001195,Derbyshire Dales,57,-26
+E14001196,Dewsbury and Batley,57,-22
+E14001197,Didcot and Wantage,54,-38
+E14001198,Doncaster Central,60,-23
+E14001199,Doncaster East and the Isle of Axholme,61,-23
+E14001200,Doncaster North,61,-22
+E14001201,Dorking and Horley,59,-43
+E14001202,Dover and Deal,72,-41
+E14001203,Droitwich and Evesham,54,-36
+E14001204,Dudley,51,-31
+E14001205,Dulwich and West Norwood,63,-42
+E14001206,Dunstable and Leighton Buzzard,62,-33
+E14001207,Ealing Central and Acton,59,-39
+E14001208,Ealing North,59,-38
+E14001209,Ealing Southall,58,-39
+E14001210,Earley and Woodley,56,-36
+E14001211,Easington,57,-16
+E14001212,East Grinstead and Uckfield,69,-43
+E14001213,East Ham,67,-38
+E14001214,East Hampshire,55,-41
+E14001215,East Surrey,67,-43
+E14001216,East Thanet,71,-39
+E14001217,East Wiltshire,53,-41
+E14001218,East Worthing and Shoreham,65,-44
+E14001219,Eastbourne,69,-45
+E14001220,Eastleigh,54,-41
+E14001221,Edmonton and Winchmore Hill,64,-36
+E14001222,Ellesmere Port and Bromborough,50,-27
+E14001223,Eltham and Chislehurst,66,-41
+E14001224,Ely and East Cambridgeshire,66,-30
+E14001225,Enfield North,62,-35
+E14001226,Epping Forest,67,-35
+E14001227,Epsom and Ewell,60,-43
+E14001228,Erewash,59,-28
+E14001229,Erith and Thamesmead,67,-40
+E14001230,Esher and Walton,58,-42
+E14001231,Exeter,48,-42
+E14001232,Exmouth and Exeter East,48,-43
+E14001233,Fareham and Waterlooville,55,-43
+E14001234,Farnham and Bordon,56,-42
+E14001235,Faversham and Mid Kent,71,-40
+E14001236,Feltham and Heston,59,-40
+E14001237,Filton and Bradley Stoke,50,-37
+E14001238,Finchley and Golders Green,61,-37
+E14001239,Folkestone and Hythe,71,-42
+E14001240,Forest of Dean,50,-35
+E14001241,Frome and East Somerset,50,-41
+E14001242,Fylde,51,-19
+E14001243,Gainsborough,61,-25
+E14001244,Gateshead Central and Whickham,56,-15
+E14001245,Gedling,61,-28
+E14001246,Gillingham and Rainham,70,-40
+E14001247,Glastonbury and Somerton,49,-41
+E14001248,Gloucester,51,-35
+E14001249,Godalming and Ash,57,-42
+E14001250,Goole and Pocklington,61,-21
+E14001251,Gorton and Denton,55,-24
+E14001252,Gosport,57,-43
+E14001253,Grantham and Bourne,63,-28
+E14001254,Gravesham,68,-39
+E14001255,Great Grimsby and Cleethorpes,63,-24
+E14001256,Great Yarmouth,67,-27
+E14001257,Greenwich and Woolwich,66,-40
+E14001258,Guildford,56,-41
+E14001259,Hackney North and Stoke Newington,64,-38
+E14001260,Hackney South and Shoreditch,64,-39
+E14001261,Halesowen,51,-33
+E14001262,Halifax,55,-21
+E14001263,Hamble Valley,56,-43
+E14001264,Hammersmith and Chiswick,60,-39
+E14001265,Hampstead and Highgate,62,-38
+E14001266,"Harborough, Oadby and Wigston",61,-31
+E14001267,Harlow,67,-32
+E14001268,Harpenden and Berkhamsted,62,-34
+E14001269,Harrogate and Knaresborough,59,-18
+E14001270,Harrow East,60,-37
+E14001271,Harrow West,59,-37
+E14001272,Hartlepool,59,-16
+E14001273,Harwich and North Essex,69,-31
+E14001274,Hastings and Rye,70,-43
+E14001275,Havant,59,-44
+E14001276,Hayes and Harlington,58,-38
+E14001277,Hazel Grove,55,-25
+E14001278,Hemel Hempstead,64,-34
+E14001279,Hendon,61,-36
+E14001280,Henley and Thame,58,-35
+E14001281,Hereford and South Herefordshire,51,-34
+E14001282,Herne Bay and Sandwich,72,-40
+E14001283,Hertford and Stortford,66,-32
+E14001284,Hertsmere,66,-34
+E14001285,Hexham,53,-13
+E14001286,Heywood and Middleton North,54,-20
+E14001287,High Peak,56,-25
+E14001288,Hinckley and Bosworth,58,-30
+E14001289,Hitchin,64,-32
+E14001290,Holborn and St Pancras,62,-39
+E14001291,Honiton and Sidmouth,49,-43
+E14001292,Hornchurch and Upminster,66,-37
+E14001293,Hornsey and Friern Barnet,63,-36
+E14001294,Horsham,62,-44
+E14001295,Houghton and Sunderland South,57,-15
+E14001296,Hove and Portslade,66,-44
+E14001297,Huddersfield,56,-22
+E14001298,Huntingdon,63,-31
+E14001299,Hyndburn,54,-19
+E14001300,Ilford North,65,-36
+E14001301,Ilford South,65,-37
+E14001302,Ipswich,68,-30
+E14001303,Isle of Wight East,54,-45
+E14001304,Isle of Wight West,53,-45
+E14001305,Islington North,63,-38
+E14001306,Islington South and Finsbury,63,-39
+E14001307,Jarrow and Gateshead East,57,-14
+E14001308,Keighley and Ilkley,56,-19
+E14001309,Kenilworth and Southam,56,-34
+E14001310,Kensington and Bayswater,61,-39
+E14001311,Kettering,61,-30
+E14001312,Kingston and Surbiton,59,-42
+E14001313,Kingston upon Hull East,63,-22
+E14001314,Kingston upon Hull North and Cottingham,62,-21
+E14001315,Kingston upon Hull West and Haltemprice,62,-22
+E14001316,Kingswinford and South Staffordshire,52,-30
+E14001317,Knowsley,50,-23
+E14001318,Lancaster and Wyre,54,-18
+E14001319,Leeds Central and Headingley,60,-20
+E14001320,Leeds East,61,-20
+E14001321,Leeds North East,59,-19
+E14001322,Leeds North West,58,-19
+E14001323,Leeds South,59,-21
+E14001324,Leeds South West and Morley,58,-21
+E14001325,Leeds West and Pudsey,59,-20
+E14001326,Leicester East,60,-30
+E14001327,Leicester South,60,-31
+E14001328,Leicester West,59,-31
+E14001329,Leigh and Atherton,51,-25
+E14001330,Lewes,68,-45
+E14001331,Lewisham East,66,-42
+E14001332,Lewisham North,65,-40
+E14001333,Lewisham West and East Dulwich,65,-41
+E14001334,Leyton and Wanstead,64,-37
+E14001335,Lichfield,56,-29
+E14001336,Lincoln,62,-25
+E14001337,Liverpool Garston,50,-25
+E14001338,Liverpool Riverside,49,-24
+E14001339,Liverpool Walton,49,-23
+E14001340,Liverpool Wavertree,49,-25
+E14001341,Liverpool West Derby,50,-24
+E14001342,Loughborough,59,-30
+E14001343,Louth and Horncastle,63,-25
+E14001344,Lowestoft,68,-28
+E14001345,Luton North,63,-33
+E14001346,Luton South and South Bedfordshire,63,-34
+E14001347,Macclesfield,56,-26
+E14001348,Maidenhead,57,-36
+E14001349,Maidstone and Malling,69,-41
+E14001350,Makerfield,51,-22
+E14001351,Maldon,69,-33
+E14001352,Manchester Central,54,-24
+E14001353,Manchester Rusholme,53,-25
+E14001354,Manchester Withington,54,-26
+E14001355,Mansfield,61,-27
+E14001356,Melksham and Devizes,52,-40
+E14001357,Melton and Syston,61,-29
+E14001358,Meriden and Solihull East,55,-33
+E14001359,Mid Bedfordshire,62,-32
+E14001360,Mid Buckinghamshire,59,-35
+E14001361,Mid Cheshire,52,-27
+E14001362,Mid Derbyshire,57,-27
+E14001363,Mid Dorset and North Poole,50,-43
+E14001364,Mid Leicestershire,58,-31
+E14001365,Mid Norfolk,65,-28
+E14001366,Mid Sussex,68,-43
+E14001367,Middlesbrough and Thornaby East,57,-17
+E14001368,Middlesbrough South and East Cleveland,59,-17
+E14001369,Milton Keynes Central,61,-34
+E14001370,Milton Keynes North,61,-33
+E14001371,Mitcham and Morden,61,-43
+E14001372,Morecambe and Lunesdale,54,-17
+E14001373,New Forest East,54,-43
+E14001374,New Forest West,53,-43
+E14001375,Newark,62,-26
+E14001376,Newbury,54,-37
+E14001377,Newcastle upon Tyne Central and West,54,-13
+E14001378,Newcastle upon Tyne East and Wallsend,56,-14
+E14001379,Newcastle upon Tyne North,55,-13
+E14001380,Newcastle-under-Lyme,52,-28
+E14001381,Newton Abbot,47,-43
+E14001382,Newton Aycliffe and Spennymoor,56,-16
+E14001383,Normanton and Hemsworth,59,-23
+E14001384,North Bedfordshire,62,-31
+E14001385,North Cornwall,45,-43
+E14001386,North Cotswolds,53,-37
+E14001387,North Devon,46,-41
+E14001388,North Dorset,51,-42
+E14001389,North Durham,54,-15
+E14001390,North East Cambridgeshire,64,-29
+E14001391,North East Derbyshire,58,-26
+E14001392,North East Hampshire,56,-38
+E14001393,North East Hertfordshire,65,-32
+E14001394,North East Somerset and Hanham,50,-39
+E14001395,North Herefordshire,52,-34
+E14001396,North Norfolk,65,-27
+E14001397,North Northumberland,54,-12
+E14001398,North Shropshire,50,-29
+E14001399,North Somerset,49,-39
+E14001400,North Warwickshire and Bedworth,57,-32
+E14001401,North West Cambridgeshire,64,-30
+E14001402,North West Essex,66,-31
+E14001403,North West Hampshire,54,-39
+E14001404,North West Leicestershire,58,-29
+E14001405,North West Norfolk,64,-28
+E14001406,Northampton North,61,-32
+E14001407,Northampton South,60,-33
+E14001408,Norwich North,66,-28
+E14001409,Norwich South,66,-29
+E14001410,Nottingham East,60,-29
+E14001411,Nottingham North and Kimberley,60,-28
+E14001412,Nottingham South,59,-29
+E14001413,Nuneaton,57,-31
+E14001414,Old Bexley and Sidcup,67,-41
+E14001415,Oldham East and Saddleworth,55,-22
+E14001416,"Oldham West, Chadderton and Royton",54,-22
+E14001417,Orpington,66,-43
+E14001418,Ossett and Denby Dale,58,-22
+E14001419,Oxford East,58,-34
+E14001420,Oxford West and Abingdon,57,-35
+E14001421,Peckham,64,-41
+E14001422,Pendle and Clitheroe,56,-18
+E14001423,Penistone and Stocksbridge,56,-23
+E14001424,Penrith and Solway,52,-15
+E14001425,Peterborough,63,-29
+E14001426,Plymouth Moor View,46,-43
+E14001427,Plymouth Sutton and Devonport,47,-44
+E14001428,"Pontefract, Castleford and Knottingley",60,-22
+E14001429,Poole,51,-43
+E14001430,Poplar and Limehouse,66,-39
+E14001431,Portsmouth North,58,-43
+E14001432,Portsmouth South,58,-44
+E14001433,Preston,52,-19
+E14001434,Putney,61,-41
+E14001435,Queen's Park and Maida Vale,62,-40
+E14001436,Rawmarsh and Conisbrough,60,-24
+E14001437,Rayleigh and Wickford,68,-34
+E14001438,Reading Central,55,-37
+E14001439,Reading West and Mid Berkshire,55,-36
+E14001440,Redcar,58,-17
+E14001441,Redditch,53,-35
+E14001442,Reigate,68,-44
+E14001443,Ribble Valley,55,-18
+E14001444,Richmond and Northallerton,57,-18
+E14001445,Richmond Park,59,-41
+E14001446,Rochdale,54,-21
+E14001447,Rochester and Strood,69,-39
+E14001448,Romford,66,-36
+E14001449,Romsey and Southampton North,54,-40
+E14001450,Rossendale and Darwen,55,-20
+E14001451,Rother Valley,60,-25
+E14001452,Rotherham,59,-24
+E14001453,Rugby,58,-32
+E14001454,"Ruislip, Northwood and Pinner",60,-36
+E14001455,Runcorn and Helsby,51,-28
+E14001456,Runnymede and Weybridge,57,-41
+E14001457,Rushcliffe,62,-28
+E14001458,Rutland and Stamford,62,-29
+E14001459,Salford,53,-24
+E14001460,Salisbury,52,-41
+E14001461,Scarborough and Whitby,61,-19
+E14001462,Scunthorpe,61,-24
+E14001463,Sefton Central,50,-20
+E14001464,Selby,60,-21
+E14001465,Sevenoaks,68,-42
+E14001466,Sheffield Brightside and Hillsborough,58,-24
+E14001467,Sheffield Central,58,-25
+E14001468,Sheffield Hallam,57,-24
+E14001469,Sheffield Heeley,57,-25
+E14001470,Sheffield South East,59,-25
+E14001471,Sherwood Forest,62,-27
+E14001472,Shipley,57,-19
+E14001473,Shrewsbury,51,-30
+E14001474,Sittingbourne and Sheppey,70,-39
+E14001475,Skipton and Ripon,58,-18
+E14001476,Sleaford and North Hykeham,63,-26
+E14001477,Slough,56,-37
+E14001478,Smethwick,53,-32
+E14001479,Solihull West and Shirley,55,-34
+E14001480,South Basildon and East Thurrock,68,-36
+E14001481,South Cambridgeshire,65,-31
+E14001482,South Cotswolds,53,-38
+E14001483,South Derbyshire,57,-29
+E14001484,South Devon,48,-45
+E14001485,South Dorset,51,-44
+E14001486,South East Cornwall,46,-44
+E14001487,South Holland and The Deepings,63,-27
+E14001488,South Leicestershire,59,-32
+E14001489,South Norfolk,67,-29
+E14001490,South Northamptonshire,59,-33
+E14001491,South Ribble,52,-20
+E14001492,South Shields,58,-14
+E14001493,South Shropshire,50,-31
+E14001494,South Suffolk,69,-30
+E14001495,South West Devon,47,-45
+E14001496,South West Hertfordshire,61,-35
+E14001497,South West Norfolk,65,-29
+E14001498,South West Wiltshire,51,-41
+E14001499,Southampton Itchen,55,-42
+E14001500,Southampton Test,54,-42
+E14001501,Southend East and Rochford,69,-34
+E14001502,Southend West and Leigh,68,-35
+E14001503,Southgate and Wood Green,63,-35
+E14001504,Southport,50,-19
+E14001505,Spelthorne,58,-40
+E14001506,Spen Valley,57,-21
+E14001507,St Albans,65,-34
+E14001508,St Austell and Newquay,45,-44
+E14001509,St Helens North,50,-21
+E14001510,St Helens South and Whiston,50,-22
+E14001511,St Ives,43,-46
+E14001512,St Neots and Mid Cambridgeshire,64,-31
+E14001513,Stafford,54,-28
+E14001514,Staffordshire Moorlands,56,-27
+E14001515,Stalybridge and Hyde,56,-24
+E14001516,Stevenage,64,-33
+E14001517,Stockport,54,-25
+E14001518,Stockton North,58,-16
+E14001519,Stockton West,56,-17
+E14001520,Stoke-on-Trent Central,55,-28
+E14001521,Stoke-on-Trent North,55,-27
+E14001522,Stoke-on-Trent South,55,-29
+E14001523,"Stone, Great Wyrley and Penkridge",53,-28
+E14001524,Stourbridge,51,-32
+E14001525,Stratford and Bow,65,-38
+E14001526,Stratford-on-Avon,54,-35
+E14001527,Streatham and Croydon North,64,-42
+E14001528,Stretford and Urmston,52,-24
+E14001529,Stroud,52,-37
+E14001530,Suffolk Coastal,69,-29
+E14001531,Sunderland Central,58,-15
+E14001532,Surrey Heath,57,-39
+E14001533,Sussex Weald,70,-42
+E14001534,Sutton and Cheam,60,-42
+E14001535,Sutton Coldfield,56,-31
+E14001536,Swindon North,53,-39
+E14001537,Swindon South,53,-40
+E14001538,Tamworth,57,-30
+E14001539,Tatton,52,-26
+E14001540,Taunton and Wellington,49,-42
+E14001541,Telford,52,-29
+E14001542,Tewkesbury,53,-36
+E14001543,The Wrekin,51,-29
+E14001544,Thirsk and Malton,60,-18
+E14001545,Thornbury and Yate,51,-36
+E14001546,Thurrock,67,-36
+E14001547,Tipton and Wednesbury,52,-31
+E14001548,Tiverton and Minehead,47,-41
+E14001549,Tonbridge,68,-41
+E14001550,Tooting,61,-42
+E14001551,Torbay,48,-44
+E14001552,Torridge and Tavistock,46,-42
+E14001553,Tottenham,62,-37
+E14001554,Truro and Falmouth,44,-45
+E14001555,Tunbridge Wells,69,-42
+E14001556,Twickenham,58,-41
+E14001557,Tynemouth,56,-13
+E14001558,Uxbridge and South Ruislip,58,-37
+E14001559,Vauxhall and Camberwell Green,63,-41
+E14001560,Wakefield and Rothwell,59,-22
+E14001561,Wallasey,48,-27
+E14001562,Walsall and Bloxwich,55,-30
+E14001563,Walthamstow,63,-37
+E14001564,Warrington North,51,-23
+E14001565,Warrington South,51,-24
+E14001566,Warwick and Leamington,55,-35
+E14001567,Washington and Gateshead South,55,-15
+E14001568,Watford,65,-35
+E14001569,Waveney Valley,67,-28
+E14001570,Weald of Kent,70,-41
+E14001571,Wellingborough and Rushden,63,-30
+E14001572,Wells and Mendip Hills,50,-40
+E14001573,Welwyn Hatfield,65,-33
+E14001574,West Bromwich,52,-32
+E14001575,West Dorset,50,-44
+E14001576,West Ham and Beckton,66,-38
+E14001577,West Lancashire,49,-21
+E14001578,West Suffolk,67,-30
+E14001579,West Worcestershire,52,-35
+E14001580,Westmorland and Lonsdale,53,-15
+E14001581,Weston-super-Mare,49,-40
+E14001582,Wetherby and Easingwold,62,-20
+E14001583,Whitehaven and Workington,53,-16
+E14001584,Widnes and Halewood,51,-26
+E14001585,Wigan,51,-20
+E14001586,Wimbledon,60,-41
+E14001587,Winchester,55,-40
+E14001588,Windsor,57,-38
+E14001589,Wirral West,49,-28
+E14001590,Witham,68,-33
+E14001591,Witney,56,-35
+E14001592,Woking,57,-40
+E14001593,Wokingham,55,-38
+E14001594,Wolverhampton North East,53,-29
+E14001595,Wolverhampton South East,54,-30
+E14001596,Wolverhampton West,53,-30
+E14001597,Worcester,53,-34
+E14001598,Worsley and Eccles,52,-23
+E14001599,Worthing West,64,-44
+E14001600,Wycombe,58,-36
+E14001601,Wyre Forest,50,-33
+E14001602,Wythenshawe and Sale East,53,-26
+E14001603,Yeovil,50,-42
+E14001604,York Central,60,-19
+E14001605,York Outer,61,-18
+N05000001,Belfast East,45,-17
+N05000002,Belfast North,45,-16
+N05000003,Belfast South and Mid Down,45,-18
+N05000004,Belfast West,44,-17
+N05000005,East Antrim,45,-15
+N05000006,East Londonderry,43,-15
+N05000007,Fermanagh and South Tyrone,42,-17
+N05000008,Foyle,42,-15
+N05000009,Lagan Valley,44,-18
+N05000010,Mid Ulster,43,-16
+N05000011,Newry and Armagh,44,-19
+N05000012,North Antrim,44,-15
+N05000013,North Down,46,-16
+N05000014,South Antrim,44,-16
+N05000015,South Down,46,-18
+N05000016,Strangford,46,-17
+N05000017,Upper Bann,43,-18
+N05000018,West Tyrone,42,-16
+S14000021,East Renfrewshire,48,-11
+S14000027,Na h-Eileanan an Iar,47,-2
+S14000045,Midlothian,52,-11
+S14000048,North Ayrshire and Arran,48,-10
+S14000051,Orkney and Shetland,51,0
+S14000060,Aberdeen North,52,-3
+S14000061,Aberdeen South,52,-4
+S14000062,Aberdeenshire North and Moray East,51,-3
+S14000063,Airdrie and Shotts,50,-11
+S14000064,Alloa and Grangemouth,50,-7
+S14000065,Angus and Perthshire Glens,50,-5
+S14000066,Arbroath and Broughty Ferry,52,-5
+S14000067,"Argyll, Bute and South Lochaber",49,-5
+S14000068,Bathgate and Linlithgow,51,-9
+S14000069,"Caithness, Sutherland and Easter Ross",50,-2
+S14000070,Coatbridge and Bellshill,50,-12
+S14000071,Cowdenbeath and Kirkcaldy,52,-7
+S14000072,Cumbernauld and Kirkintilloch,50,-8
+S14000073,Dumfries and Galloway,51,-13
+S14000074,"Dumfriesshire, Clydesdale and Tweeddale",52,-13
+S14000075,Dundee Central,50,-6
+S14000076,Dunfermline and Dollar,51,-7
+S14000077,East Kilbride and Strathaven,48,-13
+S14000078,Edinburgh East and Musselburgh,54,-10
+S14000079,Edinburgh North and Leith,53,-9
+S14000080,Edinburgh South,53,-10
+S14000081,Edinburgh South West,52,-10
+S14000082,Edinburgh West,52,-9
+S14000083,Falkirk,51,-8
+S14000084,Glasgow East,51,-10
+S14000085,Glasgow North,49,-9
+S14000086,Glasgow North East,50,-9
+S14000087,Glasgow South,49,-11
+S14000088,Glasgow South West,50,-10
+S14000089,Glasgow West,49,-8
+S14000090,Glenrothes and Mid Fife,52,-6
+S14000091,Gordon and Buchan,50,-4
+S14000092,Hamilton and Clyde Valley,51,-12
+S14000093,Inverclyde and Renfrewshire West,48,-8
+S14000094,"Inverness, Skye and West Ross-shire",49,-3
+S14000095,Livingston,51,-11
+S14000096,Lothian East,53,-11
+S14000097,Mid Dunbartonshire,49,-7
+S14000098,"Moray West, Nairn and Strathspey",49,-4
+S14000099,"Motherwell, Wishaw and Carluke",52,-12
+S14000100,North East Fife,51,-6
+S14000101,Paisley and Renfrewshire North,48,-9
+S14000102,Paisley and Renfrewshire South,49,-10
+S14000103,Perth and Kinross-shire,51,-5
+S14000104,Rutherglen,49,-12
+S14000105,Stirling and Strathallan,49,-6
+S14000106,West Dunbartonshire,48,-7
+S14000107,"Ayr, Carrick and Cumnock",49,-13
+S14000108,"Berwickshire, Roxburgh and Selkirk",53,-12
+S14000109,Central Ayrshire,48,-12
+S14000110,Kilmarnock and Loudoun,50,-13
+S14000111,West Aberdeenshire and Kincardine,51,-4
+W07000081,Aberafan Maesteg,46,-36
+W07000082,Alyn and Deeside,49,-29
+W07000083,Bangor Aberconwy,47,-31
+W07000084,Blaenau Gwent and Rhymney,49,-33
+W07000085,"Brecon, Radnor and Cwm Tawe",50,-32
+W07000086,Bridgend,46,-37
+W07000087,Caerfyrddin,49,-32
+W07000088,Caerphilly,49,-35
+W07000089,Cardiff East,48,-37
+W07000090,Cardiff North,48,-36
+W07000091,Cardiff South and Penarth,48,-38
+W07000092,Cardiff West,47,-37
+W07000093,Ceredigion Preseli,48,-34
+W07000094,Clwyd East,49,-30
+W07000095,Clwyd North,48,-30
+W07000096,Dwyfor Meirionnydd,48,-31
+W07000097,Gower,44,-37
+W07000098,Llanelli,45,-36
+W07000099,Merthyr Tydfil and Aberdare,49,-34
+W07000100,Mid and South Pembrokeshire,44,-36
+W07000101,Monmouthshire,50,-36
+W07000102,Montgomeryshire and Glyndwr,49,-31
+W07000103,Neath and Swansea East,47,-35
+W07000104,Newport East,49,-37
+W07000105,Newport West and Islwyn,49,-36
+W07000106,Pontypridd,48,-35
+W07000107,Rhondda and Ogmore,47,-36
+W07000108,Swansea West,45,-37
+W07000109,Torfaen,50,-34
+W07000110,Vale of Glamorgan,47,-38
+W07000111,Wrexham,50,-30
+W07000112,Ynys Môn,46,-29
diff --git a/scripts/.datasets/local_authorities_2021.csv b/scripts/.datasets/local_authorities_2021.csv
new file mode 100644
index 000000000..9fcf922ed
--- /dev/null
+++ b/scripts/.datasets/local_authorities_2021.csv
@@ -0,0 +1,361 @@
+code,x,y,name
+E06000001,8.0,19.0,Hartlepool
+E06000002,9.0,18.0,Middlesbrough
+E06000003,9.0,19.0,Redcar and Cleveland
+E06000004,8.0,18.0,Stockton-on-Tees
+E06000005,7.0,18.0,Darlington
+E06000006,1.0,11.0,Halton
+E06000007,2.0,11.0,Warrington
+E06000008,4.0,15.0,Blackburn with Darwen
+E06000009,2.0,15.0,Blackpool
+E06000010,10.0,15.0,"Kingston upon Hull, City of"
+E06000011,11.0,16.0,East Riding of Yorkshire
+E06000012,11.0,14.0,North East Lincolnshire
+E06000013,10.0,14.0,North Lincolnshire
+E06000014,9.0,17.0,York
+E06000015,6.0,11.0,Derby
+E06000016,8.0,8.0,Leicester
+E06000017,10.0,9.0,Rutland
+E06000018,8.0,10.0,Nottingham
+E06000019,0.0,8.0,"Herefordshire, County of"
+E06000020,2.0,9.0,Telford and Wrekin
+E06000021,3.0,10.0,Stoke-on-Trent
+E06000022,1.0,3.0,Bath and North East Somerset
+E06000023,0.0,3.0,"Bristol, City of"
+E06000024,0.0,2.0,North Somerset
+E06000025,1.0,4.0,South Gloucestershire
+E06000026,-4.0,-2.0,Plymouth
+E06000027,-3.0,-2.0,Torbay
+E06000030,2.0,4.0,Swindon
+E06000031,11.0,9.0,Peterborough
+E06000032,10.0,7.0,Luton
+E06000033,16.0,6.0,Southend-on-Sea
+E06000034,15.0,4.0,Thurrock
+E06000035,15.0,1.0,Medway
+E06000036,4.0,2.0,Bracknell Forest
+E06000037,2.0,2.0,West Berkshire
+E06000038,2.0,3.0,Reading
+E06000039,6.0,4.0,Slough
+E06000040,4.0,3.0,Windsor and Maidenhead
+E06000041,3.0,3.0,Wokingham
+E06000042,6.0,5.0,Milton Keynes
+E06000043,9.0,-2.0,Brighton and Hove
+E06000044,4.0,-1.0,Portsmouth
+E06000045,2.0,0.0,Southampton
+E06000046,1.0,-2.0,Isle of Wight
+E06000047,6.0,18.0,County Durham
+E06000049,4.0,11.0,Cheshire East
+E06000050,3.0,11.0,Cheshire West and Chester
+E06000051,1.0,9.0,Shropshire
+E06000052,-5.0,-2.0,Cornwall
+E06000053,-7.0,-3.0,Isles of Scilly
+E06000054,1.0,2.0,Wiltshire
+E06000055,9.0,7.0,Bedford
+E06000056,9.0,6.0,Central Bedfordshire
+E06000057,5.0,20.0,Northumberland
+E06000058,0.0,0.0,"Bournemouth, Christchurch and Poole"
+E06000059,-1.0,0.0,Dorset
+E06000060,5.0,5.0,Buckinghamshire
+E06000061,9.0,9.0,North Northamptonshire
+E06000062,7.0,6.0,West Northamptonshire
+E06000063,0.0,0.0,Cumberland
+E06000064,0.0,0.0,Westmorland and Furness
+E06000065,0.0,0.0,North Yorkshire
+E06000066,0.0,0.0,Somerset
+E07000008,12.0,8.0,Cambridge
+E07000009,12.0,9.0,East Cambridgeshire
+E07000010,13.0,10.0,Fenland
+E07000011,10.0,8.0,Huntingdonshire
+E07000012,11.0,8.0,South Cambridgeshire
+E07000032,7.0,11.0,Amber Valley
+E07000033,10.0,12.0,Bolsover
+E07000034,9.0,12.0,Chesterfield
+E07000035,7.0,12.0,Derbyshire Dales
+E07000036,7.0,9.0,Erewash
+E07000037,7.0,13.0,High Peak
+E07000038,8.0,12.0,North East Derbyshire
+E07000039,6.0,10.0,South Derbyshire
+E07000040,-2.0,-1.0,East Devon
+E07000041,-3.0,-1.0,Exeter
+E07000042,-2.0,0.0,Mid Devon
+E07000043,-3.0,1.0,North Devon
+E07000044,-4.0,-3.0,South Hams
+E07000045,-2.0,-2.0,Teignbridge
+E07000046,-4.0,-1.0,Torridge
+E07000047,-3.0,0.0,West Devon
+E07000061,10.0,-2.0,Eastbourne
+E07000062,13.0,-2.0,Hastings
+E07000063,10.0,-1.0,Lewes
+E07000064,12.0,-2.0,Rother
+E07000065,11.0,-2.0,Wealden
+E07000066,14.0,5.0,Basildon
+E07000067,14.0,7.0,Braintree
+E07000068,13.0,5.0,Brentwood
+E07000069,15.0,5.0,Castle Point
+E07000070,14.0,6.0,Chelmsford
+E07000071,15.0,8.0,Colchester
+E07000072,12.0,5.0,Epping Forest
+E07000073,13.0,6.0,Harlow
+E07000074,15.0,7.0,Maldon
+E07000075,15.0,6.0,Rochford
+E07000076,16.0,8.0,Tendring
+E07000077,13.0,7.0,Uttlesford
+E07000078,1.0,5.0,Cheltenham
+E07000079,2.0,5.0,Cotswold
+E07000080,-1.0,6.0,Forest of Dean
+E07000081,0.0,6.0,Gloucester
+E07000082,0.0,5.0,Stroud
+E07000083,1.0,6.0,Tewkesbury
+E07000084,2.0,1.0,Basingstoke and Deane
+E07000085,4.0,0.0,East Hampshire
+E07000086,3.0,0.0,Eastleigh
+E07000087,2.0,-1.0,Fareham
+E07000088,3.0,-1.0,Gosport
+E07000089,3.0,2.0,Hart
+E07000090,5.0,0.0,Havant
+E07000091,1.0,0.0,New Forest
+E07000092,4.0,1.0,Rushmoor
+E07000093,1.0,1.0,Test Valley
+E07000094,3.0,1.0,Winchester
+E07000095,12.0,6.0,Broxbourne
+E07000096,8.0,6.0,Dacorum
+E07000098,9.0,5.0,Hertsmere
+E07000099,11.0,7.0,North Hertfordshire
+E07000102,7.0,5.0,Three Rivers
+E07000103,8.0,5.0,Watford
+E07000105,12.0,-1.0,Ashford
+E07000106,15.0,0.0,Canterbury
+E07000107,13.0,1.0,Dartford
+E07000108,14.0,-1.0,Dover
+E07000109,14.0,1.0,Gravesham
+E07000110,14.0,0.0,Maidstone
+E07000111,12.0,0.0,Sevenoaks
+E07000112,13.0,-1.0,Folkestone and Hythe
+E07000113,16.0,0.0,Swale
+E07000114,15.0,-1.0,Thanet
+E07000115,13.0,0.0,Tonbridge and Malling
+E07000116,11.0,-1.0,Tunbridge Wells
+E07000117,6.0,15.0,Burnley
+E07000118,3.0,14.0,Chorley
+E07000119,4.0,16.0,Fylde
+E07000120,5.0,15.0,Hyndburn
+E07000121,3.0,17.0,Lancaster
+E07000122,6.0,16.0,Pendle
+E07000123,5.0,16.0,Preston
+E07000124,5.0,17.0,Ribble Valley
+E07000125,6.0,14.0,Rossendale
+E07000126,3.0,15.0,South Ribble
+E07000127,2.0,13.0,West Lancashire
+E07000128,3.0,16.0,Wyre
+E07000129,7.0,7.0,Blaby
+E07000130,8.0,9.0,Charnwood
+E07000131,8.0,7.0,Harborough
+E07000132,7.0,8.0,Hinckley and Bosworth
+E07000133,11.0,10.0,Melton
+E07000134,6.0,9.0,North West Leicestershire
+E07000135,9.0,8.0,Oadby and Wigston
+E07000136,12.0,12.0,Boston
+E07000137,12.0,13.0,East Lindsey
+E07000138,11.0,12.0,Lincoln
+E07000139,11.0,11.0,North Kesteven
+E07000140,12.0,11.0,South Holland
+E07000141,12.0,10.0,South Kesteven
+E07000142,11.0,13.0,West Lindsey
+E07000143,14.0,10.0,Breckland
+E07000144,15.0,12.0,Broadland
+E07000145,15.0,11.0,Great Yarmouth
+E07000146,13.0,11.0,King's Lynn and West Norfolk
+E07000147,14.0,12.0,North Norfolk
+E07000148,14.0,11.0,Norwich
+E07000149,15.0,10.0,South Norfolk
+E07000170,8.0,11.0,Ashfield
+E07000171,10.0,13.0,Bassetlaw
+E07000172,7.0,10.0,Broxtowe
+E07000173,9.0,10.0,Gedling
+E07000174,9.0,11.0,Mansfield
+E07000175,10.0,11.0,Newark and Sherwood
+E07000176,10.0,10.0,Rushcliffe
+E07000177,4.0,5.0,Cherwell
+E07000178,4.0,4.0,Oxford
+E07000179,5.0,4.0,South Oxfordshire
+E07000180,3.0,4.0,Vale of White Horse
+E07000181,3.0,5.0,West Oxfordshire
+E07000192,3.0,9.0,Cannock Chase
+E07000193,5.0,11.0,East Staffordshire
+E07000194,4.0,9.0,Lichfield
+E07000195,2.0,10.0,Newcastle-under-Lyme
+E07000196,2.0,8.0,South Staffordshire
+E07000197,4.0,10.0,Stafford
+E07000198,5.0,10.0,Staffordshire Moorlands
+E07000199,5.0,9.0,Tamworth
+E07000200,14.0,8.0,Babergh
+E07000202,15.0,9.0,Ipswich
+E07000203,14.0,9.0,Mid Suffolk
+E07000207,7.0,2.0,Elmbridge
+E07000208,8.0,0.0,Epsom and Ewell
+E07000209,5.0,1.0,Guildford
+E07000210,6.0,1.0,Mole Valley
+E07000211,7.0,0.0,Reigate and Banstead
+E07000212,5.0,3.0,Runnymede
+E07000213,6.0,3.0,Spelthorne
+E07000214,5.0,2.0,Surrey Heath
+E07000215,9.0,-1.0,Tandridge
+E07000216,6.0,0.0,Waverley
+E07000217,6.0,2.0,Woking
+E07000218,6.0,8.0,North Warwickshire
+E07000219,6.0,7.0,Nuneaton and Bedworth
+E07000220,6.0,6.0,Rugby
+E07000221,3.0,6.0,Stratford-on-Avon
+E07000222,4.0,6.0,Warwick
+E07000223,8.0,-2.0,Adur
+E07000224,6.0,-2.0,Arun
+E07000225,5.0,-1.0,Chichester
+E07000226,8.0,-1.0,Crawley
+E07000227,6.0,-1.0,Horsham
+E07000228,7.0,-1.0,Mid Sussex
+E07000229,7.0,-2.0,Worthing
+E07000234,2.0,7.0,Bromsgrove
+E07000235,-1.0,7.0,Malvern Hills
+E07000236,4.0,7.0,Redditch
+E07000237,0.0,7.0,Worcester
+E07000238,2.0,6.0,Wychavon
+E07000239,1.0,8.0,Wyre Forest
+E07000240,10.0,6.0,St Albans
+E07000241,11.0,6.0,Welwyn Hatfield
+E07000242,13.0,8.0,East Hertfordshire
+E07000243,12.0,7.0,Stevenage
+E07000244,16.0,10.0,East Suffolk
+E07000245,13.0,9.0,West Suffolk
+E08000001,4.0,14.0,Bolton
+E08000002,5.0,14.0,Bury
+E08000003,5.0,12.0,Manchester
+E08000004,5.0,13.0,Oldham
+E08000005,7.0,14.0,Rochdale
+E08000006,4.0,13.0,Salford
+E08000007,6.0,12.0,Stockport
+E08000008,6.0,13.0,Tameside
+E08000009,4.0,12.0,Trafford
+E08000010,3.0,13.0,Wigan
+E08000011,2.0,12.0,Knowsley
+E08000012,1.0,13.0,Liverpool
+E08000013,3.0,12.0,St. Helens
+E08000014,2.0,14.0,Sefton
+E08000015,1.0,12.0,Wirral
+E08000016,8.0,14.0,Barnsley
+E08000017,9.0,14.0,Doncaster
+E08000018,9.0,13.0,Rotherham
+E08000019,8.0,13.0,Sheffield
+E08000021,5.0,19.0,Newcastle upon Tyne
+E08000022,6.0,20.0,North Tyneside
+E08000023,7.0,20.0,South Tyneside
+E08000024,7.0,19.0,Sunderland
+E08000025,5.0,8.0,Birmingham
+E08000026,5.0,6.0,Coventry
+E08000027,1.0,7.0,Dudley
+E08000028,3.0,7.0,Sandwell
+E08000029,5.0,7.0,Solihull
+E08000030,4.0,8.0,Walsall
+E08000031,3.0,8.0,Wolverhampton
+E08000032,7.0,16.0,Bradford
+E08000033,7.0,15.0,Calderdale
+E08000034,8.0,15.0,Kirklees
+E08000035,8.0,16.0,Leeds
+E08000036,9.0,15.0,Wakefield
+E08000037,6.0,19.0,Gateshead
+E09000001,11.0,2.0,City of London
+E09000002,13.0,3.0,Barking and Dagenham
+E09000003,10.0,5.0,Barnet
+E09000004,12.0,1.0,Bexley
+E09000005,10.0,4.0,Brent
+E09000006,11.0,0.0,Bromley
+E09000007,11.0,4.0,Camden
+E09000008,10.0,0.0,Croydon
+E09000009,9.0,4.0,Ealing
+E09000010,11.0,5.0,Enfield
+E09000011,11.0,1.0,Greenwich
+E09000012,12.0,3.0,Hackney
+E09000013,8.0,3.0,Hammersmith and Fulham
+E09000014,12.0,4.0,Haringey
+E09000015,8.0,4.0,Harrow
+E09000016,14.0,3.0,Havering
+E09000017,7.0,4.0,Hillingdon
+E09000018,7.0,3.0,Hounslow
+E09000019,11.0,3.0,Islington
+E09000020,9.0,3.0,Kensington and Chelsea
+E09000021,7.0,1.0,Kingston upon Thames
+E09000022,10.0,2.0,Lambeth
+E09000023,10.0,1.0,Lewisham
+E09000024,8.0,1.0,Merton
+E09000025,13.0,2.0,Newham
+E09000026,14.0,4.0,Redbridge
+E09000027,8.0,2.0,Richmond upon Thames
+E09000028,9.0,1.0,Southwark
+E09000029,9.0,0.0,Sutton
+E09000030,12.0,2.0,Tower Hamlets
+E09000031,13.0,4.0,Waltham Forest
+E09000032,9.0,2.0,Wandsworth
+E09000033,10.0,3.0,Westminster
+N09000001,-4.0,16.0,Antrim and Newtownabbey
+N09000002,-5.0,16.0,"Armagh City, Banbridge and Craigavon"
+N09000003,-4.0,17.0,Belfast
+N09000004,-5.0,18.0,Causeway Coast and Glens
+N09000005,-6.0,17.0,Derry City and Strabane
+N09000006,-6.0,16.0,Fermanagh and Omagh
+N09000007,-5.0,15.0,Lisburn and Castlereagh
+N09000008,-4.0,18.0,Mid and East Antrim
+N09000009,-5.0,17.0,Mid Ulster
+N09000010,-4.0,15.0,"Newry, Mourne and Down"
+S12000005,2.0,24.0,Clackmannanshire
+S12000006,4.0,20.0,Dumfries and Galloway
+S12000008,3.0,20.0,East Ayrshire
+S12000010,5.0,22.0,East Lothian
+S12000011,2.0,20.0,East Renfrewshire
+S12000013,-1.0,27.0,Na h-Eileanan Siar
+S12000014,2.0,23.0,Falkirk
+S12000017,1.0,26.0,Highland
+S12000018,0.0,21.0,Inverclyde
+S12000019,3.0,21.0,Midlothian
+S12000020,2.0,26.0,Moray
+S12000021,1.0,20.0,North Ayrshire
+S12000023,4.0,28.0,Orkney Islands
+S12000026,4.0,21.0,Scottish Borders
+S12000027,5.0,30.0,Shetland Islands
+S12000028,1.0,19.0,South Ayrshire
+S12000029,2.0,21.0,South Lanarkshire
+S12000030,1.0,24.0,Stirling
+S12000033,4.0,26.0,Aberdeen City
+S12000034,3.0,26.0,Aberdeenshire
+S12000035,0.0,24.0,Argyll and Bute
+S12000036,4.0,22.0,City of Edinburgh
+S12000038,1.0,22.0,Renfrewshire
+S12000039,0.0,23.0,West Dunbartonshire
+S12000040,3.0,22.0,West Lothian
+S12000041,2.0,25.0,Angus
+S12000042,3.0,25.0,Dundee City
+S12000045,1.0,23.0,East Dunbartonshire
+S12000047,3.0,24.0,Fife
+S12000048,1.0,25.0,Perth and Kinross
+S12000049,1.0,21.0,Glasgow City
+S12000050,2.0,22.0,North Lanarkshire
+W06000001,-2.0,12.0,Isle of Anglesey
+W06000002,-2.0,10.0,Gwynedd
+W06000003,-1.0,10.0,Conwy
+W06000004,0.0,10.0,Denbighshire
+W06000005,0.0,11.0,Flintshire
+W06000006,1.0,10.0,Wrexham
+W06000008,-2.0,9.0,Ceredigion
+W06000009,-5.0,6.0,Pembrokeshire
+W06000010,-4.0,6.0,Carmarthenshire
+W06000011,-4.0,5.0,Swansea
+W06000012,-3.0,5.0,Neath Port Talbot
+W06000013,-3.0,6.0,Bridgend
+W06000014,-2.0,4.0,Vale of Glamorgan
+W06000015,-2.0,5.0,Cardiff
+W06000016,-3.0,7.0,Rhondda Cynon Taf
+W06000018,-2.0,6.0,Caerphilly
+W06000019,0.0,9.0,Blaenau Gwent
+W06000020,-2.0,7.0,Torfaen
+W06000021,-1.0,8.0,Monmouthshire
+W06000022,-1.0,5.0,Newport
+W06000023,-1.0,9.0,Powys
+W06000024,-2.0,8.0,Merthyr Tydfil
diff --git a/scripts/BUG_REPORT_build_from_dataframe.md b/scripts/BUG_REPORT_build_from_dataframe.md
new file mode 100644
index 000000000..503557e56
--- /dev/null
+++ b/scripts/BUG_REPORT_build_from_dataframe.md
@@ -0,0 +1,172 @@
+# Bug Report: Entity-Level Aggregation Missing in `build_from_dataframe`
+
+## Summary
+
+The `build_from_dataframe` method in `policyengine_uk` does not aggregate person-level data to entity-level before calling `set_input()`, causing UK country filtering (e.g., Wales) to fail with array length mismatch errors.
+
+## Affected Repository
+
+**Repository:** `policyengine-uk`
+**File:** `policyengine_uk/simulation.py`
+**Method:** `build_from_dataframe()`
+**Approximate Lines:** 281-286 (may vary by version)
+
+## Symptoms
+
+When running a UK simulation filtered to a specific country (e.g., Wales), the following error occurs:
+
+```
+ValueError: Unable to set value "[ True  True  True ... False False False]"
+for variable "would_evade_tv_licence_fee", as its length is 8470
+while there are 4108 households in the simulation.
+```
+
+The error occurs because:
+- 8,470 = number of Welsh **persons** in the dataset
+- 4,108 = number of Welsh **households** in the dataset
+- The code tries to assign person-level arrays to household-level variables
+
+## Root Cause
+
+### The Bug Location
+
+```python
+# In policyengine_uk/simulation.py, build_from_dataframe method:
+
+# Set input values for each variable and time period
+for column in df:
+    variable, time_period = column.split("__")
+    if variable not in self.tax_benefit_system.variables:
+        continue
+    self.set_input(variable, time_period, df[column])  # <-- BUG HERE
+```
+
+### Why This Fails
+
+1. **`to_input_dataframe()`** exports ALL variables at **person level** (one row per person), regardless of the variable's native entity. This is by design - it creates a flat DataFrame where each row represents a person.
+
+2. **`build_from_dataframe()`** correctly builds the entity structure:
+   - Extracts `person_household_id` to determine household membership
+   - Creates the correct number of households (e.g., 4,108 for Wales)
+   - Sets up person-to-household relationships properly
+
+3. **BUT** the loop that sets variable values does NOT check if aggregation is needed. It passes person-level arrays (8,470 values) directly to `set_input()` for household-level variables that only have 4,108 entities.
+
+### The Correct Approach
+
+The `policyengine_core` library's `build_from_dataset()` method handles this correctly in `policyengine_core/simulations/simulation.py`:
+
+```python
+# From policyengine_core/simulations/simulation.py, build_from_dataset method:
+
+if len(data[variable]) != len(population.ids):
+    population: GroupPopulation
+    entity_level_data = population.value_from_first_person(data[variable])
+else:
+    entity_level_data = data[variable]
+
+self.set_input(variable_name, time_period, entity_level_data)
+```
+
+## Required Fix
+
+### Current Buggy Code
+
+```python
+# Set input values for each variable and time period
+for column in df:
+    variable, time_period = column.split("__")
+    if variable not in self.tax_benefit_system.variables:
+        continue
+    self.set_input(variable, time_period, df[column])
+```
+
+### Fixed Code
+
+```python
+# Set input values for each variable and time period
+for column in df:
+    variable, time_period = column.split("__")
+    if variable not in self.tax_benefit_system.variables:
+        continue
+
+    # Get variable metadata and target population
+    var_meta = self.tax_benefit_system.get_variable(variable)
+    entity = var_meta.entity
+    population = self.get_population(entity.plural)
+
+    data = df[column].values
+
+    # Check if aggregation is needed (data is person-level but variable is group-level)
+    if len(data) != population.count:
+        # Aggregate from person-level to entity-level using first person's value
+        data = population.value_from_first_person(data)
+
+    self.set_input(variable, time_period, data)
+```
+
+## Technical Details
+
+### What `value_from_first_person()` Does
+
+This method aggregates person-level data to group-level by taking the value from the first person in each group. For household-level variables (like `would_evade_tv_licence_fee`), all persons in a household share the same value, so taking the first person's value is correct.
+
+The method is defined in `policyengine_core` on `GroupPopulation` objects.
+
+### Why This Pattern Works
+
+- Person-level variables: `len(data) == population.count` (no aggregation needed)
+- Group-level variables exported at person level: `len(data) != population.count` (aggregation needed)
+
+### Entity Structure in UK Model
+
+The UK tax-benefit system has these entities:
+- `person` - Individual people
+- `benunit` - Benefit units (roughly: nuclear families)
+- `household` - Households (one or more benefit units sharing accommodation)
+
+When filtering to Wales:
+- ~8,470 persons
+- ~4,108 households
+- Variable ratio depending on household composition
+
+## Reproduction Steps
+
+1. Create a UK macro simulation: `Simulation(country="uk", scope="macro")`
+2. Filter to a UK country: `Simulation(country="uk", scope="macro", region="country/wales")`
+3. The filtering process:
+   - Calls `to_input_dataframe()` on the baseline simulation
+   - Filters the DataFrame to Welsh persons only
+   - Calls `Microsimulation(dataset=filtered_df)` which invokes `build_from_dataframe()`
+4. Error occurs when `build_from_dataframe()` tries to set household-level variables
+
+## Verification
+
+A Jupyter notebook proving this bug exists at:
+`policyengine-api/scripts/prove_build_from_dataframe_bug.ipynb`
+
+The notebook:
+1. Creates a UK simulation and exports to DataFrame
+2. Filters to Wales (8,470 persons, 4,108 households)
+3. Manually traces through `build_from_dataframe()` step by step
+4. Shows entity structure is correctly built (4,108 households)
+5. Demonstrates the `set_input()` call fails with length mismatch
+6. Shows the fix (aggregation) works correctly
+
+## Impact
+
+This bug affects:
+- UK country filtering (`country/wales`, `country/scotland`, `country/northern_ireland`, `country/england`)
+- Any code path that uses `build_from_dataframe()` with a filtered DataFrame
+
+This bug does NOT affect:
+- Constituency filtering (uses weight adjustment, not DataFrame subsetting)
+- Local authority filtering (uses weight adjustment, not DataFrame subsetting)
+- UK-wide simulations (no filtering needed)
+
+## Notes for Implementation
+
+1. The fix is minimal - just wrap the existing `set_input()` call with a length check and aggregation
+2. No new dependencies are needed - `value_from_first_person()` is already available on population objects
+3. The fix matches the existing pattern in `policyengine_core`'s `build_from_dataset()` method
+4. Consider adding a unit test that creates a simulation from a filtered DataFrame and verifies household-level variables work correctly
diff --git a/scripts/diagnose_country_filtering.ipynb b/scripts/diagnose_country_filtering.ipynb
new file mode 100644
index 000000000..e9d2b1498
--- /dev/null
+++ b/scripts/diagnose_country_filtering.ipynb
@@ -0,0 +1,503 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Diagnosing UK Country Filtering Issue in policyengine.py\n",
+    "\n",
+    "This notebook tests whether `policyengine.py` properly filters simulations by UK country (e.g., Wales).\n",
+    "\n",
+    "## The Issue\n",
+    "When running a simulation filtered to a specific UK country (e.g., `country/wales`), we get:\n",
+    "```\n",
+    "ValueError: Unable to set value \"[ True  True  True ... False False False]\" for variable \n",
+    "\"would_evade_tv_licence_fee\", as its length is 8470 while there are 4108 households in the simulation.\n",
+    "```\n",
+    "\n",
+    "## Hypothesis\n",
+    "The `to_input_dataframe()` method doesn't export `person_household_id`, causing the filtered simulation\n",
+    "to lose entity relationship information and incorrectly set up household counts."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 1: Setup and Imports"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from policyengine import Simulation\n",
+    "\n",
+    "# Check policyengine version\n",
+    "import policyengine\n",
+    "print(f\"policyengine version: {policyengine.__version__}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 2: Create a Baseline UK Simulation\n",
+    "\n",
+    "First, let's create a standard UK-wide simulation and examine its structure."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a UK-wide simulation (no region filter)\n",
+    "print(\"Creating UK-wide simulation...\")\n",
+    "sim_uk = Simulation(country=\"uk\", scope=\"macro\")\n",
+    "\n",
+    "# Access the underlying country simulation\n",
+    "underlying_sim = sim_uk.baseline_simulation\n",
+    "\n",
+    "print(f\"\\n=== UK-Wide Simulation Structure ===\")\n",
+    "print(f\"Person count: {underlying_sim.persons.count}\")\n",
+    "print(f\"Household count: {underlying_sim.household.count}\")\n",
+    "print(f\"BenUnit count: {underlying_sim.benunit.count}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Check the country distribution in the UK simulation\n",
+    "country_values = sim_uk.calculate(\"country\")\n",
+    "print(\"\\n=== Country Distribution (Household Level) ===\")\n",
+    "print(country_values.value_counts())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Check person-level country distribution\n",
+    "country_person = underlying_sim.calculate(\"country\", map_to=\"person\")\n",
+    "unique, counts = np.unique(country_person, return_counts=True)\n",
+    "print(\"\\n=== Country Distribution (Person Level) ===\")\n",
+    "for u, c in zip(unique, counts):\n",
+    "    print(f\"  {u}: {c} persons\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 3: Test `to_input_dataframe()` Export\n",
+    "\n",
+    "Let's examine what columns are exported by `to_input_dataframe()` to see if entity linkage variables are included."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Export the simulation to a dataframe\n",
+    "print(\"Exporting simulation to DataFrame...\")\n",
+    "df = underlying_sim.to_input_dataframe()\n",
+    "\n",
+    "print(f\"\\n=== Exported DataFrame ===\")\n",
+    "print(f\"Shape: {df.shape}\")\n",
+    "print(f\"Number of columns: {len(df.columns)}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Check for entity ID and linkage columns\n",
+    "print(\"\\n=== Entity-Related Columns ===\")\n",
+    "\n",
+    "id_columns = [c for c in df.columns if '_id' in c.lower()]\n",
+    "print(f\"\\nColumns containing '_id': {len(id_columns)}\")\n",
+    "for col in sorted(id_columns):\n",
+    "    print(f\"  - {col}\")\n",
+    "\n",
+    "# Specifically check for critical columns\n",
+    "critical_cols = ['person_id', 'household_id', 'person_household_id', 'benunit_id', 'person_benunit_id']\n",
+    "print(f\"\\n=== Critical Entity Linkage Columns ===\")\n",
+    "for col_base in critical_cols:\n",
+    "    matching = [c for c in df.columns if c.startswith(col_base)]\n",
+    "    if matching:\n",
+    "        print(f\"  {col_base}: FOUND -> {matching}\")\n",
+    "    else:\n",
+    "        print(f\"  {col_base}: MISSING!\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Check if person_household_id has known periods in the simulation\n",
+    "print(\"\\n=== Checking Known Periods for Entity Linkage Variables ===\")\n",
+    "\n",
+    "for var_name in ['person_id', 'household_id', 'person_household_id', 'person_benunit_id']:\n",
+    "    try:\n",
+    "        holder = underlying_sim.get_holder(var_name)\n",
+    "        known_periods = holder.get_known_periods()\n",
+    "        print(f\"  {var_name}: known_periods = {list(known_periods)}\")\n",
+    "    except Exception as e:\n",
+    "        print(f\"  {var_name}: ERROR - {e}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 4: Simulate Country Filtering (Wales)\n",
+    "\n",
+    "Now let's create a Wales-filtered simulation and see what happens."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a Wales simulation\n",
+    "print(\"Creating Wales simulation...\")\n",
+    "print(\"(This may trigger the error we're diagnosing)\")\n",
+    "print()\n",
+    "\n",
+    "try:\n",
+    "    sim_wales = Simulation(country=\"uk\", scope=\"macro\", region=\"country/wales\")\n",
+    "    wales_underlying = sim_wales.baseline_simulation\n",
+    "    \n",
+    "    print(f\"\\n=== Wales Simulation Structure ===\")\n",
+    "    print(f\"Person count: {wales_underlying.persons.count}\")\n",
+    "    print(f\"Household count: {wales_underlying.household.count}\")\n",
+    "    print(f\"BenUnit count: {wales_underlying.benunit.count}\")\n",
+    "    \n",
+    "    # Check if counts make sense\n",
+    "    if wales_underlying.household.count == wales_underlying.persons.count:\n",
+    "        print(\"\\n*** WARNING: Household count equals person count! ***\")\n",
+    "        print(\"This suggests entity linkage was lost during filtering.\")\n",
+    "        \n",
+    "except Exception as e:\n",
+    "    print(f\"\\n*** ERROR creating Wales simulation ***\")\n",
+    "    print(f\"Error type: {type(e).__name__}\")\n",
+    "    print(f\"Error message: {e}\")\n",
+    "    import traceback\n",
+    "    traceback.print_exc()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 5: Manual Reproduction of the Filtering Process\n",
+    "\n",
+    "Let's manually reproduce what `_apply_region_to_simulation` does to understand where it breaks."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Step-by-step reproduction of the filtering logic\n",
+    "print(\"=== Manual Reproduction of Country Filtering ===\")\n",
+    "\n",
+    "# Step 1: Export to DataFrame\n",
+    "print(\"\\n[Step 1] Exporting to DataFrame...\")\n",
+    "df = underlying_sim.to_input_dataframe()\n",
+    "print(f\"  DataFrame shape: {df.shape}\")\n",
+    "print(f\"  Columns with 'household': {[c for c in df.columns if 'household' in c.lower()][:10]}...\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Step 2: Calculate country at person level\n",
+    "print(\"\\n[Step 2] Calculating country at person level...\")\n",
+    "country_person_level = underlying_sim.calculate(\"country\", map_to=\"person\").values\n",
+    "print(f\"  Country array shape: {country_person_level.shape}\")\n",
+    "print(f\"  Unique values: {np.unique(country_person_level)}\")\n",
+    "\n",
+    "# Count Welsh persons\n",
+    "wales_mask = country_person_level == \"WALES\"\n",
+    "print(f\"  Welsh persons: {wales_mask.sum()}\")\n",
+    "print(f\"  Non-Welsh persons: {(~wales_mask).sum()}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Step 3: Filter DataFrame to Wales\n",
+    "print(\"\\n[Step 3] Filtering DataFrame to Wales...\")\n",
+    "df_wales = df[wales_mask]\n",
+    "print(f\"  Filtered DataFrame shape: {df_wales.shape}\")\n",
+    "\n",
+    "# Check what person_household_id looks like in filtered data\n",
+    "phh_cols = [c for c in df_wales.columns if 'person_household_id' in c]\n",
+    "if phh_cols:\n",
+    "    print(f\"  person_household_id columns: {phh_cols}\")\n",
+    "    for col in phh_cols:\n",
+    "        vals = df_wales[col].values\n",
+    "        print(f\"    {col}: {len(np.unique(vals))} unique values\")\n",
+    "else:\n",
+    "    print(\"  person_household_id: NOT IN DATAFRAME!\")\n",
+    "    print(\"  This is likely the root cause of the issue.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Step 4: Try to create a new simulation from filtered DataFrame\n",
+    "print(\"\\n[Step 4] Creating new simulation from filtered DataFrame...\")\n",
+    "\n",
+    "from policyengine_uk import Microsimulation\n",
+    "\n",
+    "try:\n",
+    "    new_sim = Microsimulation(dataset=df_wales)\n",
+    "    \n",
+    "    print(f\"  New simulation created!\")\n",
+    "    print(f\"  Person count: {new_sim.persons.count}\")\n",
+    "    print(f\"  Household count: {new_sim.household.count}\")\n",
+    "    \n",
+    "    # Critical check\n",
+    "    if new_sim.household.count == new_sim.persons.count:\n",
+    "        print(\"\\n  *** CONFIRMED: Household count equals person count! ***\")\n",
+    "        print(\"  The entity linkage was lost because person_household_id is missing.\")\n",
+    "    elif new_sim.household.count == len(np.unique(df_wales.iloc[:, 0])):\n",
+    "        print(\"\\n  *** Household count matches first column's unique values ***\")\n",
+    "        print(\"  This confirms the fallback behavior in build_from_dataset()\")\n",
+    "        \n",
+    "except Exception as e:\n",
+    "    print(f\"  Error creating simulation: {e}\")\n",
+    "    import traceback\n",
+    "    traceback.print_exc()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Step 5: Try to calculate would_evade_tv_licence_fee (this should trigger the error)\n",
+    "print(\"\\n[Step 5] Attempting to calculate would_evade_tv_licence_fee...\")\n",
+    "\n",
+    "try:\n",
+    "    # This calculation uses random(household), which will fail if household count is wrong\n",
+    "    result = new_sim.calculate(\"would_evade_tv_licence_fee\")\n",
+    "    print(f\"  Calculation succeeded!\")\n",
+    "    print(f\"  Result shape: {result.shape}\")\n",
+    "    print(f\"  Result dtype: {result.dtype}\")\n",
+    "except ValueError as e:\n",
+    "    print(f\"  *** ValueError (expected): ***\")\n",
+    "    print(f\"  {e}\")\n",
+    "    \n",
+    "    # Parse the error to understand the mismatch\n",
+    "    error_str = str(e)\n",
+    "    if \"length is\" in error_str and \"while there are\" in error_str:\n",
+    "        print(f\"\\n  This confirms the array size mismatch issue.\")\n",
+    "except Exception as e:\n",
+    "    print(f\"  Unexpected error: {type(e).__name__}: {e}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 6: Deeper Investigation - What Does household_id Return?\n",
+    "\n",
+    "Let's check what `household_id` returns in the broken simulation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Check household_id in the new (potentially broken) simulation\n",
+    "print(\"=== Investigating household_id in Filtered Simulation ===\")\n",
+    "\n",
+    "try:\n",
+    "    # This is what random() calls internally\n",
+    "    hh_ids = new_sim.calculate(\"household_id\", 2025)\n",
+    "    print(f\"household_id result length: {len(hh_ids)}\")\n",
+    "    print(f\"household_id unique count: {len(np.unique(hh_ids))}\")\n",
+    "    print(f\"Expected household count: {new_sim.household.count}\")\n",
+    "    \n",
+    "    if len(hh_ids) != new_sim.household.count:\n",
+    "        print(f\"\\n*** MISMATCH: household_id has {len(hh_ids)} values but simulation has {new_sim.household.count} households ***\")\n",
+    "except Exception as e:\n",
+    "    print(f\"Error: {e}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Check the holder for household_id\n",
+    "print(\"\\n=== Checking household_id Holder ===\")\n",
+    "try:\n",
+    "    holder = new_sim.get_holder(\"household_id\")\n",
+    "    known_periods = holder.get_known_periods()\n",
+    "    print(f\"Known periods: {list(known_periods)}\")\n",
+    "    \n",
+    "    for period in known_periods:\n",
+    "        arr = holder.get_array(period)\n",
+    "        print(f\"  Period {period}: array shape = {arr.shape if arr is not None else 'None'}\")\n",
+    "except Exception as e:\n",
+    "    print(f\"Error: {e}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 7: Compare with Working Approaches (Constituency/LA)\n",
+    "\n",
+    "Constituency and LA filtering use weight adjustment instead of DataFrame subsetting. Let's verify this works."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Test constituency filtering (should work)\n",
+    "print(\"=== Testing Constituency Filtering (Should Work) ===\")\n",
+    "\n",
+    "try:\n",
+    "    sim_constituency = Simulation(country=\"uk\", scope=\"macro\", region=\"constituency/Cardiff South and Penarth\")\n",
+    "    const_underlying = sim_constituency.baseline_simulation\n",
+    "    \n",
+    "    print(f\"Constituency simulation created successfully!\")\n",
+    "    print(f\"  Person count: {const_underlying.persons.count}\")\n",
+    "    print(f\"  Household count: {const_underlying.household.count}\")\n",
+    "    \n",
+    "    # Try the problematic calculation\n",
+    "    result = sim_constituency.calculate(\"would_evade_tv_licence_fee\")\n",
+    "    print(f\"  would_evade_tv_licence_fee calculated successfully!\")\n",
+    "    print(f\"  Result length: {len(result)}\")\n",
+    "    \n",
+    "except Exception as e:\n",
+    "    print(f\"Error: {type(e).__name__}: {e}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Test local authority filtering (should work)\n",
+    "print(\"\\n=== Testing Local Authority Filtering (Should Work) ===\")\n",
+    "\n",
+    "try:\n",
+    "    sim_la = Simulation(country=\"uk\", scope=\"macro\", region=\"local_authority/Cardiff\")\n",
+    "    la_underlying = sim_la.baseline_simulation\n",
+    "    \n",
+    "    print(f\"LA simulation created successfully!\")\n",
+    "    print(f\"  Person count: {la_underlying.persons.count}\")\n",
+    "    print(f\"  Household count: {la_underlying.household.count}\")\n",
+    "    \n",
+    "    # Try the problematic calculation\n",
+    "    result = sim_la.calculate(\"would_evade_tv_licence_fee\")\n",
+    "    print(f\"  would_evade_tv_licence_fee calculated successfully!\")\n",
+    "    print(f\"  Result length: {len(result)}\")\n",
+    "    \n",
+    "except Exception as e:\n",
+    "    print(f\"Error: {type(e).__name__}: {e}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Summary and Conclusions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"=\"*70)\n",
+    "print(\"DIAGNOSIS SUMMARY\")\n",
+    "print(\"=\"*70)\n",
+    "\n",
+    "print(\"\"\"\n",
+    "Based on the tests above:\n",
+    "\n",
+    "1. COUNTRY FILTERING (country/wales):\n",
+    "   - Uses to_input_dataframe() + DataFrame subsetting + new Microsimulation()\n",
+    "   - FAILS because person_household_id is not exported\n",
+    "   - Results in household count = person count (entity linkage lost)\n",
+    "\n",
+    "2. CONSTITUENCY FILTERING (constituency/...):\n",
+    "   - Uses weight adjustment on existing simulation\n",
+    "   - WORKS because entity structure is preserved\n",
+    "\n",
+    "3. LOCAL AUTHORITY FILTERING (local_authority/...):\n",
+    "   - Uses weight adjustment on existing simulation  \n",
+    "   - WORKS because entity structure is preserved\n",
+    "\n",
+    "ROOT CAUSE:\n",
+    "- to_input_dataframe() only exports variables with known periods\n",
+    "- person_household_id doesn't have known periods (it's derived from dataset structure)\n",
+    "- When building from filtered DataFrame, the fallback creates 1 household per person\n",
+    "\n",
+    "RECOMMENDED FIX:\n",
+    "- Option A: Fix to_input_dataframe() to always export entity linkage variables\n",
+    "- Option B: Use weight-zeroing for country filtering (like constituency/LA)\n",
+    "\"\"\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.11.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/scripts/prove_build_from_dataframe_bug.ipynb b/scripts/prove_build_from_dataframe_bug.ipynb
new file mode 100644
index 000000000..a65202fc9
--- /dev/null
+++ b/scripts/prove_build_from_dataframe_bug.ipynb
@@ -0,0 +1,841 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "cell-0",
+   "metadata": {},
+   "source": [
+    "# Proving the Bug in policyengine_uk's build_from_dataframe Method\n",
+    "\n",
+    "This notebook proves that the UK country filtering bug is caused by `policyengine_uk`'s \n",
+    "`build_from_dataframe` method not handling entity-level aggregation.\n",
+    "\n",
+    "## The Bug Location\n",
+    "**File:** `policyengine_uk/simulation.py`  \n",
+    "**Method:** `build_from_dataframe()`  \n",
+    "**Lines:** 281-286\n",
+    "\n",
+    "```python\n",
+    "# Set input values for each variable and time period\n",
+    "for column in df:\n",
+    "    variable, time_period = column.split(\"__\")\n",
+    "    if variable not in self.tax_benefit_system.variables:\n",
+    "        continue\n",
+    "    self.set_input(variable, time_period, df[column])  # <-- BUG: No entity-level check!\n",
+    "```\n",
+    "\n",
+    "## The Problem\n",
+    "1. `to_input_dataframe()` exports ALL variables at **person level** (one row per person)\n",
+    "2. `build_from_dataframe()` correctly builds entity structure with proper counts\n",
+    "3. BUT it then tries to `set_input()` with person-level arrays for household-level variables\n",
+    "4. This causes a length mismatch error"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-1",
+   "metadata": {},
+   "source": [
+    "## Step 1: Setup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "cell-2",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "policyengine_uk version: unknown\n",
+      "policyengine_uk location: /opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_uk/__init__.py\n"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import traceback\n",
+    "import inspect\n",
+    "\n",
+    "from policyengine import Simulation\n",
+    "from policyengine_uk import Simulation as UKSimulation\n",
+    "\n",
+    "# Show where policyengine_uk is loaded from\n",
+    "import policyengine_uk\n",
+    "version = getattr(policyengine_uk, '__version__', 'unknown')\n",
+    "print(f\"policyengine_uk version: {version}\")\n",
+    "print(f\"policyengine_uk location: {policyengine_uk.__file__}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-3",
+   "metadata": {},
+   "source": [
+    "## Step 2: Examine the Buggy Code\n",
+    "\n",
+    "Let's look at the actual `build_from_dataframe` method to confirm the bug."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "cell-4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "=== build_from_dataframe source code ===\n",
+      "    def build_from_dataframe(self, df: pd.DataFrame) -> None:\n",
+      "        \"\"\"Build simulation from a pandas DataFrame.\n",
+      "\n",
+      "        Args:\n",
+      "            df: DataFrame with columns in format \"variable_name__time_period\"\n",
+      "        \"\"\"\n",
+      "\n",
+      "        def get_first_array(variable_name: str) -> pd.Series:\n",
+      "            \"\"\"Extract the first array for a given variable name pattern.\"\"\"\n",
+      "            columns = df.columns[df.columns.str.contains(variable_name + \"__\")]\n",
+      "            return df[columns[0]]\n",
+      "\n",
+      "        # Extract ID columns\n",
+      "        (\n",
+      "            person_id,\n",
+      "            person_benunit_id,\n",
+      "            person_household_id,\n",
+      "            benunit_id,\n",
+      "            household_id,\n",
+      "        ) = map(\n",
+      "            get_first_array,\n",
+      "            [\n",
+      "                \"person_id\",\n",
+      "                \"person_benunit_id\",\n",
+      "                \"person_household_id\",\n",
+      "                \"benunit_id\",\n",
+      "                \"household_id\",\n",
+      "            ],\n",
+      "        )\n",
+      "\n",
+      "        # Build entity structure\n",
+      "        self.build_from_ids(\n",
+      "            person_id,\n",
+      "            person_benunit_id,\n",
+      "            person_household_id,\n",
+      "            benunit_id,\n",
+      "            household_id,\n",
+      "        )\n",
+      "\n",
+      "        # Set input values for each variable and time period\n",
+      "        for column in df:\n",
+      "            variable, time_period = column.split(\"__\")\n",
+      "            if variable not in self.tax_benefit_system.variables:\n",
+      "                continue\n",
+      "            self.set_input(variable, time_period, df[column])\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Show the source code of build_from_dataframe\n",
+    "print(\"=== build_from_dataframe source code ===\")\n",
+    "print(inspect.getsource(UKSimulation.build_from_dataframe))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-5",
+   "metadata": {},
+   "source": [
+    "## Step 3: Create Test Data\n",
+    "\n",
+    "Create a UK simulation and export to DataFrame, then filter to Wales."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "cell-6",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Creating UK-wide simulation...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "No data provided, using default dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n",
+      "Using dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n",
+      "Downloading enhanced_frs_2023_24.h5 from bucket policyengine-uk-data-private\n",
+      "WARNING:root:No version specified for policyengine-uk-data-private, enhanced_frs_2023_24.h5. Using latest version: 1.29.4\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "UK-wide entity counts:\n",
+      "  Persons: 115,612\n",
+      "  Households: 53,508\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Create UK-wide simulation\n",
+    "print(\"Creating UK-wide simulation...\")\n",
+    "sim_uk = Simulation(country=\"uk\", scope=\"macro\")\n",
+    "underlying_sim = sim_uk.baseline_simulation\n",
+    "\n",
+    "print(f\"\\nUK-wide entity counts:\")\n",
+    "print(f\"  Persons: {underlying_sim.persons.count:,}\")\n",
+    "print(f\"  Households: {underlying_sim.household.count:,}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "cell-7",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Exporting to DataFrame...\n",
+      "\n",
+      "Filtered DataFrame:\n",
+      "  Rows (Welsh persons): 8,470\n",
+      "  Columns: 1,127\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Export to DataFrame and filter to Wales\n",
+    "print(\"Exporting to DataFrame...\")\n",
+    "df = underlying_sim.to_input_dataframe()\n",
+    "\n",
+    "# Filter to Wales\n",
+    "country_person = underlying_sim.calculate(\"country\", map_to=\"person\").values\n",
+    "wales_mask = country_person == \"WALES\"\n",
+    "df_wales = df[wales_mask]\n",
+    "\n",
+    "print(f\"\\nFiltered DataFrame:\")\n",
+    "print(f\"  Rows (Welsh persons): {len(df_wales):,}\")\n",
+    "print(f\"  Columns: {len(df_wales.columns):,}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-8",
+   "metadata": {},
+   "source": [
+    "## Step 4: Prove the DataFrame Has Person-Level Data for Household Variables\n",
+    "\n",
+    "This is the key insight: `to_input_dataframe()` exports EVERYTHING at person level."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "cell-9",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "=== Household-Level Variables in DataFrame ===\n",
+      "Found 392 household-level variable columns in DataFrame\n",
+      "\n",
+      "First 10 household variables:\n",
+      "  - corporate_wealth__2023\n",
+      "  - corporate_wealth__2024\n",
+      "  - corporate_wealth__2025\n",
+      "  - corporate_wealth__2026\n",
+      "  - corporate_wealth__2027\n",
+      "  - corporate_wealth__2028\n",
+      "  - corporate_wealth__2029\n",
+      "  - corporate_wealth__2030\n",
+      "  - non_residential_property_value__2023\n",
+      "  - non_residential_property_value__2024\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Find household-level variables in the DataFrame\n",
+    "print(\"=== Household-Level Variables in DataFrame ===\")\n",
+    "\n",
+    "tax_benefit_system = underlying_sim.tax_benefit_system\n",
+    "household_vars_in_df = []\n",
+    "\n",
+    "for col in df_wales.columns:\n",
+    "    var_name = col.split(\"__\")[0]\n",
+    "    if var_name in tax_benefit_system.variables:\n",
+    "        var_meta = tax_benefit_system.get_variable(var_name)\n",
+    "        if var_meta.entity.key == \"household\":\n",
+    "            household_vars_in_df.append((col, var_name))\n",
+    "\n",
+    "print(f\"Found {len(household_vars_in_df)} household-level variable columns in DataFrame\")\n",
+    "print(f\"\\nFirst 10 household variables:\")\n",
+    "for col, var_name in household_vars_in_df[:10]:\n",
+    "    print(f\"  - {col}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "cell-10",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "=== THE CRITICAL MISMATCH ===\n",
+      "\n",
+      "DataFrame rows (person-level): 8,470\n",
+      "Expected Welsh households: 4,108\n",
+      "\n",
+      "Example: 'corporate_wealth__2025'\n",
+      "  Data length in DataFrame: 8,470\n",
+      "  Should be (household count): 4,108\n",
+      "\n",
+      "  MISMATCH: 8,470 != 4,108\n",
+      "\n",
+      "This is why set_input() fails!\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Show the mismatch: DataFrame rows vs expected household count\n",
+    "print(\"=== THE CRITICAL MISMATCH ===\")\n",
+    "print()\n",
+    "\n",
+    "# Get expected Welsh household count from person_household_id\n",
+    "phh_col = [c for c in df_wales.columns if c.startswith('person_household_id__')][0]\n",
+    "welsh_household_count = df_wales[phh_col].nunique()\n",
+    "\n",
+    "print(f\"DataFrame rows (person-level): {len(df_wales):,}\")\n",
+    "print(f\"Expected Welsh households: {welsh_household_count:,}\")\n",
+    "print()\n",
+    "\n",
+    "# Show a specific household variable\n",
+    "example_var = \"corporate_wealth__2025\" if \"corporate_wealth__2025\" in df_wales.columns else household_vars_in_df[0][0]\n",
+    "print(f\"Example: '{example_var}'\")\n",
+    "print(f\"  Data length in DataFrame: {len(df_wales[example_var]):,}\")\n",
+    "print(f\"  Should be (household count): {welsh_household_count:,}\")\n",
+    "print()\n",
+    "print(f\"  MISMATCH: {len(df_wales[example_var]):,} != {welsh_household_count:,}\")\n",
+    "print()\n",
+    "print(\"This is why set_input() fails!\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-11",
+   "metadata": {},
+   "source": [
+    "## Step 5: Trace Through build_from_dataframe Step-by-Step\n",
+    "\n",
+    "Let's manually execute what `build_from_dataframe` does to see exactly where it fails."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "cell-12",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "=== Step 5a: Extract ID columns ===\n",
+      "person_id length: 8470\n",
+      "person_household_id length: 8470\n",
+      "person_household_id unique values: 4108\n",
+      "household_id length: 8470\n",
+      "household_id unique values: 4108\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Step 5a: Extract ID columns (lines 249-270 of build_from_dataframe)\n",
+    "print(\"=== Step 5a: Extract ID columns ===\")\n",
+    "\n",
+    "def get_first_array(df, variable_name):\n",
+    "    columns = df.columns[df.columns.str.contains(variable_name + \"__\")]\n",
+    "    return df[columns[0]]\n",
+    "\n",
+    "person_id = get_first_array(df_wales, \"person_id\")\n",
+    "person_benunit_id = get_first_array(df_wales, \"person_benunit_id\")\n",
+    "person_household_id = get_first_array(df_wales, \"person_household_id\")\n",
+    "benunit_id = get_first_array(df_wales, \"benunit_id\")\n",
+    "household_id = get_first_array(df_wales, \"household_id\")\n",
+    "\n",
+    "print(f\"person_id length: {len(person_id)}\")\n",
+    "print(f\"person_household_id length: {len(person_household_id)}\")\n",
+    "print(f\"person_household_id unique values: {person_household_id.nunique()}\")\n",
+    "print(f\"household_id length: {len(household_id)}\")\n",
+    "print(f\"household_id unique values: {household_id.nunique()}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "cell-13",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "=== Step 5b: Build entity structure (build_from_ids) ===\n",
+      "Person entity count: 8470\n",
+      "Benunit entity count: 4664\n",
+      "Household entity count: 4108\n",
+      "\n",
+      "Entity structure is CORRECT! 4108 households were created.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Step 5b: Build entity structure (lines 273-279 - build_from_ids)\n",
+    "print(\"\\n=== Step 5b: Build entity structure (build_from_ids) ===\")\n",
+    "\n",
+    "from policyengine_core.simulations.simulation_builder import SimulationBuilder\n",
+    "from policyengine_uk.tax_benefit_system import CountryTaxBenefitSystem\n",
+    "\n",
+    "# Create a fresh simulation to test\n",
+    "test_tbs = CountryTaxBenefitSystem()\n",
+    "builder = SimulationBuilder()\n",
+    "builder.populations = test_tbs.instantiate_entities()\n",
+    "\n",
+    "# Declare entities - this is what build_from_ids does\n",
+    "builder.declare_person_entity(\"person\", person_id.values)\n",
+    "builder.declare_entity(\"benunit\", np.unique(benunit_id.values))\n",
+    "builder.declare_entity(\"household\", np.unique(household_id.values))\n",
+    "\n",
+    "print(f\"Person entity count: {len(builder.populations['person'].ids)}\")\n",
+    "print(f\"Benunit entity count: {len(builder.populations['benunit'].ids)}\")\n",
+    "print(f\"Household entity count: {len(builder.populations['household'].ids)}\")\n",
+    "print()\n",
+    "print(\"Entity structure is CORRECT! 4108 households were created.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "cell-14",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "=== Step 5c: Complete entity setup ===\n",
+      "Test simulation created:\n",
+      "  Persons: 8470\n",
+      "  Households: 4108\n",
+      "\n",
+      "Entity counts are CORRECT at this point!\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Step 5c: Complete entity setup with joins\n",
+    "print(\"\\n=== Step 5c: Complete entity setup ===\")\n",
+    "\n",
+    "builder.join_with_persons(\n",
+    "    builder.populations[\"benunit\"],\n",
+    "    person_benunit_id.values,\n",
+    "    np.array([\"member\"] * len(person_benunit_id)),\n",
+    ")\n",
+    "builder.join_with_persons(\n",
+    "    builder.populations[\"household\"],\n",
+    "    person_household_id.values,\n",
+    "    np.array([\"member\"] * len(person_household_id)),\n",
+    ")\n",
+    "\n",
+    "# Create simulation with these populations\n",
+    "from policyengine_core.simulations import Simulation as CoreSimulation\n",
+    "from policyengine_core.tracers import SimpleTracer\n",
+    "\n",
+    "class TestSimulation(CoreSimulation):\n",
+    "    default_input_period = 2025\n",
+    "    default_calculation_period = 2025\n",
+    "\n",
+    "test_sim = TestSimulation.__new__(TestSimulation)\n",
+    "test_sim.tax_benefit_system = test_tbs\n",
+    "test_sim.branch_name = \"default\"\n",
+    "test_sim.invalidated_caches = set()\n",
+    "test_sim.branches = {}\n",
+    "\n",
+    "# Initialize required attributes that build_from_populations expects\n",
+    "test_sim.debug = False\n",
+    "test_sim.trace = False\n",
+    "test_sim.tracer = SimpleTracer()\n",
+    "test_sim.opt_out_cache = False\n",
+    "test_sim.max_spiral_loops = 10\n",
+    "test_sim.memory_config = None\n",
+    "test_sim._data_storage_dir = None\n",
+    "\n",
+    "test_sim.build_from_populations(builder.populations)\n",
+    "\n",
+    "print(f\"Test simulation created:\")\n",
+    "print(f\"  Persons: {test_sim.persons.count}\")\n",
+    "print(f\"  Households: {test_sim.household.count}\")\n",
+    "print()\n",
+    "print(\"Entity counts are CORRECT at this point!\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "cell-15",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "=== Step 5d: THE BUG - set_input without aggregation ===\n",
+      "\n",
+      "Attempting to set 'corporate_wealth' for period 2025\n",
+      "  Variable entity: household\n",
+      "  Data length: 8470\n",
+      "  Household count: 4108\n",
+      "\n",
+      "ERROR (expected): Unable to set value \"[ 42531.723   42531.723   42531.723  ... 145237.94   145237.94\n",
+      "   6483.3296]\" for variable \"corporate_wealth\", as its length is 8470 while there are 4108 households in the simulation.\n",
+      "\n",
+      "============================================================\n",
+      "BUG PROVEN!\n",
+      "============================================================\n",
+      "\n",
+      "The build_from_dataframe method calls set_input() with\n",
+      "person-level data (8470 values) for a household-level\n",
+      "variable, but there are only 4108 households.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Step 5d: THE BUG - Try to set_input for a household variable with person-level data\n",
+    "print(\"\\n=== Step 5d: THE BUG - set_input without aggregation ===\")\n",
+    "print()\n",
+    "\n",
+    "# This is what build_from_dataframe does at lines 281-286:\n",
+    "# for column in df:\n",
+    "#     variable, time_period = column.split(\"__\")\n",
+    "#     if variable not in self.tax_benefit_system.variables:\n",
+    "#         continue\n",
+    "#     self.set_input(variable, time_period, df[column])  # <-- BUG!\n",
+    "\n",
+    "# Let's simulate this for a household variable\n",
+    "test_column = example_var\n",
+    "variable_name, time_period = test_column.split(\"__\")\n",
+    "\n",
+    "print(f\"Attempting to set '{variable_name}' for period {time_period}\")\n",
+    "print(f\"  Variable entity: {test_tbs.get_variable(variable_name).entity.key}\")\n",
+    "print(f\"  Data length: {len(df_wales[test_column])}\")\n",
+    "print(f\"  Household count: {test_sim.household.count}\")\n",
+    "print()\n",
+    "\n",
+    "try:\n",
+    "    test_sim.set_input(variable_name, time_period, df_wales[test_column].values)\n",
+    "    print(\"SUCCESS - No error (unexpected!)\")\n",
+    "except ValueError as e:\n",
+    "    print(f\"ERROR (expected): {e}\")\n",
+    "    print()\n",
+    "    print(\"=\"*60)\n",
+    "    print(\"BUG PROVEN!\")\n",
+    "    print(\"=\"*60)\n",
+    "    print()\n",
+    "    print(\"The build_from_dataframe method calls set_input() with\")\n",
+    "    print(\"person-level data (8470 values) for a household-level\")\n",
+    "    print(f\"variable, but there are only {test_sim.household.count} households.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-16",
+   "metadata": {},
+   "source": [
+    "## Step 6: Show What the Fix Should Look Like\n",
+    "\n",
+    "The fix needs to check if aggregation is required before calling `set_input()`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "cell-17",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "=== The Fix: Aggregate Before set_input ===\n",
+      "\n",
+      "Variable: corporate_wealth\n",
+      "Entity: household\n",
+      "Data length: 8470\n",
+      "Population count: 4108\n",
+      "\n",
+      "Aggregation needed: 8470 != 4108\n",
+      "\n",
+      "After aggregation: 4108 values\n",
+      "\n",
+      "SUCCESS! set_input worked with aggregated data.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Demonstrate the correct approach: aggregate before set_input\n",
+    "print(\"=== The Fix: Aggregate Before set_input ===\")\n",
+    "print()\n",
+    "\n",
+    "variable_name, time_period = example_var.split(\"__\")\n",
+    "var_meta = test_tbs.get_variable(variable_name)\n",
+    "entity = var_meta.entity\n",
+    "population = test_sim.get_population(entity.plural)\n",
+    "\n",
+    "data = df_wales[example_var].values\n",
+    "\n",
+    "print(f\"Variable: {variable_name}\")\n",
+    "print(f\"Entity: {entity.key}\")\n",
+    "print(f\"Data length: {len(data)}\")\n",
+    "print(f\"Population count: {population.count}\")\n",
+    "print()\n",
+    "\n",
+    "# Check if aggregation is needed\n",
+    "if len(data) != population.count:\n",
+    "    print(f\"Aggregation needed: {len(data)} != {population.count}\")\n",
+    "    print()\n",
+    "    \n",
+    "    # Use value_from_first_person to aggregate\n",
+    "    aggregated_data = population.value_from_first_person(data)\n",
+    "    print(f\"After aggregation: {len(aggregated_data)} values\")\n",
+    "    print()\n",
+    "    \n",
+    "    # Now set_input should work\n",
+    "    try:\n",
+    "        test_sim.set_input(variable_name, time_period, aggregated_data)\n",
+    "        print(f\"SUCCESS! set_input worked with aggregated data.\")\n",
+    "    except Exception as e:\n",
+    "        print(f\"Still failed: {e}\")\n",
+    "else:\n",
+    "    print(\"No aggregation needed\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-18",
+   "metadata": {},
+   "source": [
+    "## Step 7: Show the Required Code Fix\n",
+    "\n",
+    "Here's what the fixed `build_from_dataframe` method should look like."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "cell-19",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "=== Required Fix for build_from_dataframe ===\n",
+      "\n",
+      "CURRENT CODE (buggy):\n",
+      "```python\n",
+      "# Set input values for each variable and time period\n",
+      "for column in df:\n",
+      "    variable, time_period = column.split(\"__\")\n",
+      "    if variable not in self.tax_benefit_system.variables:\n",
+      "        continue\n",
+      "    self.set_input(variable, time_period, df[column])\n",
+      "```\n",
+      "\n",
+      "FIXED CODE:\n",
+      "```python\n",
+      "# Set input values for each variable and time period\n",
+      "for column in df:\n",
+      "    variable, time_period = column.split(\"__\")\n",
+      "    if variable not in self.tax_benefit_system.variables:\n",
+      "        continue\n",
+      "    \n",
+      "    # Get variable metadata and target population\n",
+      "    var_meta = self.tax_benefit_system.get_variable(variable)\n",
+      "    entity = var_meta.entity\n",
+      "    population = self.get_population(entity.plural)\n",
+      "    \n",
+      "    data = df[column].values\n",
+      "    \n",
+      "    # Check if aggregation is needed (data is person-level but variable is group-level)\n",
+      "    if len(data) != population.count:\n",
+      "        # Aggregate from person-level to entity-level\n",
+      "        data = population.value_from_first_person(data)\n",
+      "    \n",
+      "    self.set_input(variable, time_period, data)\n",
+      "```\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"=== Required Fix for build_from_dataframe ===\")\n",
+    "print()\n",
+    "print(\"\"\"CURRENT CODE (buggy):\n",
+    "```python\n",
+    "# Set input values for each variable and time period\n",
+    "for column in df:\n",
+    "    variable, time_period = column.split(\"__\")\n",
+    "    if variable not in self.tax_benefit_system.variables:\n",
+    "        continue\n",
+    "    self.set_input(variable, time_period, df[column])\n",
+    "```\n",
+    "\n",
+    "FIXED CODE:\n",
+    "```python\n",
+    "# Set input values for each variable and time period\n",
+    "for column in df:\n",
+    "    variable, time_period = column.split(\"__\")\n",
+    "    if variable not in self.tax_benefit_system.variables:\n",
+    "        continue\n",
+    "    \n",
+    "    # Get variable metadata and target population\n",
+    "    var_meta = self.tax_benefit_system.get_variable(variable)\n",
+    "    entity = var_meta.entity\n",
+    "    population = self.get_population(entity.plural)\n",
+    "    \n",
+    "    data = df[column].values\n",
+    "    \n",
+    "    # Check if aggregation is needed (data is person-level but variable is group-level)\n",
+    "    if len(data) != population.count:\n",
+    "        # Aggregate from person-level to entity-level\n",
+    "        data = population.value_from_first_person(data)\n",
+    "    \n",
+    "    self.set_input(variable, time_period, data)\n",
+    "```\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-20",
+   "metadata": {},
+   "source": [
+    "## Summary"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "cell-21",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "======================================================================\n",
+      "SUMMARY: BUG PROVEN\n",
+      "======================================================================\n",
+      "\n",
+      "LOCATION:\n",
+      "  File: policyengine_uk/simulation.py\n",
+      "  Method: build_from_dataframe()\n",
+      "  Lines: 281-286\n",
+      "\n",
+      "ROOT CAUSE:\n",
+      "  The method iterates through DataFrame columns and calls set_input()\n",
+      "  without checking if the data length matches the target entity count.\n",
+      "  \n",
+      "  - to_input_dataframe() exports ALL variables at PERSON level\n",
+      "  - build_from_ids() correctly creates entity structure (e.g., 4108 households)\n",
+      "  - BUT the loop then tries to set 8470 person-level values for \n",
+      "    household-level variables that only have 4108 entities\n",
+      "\n",
+      "THE FIX:\n",
+      "  Before calling set_input(), check if len(data) != population.count.\n",
+      "  If so, aggregate using population.value_from_first_person(data).\n",
+      "\n",
+      "NOTE:\n",
+      "  This is the same aggregation logic that policyengine_core's\n",
+      "  build_from_dataset() method uses (simulation.py lines 406-414).\n",
+      "  The policyengine_uk version simply forgot to include it.\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"=\"*70)\n",
+    "print(\"SUMMARY: BUG PROVEN\")\n",
+    "print(\"=\"*70)\n",
+    "print(\"\"\"\n",
+    "LOCATION:\n",
+    "  File: policyengine_uk/simulation.py\n",
+    "  Method: build_from_dataframe()\n",
+    "  Lines: 281-286\n",
+    "\n",
+    "ROOT CAUSE:\n",
+    "  The method iterates through DataFrame columns and calls set_input()\n",
+    "  without checking if the data length matches the target entity count.\n",
+    "  \n",
+    "  - to_input_dataframe() exports ALL variables at PERSON level\n",
+    "  - build_from_ids() correctly creates entity structure (e.g., 4108 households)\n",
+    "  - BUT the loop then tries to set 8470 person-level values for \n",
+    "    household-level variables that only have 4108 entities\n",
+    "\n",
+    "THE FIX:\n",
+    "  Before calling set_input(), check if len(data) != population.count.\n",
+    "  If so, aggregate using population.value_from_first_person(data).\n",
+    "\n",
+    "NOTE:\n",
+    "  This is the same aggregation logic that policyengine_core's\n",
+    "  build_from_dataset() method uses (simulation.py lines 406-414).\n",
+    "  The policyengine_uk version simply forgot to include it.\n",
+    "\"\"\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "py-3.13",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/scripts/test_local_authority_api.py b/scripts/test_local_authority_api.py
new file mode 100755
index 000000000..81eeb8575
--- /dev/null
+++ b/scripts/test_local_authority_api.py
@@ -0,0 +1,570 @@
+#!/usr/bin/env python3
+"""
+Test script for UK Local Authority API functionality.
+
+This script tests the economy-wide simulation API for:
+1. A specific UK local authority (e.g., Leicester)
+2. UK-wide calculation (to confirm local_authority_impact is returned)
+3. Scotland country filter (to confirm authorities are filtered by country)
+
+SETUP INSTRUCTIONS:
+===================
+
+You need THREE terminal windows:
+
+Terminal 1 - Start Redis:
+    redis-server
+
+Terminal 2 - Start the API worker (handles economy calculations):
+    FLASK_DEBUG=1 python policyengine_api/worker.py
+
+Terminal 3 - Start the API server:
+    make debug
+
+Then run this script in a 4th terminal:
+    python scripts/test_local_authority_api.py
+
+NOTE: UK calculations require access to the policyengine-uk-data-private
+HuggingFace repo. Make sure HUGGING_FACE_TOKEN is set in your environment.
+"""
+
+import requests
+import json
+import time
+import sqlite3
+from pathlib import Path
+
+# Configuration
+API_BASE_URL = "http://127.0.0.1:5000"
+COUNTRY_ID = "uk"
+BASELINE_POLICY_ID = 1  # UK current law
+TIME_PERIOD = 2025
+DATASET = "default"
+
+# Raise the UK income tax base rate by 6 percentage points (20% -> 26%)
+SAMPLE_REFORM = {
+    "gov.hmrc.income_tax.rates.uk[0].rate": {"2025-01-01.2100-12-31": 0.26}
+}
+
+
+def print_header(title: str):
+    """Print a formatted header."""
+    print("\n" + "=" * 70)
+    print(f"  {title}")
+    print("=" * 70)
+
+
+def print_step(step_num: int, description: str):
+    """Print a step description."""
+    print(f"\n[Step {step_num}] {description}")
+    print("-" * 50)
+
+
+def wait_for_confirmation(message: str = "Press Enter to continue..."):
+    """Wait for user confirmation before proceeding."""
+    input(f"\n>>> {message}")
+
+
+def check_api_health():
+    """Check if the API is running and healthy."""
+    print_step(0, "Checking API Health")
+
+    try:
+        response = requests.get(f"{API_BASE_URL}/liveness-check", timeout=5)
+        if response.status_code == 200:
+            print(f"  [OK] API is running at {API_BASE_URL}")
+            return True
+        else:
+            print(f"  [ERROR] API returned status {response.status_code}")
+            return False
+    except requests.exceptions.ConnectionError:
+        print(f"  [ERROR] Cannot connect to API at {API_BASE_URL}")
+        print("  Make sure the API server is running. You need 3 terminals:")
+        print("")
+        print("  Terminal 1 - Start Redis:")
+        print("    redis-server")
+        print("")
+        print("  Terminal 2 - Start the API worker:")
+        print("    FLASK_DEBUG=1 python policyengine_api/worker.py")
+        print("")
+        print("  Terminal 3 - Start the API server:")
+        print("    make debug")
+        return False
+
+
+def create_reform_policy():
+    """Create a reform policy and return its ID."""
+    print_step(1, "Creating Reform Policy")
+
+    print(f"  Reform to be created:")
+    print(f"    {json.dumps(SAMPLE_REFORM, indent=4)}")
+
+    wait_for_confirmation("Press Enter to create the reform policy...")
+
+    payload = {
+        "label": "Test LA Reform - UC Standard Allowance Increase",
+        "data": SAMPLE_REFORM,
+    }
+
+    response = requests.post(
+        f"{API_BASE_URL}/{COUNTRY_ID}/policy",
+        json=payload,
+        headers={"Content-Type": "application/json"},
+    )
+
+    print(f"  Response status: {response.status_code}")
+    result = response.json()
+    print(f"  Response body: {json.dumps(result, indent=4)}")
+
+    if response.status_code in [200, 201]:
+        policy_id = result["result"]["policy_id"]
+        print(f"  [OK] Reform policy created/found with ID: {policy_id}")
+        return policy_id
+    else:
+        print(f"  [ERROR] Failed to create policy")
+        return None
+
+
+def verify_baseline_policy_exists():
+    """Verify the baseline (current law) policy exists."""
+    print_step(2, "Verifying Baseline Policy Exists")
+
+    print(f"  Checking policy ID: {BASELINE_POLICY_ID}")
+
+    response = requests.get(
+        f"{API_BASE_URL}/{COUNTRY_ID}/policy/{BASELINE_POLICY_ID}"
+    )
+
+    print(f"  Response status: {response.status_code}")
+
+    if response.status_code == 200:
+        result = response.json()
+        policy_data = result.get("result", {})
+        print(f"  Policy label: {policy_data.get('label', 'N/A')}")
+        print(f"  [OK] Baseline policy exists")
+        return True
+    else:
+        print(f"  [ERROR] Baseline policy not found")
+        print(
+            "  You may need to initialize the database with the current law policy"
+        )
+        return False
+
+
+def poll_economy_endpoint(
+    region: str, reform_policy_id: int, description: str
+):
+    """
+    Poll the economy endpoint until the calculation is complete.
+
+    Returns the result data or None if failed.
+    """
+    print(f"\n  Polling for: {description}")
+    print(f"  Region: {region}")
+    print(f"  Reform Policy ID: {reform_policy_id}")
+    print(f"  Baseline Policy ID: {BASELINE_POLICY_ID}")
+    print(f"  Time Period: {TIME_PERIOD}")
+
+    url = f"{API_BASE_URL}/{COUNTRY_ID}/economy/{reform_policy_id}/over/{BASELINE_POLICY_ID}"
+    params = {
+        "region": region,
+        "dataset": DATASET,
+        "time_period": TIME_PERIOD,
+        "target": "general",
+    }
+
+    print(f"\n  Full URL: {url}")
+    print(f"  Query params: {params}")
+
+    wait_for_confirmation("Press Enter to start polling the API...")
+
+    max_attempts = 60  # 5 minutes with 5-second intervals
+    attempt = 0
+
+    while attempt < max_attempts:
+        attempt += 1
+        print(f"\n  Attempt {attempt}/{max_attempts}...")
+
+        try:
+            response = requests.get(url, params=params, timeout=30)
+            result = response.json()
+
+            status = result.get("status")
+            print(f"    Status: {status}")
+
+            if status == "ok":
+                print(f"    [OK] Calculation complete!")
+                return result.get("result")
+            elif status == "computing":
+                print(f"    Calculation in progress... waiting 5 seconds")
+                time.sleep(5)
+            elif status == "error":
+                print(f"    [ERROR] Calculation failed")
+                print(f"    Message: {result.get('message')}")
+                return None
+            else:
+                print(f"    Unknown status: {status}")
+                time.sleep(5)
+
+        except requests.exceptions.Timeout:
+            print(f"    Request timed out, retrying...")
+            time.sleep(5)
+        except Exception as e:
+            print(f"    Error: {e}")
+            time.sleep(5)
+
+    print(f"  [ERROR] Timed out waiting for calculation")
+    return None
+
+
+def display_results(result: dict, description: str):
+    """Display key results from the economy calculation."""
+    print(f"\n  Results for: {description}")
+    print("  " + "-" * 40)
+
+    if result is None:
+        print("    No results available")
+        return
+
+    # Budgetary impact
+    budget = result.get("budget")
+    if budget:
+        print(f"\n  BUDGETARY IMPACT:")
+        for key, value in budget.items():
+            if isinstance(value, (int, float)):
+                print(f"    {key}: {value:,.2f}")
+            else:
+                print(f"    {key}: {value}")
+
+    # Decile impact summary
+    decile = result.get("decile")
+    if decile:
+        print(f"\n  DECILE IMPACT (sample):")
+        relative = decile.get("relative", {})
+        if relative:
+            for d in ["1", "5", "10"]:
+                if d in relative:
+                    print(f"    Decile {d}: {relative[d]*100:.2f}%")
+
+    # Poverty impact
+    poverty = result.get("poverty")
+    if poverty:
+        print(f"\n  POVERTY IMPACT:")
+        deep_poverty = poverty.get("deep_poverty", {})
+        regular_poverty = poverty.get("poverty", {})
+        if deep_poverty:
+            print(
+                f"    Deep poverty change: {deep_poverty.get('change', 'N/A')}"
+            )
+        if regular_poverty:
+            print(
+                f"    Poverty change: {regular_poverty.get('change', 'N/A')}"
+            )
+
+    # Local Authority Impact (if present)
+    la_impact = result.get("local_authority_impact")
+    if la_impact:
+        print(f"\n  LOCAL AUTHORITY IMPACT:")
+        by_la = la_impact.get("by_local_authority", {})
+        print(f"    Number of local authorities: {len(by_la)}")
+
+        # Show first 5 local authorities
+        print(f"    Sample local authorities:")
+        for i, (name, data) in enumerate(list(by_la.items())[:5]):
+            avg_change = data.get("average_household_income_change", 0)
+            rel_change = data.get("relative_household_income_change", 0)
+            print(
+                f"      {name}: avg={avg_change:.2f}, rel={rel_change*100:.3f}%"
+            )
+
+        # Outcomes by region
+        outcomes = la_impact.get("outcomes_by_region", {})
+        if outcomes:
+            print(f"\n    Outcomes by UK region:")
+            for region, buckets in outcomes.items():
+                total = sum(buckets.values())
+                print(f"      {region}: {total} LAs")
+                for bucket, count in buckets.items():
+                    if count > 0:
+                        print(f"        - {bucket}: {count}")
+    else:
+        print(f"\n  LOCAL AUTHORITY IMPACT: Not present in response")
+
+    # Constituency Impact (if present)
+    const_impact = result.get("constituency_impact")
+    if const_impact:
+        by_const = const_impact.get("by_constituency", {})
+        print(f"\n  CONSTITUENCY IMPACT:")
+        print(f"    Number of constituencies: {len(by_const)}")
+
+
+def test_local_authority_simulation(reform_policy_id: int):
+    """Test 1: Run simulation for a specific local authority."""
+    print_header("TEST 1: Local Authority Simulation (Leicester)")
+
+    print(
+        """
+    This test runs an economy simulation for a specific UK local authority.
+    We're using Leicester as it's a well-known unitary authority.
+
+    Expected: The API should accept the local_authority/Leicester region
+    and return economic impact results.
+    """
+    )
+
+    wait_for_confirmation(
+        "Press Enter to run the local authority simulation..."
+    )
+
+    region = "local_authority/Leicester"
+    result = poll_economy_endpoint(
+        region, reform_policy_id, "Leicester Local Authority"
+    )
+
+    if result:
+        display_results(result, "Leicester Local Authority")
+        print(
+            "\n  [TEST 1 PASSED] Local authority simulation completed successfully"
+        )
+        return True
+    else:
+        print("\n  [TEST 1 FAILED] Local authority simulation failed")
+        return False
+
+
+def test_uk_wide_simulation(reform_policy_id: int):
+    """Test 2: Run UK-wide simulation and check for local_authority_impact."""
+    print_header("TEST 2: UK-Wide Simulation (Check local_authority_impact)")
+
+    print(
+        """
+    This test runs an economy simulation for the entire UK.
+
+    Expected: The API should return results that include:
+    - Standard budgetary/poverty/decile impacts
+    - constituency_impact (existing feature)
+    - local_authority_impact (NEW feature we just added)
+
+    We'll verify that local_authority_impact is present and contains
+    data for all 360 UK local authorities.
+    """
+    )
+
+    wait_for_confirmation("Press Enter to run the UK-wide simulation...")
+
+    region = "uk"
+    result = poll_economy_endpoint(region, reform_policy_id, "UK-wide")
+
+    if result:
+        display_results(result, "UK-wide")
+
+        # Verify local_authority_impact is present
+        la_impact = result.get("local_authority_impact")
+        if la_impact:
+            by_la = la_impact.get("by_local_authority", {})
+            if len(by_la) == 360:
+                print(
+                    f"\n  [OK] local_authority_impact contains all 360 local authorities"
+                )
+            else:
+                print(
+                    f"\n  [WARNING] Expected 360 local authorities, got {len(by_la)}"
+                )
+
+            # Check outcomes_by_region has all UK nations
+            outcomes = la_impact.get("outcomes_by_region", {})
+            expected_regions = [
+                "uk",
+                "england",
+                "scotland",
+                "wales",
+                "northern_ireland",
+            ]
+            for r in expected_regions:
+                if r in outcomes:
+                    print(f"  [OK] {r} region present in outcomes")
+                else:
+                    print(f"  [MISSING] {r} region not in outcomes")
+
+            print(
+                "\n  [TEST 2 PASSED] UK-wide simulation includes local_authority_impact"
+            )
+            return True
+        else:
+            print(
+                "\n  [TEST 2 FAILED] local_authority_impact not present in response"
+            )
+            return False
+    else:
+        print("\n  [TEST 2 FAILED] UK-wide simulation failed")
+        return False
+
+
+def test_wales_simulation(reform_policy_id: int):
+    """Test 3: Run Wales simulation and check local authorities are filtered."""
+    print_header("TEST 3: Wales Simulation (Filter Check)")
+
+    print(
+        """
+    This test runs an economy simulation for Wales only.
+
+    Expected: The API should return results where:
+    - The simulation is filtered to Wales
+    - If local_authority_impact is present, it should only contain
+      Welsh local authorities (codes starting with 'W')
+    - Wales has exactly 22 principal areas
+
+    Note: The local_authority_impact breakdown may only be calculated
+    for UK-wide simulations. This test will verify the behavior.
+    """
+    )
+
+    wait_for_confirmation("Press Enter to run the Wales simulation...")
+
+    region = "country/wales"
+    result = poll_economy_endpoint(region, reform_policy_id, "Wales")
+
+    if result:
+        display_results(result, "Wales")
+
+        la_impact = result.get("local_authority_impact")
+        if la_impact:
+            by_la = la_impact.get("by_local_authority", {})
+            print(f"\n  Local authorities in response: {len(by_la)}")
+
+            # If filtering is implemented, we'd expect 22 Welsh LAs
+            if len(by_la) == 22:
+                print(
+                    f"  [OK] Correctly filtered to 22 Welsh local authorities"
+                )
+            elif len(by_la) == 360:
+                print(
+                    f"  [INFO] All 360 LAs returned (filtering not applied at LA level)"
+                )
+            else:
+                print(f"  [INFO] Got {len(by_la)} local authorities")
+
+            print("\n  [TEST 3 PASSED] Wales simulation completed")
+            return True
+        else:
+            print(
+                f"\n  [INFO] local_authority_impact not present for country-level simulation"
+            )
+            print(
+                "  This may be expected behavior - LA breakdown may only be for UK-wide"
+            )
+            print(
+                "\n  [TEST 3 PASSED] Wales simulation completed (no LA breakdown)"
+            )
+            return True
+    else:
+        print("\n  [TEST 3 FAILED] Wales simulation failed")
+        return False
+
+
+def main():
+    """Main test runner."""
+    print_header("UK Local Authority API Test Script")
+
+    print(
+        """
+    This script tests the UK Local Authority feature in the PolicyEngine API.
+
+    It will:
+    1. Check API health
+    2. Create a test reform policy
+    3. Verify baseline policy exists
+    4. Run TEST 1: Local Authority simulation (Leicester)
+    5. Run TEST 2: UK-wide simulation (check local_authority_impact)
+    6. Run TEST 3: Wales simulation (filter check)
+
+    Prerequisites (you need 3 other terminals running):
+    - Terminal 1: redis-server
+    - Terminal 2: FLASK_DEBUG=1 python policyengine_api/worker.py
+    - Terminal 3: make debug
+    - HUGGING_FACE_TOKEN environment variable set (for UK data access)
+
+    You will be prompted before each major step.
+    """
+    )
+
+    wait_for_confirmation("Press Enter to begin testing...")
+
+    # Step 0: Check API health
+    if not check_api_health():
+        print("\n[ABORT] API is not available. Please start the server first.")
+        return
+
+    wait_for_confirmation("API is healthy. Press Enter to continue...")
+
+    # Step 1: Create reform policy
+    reform_policy_id = create_reform_policy()
+    if reform_policy_id is None:
+        print("\n[ABORT] Failed to create reform policy.")
+        return
+
+    # Step 2: Verify baseline policy
+    if not verify_baseline_policy_exists():
+        print("\n[WARNING] Baseline policy not found. Tests may fail.")
+        wait_for_confirmation("Press Enter to continue anyway...")
+
+    print_header("Setup Complete - Ready to Run Tests")
+    print(
+        f"""
+    Configuration:
+    - API Base URL: {API_BASE_URL}
+    - Country: {COUNTRY_ID}
+    - Reform Policy ID: {reform_policy_id}
+    - Baseline Policy ID: {BASELINE_POLICY_ID}
+    - Time Period: {TIME_PERIOD}
+    - Dataset: {DATASET}
+    """
+    )
+
+    wait_for_confirmation("Press Enter to start running tests...")
+
+    # Run tests
+    results = []
+
+    # Test 1: Local Authority simulation
+    results.append(
+        (
+            "Local Authority (Leicester)",
+            test_local_authority_simulation(reform_policy_id),
+        )
+    )
+    wait_for_confirmation(
+        "Test 1 complete. Press Enter to continue to Test 2..."
+    )
+
+    # Test 2: UK-wide simulation
+    results.append(
+        ("UK-Wide with LA Impact", test_uk_wide_simulation(reform_policy_id))
+    )
+    wait_for_confirmation(
+        "Test 2 complete. Press Enter to continue to Test 3..."
+    )
+
+    # Test 3: Wales simulation
+    results.append(("Wales Filter", test_wales_simulation(reform_policy_id)))
+
+    # Summary
+    print_header("Test Summary")
+    print("\n  Results:")
+    for test_name, passed in results:
+        status = "[PASSED]" if passed else "[FAILED]"
+        print(f"    {status} {test_name}")
+
+    all_passed = all(r[1] for r in results)
+    if all_passed:
+        print("\n  All tests passed!")
+    else:
+        print("\n  Some tests failed. Review output above for details.")
+
+    print("\n" + "=" * 70)
+    print("  Testing complete.")
+    print("=" * 70 + "\n")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/verify_country_filtering_bug.ipynb b/scripts/verify_country_filtering_bug.ipynb
new file mode 100644
index 000000000..73c71e701
--- /dev/null
+++ b/scripts/verify_country_filtering_bug.ipynb
@@ -0,0 +1,1147 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "cell-0",
+   "metadata": {},
+   "source": [
+    "# Verifying UK Country Filtering Bug in policyengine.py\n",
+    "\n",
+    "This notebook verifies the bug that occurs when filtering simulations by UK country (e.g., Wales).\n",
+    "\n",
+    "## The Bug\n",
+    "When running a simulation filtered to a specific UK country (e.g., `region=\"country/wales\"`), we get:\n",
+    "```\n",
+    "ValueError: Unable to set value \"[ True  True  True ... False False False]\" for variable \n",
+    "\"would_evade_tv_licence_fee\", as its length is 8470 while there are 4108 households in the simulation.\n",
+    "```\n",
+    "\n",
+    "## Root Cause Hypothesis\n",
+    "The country filtering code in `policyengine/simulation.py` uses DataFrame subsetting:\n",
+    "1. Exports simulation to DataFrame via `to_input_dataframe()`\n",
+    "2. Filters DataFrame rows by country\n",
+    "3. Creates new simulation from filtered DataFrame\n",
+    "\n",
+    "The issue is that entity linkage variables (like `household_id`) may not be properly \n",
+    "exported/imported, causing entity count mismatches during variable calculations."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-1",
+   "metadata": {},
+   "source": [
+    "## Step 1: Setup and Version Check"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "cell-2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import traceback\n",
+    "\n",
+    "# Import policyengine (the high-level package)\n",
+    "import policyengine\n",
+    "from policyengine import Simulation\n",
+    "\n",
+    "# Also import the underlying UK simulation for manual testing\n",
+    "from policyengine_uk import Microsimulation as UKMicrosimulation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-3",
+   "metadata": {},
+   "source": [
+    "## Step 2: Create UK-Wide Baseline Simulation\n",
+    "\n",
+    "First, create a standard UK-wide simulation to understand the data structure."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "cell-4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Creating UK-wide simulation...\n",
+      "(This may take a minute to download data)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "No data provided, using default dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n",
+      "Using dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n",
+      "Downloading enhanced_frs_2023_24.h5 from bucket policyengine-uk-data-private\n",
+      "WARNING:root:No version specified for policyengine-uk-data-private, enhanced_frs_2023_24.h5. Using latest version: 1.29.4\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "=== UK-Wide Simulation Structure ===\n",
+      "Person count: 115612\n",
+      "Household count: 53508\n",
+      "BenUnit count: 61858\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Create UK-wide simulation using policyengine.Simulation\n",
+    "print(\"Creating UK-wide simulation...\")\n",
+    "print(\"(This may take a minute to download data)\")\n",
+    "\n",
+    "sim_uk = Simulation(country=\"uk\", scope=\"macro\")\n",
+    "\n",
+    "# Get the underlying microsimulation\n",
+    "underlying_sim = sim_uk.baseline_simulation\n",
+    "\n",
+    "print(f\"\\n=== UK-Wide Simulation Structure ===\")\n",
+    "print(f\"Person count: {underlying_sim.persons.count}\")\n",
+    "print(f\"Household count: {underlying_sim.household.count}\")\n",
+    "print(f\"BenUnit count: {underlying_sim.benunit.count}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "cell-5",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Creating UK-wide simulation...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "No data provided, using default dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n",
+      "Using dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n",
+      "Downloading enhanced_frs_2023_24.h5 from bucket policyengine-uk-data-private\n",
+      "WARNING:root:No version specified for policyengine-uk-data-private, enhanced_frs_2023_24.h5. Using latest version: 1.29.4\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "=== UK-Wide Simulation Structure ===\n",
+      "Person count: 115612\n",
+      "Household count: 53508\n",
+      "BenUnit count: 61858\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Create a UK-wide simulation (no region filter)\n",
+    "print(\"Creating UK-wide simulation...\")\n",
+    "sim_uk = Simulation(country=\"uk\", scope=\"macro\")\n",
+    "\n",
+    "# Access the underlying country simulation\n",
+    "underlying_sim = sim_uk.baseline_simulation\n",
+    "\n",
+    "print(f\"\\n=== UK-Wide Simulation Structure ===\")\n",
+    "print(f\"Person count: {underlying_sim.persons.count}\")\n",
+    "print(f\"Household count: {underlying_sim.household.count}\")\n",
+    "print(f\"BenUnit count: {underlying_sim.benunit.count}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-6",
+   "metadata": {},
+   "source": [
+    "## Step 3: Examine to_input_dataframe() Export\n",
+    "\n",
+    "This is what `_apply_region_to_simulation` uses to get the data before filtering."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "cell-7",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Exporting simulation to DataFrame...\n",
+      "\n",
+      "=== Exported DataFrame ===\n",
+      "Shape: (115612, 1127)\n",
+      "Number of rows (should be person count): 115612\n",
+      "Number of columns: 1127\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Export the simulation to DataFrame\n",
+    "print(\"Exporting simulation to DataFrame...\")\n",
+    "df = underlying_sim.to_input_dataframe()\n",
+    "\n",
+    "print(f\"\\n=== Exported DataFrame ===\")\n",
+    "print(f\"Shape: {df.shape}\")\n",
+    "print(f\"Number of rows (should be person count): {len(df)}\")\n",
+    "print(f\"Number of columns: {len(df.columns)}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "cell-8",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "=== Critical Entity Columns ===\n",
+      "  person_id: FOUND (8 columns)\n",
+      "    - person_id__2023\n",
+      "    - person_id__2024\n",
+      "    - person_id__2025\n",
+      "    ... and 5 more\n",
+      "  household_id: FOUND (8 columns)\n",
+      "    - household_id__2023\n",
+      "    - household_id__2024\n",
+      "    - household_id__2025\n",
+      "    ... and 5 more\n",
+      "  person_household_id: FOUND (8 columns)\n",
+      "    - person_household_id__2023\n",
+      "    - person_household_id__2024\n",
+      "    - person_household_id__2025\n",
+      "    ... and 5 more\n",
+      "  benunit_id: FOUND (8 columns)\n",
+      "    - benunit_id__2023\n",
+      "    - benunit_id__2024\n",
+      "    - benunit_id__2025\n",
+      "    ... and 5 more\n",
+      "  person_benunit_id: FOUND (8 columns)\n",
+      "    - person_benunit_id__2023\n",
+      "    - person_benunit_id__2024\n",
+      "    - person_benunit_id__2025\n",
+      "    ... and 5 more\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Check for critical entity linkage columns\n",
+    "print(\"\\n=== Critical Entity Columns ===\")\n",
+    "\n",
+    "critical_patterns = [\n",
+    "    'person_id',\n",
+    "    'household_id', \n",
+    "    'person_household_id',\n",
+    "    'benunit_id',\n",
+    "    'person_benunit_id'\n",
+    "]\n",
+    "\n",
+    "for pattern in critical_patterns:\n",
+    "    matching = [c for c in df.columns if c.startswith(pattern)]\n",
+    "    if matching:\n",
+    "        print(f\"  {pattern}: FOUND ({len(matching)} columns)\")\n",
+    "        for col in matching[:3]:  # Show first 3\n",
+    "            print(f\"    - {col}\")\n",
+    "        if len(matching) > 3:\n",
+    "            print(f\"    ... and {len(matching) - 3} more\")\n",
+    "    else:\n",
+    "        print(f\"  {pattern}: MISSING!\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "cell-9",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "=== household_id Export Analysis ===\n",
+      "Column: household_id__2023\n",
+      "  Length: 115612\n",
+      "  Unique values: 53508\n",
+      "  Min: 1, Max: 67019\n",
+      "  Sample values: [2 1 2 2 2 2 3 6 6 3]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Check if household_id is exported and examine its values\n",
+    "hh_id_cols = [c for c in df.columns if c.startswith('household_id__')]\n",
+    "\n",
+    "print(\"\\n=== household_id Export Analysis ===\")\n",
+    "if hh_id_cols:\n",
+    "    col = hh_id_cols[0]\n",
+    "    print(f\"Column: {col}\")\n",
+    "    print(f\"  Length: {len(df[col])}\")\n",
+    "    print(f\"  Unique values: {df[col].nunique()}\")\n",
+    "    print(f\"  Min: {df[col].min()}, Max: {df[col].max()}\")\n",
+    "    print(f\"  Sample values: {df[col].values[:10]}\")\n",
+    "else:\n",
+    "    print(\"household_id NOT exported!\")\n",
+    "    print(\"This could be the root cause of the bug.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "cell-10",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "=== person_household_id Export Analysis ===\n",
+      "Column: person_household_id__2023\n",
+      "  Length: 115612\n",
+      "  Unique values (should match household count): 53508\n",
+      "  Expected household count: 53508\n",
+      "  [OK] Unique count matches household count\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Check person_household_id linkage\n",
+    "phh_id_cols = [c for c in df.columns if c.startswith('person_household_id__')]\n",
+    "\n",
+    "print(\"\\n=== person_household_id Export Analysis ===\")\n",
+    "if phh_id_cols:\n",
+    "    col = phh_id_cols[0]\n",
+    "    print(f\"Column: {col}\")\n",
+    "    print(f\"  Length: {len(df[col])}\")\n",
+    "    print(f\"  Unique values (should match household count): {df[col].nunique()}\")\n",
+    "    print(f\"  Expected household count: {underlying_sim.household.count}\")\n",
+    "    \n",
+    "    if df[col].nunique() == underlying_sim.household.count:\n",
+    "        print(\"  [OK] Unique count matches household count\")\n",
+    "    else:\n",
+    "        print(\"  [WARNING] Mismatch!\")\n",
+    "else:\n",
+    "    print(\"person_household_id NOT exported! This is critical.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-11",
+   "metadata": {},
+   "source": [
+    "## Step 4: Manually Reproduce the Wales Filtering\n",
+    "\n",
+    "Let's manually do what `_apply_region_to_simulation` does to identify where it breaks."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "cell-12",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "=== Step 4a: Calculate country at person level ===\n",
+      "Country array shape: (115612,)\n",
+      "\n",
+      "Welsh persons: 8,470\n",
+      "Non-Welsh persons: 107,142\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Step 4a: Get country at person level (same as policyengine.py:296-298)\n",
+    "print(\"=== Step 4a: Calculate country at person level ===\")\n",
+    "country_person = underlying_sim.calculate(\"country\", map_to=\"person\").values\n",
+    "print(f\"Country array shape: {country_person.shape}\")\n",
+    "\n",
+    "# Create Wales mask\n",
+    "wales_mask = country_person == \"WALES\"\n",
+    "print(f\"\\nWelsh persons: {wales_mask.sum():,}\")\n",
+    "print(f\"Non-Welsh persons: {(~wales_mask).sum():,}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "cell-13",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "=== Step 4b: Filter DataFrame to Wales ===\n",
+      "Filtered DataFrame shape: (8470, 1127)\n",
+      "Number of Welsh persons: 8470\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Step 4b: Filter DataFrame to Wales (same as policyengine.py:299-300)\n",
+    "print(\"\\n=== Step 4b: Filter DataFrame to Wales ===\")\n",
+    "df_wales = df[wales_mask]\n",
+    "print(f\"Filtered DataFrame shape: {df_wales.shape}\")\n",
+    "print(f\"Number of Welsh persons: {len(df_wales)}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "cell-14",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "=== Step 4c: Examine person_household_id in filtered data ===\n",
+      "Column: person_household_id__2023\n",
+      "  Length: 8470\n",
+      "  Unique households in Wales: 4108\n",
+      "  Min household ID: 2.0\n",
+      "  Max household ID: 66996.0\n",
+      "  Sample values: [2. 2. 2. 2. 2. 6. 6. 6. 6. 7.]\n",
+      "  [INFO] Household IDs are NOT contiguous (gaps from filtering)\n",
+      "         This is expected - they're original UK-wide IDs\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Check what person_household_id looks like in filtered data\n",
+    "print(\"\\n=== Step 4c: Examine person_household_id in filtered data ===\")\n",
+    "if phh_id_cols:\n",
+    "    col = phh_id_cols[0]\n",
+    "    welsh_phh = df_wales[col].values\n",
+    "    print(f\"Column: {col}\")\n",
+    "    print(f\"  Length: {len(welsh_phh)}\")\n",
+    "    print(f\"  Unique households in Wales: {len(np.unique(welsh_phh))}\")\n",
+    "    print(f\"  Min household ID: {welsh_phh.min()}\")\n",
+    "    print(f\"  Max household ID: {welsh_phh.max()}\")\n",
+    "    print(f\"  Sample values: {welsh_phh[:10]}\")\n",
+    "    \n",
+    "    # Check if IDs are contiguous\n",
+    "    unique_hh = np.unique(welsh_phh)\n",
+    "    if np.array_equal(unique_hh, np.arange(len(unique_hh))):\n",
+    "        print(\"  [INFO] Household IDs are contiguous 0-based\")\n",
+    "    else:\n",
+    "        print(\"  [INFO] Household IDs are NOT contiguous (gaps from filtering)\")\n",
+    "        print(f\"         This is expected - they're original UK-wide IDs\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-15",
+   "metadata": {},
+   "source": [
+    "## Step 5: Try to Create Simulation from Filtered DataFrame\n",
+    "\n",
+    "This is where the error should occur."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "cell-16",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "=== Step 5a: Create simulation from filtered DataFrame ===\n",
+      "(This is what policyengine.py:299-300 does)\n",
+      "\n",
+      "[ERROR] Failed to create simulation: Unable to set value \"[ 39361.   39361.   39361.  ... 134410.5 134410.5   6000. ]\" for variable \"corporate_wealth\", as its length is 8470 while there are 4108 households in the simulation.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Traceback (most recent call last):\n",
+      "  File \"/var/folders/qf/7fglpnr94wsc5xgbqkp80xbw0000gn/T/ipykernel_19066/2037714397.py\", line 7, in <module>\n",
+      "    new_sim = UKMicrosimulation(dataset=df_wales)\n",
+      "  File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_uk/simulation.py\", line 100, in __init__\n",
+      "    self.build_from_dataframe(dataset)\n",
+      "    ~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^\n",
+      "  File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_uk/simulation.py\", line 286, in build_from_dataframe\n",
+      "    self.set_input(variable, time_period, df[column])\n",
+      "    ~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_core/simulations/simulation.py\", line 1241, in set_input\n",
+      "    self.get_holder(variable_name).set_input(\n",
+      "    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^\n",
+      "        period, value, self.branch_name\n",
+      "        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "    )\n",
+      "    ^\n",
+      "  File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_core/holders/holder.py\", line 244, in set_input\n",
+      "    return self._set(period, array, branch_name)\n",
+      "           ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_core/holders/holder.py\", line 286, in _set\n",
+      "    value = self._to_array(value)\n",
+      "  File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_core/holders/holder.py\", line 253, in _to_array\n",
+      "    raise ValueError(\n",
+      "    ...<7 lines>...\n",
+      "    )\n",
+      "ValueError: Unable to set value \"[ 39361.   39361.   39361.  ... 134410.5 134410.5   6000. ]\" for variable \"corporate_wealth\", as its length is 8470 while there are 4108 households in the simulation.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Step 5a: Create new simulation from filtered DataFrame\n",
+    "print(\"=== Step 5a: Create simulation from filtered DataFrame ===\")\n",
+    "print(\"(This is what policyengine.py:299-300 does)\")\n",
+    "print()\n",
+    "\n",
+    "try:\n",
+    "    new_sim = UKMicrosimulation(dataset=df_wales)\n",
+    "    \n",
+    "    print(f\"New simulation created successfully!\")\n",
+    "    print(f\"  Person count: {new_sim.persons.count}\")\n",
+    "    print(f\"  Household count: {new_sim.household.count}\")\n",
+    "    print(f\"  BenUnit count: {new_sim.benunit.count}\")\n",
+    "    \n",
+    "    # Critical check\n",
+    "    if new_sim.household.count == new_sim.persons.count:\n",
+    "        print(\"\\n  [ERROR] Household count equals person count!\")\n",
+    "        print(\"  Entity linkage was lost during filtering.\")\n",
+    "        \n",
+    "except Exception as e:\n",
+    "    print(f\"[ERROR] Failed to create simulation: {e}\")\n",
+    "    traceback.print_exc()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "cell-17",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "=== Step 5b: Check household_id holder ===\n",
+      "Error checking household_id: name 'new_sim' is not defined\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Step 5b: Check if household_id holder has data\n",
+    "print(\"\\n=== Step 5b: Check household_id holder ===\")\n",
+    "\n",
+    "try:\n",
+    "    hh_id_holder = new_sim.get_holder(\"household_id\")\n",
+    "    known_periods = list(hh_id_holder.get_known_periods())\n",
+    "    print(f\"household_id known periods: {known_periods}\")\n",
+    "    \n",
+    "    if known_periods:\n",
+    "        period = known_periods[0]\n",
+    "        arr = hh_id_holder.get_array(period)\n",
+    "        print(f\"  Period {period}: array shape = {arr.shape if arr is not None else 'None'}\")\n",
+    "        if arr is not None:\n",
+    "            print(f\"  Values sample: {arr[:10]}\")\n",
+    "    else:\n",
+    "        print(\"  No known periods - household_id was not set as input!\")\n",
+    "except Exception as e:\n",
+    "    print(f\"Error checking household_id: {e}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "cell-18",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "=== Step 5c: Calculate household_id ===\n",
+      "Error calculating household_id: name 'new_sim' is not defined\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Traceback (most recent call last):\n",
+      "  File \"/var/folders/qf/7fglpnr94wsc5xgbqkp80xbw0000gn/T/ipykernel_19066/1284064109.py\", line 5, in <module>\n",
+      "    hh_ids = new_sim.calculate(\"household_id\", 2025)\n",
+      "             ^^^^^^^\n",
+      "NameError: name 'new_sim' is not defined\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Step 5c: Try to calculate household_id\n",
+    "print(\"\\n=== Step 5c: Calculate household_id ===\")\n",
+    "\n",
+    "try:\n",
+    "    hh_ids = new_sim.calculate(\"household_id\", 2025)\n",
+    "    print(f\"household_id calculation result:\")\n",
+    "    print(f\"  Length: {len(hh_ids)}\")\n",
+    "    print(f\"  Expected (household count): {new_sim.household.count}\")\n",
+    "    \n",
+    "    if len(hh_ids) == new_sim.household.count:\n",
+    "        print(\"  [OK] Length matches household count\")\n",
+    "    else:\n",
+    "        print(f\"  [ERROR] Length mismatch! Got {len(hh_ids)}, expected {new_sim.household.count}\")\n",
+    "        \n",
+    "except Exception as e:\n",
+    "    print(f\"Error calculating household_id: {e}\")\n",
+    "    traceback.print_exc()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-19",
+   "metadata": {},
+   "source": [
+    "## Step 6: Try to Calculate would_evade_tv_licence_fee\n",
+    "\n",
+    "This is the variable that triggers the error in production."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "cell-20",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "=== Step 6: Calculate would_evade_tv_licence_fee ===\n",
+      "(This calculation uses random(household) internally)\n",
+      "\n",
+      "Unexpected error: NameError: name 'new_sim' is not defined\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Traceback (most recent call last):\n",
+      "  File \"/var/folders/qf/7fglpnr94wsc5xgbqkp80xbw0000gn/T/ipykernel_19066/1304269510.py\", line 7, in <module>\n",
+      "    result = new_sim.calculate(\"would_evade_tv_licence_fee\", 2025)\n",
+      "             ^^^^^^^\n",
+      "NameError: name 'new_sim' is not defined\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Step 6: Calculate the problematic variable\n",
+    "print(\"=== Step 6: Calculate would_evade_tv_licence_fee ===\")\n",
+    "print(\"(This calculation uses random(household) internally)\")\n",
+    "print()\n",
+    "\n",
+    "try:\n",
+    "    result = new_sim.calculate(\"would_evade_tv_licence_fee\", 2025)\n",
+    "    print(f\"Calculation succeeded!\")\n",
+    "    print(f\"  Result length: {len(result)}\")\n",
+    "    print(f\"  Expected (household count): {new_sim.household.count}\")\n",
+    "    print(f\"  Result dtype: {result.dtype}\")\n",
+    "    \n",
+    "except ValueError as e:\n",
+    "    print(f\"[EXPECTED ERROR] ValueError:\")\n",
+    "    print(f\"  {e}\")\n",
+    "    print()\n",
+    "    print(\"This confirms the bug!\")\n",
+    "    \n",
+    "    # Parse the error message\n",
+    "    error_str = str(e)\n",
+    "    if \"length is\" in error_str and \"while there are\" in error_str:\n",
+    "        print(\"\\nThe error indicates:\")\n",
+    "        print(\"  - The formula returned an array sized for persons\")\n",
+    "        print(\"  - But the variable is household-level\")\n",
+    "        print(\"  - This means random(household) returned wrong-sized array\")\n",
+    "        \n",
+    "except Exception as e:\n",
+    "    print(f\"Unexpected error: {type(e).__name__}: {e}\")\n",
+    "    traceback.print_exc()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-21",
+   "metadata": {},
+   "source": [
+    "## Step 7: Test Using policyengine.Simulation Directly\n",
+    "\n",
+    "Now let's test using the high-level API to confirm the bug occurs there too."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "cell-22",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "=== Step 7: Test with policyengine.Simulation ===\n",
+      "Creating Simulation with region='country/wales'...\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "No data provided, using default dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n",
+      "Using dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n",
+      "Downloading enhanced_frs_2023_24.h5 from bucket policyengine-uk-data-private\n",
+      "WARNING:root:No version specified for policyengine-uk-data-private, enhanced_frs_2023_24.h5. Using latest version: 1.29.4\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "DataFrame columns: ['miscellaneous_income__2023', 'miscellaneous_income__2024', 'miscellaneous_income__2025', 'miscellaneous_income__2026', 'miscellaneous_income__2027', 'miscellaneous_income__2028', 'miscellaneous_income__2029', 'miscellaneous_income__2030', 'corporate_wealth__2023', 'corporate_wealth__2024', 'corporate_wealth__2025', 'corporate_wealth__2026', 'corporate_wealth__2027', 'corporate_wealth__2028', 'corporate_wealth__2029', 'corporate_wealth__2030', 'non_residential_property_value__2023', 'non_residential_property_value__2024', 'non_residential_property_value__2025', 'non_residential_property_value__2026', 'non_residential_property_value__2027', 'non_residential_property_value__2028', 'non_residential_property_value__2029', 'non_residential_property_value__2030', 'employment_income_before_lsr__2023', 'employment_income_before_lsr__2024', 'employment_income_before_lsr__2025', 'employment_income_before_lsr__2026', 'employment_income_before_lsr__2027', 'employment_income_before_lsr__2028', 'employment_income_before_lsr__2029', 'employment_income_before_lsr__2030', 'property_income__2023', 'property_income__2024', 'property_income__2025', 'property_income__2026', 'property_income__2027', 'property_income__2028', 'property_income__2029', 'property_income__2030', 'savings_interest_income__2023', 'savings_interest_income__2024', 'savings_interest_income__2025', 'savings_interest_income__2026', 'savings_interest_income__2027', 'savings_interest_income__2028', 'savings_interest_income__2029', 'savings_interest_income__2030', 'main_residence_value__2023', 'main_residence_value__2024', 'main_residence_value__2025', 'main_residence_value__2026', 'main_residence_value__2027', 'main_residence_value__2028', 'main_residence_value__2029', 'main_residence_value__2030', 'rent__2023', 'rent__2024', 'rent__2025', 'rent__2026', 'rent__2027', 'rent__2028', 'rent__2029', 'rent__2030', 'private_pension_income__2023', 'private_pension_income__2024', 'private_pension_income__2025', 'private_pension_income__2026', 'private_pension_income__2027', 'private_pension_income__2028', 'private_pension_income__2029', 'private_pension_income__2030', 'self_employment_income__2023', 'self_employment_income__2024', 'self_employment_income__2025', 'self_employment_income__2026', 'self_employment_income__2027', 'self_employment_income__2028', 'self_employment_income__2029', 'self_employment_income__2030', 'private_transfer_income__2023', 'private_transfer_income__2024', 'private_transfer_income__2025', 'private_transfer_income__2026', 'private_transfer_income__2027', 'private_transfer_income__2028', 'private_transfer_income__2029', 'private_transfer_income__2030', 'age__2023', 'age__2024', 'age__2025', 'age__2026', 'age__2027', 'age__2028', 'age__2029', 'age__2030', 'owned_land__2023', 'owned_land__2024', 'owned_land__2025', 'owned_land__2026', 'owned_land__2027', 'owned_land__2028', 'owned_land__2029', 'owned_land__2030', 'lump_sum_income__2023', 'lump_sum_income__2024', 'lump_sum_income__2025', 'lump_sum_income__2026', 'lump_sum_income__2027', 'lump_sum_income__2028', 'lump_sum_income__2029', 'lump_sum_income__2030', 'council_tax_band__2023', 'council_tax_band__2024', 'council_tax_band__2025', 'council_tax_band__2026', 'council_tax_band__2027', 'council_tax_band__2028', 'council_tax_band__2029', 'council_tax_band__2030', 'other_residential_property_value__2023', 'other_residential_property_value__2024', 'other_residential_property_value__2025', 'other_residential_property_value__2026', 'other_residential_property_value__2027', 'other_residential_property_value__2028', 'other_residential_property_value__2029', 'other_residential_property_value__2030', 'dividend_income__2023', 'dividend_income__2024', 'dividend_income__2025', 'dividend_income__2026', 'dividend_income__2027', 'dividend_income__2028', 'dividend_income__2029', 'dividend_income__2030', 'maintenance_income__2023', 'maintenance_income__2024', 'maintenance_income__2025', 'maintenance_income__2026', 'maintenance_income__2027', 'maintenance_income__2028', 'maintenance_income__2029', 'maintenance_income__2030', 'petrol_spending__2023', 'petrol_spending__2024', 'petrol_spending__2025', 'petrol_spending__2026', 'petrol_spending__2027', 'petrol_spending__2028', 'petrol_spending__2029', 'petrol_spending__2030', 'health_consumption__2023', 'health_consumption__2024', 'health_consumption__2025', 'health_consumption__2026', 'health_consumption__2027', 'health_consumption__2028', 'health_consumption__2029', 'health_consumption__2030', 'household_furnishings_consumption__2023', 'household_furnishings_consumption__2024', 'household_furnishings_consumption__2025', 'household_furnishings_consumption__2026', 'household_furnishings_consumption__2027', 'household_furnishings_consumption__2028', 'household_furnishings_consumption__2029', 'household_furnishings_consumption__2030', 'restaurants_and_hotels_consumption__2023', 'restaurants_and_hotels_consumption__2024', 'restaurants_and_hotels_consumption__2025', 'restaurants_and_hotels_consumption__2026', 'restaurants_and_hotels_consumption__2027', 'restaurants_and_hotels_consumption__2028', 'restaurants_and_hotels_consumption__2029', 'restaurants_and_hotels_consumption__2030', 'miscellaneous_consumption__2023', 'miscellaneous_consumption__2024', 'miscellaneous_consumption__2025', 'miscellaneous_consumption__2026', 'miscellaneous_consumption__2027', 'miscellaneous_consumption__2028', 'miscellaneous_consumption__2029', 'miscellaneous_consumption__2030', 'recreation_consumption__2023', 'recreation_consumption__2024', 'recreation_consumption__2025', 'recreation_consumption__2026', 'recreation_consumption__2027', 'recreation_consumption__2028', 'recreation_consumption__2029', 'recreation_consumption__2030', 'domestic_energy_consumption__2023', 'domestic_energy_consumption__2024', 'domestic_energy_consumption__2025', 'domestic_energy_consumption__2026', 'domestic_energy_consumption__2027', 'domestic_energy_consumption__2028', 'domestic_energy_consumption__2029', 'domestic_energy_consumption__2030', 'alcohol_and_tobacco_consumption__2023', 'alcohol_and_tobacco_consumption__2024', 'alcohol_and_tobacco_consumption__2025', 'alcohol_and_tobacco_consumption__2026', 'alcohol_and_tobacco_consumption__2027', 'alcohol_and_tobacco_consumption__2028', 'alcohol_and_tobacco_consumption__2029', 'alcohol_and_tobacco_consumption__2030', 'clothing_and_footwear_consumption__2023', 'clothing_and_footwear_consumption__2024', 'clothing_and_footwear_consumption__2025', 'clothing_and_footwear_consumption__2026', 'clothing_and_footwear_consumption__2027', 'clothing_and_footwear_consumption__2028', 'clothing_and_footwear_consumption__2029', 'clothing_and_footwear_consumption__2030', 'education_consumption__2023', 'education_consumption__2024', 'education_consumption__2025', 'education_consumption__2026', 'education_consumption__2027', 'education_consumption__2028', 'education_consumption__2029', 'education_consumption__2030', 'communication_consumption__2023', 'communication_consumption__2024', 'communication_consumption__2025', 'communication_consumption__2026', 'communication_consumption__2027', 'communication_consumption__2028', 'communication_consumption__2029', 'communication_consumption__2030', 'housing_water_and_electricity_consumption__2023', 'housing_water_and_electricity_consumption__2024', 'housing_water_and_electricity_consumption__2025', 'housing_water_and_electricity_consumption__2026', 'housing_water_and_electricity_consumption__2027', 'housing_water_and_electricity_consumption__2028', 'housing_water_and_electricity_consumption__2029', 'housing_water_and_electricity_consumption__2030', 'diesel_spending__2023', 'diesel_spending__2024', 'diesel_spending__2025', 'diesel_spending__2026', 'diesel_spending__2027', 'diesel_spending__2028', 'diesel_spending__2029', 'diesel_spending__2030', 'food_and_non_alcoholic_beverages_consumption__2023', 'food_and_non_alcoholic_beverages_consumption__2024', 'food_and_non_alcoholic_beverages_consumption__2025', 'food_and_non_alcoholic_beverages_consumption__2026', 'food_and_non_alcoholic_beverages_consumption__2027', 'food_and_non_alcoholic_beverages_consumption__2028', 'food_and_non_alcoholic_beverages_consumption__2029', 'food_and_non_alcoholic_beverages_consumption__2030', 'transport_consumption__2023', 'transport_consumption__2024', 'transport_consumption__2025', 'transport_consumption__2026', 'transport_consumption__2027', 'transport_consumption__2028', 'transport_consumption__2029', 'transport_consumption__2030', 'childcare_expenses__2023', 'childcare_expenses__2024', 'childcare_expenses__2025', 'childcare_expenses__2026', 'childcare_expenses__2027', 'childcare_expenses__2028', 'childcare_expenses__2029', 'childcare_expenses__2030', 'water_and_sewerage_charges__2023', 'water_and_sewerage_charges__2024', 'water_and_sewerage_charges__2025', 'water_and_sewerage_charges__2026', 'water_and_sewerage_charges__2027', 'water_and_sewerage_charges__2028', 'water_and_sewerage_charges__2029', 'water_and_sewerage_charges__2030', 'maintenance_expenses__2023', 'maintenance_expenses__2024', 'maintenance_expenses__2025', 'maintenance_expenses__2026', 'maintenance_expenses__2027', 'maintenance_expenses__2028', 'maintenance_expenses__2029', 'maintenance_expenses__2030', 'employee_pension_contributions_reported__2023', 'employee_pension_contributions_reported__2024', 'employee_pension_contributions_reported__2025', 'employee_pension_contributions_reported__2026', 'employee_pension_contributions_reported__2027', 'employee_pension_contributions_reported__2028', 'employee_pension_contributions_reported__2029', 'employee_pension_contributions_reported__2030', 'mortgage_capital_repayment__2023', 'mortgage_capital_repayment__2024', 'mortgage_capital_repayment__2025', 'mortgage_capital_repayment__2026', 'mortgage_capital_repayment__2027', 'mortgage_capital_repayment__2028', 'mortgage_capital_repayment__2029', 'mortgage_capital_repayment__2030', 'pension_contributions_via_salary_sacrifice__2023', 'pension_contributions_via_salary_sacrifice__2024', 'pension_contributions_via_salary_sacrifice__2025', 'pension_contributions_via_salary_sacrifice__2026', 'pension_contributions_via_salary_sacrifice__2027', 'pension_contributions_via_salary_sacrifice__2028', 'pension_contributions_via_salary_sacrifice__2029', 'pension_contributions_via_salary_sacrifice__2030', 'council_tax__2023', 'council_tax__2024', 'council_tax__2025', 'council_tax__2026', 'council_tax__2027', 'council_tax__2028', 'council_tax__2029', 'council_tax__2030', 'mortgage_interest_repayment__2023', 'mortgage_interest_repayment__2024', 'mortgage_interest_repayment__2025', 'mortgage_interest_repayment__2026', 'mortgage_interest_repayment__2027', 'mortgage_interest_repayment__2028', 'mortgage_interest_repayment__2029', 'mortgage_interest_repayment__2030', 'housing_service_charges__2023', 'housing_service_charges__2024', 'housing_service_charges__2025', 'housing_service_charges__2026', 'housing_service_charges__2027', 'housing_service_charges__2028', 'housing_service_charges__2029', 'housing_service_charges__2030', 'employer_pension_contributions__2023', 'employer_pension_contributions__2024', 'employer_pension_contributions__2025', 'employer_pension_contributions__2026', 'employer_pension_contributions__2027', 'employer_pension_contributions__2028', 'employer_pension_contributions__2029', 'employer_pension_contributions__2030', 'personal_pension_contributions__2023', 'personal_pension_contributions__2024', 'personal_pension_contributions__2025', 'personal_pension_contributions__2026', 'personal_pension_contributions__2027', 'personal_pension_contributions__2028', 'personal_pension_contributions__2029', 'personal_pension_contributions__2030', 'attends_private_school__2023', 'attends_private_school__2024', 'attends_private_school__2025', 'attends_private_school__2026', 'attends_private_school__2027', 'attends_private_school__2028', 'attends_private_school__2029', 'attends_private_school__2030', 'region__2023', 'region__2024', 'region__2025', 'region__2026', 'region__2027', 'region__2028', 'region__2029', 'region__2030', 'brma__2023', 'brma__2024', 'brma__2025', 'brma__2026', 'brma__2027', 'brma__2028', 'brma__2029', 'brma__2030', 'net_financial_wealth__2023', 'net_financial_wealth__2024', 'net_financial_wealth__2025', 'net_financial_wealth__2026', 'net_financial_wealth__2027', 'net_financial_wealth__2028', 'net_financial_wealth__2029', 'net_financial_wealth__2030', 'property_wealth__2023', 'property_wealth__2024', 'property_wealth__2025', 'property_wealth__2026', 'property_wealth__2027', 'property_wealth__2028', 'property_wealth__2029', 'property_wealth__2030', 'savings__2023', 'savings__2024', 'savings__2025', 'savings__2026', 'savings__2027', 'savings__2028', 'savings__2029', 'savings__2030', 'num_vehicles__2023', 'num_vehicles__2024', 'num_vehicles__2025', 'num_vehicles__2026', 'num_vehicles__2027', 'num_vehicles__2028', 'num_vehicles__2029', 'num_vehicles__2030', 'gross_financial_wealth__2023', 'gross_financial_wealth__2024', 'gross_financial_wealth__2025', 'gross_financial_wealth__2026', 'gross_financial_wealth__2027', 'gross_financial_wealth__2028', 'gross_financial_wealth__2029', 'gross_financial_wealth__2030', 'relation_type__2025', 'is_enhanced_disabled_for_benefits__2023', 'is_enhanced_disabled_for_benefits__2024', 'is_enhanced_disabled_for_benefits__2025', 'is_enhanced_disabled_for_benefits__2026', 'is_enhanced_disabled_for_benefits__2027', 'is_enhanced_disabled_for_benefits__2028', 'is_enhanced_disabled_for_benefits__2029', 'is_enhanced_disabled_for_benefits__2030', 'is_higher_earner__2023', 'is_higher_earner__2024', 'is_higher_earner__2025', 'is_higher_earner__2026', 'is_higher_earner__2027', 'is_higher_earner__2028', 'is_higher_earner__2029', 'is_higher_earner__2030', 'gender__2023', 'gender__2024', 'gender__2025', 'gender__2026', 'gender__2027', 'gender__2028', 'gender__2029', 'gender__2030', 'person_id__2023', 'person_id__2024', 'person_id__2025', 'person_id__2026', 'person_id__2027', 'person_id__2028', 'person_id__2029', 'person_id__2030', 'household_weight__2023', 'household_weight__2024', 'household_weight__2025', 'household_weight__2026', 'household_weight__2027', 'household_weight__2028', 'household_weight__2029', 'household_weight__2030', 'is_benunit_head__2023', 'is_benunit_head__2024', 'is_benunit_head__2025', 'is_benunit_head__2026', 'is_benunit_head__2027', 'is_benunit_head__2028', 'is_benunit_head__2029', 'is_benunit_head__2030', 'person_weight__2025', 'person_weight__2026', 'person_weight__2027', 'person_weight__2028', 'person_weight__2029', 'person_weight__2023', 'person_weight__2024', 'person_weight__2030', 'person_benunit_id__2023', 'person_benunit_id__2024', 'person_benunit_id__2025', 'person_benunit_id__2026', 'person_benunit_id__2027', 'person_benunit_id__2028', 'person_benunit_id__2029', 'person_benunit_id__2030', 'person_household_id__2023', 'person_household_id__2024', 'person_household_id__2025', 'person_household_id__2026', 'person_household_id__2027', 'person_household_id__2028', 'person_household_id__2029', 'person_household_id__2030', 'tenure_type__2023', 'tenure_type__2024', 'tenure_type__2025', 'tenure_type__2026', 'tenure_type__2027', 'tenure_type__2028', 'tenure_type__2029', 'tenure_type__2030', 'marital_status__2023', 'marital_status__2024', 'marital_status__2025', 'marital_status__2026', 'marital_status__2027', 'marital_status__2028', 'marital_status__2029', 'marital_status__2030', 'is_household_head__2023', 'is_household_head__2024', 'is_household_head__2025', 'is_household_head__2026', 'is_household_head__2027', 'is_household_head__2028', 'is_household_head__2029', 'is_household_head__2030', 'current_education__2023', 'current_education__2024', 'current_education__2025', 'current_education__2026', 'current_education__2027', 'current_education__2028', 'current_education__2029', 'current_education__2030', 'household_owns_tv__2023', 'household_owns_tv__2024', 'household_owns_tv__2025', 'household_owns_tv__2026', 'household_owns_tv__2027', 'household_owns_tv__2028', 'household_owns_tv__2029', 'household_owns_tv__2030', 'is_severely_disabled_for_benefits__2023', 'is_severely_disabled_for_benefits__2024', 'is_severely_disabled_for_benefits__2025', 'is_severely_disabled_for_benefits__2026', 'is_severely_disabled_for_benefits__2027', 'is_severely_disabled_for_benefits__2028', 'is_severely_disabled_for_benefits__2029', 'is_severely_disabled_for_benefits__2030', 'accommodation_type__2023', 'accommodation_type__2024', 'accommodation_type__2025', 'accommodation_type__2026', 'accommodation_type__2027', 'accommodation_type__2028', 'accommodation_type__2029', 'accommodation_type__2030', 'is_married__2023', 'is_married__2024', 'is_married__2025', 'is_married__2026', 'is_married__2027', 'is_married__2028', 'is_married__2029', 'is_married__2030', 'benunit_id__2023', 'benunit_id__2024', 'benunit_id__2025', 'benunit_id__2026', 'benunit_id__2027', 'benunit_id__2028', 'benunit_id__2029', 'benunit_id__2030', 'is_disabled_for_benefits__2023', 'is_disabled_for_benefits__2024', 'is_disabled_for_benefits__2025', 'is_disabled_for_benefits__2026', 'is_disabled_for_benefits__2027', 'is_disabled_for_benefits__2028', 'is_disabled_for_benefits__2029', 'is_disabled_for_benefits__2030', 'eldest_adult_age__2025', 'is_adult__2025', 'benunit_weight__2025', 'benunit_weight__2026', 'benunit_weight__2027', 'benunit_weight__2028', 'benunit_weight__2029', 'household_id__2023', 'household_id__2024', 'household_id__2025', 'household_id__2026', 'household_id__2027', 'household_id__2028', 'household_id__2029', 'household_id__2030', 'structural_insurance_payments__2023', 'structural_insurance_payments__2024', 'structural_insurance_payments__2025', 'structural_insurance_payments__2026', 'structural_insurance_payments__2027', 'structural_insurance_payments__2028', 'structural_insurance_payments__2029', 'structural_insurance_payments__2030', 'main_residential_property_purchased_is_first_home__2023', 'main_residential_property_purchased_is_first_home__2024', 'main_residential_property_purchased_is_first_home__2025', 'main_residential_property_purchased_is_first_home__2026', 'main_residential_property_purchased_is_first_home__2027', 'main_residential_property_purchased_is_first_home__2028', 'main_residential_property_purchased_is_first_home__2029', 'main_residential_property_purchased_is_first_home__2030', 'full_rate_vat_expenditure_rate__2023', 'full_rate_vat_expenditure_rate__2024', 'full_rate_vat_expenditure_rate__2025', 'full_rate_vat_expenditure_rate__2026', 'full_rate_vat_expenditure_rate__2027', 'full_rate_vat_expenditure_rate__2028', 'full_rate_vat_expenditure_rate__2029', 'full_rate_vat_expenditure_rate__2030', 'external_child_payments__2023', 'external_child_payments__2024', 'external_child_payments__2025', 'external_child_payments__2026', 'external_child_payments__2027', 'external_child_payments__2028', 'external_child_payments__2029', 'external_child_payments__2030', 'statutory_maternity_pay__2023', 'statutory_maternity_pay__2024', 'statutory_maternity_pay__2025', 'statutory_maternity_pay__2026', 'statutory_maternity_pay__2027', 'statutory_maternity_pay__2028', 'statutory_maternity_pay__2029', 'statutory_maternity_pay__2030', 'employment_status__2023', 'employment_status__2024', 'employment_status__2025', 'employment_status__2026', 'employment_status__2027', 'employment_status__2028', 'employment_status__2029', 'employment_status__2030', 'is_single__2025', 'statutory_sick_pay__2023', 'statutory_sick_pay__2024', 'statutory_sick_pay__2025', 'statutory_sick_pay__2026', 'statutory_sick_pay__2027', 'statutory_sick_pay__2028', 'statutory_sick_pay__2029', 'statutory_sick_pay__2030', 'hours_worked__2023', 'hours_worked__2024', 'hours_worked__2025', 'hours_worked__2026', 'hours_worked__2027', 'hours_worked__2028', 'hours_worked__2029', 'hours_worked__2030', 'rail_usage__2023', 'rail_usage__2024', 'rail_usage__2025', 'rail_usage__2026', 'rail_usage__2027', 'rail_usage__2028', 'rail_usage__2029', 'rail_usage__2030', 'rail_subsidy_spending__2023', 'rail_subsidy_spending__2024', 'rail_subsidy_spending__2025', 'rail_subsidy_spending__2026', 'rail_subsidy_spending__2027', 'rail_subsidy_spending__2028', 'rail_subsidy_spending__2029', 'rail_subsidy_spending__2030', 'bus_subsidy_spending__2023', 'bus_subsidy_spending__2024', 'bus_subsidy_spending__2025', 'bus_subsidy_spending__2026', 'bus_subsidy_spending__2027', 'bus_subsidy_spending__2028', 'bus_subsidy_spending__2029', 'bus_subsidy_spending__2030', 'outpatient_visits__2023', 'outpatient_visits__2024', 'outpatient_visits__2025', 'outpatient_visits__2026', 'outpatient_visits__2027', 'outpatient_visits__2028', 'outpatient_visits__2029', 'outpatient_visits__2030', 'nhs_outpatient_spending__2023', 'nhs_outpatient_spending__2024', 'nhs_outpatient_spending__2025', 'nhs_outpatient_spending__2026', 'nhs_outpatient_spending__2027', 'nhs_outpatient_spending__2028', 'nhs_outpatient_spending__2029', 'nhs_outpatient_spending__2030', 'nhs_a_and_e_spending__2023', 'nhs_a_and_e_spending__2024', 'nhs_a_and_e_spending__2025', 'nhs_a_and_e_spending__2026', 'nhs_a_and_e_spending__2027', 'nhs_a_and_e_spending__2028', 'nhs_a_and_e_spending__2029', 'nhs_a_and_e_spending__2030', 'a_and_e_visits__2023', 'a_and_e_visits__2024', 'a_and_e_visits__2025', 'a_and_e_visits__2026', 'a_and_e_visits__2027', 'a_and_e_visits__2028', 'a_and_e_visits__2029', 'a_and_e_visits__2030', 'admitted_patient_visits__2023', 'admitted_patient_visits__2024', 'admitted_patient_visits__2025', 'admitted_patient_visits__2026', 'admitted_patient_visits__2027', 'admitted_patient_visits__2028', 'admitted_patient_visits__2029', 'admitted_patient_visits__2030', 'nhs_admitted_patient_spending__2023', 'nhs_admitted_patient_spending__2024', 'nhs_admitted_patient_spending__2025', 'nhs_admitted_patient_spending__2026', 'nhs_admitted_patient_spending__2027', 'nhs_admitted_patient_spending__2028', 'nhs_admitted_patient_spending__2029', 'nhs_admitted_patient_spending__2030', 'healthy_start_vouchers__2023', 'healthy_start_vouchers__2024', 'healthy_start_vouchers__2025', 'healthy_start_vouchers__2026', 'healthy_start_vouchers__2027', 'healthy_start_vouchers__2028', 'healthy_start_vouchers__2029', 'healthy_start_vouchers__2030', 'education_grants__2023', 'education_grants__2024', 'education_grants__2025', 'education_grants__2026', 'education_grants__2027', 'education_grants__2028', 'education_grants__2029', 'education_grants__2030', 'jsa_contrib_reported__2023', 'jsa_contrib_reported__2024', 'jsa_contrib_reported__2025', 'jsa_contrib_reported__2026', 'jsa_contrib_reported__2027', 'jsa_contrib_reported__2028', 'jsa_contrib_reported__2029', 'jsa_contrib_reported__2030', 'sda_reported__2023', 'sda_reported__2024', 'sda_reported__2025', 'sda_reported__2026', 'sda_reported__2027', 'sda_reported__2028', 'sda_reported__2029', 'sda_reported__2030', 'adult_ema__2023', 'adult_ema__2024', 'adult_ema__2025', 'adult_ema__2026', 'adult_ema__2027', 'adult_ema__2028', 'adult_ema__2029', 'adult_ema__2030', 'winter_fuel_allowance_reported__2023', 'winter_fuel_allowance_reported__2024', 'winter_fuel_allowance_reported__2025', 'winter_fuel_allowance_reported__2026', 'winter_fuel_allowance_reported__2027', 'winter_fuel_allowance_reported__2028', 'winter_fuel_allowance_reported__2029', 'winter_fuel_allowance_reported__2030', 'child_tax_credit_reported__2023', 'child_tax_credit_reported__2024', 'child_tax_credit_reported__2025', 'child_tax_credit_reported__2026', 'child_tax_credit_reported__2027', 'child_tax_credit_reported__2028', 'child_tax_credit_reported__2029', 'child_tax_credit_reported__2030', 'working_tax_credit_reported__2023', 'working_tax_credit_reported__2024', 'working_tax_credit_reported__2025', 'working_tax_credit_reported__2026', 'working_tax_credit_reported__2027', 'working_tax_credit_reported__2028', 'working_tax_credit_reported__2029', 'working_tax_credit_reported__2030', 'bsp_reported__2023', 'bsp_reported__2024', 'bsp_reported__2025', 'bsp_reported__2026', 'bsp_reported__2027', 'bsp_reported__2028', 'bsp_reported__2029', 'bsp_reported__2030', 'carers_allowance_reported__2023', 'carers_allowance_reported__2024', 'carers_allowance_reported__2025', 'carers_allowance_reported__2026', 'carers_allowance_reported__2027', 'carers_allowance_reported__2028', 'carers_allowance_reported__2029', 'carers_allowance_reported__2030', 'access_fund__2023', 'access_fund__2024', 'access_fund__2025', 'access_fund__2026', 'access_fund__2027', 'access_fund__2028', 'access_fund__2029', 'access_fund__2030', 'ssmg_reported__2023', 'ssmg_reported__2024', 'ssmg_reported__2025', 'ssmg_reported__2026', 'ssmg_reported__2027', 'ssmg_reported__2028', 'ssmg_reported__2029', 'ssmg_reported__2030', 'incapacity_benefit_reported__2023', 'incapacity_benefit_reported__2024', 'incapacity_benefit_reported__2025', 'incapacity_benefit_reported__2026', 'incapacity_benefit_reported__2027', 'incapacity_benefit_reported__2028', 'incapacity_benefit_reported__2029', 'incapacity_benefit_reported__2030', 'iidb_reported__2023', 'iidb_reported__2024', 'iidb_reported__2025', 'iidb_reported__2026', 'iidb_reported__2027', 'iidb_reported__2028', 'iidb_reported__2029', 'iidb_reported__2030', 'attendance_allowance_reported__2023', 'attendance_allowance_reported__2024', 'attendance_allowance_reported__2025', 'attendance_allowance_reported__2026', 'attendance_allowance_reported__2027', 'attendance_allowance_reported__2028', 'attendance_allowance_reported__2029', 'attendance_allowance_reported__2030', 'student_loans__2023', 'student_loans__2024', 'student_loans__2025', 'student_loans__2026', 'student_loans__2027', 'student_loans__2028', 'student_loans__2029', 'student_loans__2030', 'esa_income_reported__2023', 'esa_income_reported__2024', 'esa_income_reported__2025', 'esa_income_reported__2026', 'esa_income_reported__2027', 'esa_income_reported__2028', 'esa_income_reported__2029', 'esa_income_reported__2030', 'state_pension_reported__2023', 'state_pension_reported__2024', 'state_pension_reported__2025', 'state_pension_reported__2026', 'state_pension_reported__2027', 'state_pension_reported__2028', 'state_pension_reported__2029', 'state_pension_reported__2030', 'afcs_reported__2023', 'afcs_reported__2024', 'afcs_reported__2025', 'afcs_reported__2026', 'afcs_reported__2027', 'afcs_reported__2028', 'afcs_reported__2029', 'afcs_reported__2030', 'council_tax_benefit_reported__2023', 'council_tax_benefit_reported__2024', 'council_tax_benefit_reported__2025', 'council_tax_benefit_reported__2026', 'council_tax_benefit_reported__2027', 'council_tax_benefit_reported__2028', 'council_tax_benefit_reported__2029', 'council_tax_benefit_reported__2030', 'income_support_reported__2023', 'income_support_reported__2024', 'income_support_reported__2025', 'income_support_reported__2026', 'income_support_reported__2027', 'income_support_reported__2028', 'income_support_reported__2029', 'income_support_reported__2030', 'esa_contrib_reported__2023', 'esa_contrib_reported__2024', 'esa_contrib_reported__2025', 'esa_contrib_reported__2026', 'esa_contrib_reported__2027', 'esa_contrib_reported__2028', 'esa_contrib_reported__2029', 'esa_contrib_reported__2030', 'jsa_income_reported__2023', 'jsa_income_reported__2024', 'jsa_income_reported__2025', 'jsa_income_reported__2026', 'jsa_income_reported__2027', 'jsa_income_reported__2028', 'jsa_income_reported__2029', 'jsa_income_reported__2030', 'child_ema__2023', 'child_ema__2024', 'child_ema__2025', 'child_ema__2026', 'child_ema__2027', 'child_ema__2028', 'child_ema__2029', 'child_ema__2030', 'dla_sc_reported__2023', 'dla_sc_reported__2024', 'dla_sc_reported__2025', 'dla_sc_reported__2026', 'dla_sc_reported__2027', 'dla_sc_reported__2028', 'dla_sc_reported__2029', 'dla_sc_reported__2030', 'dla_m_reported__2023', 'dla_m_reported__2024', 'dla_m_reported__2025', 'dla_m_reported__2026', 'dla_m_reported__2027', 'dla_m_reported__2028', 'dla_m_reported__2029', 'dla_m_reported__2030', 'housing_benefit_reported__2023', 'housing_benefit_reported__2024', 'housing_benefit_reported__2025', 'housing_benefit_reported__2026', 'housing_benefit_reported__2027', 'housing_benefit_reported__2028', 'housing_benefit_reported__2029', 'housing_benefit_reported__2030', 'would_claim_uc__2023', 'would_claim_uc__2024', 'would_claim_uc__2025', 'would_claim_uc__2026', 'would_claim_uc__2027', 'would_claim_uc__2028', 'would_claim_uc__2029', 'would_claim_uc__2030', 'universal_credit_reported__2023', 'universal_credit_reported__2024', 'universal_credit_reported__2025', 'universal_credit_reported__2026', 'universal_credit_reported__2027', 'universal_credit_reported__2028', 'universal_credit_reported__2029', 'universal_credit_reported__2030', 'uc_standard_allowance_claimant_type__2025', 'uc_standard_allowance__2025', 'uc_standard_allowance__2026', 'uc_standard_allowance__2027', 'uc_standard_allowance__2028', 'uc_standard_allowance__2029', 'uc_limited_capability_for_WRA__2026', 'uc_limited_capability_for_WRA__2027', 'uc_limited_capability_for_WRA__2028', 'uc_limited_capability_for_WRA__2029', 'uc_LCWRA_element__2026', 'uc_LCWRA_element__2027', 'uc_LCWRA_element__2028', 'uc_LCWRA_element__2029', 'pip_m_reported__2023', 'pip_m_reported__2024', 'pip_m_reported__2025', 'pip_m_reported__2026', 'pip_m_reported__2027', 'pip_m_reported__2028', 'pip_m_reported__2029', 'pip_m_reported__2030', 'pip_dl_reported__2023', 'pip_dl_reported__2024', 'pip_dl_reported__2025', 'pip_dl_reported__2026', 'pip_dl_reported__2027', 'pip_dl_reported__2028', 'pip_dl_reported__2029', 'pip_dl_reported__2030', 'pension_credit_reported__2023', 'pension_credit_reported__2024', 'pension_credit_reported__2025', 'pension_credit_reported__2026', 'pension_credit_reported__2027', 'pension_credit_reported__2028', 'pension_credit_reported__2029', 'pension_credit_reported__2030', 'would_claim_pc__2023', 'would_claim_pc__2024', 'would_claim_pc__2025', 'would_claim_pc__2026', 'would_claim_pc__2027', 'would_claim_pc__2028', 'would_claim_pc__2029', 'would_claim_pc__2030', 'would_evade_tv_licence_fee__2023', 'would_evade_tv_licence_fee__2024', 'would_evade_tv_licence_fee__2025', 'would_evade_tv_licence_fee__2026', 'would_evade_tv_licence_fee__2027', 'would_evade_tv_licence_fee__2028', 'would_evade_tv_licence_fee__2029', 'would_evade_tv_licence_fee__2030', 'free_school_fruit_veg__2023', 'free_school_fruit_veg__2024', 'free_school_fruit_veg__2025', 'free_school_fruit_veg__2026', 'free_school_fruit_veg__2027', 'free_school_fruit_veg__2028', 'free_school_fruit_veg__2029', 'free_school_fruit_veg__2030', 'dfe_education_spending__2023', 'dfe_education_spending__2024', 'dfe_education_spending__2025', 'dfe_education_spending__2026', 'dfe_education_spending__2027', 'dfe_education_spending__2028', 'dfe_education_spending__2029', 'dfe_education_spending__2030', 'free_school_meals__2023', 'free_school_meals__2024', 'free_school_meals__2025', 'free_school_meals__2026', 'free_school_meals__2027', 'free_school_meals__2028', 'free_school_meals__2029', 'free_school_meals__2030', 'would_claim_extended_childcare__2023', 'would_claim_extended_childcare__2024', 'would_claim_extended_childcare__2025', 'would_claim_extended_childcare__2026', 'would_claim_extended_childcare__2027', 'would_claim_extended_childcare__2028', 'would_claim_extended_childcare__2029', 'would_claim_extended_childcare__2030', 'maximum_extended_childcare_hours_usage__2023', 'maximum_extended_childcare_hours_usage__2024', 'maximum_extended_childcare_hours_usage__2025', 'maximum_extended_childcare_hours_usage__2026', 'maximum_extended_childcare_hours_usage__2027', 'maximum_extended_childcare_hours_usage__2028', 'maximum_extended_childcare_hours_usage__2029', 'maximum_extended_childcare_hours_usage__2030', 'would_claim_targeted_childcare__2023', 'would_claim_targeted_childcare__2024', 'would_claim_targeted_childcare__2025', 'would_claim_targeted_childcare__2026', 'would_claim_targeted_childcare__2027', 'would_claim_targeted_childcare__2028', 'would_claim_targeted_childcare__2029', 'would_claim_targeted_childcare__2030', 'would_claim_universal_childcare__2023', 'would_claim_universal_childcare__2024', 'would_claim_universal_childcare__2025', 'would_claim_universal_childcare__2026', 'would_claim_universal_childcare__2027', 'would_claim_universal_childcare__2028', 'would_claim_universal_childcare__2029', 'would_claim_universal_childcare__2030', 'student_loan_repayments__2023', 'student_loan_repayments__2024', 'student_loan_repayments__2025', 'student_loan_repayments__2026', 'student_loan_repayments__2027', 'student_loan_repayments__2028', 'student_loan_repayments__2029', 'student_loan_repayments__2030', 'would_claim_child_benefit__2023', 'would_claim_child_benefit__2024', 'would_claim_child_benefit__2025', 'would_claim_child_benefit__2026', 'would_claim_child_benefit__2027', 'would_claim_child_benefit__2028', 'would_claim_child_benefit__2029', 'would_claim_child_benefit__2030', 'child_benefit_reported__2023', 'child_benefit_reported__2024', 'child_benefit_reported__2025', 'child_benefit_reported__2026', 'child_benefit_reported__2027', 'child_benefit_reported__2028', 'child_benefit_reported__2029', 'child_benefit_reported__2030', 'capital_gains_before_response__2023', 'capital_gains_before_response__2024', 'capital_gains_before_response__2025', 'capital_gains_before_response__2026', 'capital_gains_before_response__2027', 'capital_gains_before_response__2028', 'capital_gains_before_response__2029', 'capital_gains_before_response__2030', 'tax_free_savings_income__2023', 'tax_free_savings_income__2024', 'tax_free_savings_income__2025', 'tax_free_savings_income__2026', 'tax_free_savings_income__2027', 'tax_free_savings_income__2028', 'tax_free_savings_income__2029', 'tax_free_savings_income__2030', 'would_claim_tfc__2023', 'would_claim_tfc__2024', 'would_claim_tfc__2025', 'would_claim_tfc__2026', 'would_claim_tfc__2027', 'would_claim_tfc__2028', 'would_claim_tfc__2029', 'would_claim_tfc__2030', 'student_loan_plan__2023', 'student_loan_plan__2024', 'student_loan_plan__2025', 'student_loan_plan__2026', 'student_loan_plan__2027', 'student_loan_plan__2028', 'student_loan_plan__2029', 'student_loan_plan__2030', 'domestic_rates__2023', 'domestic_rates__2024', 'domestic_rates__2025', 'domestic_rates__2026', 'domestic_rates__2027', 'domestic_rates__2028', 'domestic_rates__2029', 'domestic_rates__2030']\n",
+      "DataFrame shape: (115612, 1127)\n",
+      "'person_household_id' columns: ['person_household_id__2023', 'person_household_id__2024', 'person_household_id__2025', 'person_household_id__2026', 'person_household_id__2027', 'person_household_id__2028', 'person_household_id__2029', 'person_household_id__2030']\n",
+      "Filtered DataFrame shape: (8470, 1127)\n",
+      "[ERROR] ValueError: Unable to set value \"[ 39361.   39361.   39361.  ... 134410.5 134410.5   6000. ]\" for variable \"corporate_wealth\", as its length is 8470 while there are 4108 households in the simulation.\n",
+      "\n",
+      "This confirms the bug exists in the high-level API.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Traceback (most recent call last):\n",
+      "  File \"/var/folders/qf/7fglpnr94wsc5xgbqkp80xbw0000gn/T/ipykernel_19066/3661659745.py\", line 7, in <module>\n",
+      "    sim_wales = Simulation(country=\"uk\", scope=\"macro\", region=\"country/wales\")\n",
+      "  File \"/Users/administrator/Documents/PolicyEngine/policyengine.py/policyengine/simulation.py\", line 110, in __init__\n",
+      "    self._initialise_simulations()\n",
+      "    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^\n",
+      "  File \"/Users/administrator/Documents/PolicyEngine/policyengine.py/policyengine/simulation.py\", line 202, in _initialise_simulations\n",
+      "    self.baseline_simulation = self._initialise_simulation(\n",
+      "                               ~~~~~~~~~~~~~~~~~~~~~~~~~~~^\n",
+      "        scope=self.options.scope,\n",
+      "        ^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "    ...<5 lines>...\n",
+      "        subsample=self.options.subsample,\n",
+      "        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "    )\n",
+      "    ^\n",
+      "  File \"/Users/administrator/Documents/PolicyEngine/policyengine.py/policyengine/simulation.py\", line 260, in _initialise_simulation\n",
+      "    simulation = self._apply_region_to_simulation(\n",
+      "        country=country,\n",
+      "    ...<4 lines>...\n",
+      "        time_period=time_period,\n",
+      "    )\n",
+      "  File \"/Users/administrator/Documents/PolicyEngine/policyengine.py/policyengine/simulation.py\", line 307, in _apply_region_to_simulation\n",
+      "    simulation = simulation_type(\n",
+      "        dataset=filtered_df, reform=reform\n",
+      "    )\n",
+      "  File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_uk/simulation.py\", line 100, in __init__\n",
+      "    self.build_from_dataframe(dataset)\n",
+      "    ~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^\n",
+      "  File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_uk/simulation.py\", line 286, in build_from_dataframe\n",
+      "    self.set_input(variable, time_period, df[column])\n",
+      "    ~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_core/simulations/simulation.py\", line 1241, in set_input\n",
+      "    self.get_holder(variable_name).set_input(\n",
+      "    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^\n",
+      "        period, value, self.branch_name\n",
+      "        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "    )\n",
+      "    ^\n",
+      "  File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_core/holders/holder.py\", line 244, in set_input\n",
+      "    return self._set(period, array, branch_name)\n",
+      "           ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_core/holders/holder.py\", line 286, in _set\n",
+      "    value = self._to_array(value)\n",
+      "  File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_core/holders/holder.py\", line 253, in _to_array\n",
+      "    raise ValueError(\n",
+      "    ...<7 lines>...\n",
+      "    )\n",
+      "ValueError: Unable to set value \"[ 39361.   39361.   39361.  ... 134410.5 134410.5   6000. ]\" for variable \"corporate_wealth\", as its length is 8470 while there are 4108 households in the simulation.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Test with policyengine.Simulation using region=\"country/wales\"\n",
+    "print(\"=== Step 7: Test with policyengine.Simulation ===\")\n",
+    "print(\"Creating Simulation with region='country/wales'...\")\n",
+    "print()\n",
+    "\n",
+    "try:\n",
+    "    sim_wales = Simulation(country=\"uk\", scope=\"macro\", region=\"country/wales\")\n",
+    "    \n",
+    "    wales_underlying = sim_wales.baseline_simulation\n",
+    "    print(f\"Wales simulation created!\")\n",
+    "    print(f\"  Person count: {wales_underlying.persons.count}\")\n",
+    "    print(f\"  Household count: {wales_underlying.household.count}\")\n",
+    "    \n",
+    "    # Try calculating the problematic variable\n",
+    "    print(\"\\nCalculating would_evade_tv_licence_fee...\")\n",
+    "    result = sim_wales.calculate(\"would_evade_tv_licence_fee\")\n",
+    "    print(f\"  Result length: {len(result)}\")\n",
+    "    print(\"  [OK] No error!\")\n",
+    "    \n",
+    "except Exception as e:\n",
+    "    print(f\"[ERROR] {type(e).__name__}: {e}\")\n",
+    "    print()\n",
+    "    print(\"This confirms the bug exists in the high-level API.\")\n",
+    "    traceback.print_exc()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-23",
+   "metadata": {},
+   "source": [
+    "## Step 8: Compare with Constituency Filtering (Should Work)\n",
+    "\n",
+    "Constituency filtering uses weight adjustment instead of DataFrame subsetting."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "cell-24",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "No data provided, using default dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n",
+      "Using dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n",
+      "Downloading enhanced_frs_2023_24.h5 from bucket policyengine-uk-data-private\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "=== Step 8: Test Constituency Filtering ===\n",
+      "Creating Simulation with region='constituency/Cardiff South and Penarth'...\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:No version specified for policyengine-uk-data-private, enhanced_frs_2023_24.h5. Using latest version: 1.29.4\n",
+      "WARNING:root:No metadata found for blob policyengine-uk-data-private, constituencies_2024.csv, so it has no version attached.\n",
+      "WARNING:root:No version specified for policyengine-uk-data-private, constituencies_2024.csv. Using latest version: None\n",
+      "WARNING:root:No version specified for policyengine-uk-data-private, parliamentary_constituency_weights.h5. Using latest version: 1.29.4\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Constituency simulation created!\n",
+      "  Person count: 115612\n",
+      "  Household count: 53508\n",
+      "  (Full UK counts, but weights adjusted for constituency)\n",
+      "\n",
+      "Calculating would_evade_tv_licence_fee...\n",
+      "[ERROR] AttributeError: 'Simulation' object has no attribute 'calculate'\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Traceback (most recent call last):\n",
+      "  File \"/var/folders/qf/7fglpnr94wsc5xgbqkp80xbw0000gn/T/ipykernel_19066/2462177757.py\", line 21, in <module>\n",
+      "    result = sim_const.calculate(\"would_evade_tv_licence_fee\")\n",
+      "             ^^^^^^^^^^^^^^^^^^^\n",
+      "AttributeError: 'Simulation' object has no attribute 'calculate'\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Test constituency filtering\n",
+    "print(\"=== Step 8: Test Constituency Filtering ===\")\n",
+    "print(\"Creating Simulation with region='constituency/Cardiff South and Penarth'...\")\n",
+    "print()\n",
+    "\n",
+    "try:\n",
+    "    sim_const = Simulation(\n",
+    "        country=\"uk\", \n",
+    "        scope=\"macro\", \n",
+    "        region=\"constituency/Cardiff South and Penarth\"\n",
+    "    )\n",
+    "    \n",
+    "    const_underlying = sim_const.baseline_simulation\n",
+    "    print(f\"Constituency simulation created!\")\n",
+    "    print(f\"  Person count: {const_underlying.persons.count}\")\n",
+    "    print(f\"  Household count: {const_underlying.household.count}\")\n",
+    "    print(\"  (Full UK counts, but weights adjusted for constituency)\")\n",
+    "    \n",
+    "    # Try calculating the problematic variable\n",
+    "    print(\"\\nCalculating would_evade_tv_licence_fee...\")\n",
+    "    result = sim_const.calculate(\"would_evade_tv_licence_fee\")\n",
+    "    print(f\"  Result length: {len(result)}\")\n",
+    "    print(\"  [OK] Constituency filtering works!\")\n",
+    "    \n",
+    "except Exception as e:\n",
+    "    print(f\"[ERROR] {type(e).__name__}: {e}\")\n",
+    "    traceback.print_exc()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "cell-25",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "=== Step 8b: Test Local Authority Filtering ===\n",
+      "Creating Simulation with region='local_authority/Cardiff'...\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "No data provided, using default dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n",
+      "Using dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n",
+      "Downloading enhanced_frs_2023_24.h5 from bucket policyengine-uk-data-private\n",
+      "WARNING:root:No version specified for policyengine-uk-data-private, enhanced_frs_2023_24.h5. Using latest version: 1.29.4\n",
+      "WARNING:root:No metadata found for blob policyengine-uk-data-private, local_authorities_2021.csv, so it has no version attached.\n",
+      "WARNING:root:No version specified for policyengine-uk-data-private, local_authorities_2021.csv. Using latest version: None\n",
+      "WARNING:root:No version specified for policyengine-uk-data-private, local_authority_weights.h5. Using latest version: 1.29.4\n"
+     ]
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[17], line 7\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[38;5;28mprint\u001b[39m()\n\u001b[1;32m      6\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m----> 7\u001b[0m     sim_la \u001b[38;5;241m=\u001b[39m \u001b[43mSimulation\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      8\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcountry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muk\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m      9\u001b[0m \u001b[43m        \u001b[49m\u001b[43mscope\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmacro\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m     10\u001b[0m \u001b[43m        \u001b[49m\u001b[43mregion\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlocal_authority/Cardiff\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m     11\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     13\u001b[0m     la_underlying \u001b[38;5;241m=\u001b[39m sim_la\u001b[38;5;241m.\u001b[39mbaseline_simulation\n\u001b[1;32m     14\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mLocal Authority simulation created!\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/simulation.py:110\u001b[0m, in \u001b[0;36mSimulation.__init__\u001b[0;34m(self, **options)\u001b[0m\n\u001b[1;32m    108\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_set_data(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mdata)\n\u001b[1;32m    109\u001b[0m     logging\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mData loaded\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 110\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_initialise_simulations\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    111\u001b[0m logging\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSimulations initialised\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    112\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcheck_data_version()\n",
+      "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/simulation.py:202\u001b[0m, in \u001b[0;36mSimulation._initialise_simulations\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    201\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_initialise_simulations\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m--> 202\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbaseline_simulation \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_initialise_simulation\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    203\u001b[0m \u001b[43m        \u001b[49m\u001b[43mscope\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mscope\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    204\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcountry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcountry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    205\u001b[0m \u001b[43m        \u001b[49m\u001b[43mreform\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbaseline\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    206\u001b[0m \u001b[43m        \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    207\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtime_period\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtime_period\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    208\u001b[0m \u001b[43m        \u001b[49m\u001b[43mregion\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mregion\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    209\u001b[0m \u001b[43m        \u001b[49m\u001b[43msubsample\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msubsample\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    210\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    212\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mreform \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    213\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreform_simulation \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_initialise_simulation(\n\u001b[1;32m    214\u001b[0m             scope\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mscope,\n\u001b[1;32m    215\u001b[0m             country\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mcountry,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    220\u001b[0m             subsample\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39msubsample,\n\u001b[1;32m    221\u001b[0m         )\n",
+      "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/simulation.py:260\u001b[0m, in \u001b[0;36mSimulation._initialise_simulation\u001b[0;34m(self, country, scope, reform, data, time_period, region, subsample)\u001b[0m\n\u001b[1;32m    257\u001b[0m simulation\u001b[38;5;241m.\u001b[39mdefault_calculation_period \u001b[38;5;241m=\u001b[39m time_period\n\u001b[1;32m    259\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m region \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 260\u001b[0m     simulation \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_apply_region_to_simulation\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    261\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcountry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcountry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    262\u001b[0m \u001b[43m        \u001b[49m\u001b[43msimulation\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msimulation\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    263\u001b[0m \u001b[43m        \u001b[49m\u001b[43msimulation_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_simulation_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    264\u001b[0m \u001b[43m        \u001b[49m\u001b[43mregion\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mregion\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    265\u001b[0m \u001b[43m        \u001b[49m\u001b[43mreform\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreform\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    266\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtime_period\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtime_period\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    267\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    269\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m subsample \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    270\u001b[0m     simulation \u001b[38;5;241m=\u001b[39m simulation\u001b[38;5;241m.\u001b[39msubsample(subsample)\n",
+      "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/simulation.py:366\u001b[0m, in \u001b[0;36mSimulation._apply_region_to_simulation\u001b[0;34m(self, country, simulation, simulation_type, region, reform, time_period)\u001b[0m\n\u001b[1;32m    362\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    363\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m    364\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mLocal authority \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mla\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m not found. See \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mla_names_local_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m for the list of available local authorities.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    365\u001b[0m     )\n\u001b[0;32m--> 366\u001b[0m weights_local_path \u001b[38;5;241m=\u001b[39m \u001b[43mdownload\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    367\u001b[0m \u001b[43m    \u001b[49m\u001b[43mgcs_bucket\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpolicyengine-uk-data-private\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    368\u001b[0m \u001b[43m    \u001b[49m\u001b[43mgcs_key\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlocal_authority_weights.h5\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    369\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    371\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m h5py\u001b[38;5;241m.\u001b[39mFile(weights_local_path, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[1;32m    372\u001b[0m     weights \u001b[38;5;241m=\u001b[39m f[\u001b[38;5;28mstr\u001b[39m(time_period)][\u001b[38;5;241m.\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;241m.\u001b[39m]\n",
+      "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/utils/data_download.py:38\u001b[0m, in \u001b[0;36mdownload\u001b[0;34m(gcs_key, gcs_bucket, version, return_version)\u001b[0m\n\u001b[1;32m     21\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m     22\u001b[0m \u001b[38;5;124;03mDownload a file from Google Cloud Storage.\u001b[39;00m\n\u001b[1;32m     23\u001b[0m \n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     35\u001b[0m \u001b[38;5;124;03m    Otherwise: just the local_path string\u001b[39;00m\n\u001b[1;32m     36\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m     37\u001b[0m logging\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUsing Google Cloud Storage for download.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 38\u001b[0m local_path, downloaded_version \u001b[38;5;241m=\u001b[39m \u001b[43mdownload_file_from_gcs\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m     39\u001b[0m \u001b[43m    \u001b[49m\u001b[43mbucket_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgcs_bucket\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     40\u001b[0m \u001b[43m    \u001b[49m\u001b[43mgcs_key\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgcs_key\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     41\u001b[0m \u001b[43m    \u001b[49m\u001b[43mversion\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mversion\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     42\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     43\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m return_version:\n\u001b[1;32m     44\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m local_path, downloaded_version\n",
+      "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/utils/google_cloud_bucket.py:75\u001b[0m, in \u001b[0;36mdownload_file_from_gcs\u001b[0;34m(bucket_name, gcs_key, version)\u001b[0m\n\u001b[1;32m     72\u001b[0m local_path \u001b[38;5;241m=\u001b[39m DATASETS_DIR \u001b[38;5;241m/\u001b[39m gcs_key\n\u001b[1;32m     73\u001b[0m local_path\u001b[38;5;241m.\u001b[39mparent\u001b[38;5;241m.\u001b[39mmkdir(parents\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, exist_ok\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m---> 75\u001b[0m version \u001b[38;5;241m=\u001b[39m \u001b[43m_get_client\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdownload\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m     76\u001b[0m \u001b[43m    \u001b[49m\u001b[43mbucket_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     77\u001b[0m \u001b[43m    \u001b[49m\u001b[43mgcs_key\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     78\u001b[0m \u001b[43m    \u001b[49m\u001b[43mlocal_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     79\u001b[0m \u001b[43m    \u001b[49m\u001b[43mversion\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mversion\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     80\u001b[0m \u001b[43m    \u001b[49m\u001b[43mreturn_version\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m     81\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     82\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mstr\u001b[39m(local_path), version\n",
+      "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/utils/data/caching_google_storage_client.py:64\u001b[0m, in \u001b[0;36mCachingGoogleStorageClient.download\u001b[0;34m(self, bucket, key, target, version, return_version)\u001b[0m\n\u001b[1;32m     60\u001b[0m     version \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclient\u001b[38;5;241m.\u001b[39m_get_latest_version(bucket, key)\n\u001b[1;32m     61\u001b[0m     logging\u001b[38;5;241m.\u001b[39mwarning(\n\u001b[1;32m     62\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo version specified for \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbucket\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkey\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m. Using latest version: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mversion\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     63\u001b[0m     )\n\u001b[0;32m---> 64\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msync\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbucket\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mversion\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     65\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcache\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_data_key(bucket, key, version))\n\u001b[1;32m     66\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(data) \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28mbytes\u001b[39m:\n",
+      "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/utils/data/caching_google_storage_client.py:106\u001b[0m, in \u001b[0;36mCachingGoogleStorageClient.sync\u001b[0;34m(self, bucket, key, version)\u001b[0m\n\u001b[1;32m    104\u001b[0m     logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCache exists and crc is unchanged for \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mid_string\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m .\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    105\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[0;32m--> 106\u001b[0m [content, downloaded_crc] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdownload\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    107\u001b[0m \u001b[43m    \u001b[49m\u001b[43mbucket\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mversion\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mversion\u001b[49m\n\u001b[1;32m    108\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    109\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\n\u001b[1;32m    110\u001b[0m     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDownloaded new version of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mid_string\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m with crc \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdownloaded_crc\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    111\u001b[0m )\n\u001b[1;32m    113\u001b[0m \u001b[38;5;66;03m# atomic transaction to update both the data and the metadata\u001b[39;00m\n\u001b[1;32m    114\u001b[0m \u001b[38;5;66;03m# at the same time.\u001b[39;00m\n",
+      "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/utils/data/version_aware_storage_client.py:171\u001b[0m, in \u001b[0;36mVersionAwareStorageClient.download\u001b[0;34m(self, bucket_name, key, version)\u001b[0m\n\u001b[1;32m    166\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\n\u001b[1;32m    167\u001b[0m     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDownloading \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbucket_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkey\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    168\u001b[0m     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, version: \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;250m \u001b[39m\u001b[38;5;241m+\u001b[39m\u001b[38;5;250m \u001b[39mversion\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mif\u001b[39;00m\u001b[38;5;250m \u001b[39mversion\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01melse\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    169\u001b[0m )\n\u001b[1;32m    170\u001b[0m blob \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_blob(bucket_name, key, version)\n\u001b[0;32m--> 171\u001b[0m content \u001b[38;5;241m=\u001b[39m \u001b[43mblob\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdownload_as_bytes\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    172\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\n\u001b[1;32m    173\u001b[0m     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDownloaded \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbucket_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkey\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    174\u001b[0m     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, version: \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;250m \u001b[39m\u001b[38;5;241m+\u001b[39m\u001b[38;5;250m \u001b[39mversion\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mif\u001b[39;00m\u001b[38;5;250m \u001b[39mversion\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01melse\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    175\u001b[0m )\n\u001b[1;32m    176\u001b[0m \u001b[38;5;66;03m# According to documentation, blob.crc32c is updated as a side effect of\u001b[39;00m\n\u001b[1;32m    177\u001b[0m \u001b[38;5;66;03m# downloading the content. This should be the CRC of the downloaded\u001b[39;00m\n\u001b[1;32m    178\u001b[0m \u001b[38;5;66;03m# content (avoiding race conditions with the cloud).\u001b[39;00m\n",
+      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/cloud/storage/blob.py:1530\u001b[0m, in \u001b[0;36mBlob.download_as_bytes\u001b[0;34m(self, client, start, end, raw_download, if_etag_match, if_etag_not_match, if_generation_match, if_generation_not_match, if_metageneration_match, if_metageneration_not_match, timeout, checksum, retry, single_shot_download)\u001b[0m\n\u001b[1;32m   1527\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m create_trace_span(name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mStorage.Blob.downloadAsBytes\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m   1528\u001b[0m     string_buffer \u001b[38;5;241m=\u001b[39m BytesIO()\n\u001b[0;32m-> 1530\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_prep_and_do_download\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1531\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstring_buffer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1532\u001b[0m \u001b[43m        \u001b[49m\u001b[43mclient\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mclient\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1533\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstart\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1534\u001b[0m \u001b[43m        \u001b[49m\u001b[43mend\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mend\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1535\u001b[0m \u001b[43m        \u001b[49m\u001b[43mraw_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mraw_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1536\u001b[0m \u001b[43m        \u001b[49m\u001b[43mif_etag_match\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mif_etag_match\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1537\u001b[0m \u001b[43m        \u001b[49m\u001b[43mif_etag_not_match\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mif_etag_not_match\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1538\u001b[0m \u001b[43m        \u001b[49m\u001b[43mif_generation_match\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mif_generation_match\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1539\u001b[0m \u001b[43m        \u001b[49m\u001b[43mif_generation_not_match\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mif_generation_not_match\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1540\u001b[0m \u001b[43m        \u001b[49m\u001b[43mif_metageneration_match\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mif_metageneration_match\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1541\u001b[0m \u001b[43m        \u001b[49m\u001b[43mif_metageneration_not_match\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mif_metageneration_not_match\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1542\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1543\u001b[0m \u001b[43m        \u001b[49m\u001b[43mchecksum\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchecksum\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1544\u001b[0m \u001b[43m        \u001b[49m\u001b[43mretry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1545\u001b[0m \u001b[43m        \u001b[49m\u001b[43msingle_shot_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msingle_shot_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1546\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1547\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m string_buffer\u001b[38;5;241m.\u001b[39mgetvalue()\n",
+      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/cloud/storage/blob.py:4659\u001b[0m, in \u001b[0;36mBlob._prep_and_do_download\u001b[0;34m(self, file_obj, client, start, end, raw_download, if_etag_match, if_etag_not_match, if_generation_match, if_generation_not_match, if_metageneration_match, if_metageneration_not_match, timeout, checksum, retry, single_shot_download, command)\u001b[0m\n\u001b[1;32m   4656\u001b[0m transport \u001b[38;5;241m=\u001b[39m client\u001b[38;5;241m.\u001b[39m_http\n\u001b[1;32m   4658\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 4659\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_do_download\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   4660\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtransport\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   4661\u001b[0m \u001b[43m        \u001b[49m\u001b[43mfile_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   4662\u001b[0m \u001b[43m        \u001b[49m\u001b[43mdownload_url\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   4663\u001b[0m \u001b[43m        \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   4664\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   4665\u001b[0m \u001b[43m        \u001b[49m\u001b[43mend\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   4666\u001b[0m \u001b[43m        \u001b[49m\u001b[43mraw_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   4667\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   4668\u001b[0m \u001b[43m        \u001b[49m\u001b[43mchecksum\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchecksum\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   4669\u001b[0m \u001b[43m        \u001b[49m\u001b[43mretry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   4670\u001b[0m \u001b[43m        \u001b[49m\u001b[43msingle_shot_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msingle_shot_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   4671\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   4672\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m InvalidResponse \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[1;32m   4673\u001b[0m     _raise_from_invalid_response(exc)\n",
+      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/cloud/storage/blob.py:1094\u001b[0m, in \u001b[0;36mBlob._do_download\u001b[0;34m(self, transport, file_obj, download_url, headers, start, end, raw_download, timeout, checksum, retry, single_shot_download)\u001b[0m\n\u001b[1;32m   1076\u001b[0m     download \u001b[38;5;241m=\u001b[39m klass(\n\u001b[1;32m   1077\u001b[0m         download_url,\n\u001b[1;32m   1078\u001b[0m         stream\u001b[38;5;241m=\u001b[39mfile_obj,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1087\u001b[0m         single_shot_download\u001b[38;5;241m=\u001b[39msingle_shot_download,\n\u001b[1;32m   1088\u001b[0m     )\n\u001b[1;32m   1089\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m create_trace_span(\n\u001b[1;32m   1090\u001b[0m         name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mStorage.\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdownload_class\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/consume\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m   1091\u001b[0m         attributes\u001b[38;5;241m=\u001b[39mextra_attributes,\n\u001b[1;32m   1092\u001b[0m         api_request\u001b[38;5;241m=\u001b[39margs,\n\u001b[1;32m   1093\u001b[0m     ):\n\u001b[0;32m-> 1094\u001b[0m         response \u001b[38;5;241m=\u001b[39m \u001b[43mdownload\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconsume\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtransport\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1095\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_extract_headers_from_download(response)\n\u001b[1;32m   1096\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
+      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/cloud/storage/_media/requests/download.py:280\u001b[0m, in \u001b[0;36mDownload.consume\u001b[0;34m(self, transport, timeout)\u001b[0m\n\u001b[1;32m    276\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_write_to_stream(result)\n\u001b[1;32m    278\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m result\n\u001b[0;32m--> 280\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_request_helpers\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwait_and_retry\u001b[49m\u001b[43m(\u001b[49m\u001b[43mretriable_request\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_strategy\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/cloud/storage/_media/requests/_request_helpers.py:107\u001b[0m, in \u001b[0;36mwait_and_retry\u001b[0;34m(func, retry_strategy)\u001b[0m\n\u001b[1;32m    105\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m retry_strategy:\n\u001b[1;32m    106\u001b[0m     func \u001b[38;5;241m=\u001b[39m retry_strategy(func)\n\u001b[0;32m--> 107\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:294\u001b[0m, in \u001b[0;36mRetry.__call__.<locals>.retry_wrapped_func\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    290\u001b[0m target \u001b[38;5;241m=\u001b[39m functools\u001b[38;5;241m.\u001b[39mpartial(func, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m    291\u001b[0m sleep_generator \u001b[38;5;241m=\u001b[39m exponential_sleep_generator(\n\u001b[1;32m    292\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_initial, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_maximum, multiplier\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_multiplier\n\u001b[1;32m    293\u001b[0m )\n\u001b[0;32m--> 294\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mretry_target\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    295\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    296\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_predicate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    297\u001b[0m \u001b[43m    \u001b[49m\u001b[43msleep_generator\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    298\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    299\u001b[0m \u001b[43m    \u001b[49m\u001b[43mon_error\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mon_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    300\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:147\u001b[0m, in \u001b[0;36mretry_target\u001b[0;34m(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)\u001b[0m\n\u001b[1;32m    145\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[1;32m    146\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 147\u001b[0m         result \u001b[38;5;241m=\u001b[39m \u001b[43mtarget\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    148\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m inspect\u001b[38;5;241m.\u001b[39misawaitable(result):\n\u001b[1;32m    149\u001b[0m             warnings\u001b[38;5;241m.\u001b[39mwarn(_ASYNC_RETRY_WARNING)\n",
+      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/cloud/storage/_media/requests/download.py:276\u001b[0m, in \u001b[0;36mDownload.consume.<locals>.retriable_request\u001b[0;34m()\u001b[0m\n\u001b[1;32m    273\u001b[0m             \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(msg) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mexc\u001b[39;00m\n\u001b[1;32m    274\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_bytes_downloaded \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m--> 276\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_write_to_stream\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresult\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    278\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m result\n",
+      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/cloud/storage/_media/requests/download.py:149\u001b[0m, in \u001b[0;36mDownload._write_to_stream\u001b[0;34m(self, response)\u001b[0m\n\u001b[1;32m    144\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    145\u001b[0m     body_iter \u001b[38;5;241m=\u001b[39m response\u001b[38;5;241m.\u001b[39miter_content(\n\u001b[1;32m    146\u001b[0m         chunk_size\u001b[38;5;241m=\u001b[39m_request_helpers\u001b[38;5;241m.\u001b[39m_SINGLE_GET_CHUNK_SIZE,\n\u001b[1;32m    147\u001b[0m         decode_unicode\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m    148\u001b[0m     )\n\u001b[0;32m--> 149\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mchunk\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mbody_iter\u001b[49m\u001b[43m:\u001b[49m\n\u001b[1;32m    150\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_stream\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwrite\u001b[49m\u001b[43m(\u001b[49m\u001b[43mchunk\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    151\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_bytes_downloaded\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mchunk\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/requests/models.py:820\u001b[0m, in \u001b[0;36mResponse.iter_content.<locals>.generate\u001b[0;34m()\u001b[0m\n\u001b[1;32m    818\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mraw, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstream\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m    819\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 820\u001b[0m         \u001b[38;5;28;01myield from\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mraw\u001b[38;5;241m.\u001b[39mstream(chunk_size, decode_content\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m    821\u001b[0m     \u001b[38;5;28;01mexcept\u001b[39;00m ProtocolError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m    822\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m ChunkedEncodingError(e)\n",
+      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/urllib3/response.py:1253\u001b[0m, in \u001b[0;36mHTTPResponse.stream\u001b[0;34m(self, amt, decode_content)\u001b[0m\n\u001b[1;32m   1247\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   1248\u001b[0m     \u001b[38;5;28;01mwhile\u001b[39;00m (\n\u001b[1;32m   1249\u001b[0m         \u001b[38;5;129;01mnot\u001b[39;00m is_fp_closed(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp)\n\u001b[1;32m   1250\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoded_buffer) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m   1251\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoder \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoder\u001b[38;5;241m.\u001b[39mhas_unconsumed_tail)\n\u001b[1;32m   1252\u001b[0m     ):\n\u001b[0;32m-> 1253\u001b[0m         data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mamt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_content\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1255\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m data:\n\u001b[1;32m   1256\u001b[0m             \u001b[38;5;28;01myield\u001b[39;00m data\n",
+      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/urllib3/response.py:1108\u001b[0m, in \u001b[0;36mHTTPResponse.read\u001b[0;34m(self, amt, decode_content, cache_content)\u001b[0m\n\u001b[1;32m   1105\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoded_buffer) \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m amt:\n\u001b[1;32m   1106\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoded_buffer\u001b[38;5;241m.\u001b[39mget(amt)\n\u001b[0;32m-> 1108\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_raw_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1110\u001b[0m flush_decoder \u001b[38;5;241m=\u001b[39m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mor\u001b[39;00m (amt \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m data)\n\u001b[1;32m   1112\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m   1113\u001b[0m     \u001b[38;5;129;01mnot\u001b[39;00m data\n\u001b[1;32m   1114\u001b[0m     \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoded_buffer) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m   1115\u001b[0m     \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoder \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoder\u001b[38;5;241m.\u001b[39mhas_unconsumed_tail)\n\u001b[1;32m   1116\u001b[0m ):\n",
+      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/urllib3/response.py:1024\u001b[0m, in \u001b[0;36mHTTPResponse._raw_read\u001b[0;34m(self, amt, read1)\u001b[0m\n\u001b[1;32m   1021\u001b[0m fp_closed \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mclosed\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m   1023\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_error_catcher():\n\u001b[0;32m-> 1024\u001b[0m     data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fp_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mread1\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mread1\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m fp_closed \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1025\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m amt \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m data:\n\u001b[1;32m   1026\u001b[0m         \u001b[38;5;66;03m# Platform-specific: Buggy versions of Python.\u001b[39;00m\n\u001b[1;32m   1027\u001b[0m         \u001b[38;5;66;03m# Close the connection when no data is returned\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1032\u001b[0m         \u001b[38;5;66;03m# not properly close the connection in all cases. There is\u001b[39;00m\n\u001b[1;32m   1033\u001b[0m         \u001b[38;5;66;03m# no harm in redundantly calling close.\u001b[39;00m\n\u001b[1;32m   1034\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp\u001b[38;5;241m.\u001b[39mclose()\n",
+      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/urllib3/response.py:1007\u001b[0m, in \u001b[0;36mHTTPResponse._fp_read\u001b[0;34m(self, amt, read1)\u001b[0m\n\u001b[1;32m   1004\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp\u001b[38;5;241m.\u001b[39mread1(amt) \u001b[38;5;28;01mif\u001b[39;00m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp\u001b[38;5;241m.\u001b[39mread1()\n\u001b[1;32m   1005\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   1006\u001b[0m     \u001b[38;5;66;03m# StringIO doesn't like amt=None\u001b[39;00m\n\u001b[0;32m-> 1007\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp\u001b[38;5;241m.\u001b[39mread()\n",
+      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/http/client.py:479\u001b[0m, in \u001b[0;36mHTTPResponse.read\u001b[0;34m(self, amt)\u001b[0m\n\u001b[1;32m    476\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlength \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m amt \u001b[38;5;241m>\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlength:\n\u001b[1;32m    477\u001b[0m     \u001b[38;5;66;03m# clip the read to the \"end of response\"\u001b[39;00m\n\u001b[1;32m    478\u001b[0m     amt \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlength\n\u001b[0;32m--> 479\u001b[0m s \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    480\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m s \u001b[38;5;129;01mand\u001b[39;00m amt:\n\u001b[1;32m    481\u001b[0m     \u001b[38;5;66;03m# Ideally, we would raise IncompleteRead if the content-length\u001b[39;00m\n\u001b[1;32m    482\u001b[0m     \u001b[38;5;66;03m# wasn't satisfied, but it might break compatibility.\u001b[39;00m\n\u001b[1;32m    483\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_close_conn()\n",
+      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/socket.py:719\u001b[0m, in \u001b[0;36mSocketIO.readinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m    717\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mOSError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcannot read from timed out object\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    718\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 719\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrecv_into\u001b[49m\u001b[43m(\u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    720\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m timeout:\n\u001b[1;32m    721\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_timeout_occurred \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
+      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/ssl.py:1304\u001b[0m, in \u001b[0;36mSSLSocket.recv_into\u001b[0;34m(self, buffer, nbytes, flags)\u001b[0m\n\u001b[1;32m   1300\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m flags \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m   1301\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m   1302\u001b[0m           \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnon-zero flags not allowed in calls to recv_into() on \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m\n\u001b[1;32m   1303\u001b[0m           \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m)\n\u001b[0;32m-> 1304\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnbytes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbuffer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1305\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   1306\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39mrecv_into(buffer, nbytes, flags)\n",
+      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/ssl.py:1138\u001b[0m, in \u001b[0;36mSSLSocket.read\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m   1136\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m   1137\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m buffer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1138\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sslobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbuffer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1139\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   1140\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_sslobj\u001b[38;5;241m.\u001b[39mread(\u001b[38;5;28mlen\u001b[39m)\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "# Test local authority filtering\n",
+    "print(\"\\n=== Step 8b: Test Local Authority Filtering ===\")\n",
+    "print(\"Creating Simulation with region='local_authority/Cardiff'...\")\n",
+    "print()\n",
+    "\n",
+    "try:\n",
+    "    sim_la = Simulation(\n",
+    "        country=\"uk\", \n",
+    "        scope=\"macro\", \n",
+    "        region=\"local_authority/Cardiff\"\n",
+    "    )\n",
+    "    \n",
+    "    la_underlying = sim_la.baseline_simulation\n",
+    "    print(f\"Local Authority simulation created!\")\n",
+    "    print(f\"  Person count: {la_underlying.persons.count}\")\n",
+    "    print(f\"  Household count: {la_underlying.household.count}\")\n",
+    "    print(\"  (Full UK counts, but weights adjusted for LA)\")\n",
+    "    \n",
+    "    # Try calculating the problematic variable\n",
+    "    print(\"\\nCalculating would_evade_tv_licence_fee...\")\n",
+    "    result = sim_la.calculate(\"would_evade_tv_licence_fee\")\n",
+    "    print(f\"  Result length: {len(result)}\")\n",
+    "    print(\"  [OK] Local authority filtering works!\")\n",
+    "    \n",
+    "except Exception as e:\n",
+    "    print(f\"[ERROR] {type(e).__name__}: {e}\")\n",
+    "    traceback.print_exc()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-26",
+   "metadata": {},
+   "source": [
+    "## Step 9: Deep Dive - Check random() Function Behavior"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cell-27",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Check what random(household) would return in the broken simulation\n",
+    "print(\"=== Step 9: Investigate random() function behavior ===\")\n",
+    "\n",
+    "# Import the random function\n",
+    "from policyengine_core.commons.formulas import random\n",
+    "\n",
+    "try:\n",
+    "    # Get household population from the new (potentially broken) simulation\n",
+    "    hh_pop = new_sim.household\n",
+    "    print(f\"Household population count: {hh_pop.count}\")\n",
+    "    \n",
+    "    # Check what household_id returns when calculated via population\n",
+    "    print(\"\\nCalling hh_pop('household_id', 2025)...\")\n",
+    "    hh_ids_from_pop = hh_pop(\"household_id\", 2025)\n",
+    "    print(f\"  Result length: {len(hh_ids_from_pop)}\")\n",
+    "    print(f\"  Expected: {hh_pop.count}\")\n",
+    "    \n",
+    "    if len(hh_ids_from_pop) != hh_pop.count:\n",
+    "        print(f\"\\n  [BUG CONFIRMED] household_id returned {len(hh_ids_from_pop)} values\")\n",
+    "        print(f\"  but household population only has {hh_pop.count} entities!\")\n",
+    "        print(\"  This is why random(household) fails.\")\n",
+    "        \n",
+    "except Exception as e:\n",
+    "    print(f\"Error: {e}\")\n",
+    "    traceback.print_exc()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-28",
+   "metadata": {},
+   "source": [
+    "## Summary and Conclusions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cell-29",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"=\"*70)\n",
+    "print(\"DIAGNOSTIC SUMMARY\")\n",
+    "print(\"=\"*70)\n",
+    "\n",
+    "print(\"\"\"\n",
+    "FINDINGS:\n",
+    "\n",
+    "1. COUNTRY FILTERING (country/wales):\n",
+    "   - Uses to_input_dataframe() + DataFrame subsetting + new Simulation()\n",
+    "   - Creates entity count mismatch between persons and households\n",
+    "   - Breaks when calculating variables that use random(household)\n",
+    "\n",
+    "2. CONSTITUENCY/LA FILTERING:\n",
+    "   - Uses weight adjustment on existing simulation\n",
+    "   - Preserves entity structure\n",
+    "   - Works correctly\n",
+    "\n",
+    "ROOT CAUSE:\n",
+    "   - The to_input_dataframe() -> filter -> new Simulation() approach\n",
+    "     doesn't properly preserve entity relationships\n",
+    "   - Either household_id isn't properly exported/imported, OR\n",
+    "   - The entity membership mapping gets corrupted during rebuild\n",
+    "\n",
+    "RECOMMENDED FIX:\n",
+    "   - Use weight-based filtering for country filtering (like constituency/LA)\n",
+    "   - Zero out weights for households not in the target country\n",
+    "   - This preserves entity structure and avoids the export/import complexity\n",
+    "\n",
+    "Example fix for policyengine/simulation.py:\n",
+    "\n",
+    "    if \"country/\" in region:\n",
+    "        country_name = region.split(\"/\")[1]\n",
+    "        country = simulation.calculate(\"country\", map_to=\"household\").values\n",
+    "        is_in_country = (country == country_name.upper())\n",
+    "        current_weights = simulation.calculate(\n",
+    "            \"household_weight\", simulation.default_calculation_period\n",
+    "        )\n",
+    "        simulation.set_input(\n",
+    "            \"household_weight\",\n",
+    "            simulation.default_calculation_period,\n",
+    "            current_weights * is_in_country  # Zero out non-matching\n",
+    "        )\n",
+    "\"\"\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "py-3.13",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/tests/unit/endpoints/economy/test_compare.py b/tests/unit/endpoints/economy/test_compare.py
index 0b8cf5f94..17ff66275 100644
--- a/tests/unit/endpoints/economy/test_compare.py
+++ b/tests/unit/endpoints/economy/test_compare.py
@@ -307,9 +307,13 @@ def test__downloads_from_correct_repos(
 
         # Verify correct repos are used
         calls = mock_download.call_args_list
-        assert calls[0][1]["repo"] == "policyengine/policyengine-uk-data-private"
+        assert (
+            calls[0][1]["repo"] == "policyengine/policyengine-uk-data-private"
+        )
         assert calls[0][1]["repo_filename"] == "local_authority_weights.h5"
-        assert calls[1][1]["repo"] == "policyengine/policyengine-uk-data-public"
+        assert (
+            calls[1][1]["repo"] == "policyengine/policyengine-uk-data-public"
+        )
         assert calls[1][1]["repo_filename"] == "local_authorities_2021.csv"
 
     def test__given_constituency_region__returns_none(self):
@@ -541,12 +545,16 @@ def test__given_non_uk_country_nigeria__returns_none(self):
 
     def test__given_local_authority_region__returns_none(self):
         """When simulating a local authority, constituency breakdown should not be computed."""
-        result = uk_constituency_breakdown({}, {}, "uk", "local_authority/Leicester")
+        result = uk_constituency_breakdown(
+            {}, {}, "uk", "local_authority/Leicester"
+        )
         assert result is None
 
     def test__given_local_authority_region_with_code__returns_none(self):
         """When simulating a local authority by code, constituency breakdown should not be computed."""
-        result = uk_constituency_breakdown({}, {}, "uk", "local_authority/E06000016")
+        result = uk_constituency_breakdown(
+            {}, {}, "uk", "local_authority/E06000016"
+        )
         assert result is None
 
     @patch(
diff --git a/tests/unit/test_country.py b/tests/unit/test_country.py
index 1b597ec0a..b57e8ceee 100644
--- a/tests/unit/test_country.py
+++ b/tests/unit/test_country.py
@@ -60,9 +60,7 @@ def test__local_authority_regions_have_type_field(self, uk_regions):
     def test__specific_local_authorities_present(self, uk_regions):
         """Verify specific local authorities are present in metadata."""
         local_authority_names = [
-            r["name"]
-            for r in uk_regions
-            if r.get("type") == "local_authority"
+            r["name"] for r in uk_regions if r.get("type") == "local_authority"
         ]
         # Check some well-known local authorities
         assert "local_authority/Hartlepool" in local_authority_names

From 945cf040db810392b3826d9c92dc20bc02efe7f9 Mon Sep 17 00:00:00 2001
From: Anthony Volk <anth.volk@gmail.com>
Date: Tue, 16 Dec 2025 13:31:29 +0400
Subject: [PATCH 5/7] fix: Explicitly sort parliamentary constituency and local
 authority lists

---
 policyengine_api/country.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/policyengine_api/country.py b/policyengine_api/country.py
index 4c602b347..29f64fbbe 100644
--- a/policyengine_api/country.py
+++ b/policyengine_api/country.py
@@ -73,6 +73,7 @@ def build_microsimulation_options(self) -> dict:
                 Path(__file__).parent / "data" / "constituencies_2024.csv"
             )
             constituency_names = pd.read_csv(constituency_names_path)
+            constituency_names = constituency_names.sort_values("name")
             region = [
                 dict(name="uk", label="the UK", type="national"),
                 dict(name="country/england", label="England", type="country"),
@@ -96,6 +97,7 @@ def build_microsimulation_options(self) -> dict:
                 Path(__file__).parent / "data" / "local_authorities_2021.csv"
             )
             local_authority_names = pd.read_csv(local_authority_names_path)
+            local_authority_names = local_authority_names.sort_values("name")
             for i in range(len(local_authority_names)):
                 region.append(
                     dict(

From 80e676cfdf9bbaae8b93e55e7255871e646981bb Mon Sep 17 00:00:00 2001
From: Anthony Volk <anth.volk@gmail.com>
Date: Tue, 16 Dec 2025 14:11:19 +0400
Subject: [PATCH 6/7] fix: Un-merge accidentally merged testing code

---
 scripts/.datasets/constituencies_2024.csv    |  651 ----------
 scripts/.datasets/local_authorities_2021.csv |  361 ------
 scripts/BUG_REPORT_build_from_dataframe.md   |  172 ---
 scripts/diagnose_country_filtering.ipynb     |  503 --------
 scripts/prove_build_from_dataframe_bug.ipynb |  841 -------------
 scripts/test_local_authority_api.py          |  570 ---------
 scripts/verify_country_filtering_bug.ipynb   | 1147 ------------------
 7 files changed, 4245 deletions(-)
 delete mode 100644 scripts/.datasets/constituencies_2024.csv
 delete mode 100644 scripts/.datasets/local_authorities_2021.csv
 delete mode 100644 scripts/BUG_REPORT_build_from_dataframe.md
 delete mode 100644 scripts/diagnose_country_filtering.ipynb
 delete mode 100644 scripts/prove_build_from_dataframe_bug.ipynb
 delete mode 100755 scripts/test_local_authority_api.py
 delete mode 100644 scripts/verify_country_filtering_bug.ipynb

diff --git a/scripts/.datasets/constituencies_2024.csv b/scripts/.datasets/constituencies_2024.csv
deleted file mode 100644
index bd9a1df28..000000000
--- a/scripts/.datasets/constituencies_2024.csv
+++ /dev/null
@@ -1,651 +0,0 @@
-code,name,x,y
-E14001063,Aldershot,56,-40
-E14001064,Aldridge-Brownhills,56,-30
-E14001065,Altrincham and Sale West,52,-25
-E14001066,Amber Valley,58,-27
-E14001067,Arundel and South Downs,61,-44
-E14001068,Ashfield,60,-27
-E14001069,Ashford,72,-42
-E14001070,Ashton-under-Lyne,54,-23
-E14001071,Aylesbury,60,-35
-E14001072,Banbury,58,-33
-E14001073,Barking,68,-38
-E14001074,Barnsley North,57,-23
-E14001075,Barnsley South,58,-23
-E14001076,Barrow and Furness,54,-16
-E14001077,Basildon and Billericay,67,-34
-E14001078,Basingstoke,55,-39
-E14001079,Bassetlaw,61,-26
-E14001080,Bath,51,-40
-E14001081,Battersea,62,-41
-E14001082,Beaconsfield,57,-37
-E14001083,Beckenham and Penge,65,-43
-E14001084,Bedford,63,-32
-E14001085,Bermondsey and Old Southwark,64,-40
-E14001086,Bethnal Green and Stepney,65,-39
-E14001087,Beverley and Holderness,64,-22
-E14001088,Bexhill and Battle,70,-44
-E14001089,Bexleyheath and Crayford,67,-39
-E14001090,Bicester and Woodstock,59,-34
-E14001091,Birkenhead,49,-27
-E14001092,Birmingham Edgbaston,53,-33
-E14001093,Birmingham Erdington,54,-31
-E14001094,Birmingham Hall Green and Moseley,55,-32
-E14001095,Birmingham Hodge Hill and Solihull North,55,-31
-E14001096,Birmingham Ladywood,54,-32
-E14001097,Birmingham Northfield,54,-34
-E14001098,Birmingham Perry Barr,53,-31
-E14001099,Birmingham Selly Oak,54,-33
-E14001100,Birmingham Yardley,56,-32
-E14001101,Bishop Auckland,54,-14
-E14001102,Blackburn,53,-19
-E14001103,Blackley and Middleton South,53,-23
-E14001104,Blackpool North and Fleetwood,53,-18
-E14001105,Blackpool South,52,-18
-E14001106,Blaydon and Consett,55,-14
-E14001107,Blyth and Ashington,55,-12
-E14001108,Bognor Regis and Littlehampton,63,-44
-E14001109,Bolsover,60,-26
-E14001110,Bolton North East,52,-21
-E14001111,Bolton South and Walkden,52,-22
-E14001112,Bolton West,51,-21
-E14001113,Bootle,49,-22
-E14001114,Boston and Skegness,64,-26
-E14001115,Bournemouth East,52,-43
-E14001116,Bournemouth West,52,-42
-E14001117,Bracknell,56,-39
-E14001118,Bradford East,58,-20
-E14001119,Bradford South,56,-21
-E14001120,Bradford West,57,-20
-E14001121,Braintree,67,-31
-E14001122,Brent East,61,-38
-E14001123,Brent West,60,-38
-E14001124,Brentford and Isleworth,60,-40
-E14001125,Brentwood and Ongar,66,-33
-E14001126,Bridgwater,48,-41
-E14001127,Bridlington and The Wolds,63,-20
-E14001128,Brigg and Immingham,62,-24
-E14001129,Brighton Kemptown and Peacehaven,67,-45
-E14001130,Brighton Pavilion,67,-44
-E14001131,Bristol Central,51,-38
-E14001132,Bristol East,52,-38
-E14001133,Bristol North East,51,-37
-E14001134,Bristol North West,50,-38
-E14001135,Bristol South,51,-39
-E14001136,Broadland and Fakenham,66,-27
-E14001137,Bromley and Biggin Hill,67,-42
-E14001138,Bromsgrove,52,-33
-E14001139,Broxbourne,66,-35
-E14001140,Broxtowe,59,-27
-E14001141,Buckingham and Bletchley,60,-34
-E14001142,Burnley,55,-19
-E14001143,Burton and Uttoxeter,56,-28
-E14001144,Bury North,53,-21
-E14001145,Bury South,53,-22
-E14001146,Bury St Edmunds and Stowmarket,68,-31
-E14001147,Calder Valley,56,-20
-E14001148,Camborne and Redruth,43,-45
-E14001149,Cambridge,65,-30
-E14001150,Cannock Chase,54,-29
-E14001151,Canterbury,71,-41
-E14001152,Carlisle,53,-14
-E14001153,Carshalton and Wallington,62,-43
-E14001154,Castle Point,69,-36
-E14001155,Central Devon,47,-42
-E14001156,Central Suffolk and North Ipswich,68,-29
-E14001157,Chatham and Aylesford,69,-40
-E14001158,Cheadle,55,-26
-E14001159,Chelmsford,67,-33
-E14001160,Chelsea and Fulham,61,-40
-E14001161,Cheltenham,52,-36
-E14001162,Chesham and Amersham,59,-36
-E14001163,Chester North and Neston,50,-28
-E14001164,Chester South and Eddisbury,51,-27
-E14001165,Chesterfield,59,-26
-E14001166,Chichester,60,-44
-E14001167,Chingford and Woodford Green,64,-35
-E14001168,Chippenham,52,-39
-E14001169,Chipping Barnet,62,-36
-E14001170,Chorley,53,-20
-E14001171,Christchurch,53,-42
-E14001172,Cities of London and Westminster,63,-40
-E14001173,City of Durham,55,-16
-E14001174,Clacton,69,-32
-E14001175,Clapham and Brixton Hill,62,-42
-E14001176,Colchester,68,-32
-E14001177,Colne Valley,55,-23
-E14001178,Congleton,54,-27
-E14001179,Corby and East Northamptonshire,62,-30
-E14001180,Coventry East,57,-33
-E14001181,Coventry North West,56,-33
-E14001182,Coventry South,57,-34
-E14001183,Cramlington and Killingworth,56,-12
-E14001184,Crawley,69,-44
-E14001185,Crewe and Nantwich,53,-27
-E14001186,Croydon East,65,-42
-E14001187,Croydon South,64,-43
-E14001188,Croydon West,63,-43
-E14001189,Dagenham and Rainham,67,-37
-E14001190,Darlington,55,-17
-E14001191,Dartford,68,-40
-E14001192,Daventry,60,-32
-E14001193,Derby North,58,-28
-E14001194,Derby South,57,-28
-E14001195,Derbyshire Dales,57,-26
-E14001196,Dewsbury and Batley,57,-22
-E14001197,Didcot and Wantage,54,-38
-E14001198,Doncaster Central,60,-23
-E14001199,Doncaster East and the Isle of Axholme,61,-23
-E14001200,Doncaster North,61,-22
-E14001201,Dorking and Horley,59,-43
-E14001202,Dover and Deal,72,-41
-E14001203,Droitwich and Evesham,54,-36
-E14001204,Dudley,51,-31
-E14001205,Dulwich and West Norwood,63,-42
-E14001206,Dunstable and Leighton Buzzard,62,-33
-E14001207,Ealing Central and Acton,59,-39
-E14001208,Ealing North,59,-38
-E14001209,Ealing Southall,58,-39
-E14001210,Earley and Woodley,56,-36
-E14001211,Easington,57,-16
-E14001212,East Grinstead and Uckfield,69,-43
-E14001213,East Ham,67,-38
-E14001214,East Hampshire,55,-41
-E14001215,East Surrey,67,-43
-E14001216,East Thanet,71,-39
-E14001217,East Wiltshire,53,-41
-E14001218,East Worthing and Shoreham,65,-44
-E14001219,Eastbourne,69,-45
-E14001220,Eastleigh,54,-41
-E14001221,Edmonton and Winchmore Hill,64,-36
-E14001222,Ellesmere Port and Bromborough,50,-27
-E14001223,Eltham and Chislehurst,66,-41
-E14001224,Ely and East Cambridgeshire,66,-30
-E14001225,Enfield North,62,-35
-E14001226,Epping Forest,67,-35
-E14001227,Epsom and Ewell,60,-43
-E14001228,Erewash,59,-28
-E14001229,Erith and Thamesmead,67,-40
-E14001230,Esher and Walton,58,-42
-E14001231,Exeter,48,-42
-E14001232,Exmouth and Exeter East,48,-43
-E14001233,Fareham and Waterlooville,55,-43
-E14001234,Farnham and Bordon,56,-42
-E14001235,Faversham and Mid Kent,71,-40
-E14001236,Feltham and Heston,59,-40
-E14001237,Filton and Bradley Stoke,50,-37
-E14001238,Finchley and Golders Green,61,-37
-E14001239,Folkestone and Hythe,71,-42
-E14001240,Forest of Dean,50,-35
-E14001241,Frome and East Somerset,50,-41
-E14001242,Fylde,51,-19
-E14001243,Gainsborough,61,-25
-E14001244,Gateshead Central and Whickham,56,-15
-E14001245,Gedling,61,-28
-E14001246,Gillingham and Rainham,70,-40
-E14001247,Glastonbury and Somerton,49,-41
-E14001248,Gloucester,51,-35
-E14001249,Godalming and Ash,57,-42
-E14001250,Goole and Pocklington,61,-21
-E14001251,Gorton and Denton,55,-24
-E14001252,Gosport,57,-43
-E14001253,Grantham and Bourne,63,-28
-E14001254,Gravesham,68,-39
-E14001255,Great Grimsby and Cleethorpes,63,-24
-E14001256,Great Yarmouth,67,-27
-E14001257,Greenwich and Woolwich,66,-40
-E14001258,Guildford,56,-41
-E14001259,Hackney North and Stoke Newington,64,-38
-E14001260,Hackney South and Shoreditch,64,-39
-E14001261,Halesowen,51,-33
-E14001262,Halifax,55,-21
-E14001263,Hamble Valley,56,-43
-E14001264,Hammersmith and Chiswick,60,-39
-E14001265,Hampstead and Highgate,62,-38
-E14001266,"Harborough, Oadby and Wigston",61,-31
-E14001267,Harlow,67,-32
-E14001268,Harpenden and Berkhamsted,62,-34
-E14001269,Harrogate and Knaresborough,59,-18
-E14001270,Harrow East,60,-37
-E14001271,Harrow West,59,-37
-E14001272,Hartlepool,59,-16
-E14001273,Harwich and North Essex,69,-31
-E14001274,Hastings and Rye,70,-43
-E14001275,Havant,59,-44
-E14001276,Hayes and Harlington,58,-38
-E14001277,Hazel Grove,55,-25
-E14001278,Hemel Hempstead,64,-34
-E14001279,Hendon,61,-36
-E14001280,Henley and Thame,58,-35
-E14001281,Hereford and South Herefordshire,51,-34
-E14001282,Herne Bay and Sandwich,72,-40
-E14001283,Hertford and Stortford,66,-32
-E14001284,Hertsmere,66,-34
-E14001285,Hexham,53,-13
-E14001286,Heywood and Middleton North,54,-20
-E14001287,High Peak,56,-25
-E14001288,Hinckley and Bosworth,58,-30
-E14001289,Hitchin,64,-32
-E14001290,Holborn and St Pancras,62,-39
-E14001291,Honiton and Sidmouth,49,-43
-E14001292,Hornchurch and Upminster,66,-37
-E14001293,Hornsey and Friern Barnet,63,-36
-E14001294,Horsham,62,-44
-E14001295,Houghton and Sunderland South,57,-15
-E14001296,Hove and Portslade,66,-44
-E14001297,Huddersfield,56,-22
-E14001298,Huntingdon,63,-31
-E14001299,Hyndburn,54,-19
-E14001300,Ilford North,65,-36
-E14001301,Ilford South,65,-37
-E14001302,Ipswich,68,-30
-E14001303,Isle of Wight East,54,-45
-E14001304,Isle of Wight West,53,-45
-E14001305,Islington North,63,-38
-E14001306,Islington South and Finsbury,63,-39
-E14001307,Jarrow and Gateshead East,57,-14
-E14001308,Keighley and Ilkley,56,-19
-E14001309,Kenilworth and Southam,56,-34
-E14001310,Kensington and Bayswater,61,-39
-E14001311,Kettering,61,-30
-E14001312,Kingston and Surbiton,59,-42
-E14001313,Kingston upon Hull East,63,-22
-E14001314,Kingston upon Hull North and Cottingham,62,-21
-E14001315,Kingston upon Hull West and Haltemprice,62,-22
-E14001316,Kingswinford and South Staffordshire,52,-30
-E14001317,Knowsley,50,-23
-E14001318,Lancaster and Wyre,54,-18
-E14001319,Leeds Central and Headingley,60,-20
-E14001320,Leeds East,61,-20
-E14001321,Leeds North East,59,-19
-E14001322,Leeds North West,58,-19
-E14001323,Leeds South,59,-21
-E14001324,Leeds South West and Morley,58,-21
-E14001325,Leeds West and Pudsey,59,-20
-E14001326,Leicester East,60,-30
-E14001327,Leicester South,60,-31
-E14001328,Leicester West,59,-31
-E14001329,Leigh and Atherton,51,-25
-E14001330,Lewes,68,-45
-E14001331,Lewisham East,66,-42
-E14001332,Lewisham North,65,-40
-E14001333,Lewisham West and East Dulwich,65,-41
-E14001334,Leyton and Wanstead,64,-37
-E14001335,Lichfield,56,-29
-E14001336,Lincoln,62,-25
-E14001337,Liverpool Garston,50,-25
-E14001338,Liverpool Riverside,49,-24
-E14001339,Liverpool Walton,49,-23
-E14001340,Liverpool Wavertree,49,-25
-E14001341,Liverpool West Derby,50,-24
-E14001342,Loughborough,59,-30
-E14001343,Louth and Horncastle,63,-25
-E14001344,Lowestoft,68,-28
-E14001345,Luton North,63,-33
-E14001346,Luton South and South Bedfordshire,63,-34
-E14001347,Macclesfield,56,-26
-E14001348,Maidenhead,57,-36
-E14001349,Maidstone and Malling,69,-41
-E14001350,Makerfield,51,-22
-E14001351,Maldon,69,-33
-E14001352,Manchester Central,54,-24
-E14001353,Manchester Rusholme,53,-25
-E14001354,Manchester Withington,54,-26
-E14001355,Mansfield,61,-27
-E14001356,Melksham and Devizes,52,-40
-E14001357,Melton and Syston,61,-29
-E14001358,Meriden and Solihull East,55,-33
-E14001359,Mid Bedfordshire,62,-32
-E14001360,Mid Buckinghamshire,59,-35
-E14001361,Mid Cheshire,52,-27
-E14001362,Mid Derbyshire,57,-27
-E14001363,Mid Dorset and North Poole,50,-43
-E14001364,Mid Leicestershire,58,-31
-E14001365,Mid Norfolk,65,-28
-E14001366,Mid Sussex,68,-43
-E14001367,Middlesbrough and Thornaby East,57,-17
-E14001368,Middlesbrough South and East Cleveland,59,-17
-E14001369,Milton Keynes Central,61,-34
-E14001370,Milton Keynes North,61,-33
-E14001371,Mitcham and Morden,61,-43
-E14001372,Morecambe and Lunesdale,54,-17
-E14001373,New Forest East,54,-43
-E14001374,New Forest West,53,-43
-E14001375,Newark,62,-26
-E14001376,Newbury,54,-37
-E14001377,Newcastle upon Tyne Central and West,54,-13
-E14001378,Newcastle upon Tyne East and Wallsend,56,-14
-E14001379,Newcastle upon Tyne North,55,-13
-E14001380,Newcastle-under-Lyme,52,-28
-E14001381,Newton Abbot,47,-43
-E14001382,Newton Aycliffe and Spennymoor,56,-16
-E14001383,Normanton and Hemsworth,59,-23
-E14001384,North Bedfordshire,62,-31
-E14001385,North Cornwall,45,-43
-E14001386,North Cotswolds,53,-37
-E14001387,North Devon,46,-41
-E14001388,North Dorset,51,-42
-E14001389,North Durham,54,-15
-E14001390,North East Cambridgeshire,64,-29
-E14001391,North East Derbyshire,58,-26
-E14001392,North East Hampshire,56,-38
-E14001393,North East Hertfordshire,65,-32
-E14001394,North East Somerset and Hanham,50,-39
-E14001395,North Herefordshire,52,-34
-E14001396,North Norfolk,65,-27
-E14001397,North Northumberland,54,-12
-E14001398,North Shropshire,50,-29
-E14001399,North Somerset,49,-39
-E14001400,North Warwickshire and Bedworth,57,-32
-E14001401,North West Cambridgeshire,64,-30
-E14001402,North West Essex,66,-31
-E14001403,North West Hampshire,54,-39
-E14001404,North West Leicestershire,58,-29
-E14001405,North West Norfolk,64,-28
-E14001406,Northampton North,61,-32
-E14001407,Northampton South,60,-33
-E14001408,Norwich North,66,-28
-E14001409,Norwich South,66,-29
-E14001410,Nottingham East,60,-29
-E14001411,Nottingham North and Kimberley,60,-28
-E14001412,Nottingham South,59,-29
-E14001413,Nuneaton,57,-31
-E14001414,Old Bexley and Sidcup,67,-41
-E14001415,Oldham East and Saddleworth,55,-22
-E14001416,"Oldham West, Chadderton and Royton",54,-22
-E14001417,Orpington,66,-43
-E14001418,Ossett and Denby Dale,58,-22
-E14001419,Oxford East,58,-34
-E14001420,Oxford West and Abingdon,57,-35
-E14001421,Peckham,64,-41
-E14001422,Pendle and Clitheroe,56,-18
-E14001423,Penistone and Stocksbridge,56,-23
-E14001424,Penrith and Solway,52,-15
-E14001425,Peterborough,63,-29
-E14001426,Plymouth Moor View,46,-43
-E14001427,Plymouth Sutton and Devonport,47,-44
-E14001428,"Pontefract, Castleford and Knottingley",60,-22
-E14001429,Poole,51,-43
-E14001430,Poplar and Limehouse,66,-39
-E14001431,Portsmouth North,58,-43
-E14001432,Portsmouth South,58,-44
-E14001433,Preston,52,-19
-E14001434,Putney,61,-41
-E14001435,Queen's Park and Maida Vale,62,-40
-E14001436,Rawmarsh and Conisbrough,60,-24
-E14001437,Rayleigh and Wickford,68,-34
-E14001438,Reading Central,55,-37
-E14001439,Reading West and Mid Berkshire,55,-36
-E14001440,Redcar,58,-17
-E14001441,Redditch,53,-35
-E14001442,Reigate,68,-44
-E14001443,Ribble Valley,55,-18
-E14001444,Richmond and Northallerton,57,-18
-E14001445,Richmond Park,59,-41
-E14001446,Rochdale,54,-21
-E14001447,Rochester and Strood,69,-39
-E14001448,Romford,66,-36
-E14001449,Romsey and Southampton North,54,-40
-E14001450,Rossendale and Darwen,55,-20
-E14001451,Rother Valley,60,-25
-E14001452,Rotherham,59,-24
-E14001453,Rugby,58,-32
-E14001454,"Ruislip, Northwood and Pinner",60,-36
-E14001455,Runcorn and Helsby,51,-28
-E14001456,Runnymede and Weybridge,57,-41
-E14001457,Rushcliffe,62,-28
-E14001458,Rutland and Stamford,62,-29
-E14001459,Salford,53,-24
-E14001460,Salisbury,52,-41
-E14001461,Scarborough and Whitby,61,-19
-E14001462,Scunthorpe,61,-24
-E14001463,Sefton Central,50,-20
-E14001464,Selby,60,-21
-E14001465,Sevenoaks,68,-42
-E14001466,Sheffield Brightside and Hillsborough,58,-24
-E14001467,Sheffield Central,58,-25
-E14001468,Sheffield Hallam,57,-24
-E14001469,Sheffield Heeley,57,-25
-E14001470,Sheffield South East,59,-25
-E14001471,Sherwood Forest,62,-27
-E14001472,Shipley,57,-19
-E14001473,Shrewsbury,51,-30
-E14001474,Sittingbourne and Sheppey,70,-39
-E14001475,Skipton and Ripon,58,-18
-E14001476,Sleaford and North Hykeham,63,-26
-E14001477,Slough,56,-37
-E14001478,Smethwick,53,-32
-E14001479,Solihull West and Shirley,55,-34
-E14001480,South Basildon and East Thurrock,68,-36
-E14001481,South Cambridgeshire,65,-31
-E14001482,South Cotswolds,53,-38
-E14001483,South Derbyshire,57,-29
-E14001484,South Devon,48,-45
-E14001485,South Dorset,51,-44
-E14001486,South East Cornwall,46,-44
-E14001487,South Holland and The Deepings,63,-27
-E14001488,South Leicestershire,59,-32
-E14001489,South Norfolk,67,-29
-E14001490,South Northamptonshire,59,-33
-E14001491,South Ribble,52,-20
-E14001492,South Shields,58,-14
-E14001493,South Shropshire,50,-31
-E14001494,South Suffolk,69,-30
-E14001495,South West Devon,47,-45
-E14001496,South West Hertfordshire,61,-35
-E14001497,South West Norfolk,65,-29
-E14001498,South West Wiltshire,51,-41
-E14001499,Southampton Itchen,55,-42
-E14001500,Southampton Test,54,-42
-E14001501,Southend East and Rochford,69,-34
-E14001502,Southend West and Leigh,68,-35
-E14001503,Southgate and Wood Green,63,-35
-E14001504,Southport,50,-19
-E14001505,Spelthorne,58,-40
-E14001506,Spen Valley,57,-21
-E14001507,St Albans,65,-34
-E14001508,St Austell and Newquay,45,-44
-E14001509,St Helens North,50,-21
-E14001510,St Helens South and Whiston,50,-22
-E14001511,St Ives,43,-46
-E14001512,St Neots and Mid Cambridgeshire,64,-31
-E14001513,Stafford,54,-28
-E14001514,Staffordshire Moorlands,56,-27
-E14001515,Stalybridge and Hyde,56,-24
-E14001516,Stevenage,64,-33
-E14001517,Stockport,54,-25
-E14001518,Stockton North,58,-16
-E14001519,Stockton West,56,-17
-E14001520,Stoke-on-Trent Central,55,-28
-E14001521,Stoke-on-Trent North,55,-27
-E14001522,Stoke-on-Trent South,55,-29
-E14001523,"Stone, Great Wyrley and Penkridge",53,-28
-E14001524,Stourbridge,51,-32
-E14001525,Stratford and Bow,65,-38
-E14001526,Stratford-on-Avon,54,-35
-E14001527,Streatham and Croydon North,64,-42
-E14001528,Stretford and Urmston,52,-24
-E14001529,Stroud,52,-37
-E14001530,Suffolk Coastal,69,-29
-E14001531,Sunderland Central,58,-15
-E14001532,Surrey Heath,57,-39
-E14001533,Sussex Weald,70,-42
-E14001534,Sutton and Cheam,60,-42
-E14001535,Sutton Coldfield,56,-31
-E14001536,Swindon North,53,-39
-E14001537,Swindon South,53,-40
-E14001538,Tamworth,57,-30
-E14001539,Tatton,52,-26
-E14001540,Taunton and Wellington,49,-42
-E14001541,Telford,52,-29
-E14001542,Tewkesbury,53,-36
-E14001543,The Wrekin,51,-29
-E14001544,Thirsk and Malton,60,-18
-E14001545,Thornbury and Yate,51,-36
-E14001546,Thurrock,67,-36
-E14001547,Tipton and Wednesbury,52,-31
-E14001548,Tiverton and Minehead,47,-41
-E14001549,Tonbridge,68,-41
-E14001550,Tooting,61,-42
-E14001551,Torbay,48,-44
-E14001552,Torridge and Tavistock,46,-42
-E14001553,Tottenham,62,-37
-E14001554,Truro and Falmouth,44,-45
-E14001555,Tunbridge Wells,69,-42
-E14001556,Twickenham,58,-41
-E14001557,Tynemouth,56,-13
-E14001558,Uxbridge and South Ruislip,58,-37
-E14001559,Vauxhall and Camberwell Green,63,-41
-E14001560,Wakefield and Rothwell,59,-22
-E14001561,Wallasey,48,-27
-E14001562,Walsall and Bloxwich,55,-30
-E14001563,Walthamstow,63,-37
-E14001564,Warrington North,51,-23
-E14001565,Warrington South,51,-24
-E14001566,Warwick and Leamington,55,-35
-E14001567,Washington and Gateshead South,55,-15
-E14001568,Watford,65,-35
-E14001569,Waveney Valley,67,-28
-E14001570,Weald of Kent,70,-41
-E14001571,Wellingborough and Rushden,63,-30
-E14001572,Wells and Mendip Hills,50,-40
-E14001573,Welwyn Hatfield,65,-33
-E14001574,West Bromwich,52,-32
-E14001575,West Dorset,50,-44
-E14001576,West Ham and Beckton,66,-38
-E14001577,West Lancashire,49,-21
-E14001578,West Suffolk,67,-30
-E14001579,West Worcestershire,52,-35
-E14001580,Westmorland and Lonsdale,53,-15
-E14001581,Weston-super-Mare,49,-40
-E14001582,Wetherby and Easingwold,62,-20
-E14001583,Whitehaven and Workington,53,-16
-E14001584,Widnes and Halewood,51,-26
-E14001585,Wigan,51,-20
-E14001586,Wimbledon,60,-41
-E14001587,Winchester,55,-40
-E14001588,Windsor,57,-38
-E14001589,Wirral West,49,-28
-E14001590,Witham,68,-33
-E14001591,Witney,56,-35
-E14001592,Woking,57,-40
-E14001593,Wokingham,55,-38
-E14001594,Wolverhampton North East,53,-29
-E14001595,Wolverhampton South East,54,-30
-E14001596,Wolverhampton West,53,-30
-E14001597,Worcester,53,-34
-E14001598,Worsley and Eccles,52,-23
-E14001599,Worthing West,64,-44
-E14001600,Wycombe,58,-36
-E14001601,Wyre Forest,50,-33
-E14001602,Wythenshawe and Sale East,53,-26
-E14001603,Yeovil,50,-42
-E14001604,York Central,60,-19
-E14001605,York Outer,61,-18
-N05000001,Belfast East,45,-17
-N05000002,Belfast North,45,-16
-N05000003,Belfast South and Mid Down,45,-18
-N05000004,Belfast West,44,-17
-N05000005,East Antrim,45,-15
-N05000006,East Londonderry,43,-15
-N05000007,Fermanagh and South Tyrone,42,-17
-N05000008,Foyle,42,-15
-N05000009,Lagan Valley,44,-18
-N05000010,Mid Ulster,43,-16
-N05000011,Newry and Armagh,44,-19
-N05000012,North Antrim,44,-15
-N05000013,North Down,46,-16
-N05000014,South Antrim,44,-16
-N05000015,South Down,46,-18
-N05000016,Strangford,46,-17
-N05000017,Upper Bann,43,-18
-N05000018,West Tyrone,42,-16
-S14000021,East Renfrewshire,48,-11
-S14000027,Na h-Eileanan an Iar,47,-2
-S14000045,Midlothian,52,-11
-S14000048,North Ayrshire and Arran,48,-10
-S14000051,Orkney and Shetland,51,0
-S14000060,Aberdeen North,52,-3
-S14000061,Aberdeen South,52,-4
-S14000062,Aberdeenshire North and Moray East,51,-3
-S14000063,Airdrie and Shotts,50,-11
-S14000064,Alloa and Grangemouth,50,-7
-S14000065,Angus and Perthshire Glens,50,-5
-S14000066,Arbroath and Broughty Ferry,52,-5
-S14000067,"Argyll, Bute and South Lochaber",49,-5
-S14000068,Bathgate and Linlithgow,51,-9
-S14000069,"Caithness, Sutherland and Easter Ross",50,-2
-S14000070,Coatbridge and Bellshill,50,-12
-S14000071,Cowdenbeath and Kirkcaldy,52,-7
-S14000072,Cumbernauld and Kirkintilloch,50,-8
-S14000073,Dumfries and Galloway,51,-13
-S14000074,"Dumfriesshire, Clydesdale and Tweeddale",52,-13
-S14000075,Dundee Central,50,-6
-S14000076,Dunfermline and Dollar,51,-7
-S14000077,East Kilbride and Strathaven,48,-13
-S14000078,Edinburgh East and Musselburgh,54,-10
-S14000079,Edinburgh North and Leith,53,-9
-S14000080,Edinburgh South,53,-10
-S14000081,Edinburgh South West,52,-10
-S14000082,Edinburgh West,52,-9
-S14000083,Falkirk,51,-8
-S14000084,Glasgow East,51,-10
-S14000085,Glasgow North,49,-9
-S14000086,Glasgow North East,50,-9
-S14000087,Glasgow South,49,-11
-S14000088,Glasgow South West,50,-10
-S14000089,Glasgow West,49,-8
-S14000090,Glenrothes and Mid Fife,52,-6
-S14000091,Gordon and Buchan,50,-4
-S14000092,Hamilton and Clyde Valley,51,-12
-S14000093,Inverclyde and Renfrewshire West,48,-8
-S14000094,"Inverness, Skye and West Ross-shire",49,-3
-S14000095,Livingston,51,-11
-S14000096,Lothian East,53,-11
-S14000097,Mid Dunbartonshire,49,-7
-S14000098,"Moray West, Nairn and Strathspey",49,-4
-S14000099,"Motherwell, Wishaw and Carluke",52,-12
-S14000100,North East Fife,51,-6
-S14000101,Paisley and Renfrewshire North,48,-9
-S14000102,Paisley and Renfrewshire South,49,-10
-S14000103,Perth and Kinross-shire,51,-5
-S14000104,Rutherglen,49,-12
-S14000105,Stirling and Strathallan,49,-6
-S14000106,West Dunbartonshire,48,-7
-S14000107,"Ayr, Carrick and Cumnock",49,-13
-S14000108,"Berwickshire, Roxburgh and Selkirk",53,-12
-S14000109,Central Ayrshire,48,-12
-S14000110,Kilmarnock and Loudoun,50,-13
-S14000111,West Aberdeenshire and Kincardine,51,-4
-W07000081,Aberafan Maesteg,46,-36
-W07000082,Alyn and Deeside,49,-29
-W07000083,Bangor Aberconwy,47,-31
-W07000084,Blaenau Gwent and Rhymney,49,-33
-W07000085,"Brecon, Radnor and Cwm Tawe",50,-32
-W07000086,Bridgend,46,-37
-W07000087,Caerfyrddin,49,-32
-W07000088,Caerphilly,49,-35
-W07000089,Cardiff East,48,-37
-W07000090,Cardiff North,48,-36
-W07000091,Cardiff South and Penarth,48,-38
-W07000092,Cardiff West,47,-37
-W07000093,Ceredigion Preseli,48,-34
-W07000094,Clwyd East,49,-30
-W07000095,Clwyd North,48,-30
-W07000096,Dwyfor Meirionnydd,48,-31
-W07000097,Gower,44,-37
-W07000098,Llanelli,45,-36
-W07000099,Merthyr Tydfil and Aberdare,49,-34
-W07000100,Mid and South Pembrokeshire,44,-36
-W07000101,Monmouthshire,50,-36
-W07000102,Montgomeryshire and Glyndwr,49,-31
-W07000103,Neath and Swansea East,47,-35
-W07000104,Newport East,49,-37
-W07000105,Newport West and Islwyn,49,-36
-W07000106,Pontypridd,48,-35
-W07000107,Rhondda and Ogmore,47,-36
-W07000108,Swansea West,45,-37
-W07000109,Torfaen,50,-34
-W07000110,Vale of Glamorgan,47,-38
-W07000111,Wrexham,50,-30
-W07000112,Ynys Môn,46,-29
diff --git a/scripts/.datasets/local_authorities_2021.csv b/scripts/.datasets/local_authorities_2021.csv
deleted file mode 100644
index 9fcf922ed..000000000
--- a/scripts/.datasets/local_authorities_2021.csv
+++ /dev/null
@@ -1,361 +0,0 @@
-code,x,y,name
-E06000001,8.0,19.0,Hartlepool
-E06000002,9.0,18.0,Middlesbrough
-E06000003,9.0,19.0,Redcar and Cleveland
-E06000004,8.0,18.0,Stockton-on-Tees
-E06000005,7.0,18.0,Darlington
-E06000006,1.0,11.0,Halton
-E06000007,2.0,11.0,Warrington
-E06000008,4.0,15.0,Blackburn with Darwen
-E06000009,2.0,15.0,Blackpool
-E06000010,10.0,15.0,"Kingston upon Hull, City of"
-E06000011,11.0,16.0,East Riding of Yorkshire
-E06000012,11.0,14.0,North East Lincolnshire
-E06000013,10.0,14.0,North Lincolnshire
-E06000014,9.0,17.0,York
-E06000015,6.0,11.0,Derby
-E06000016,8.0,8.0,Leicester
-E06000017,10.0,9.0,Rutland
-E06000018,8.0,10.0,Nottingham
-E06000019,0.0,8.0,"Herefordshire, County of"
-E06000020,2.0,9.0,Telford and Wrekin
-E06000021,3.0,10.0,Stoke-on-Trent
-E06000022,1.0,3.0,Bath and North East Somerset
-E06000023,0.0,3.0,"Bristol, City of"
-E06000024,0.0,2.0,North Somerset
-E06000025,1.0,4.0,South Gloucestershire
-E06000026,-4.0,-2.0,Plymouth
-E06000027,-3.0,-2.0,Torbay
-E06000030,2.0,4.0,Swindon
-E06000031,11.0,9.0,Peterborough
-E06000032,10.0,7.0,Luton
-E06000033,16.0,6.0,Southend-on-Sea
-E06000034,15.0,4.0,Thurrock
-E06000035,15.0,1.0,Medway
-E06000036,4.0,2.0,Bracknell Forest
-E06000037,2.0,2.0,West Berkshire
-E06000038,2.0,3.0,Reading
-E06000039,6.0,4.0,Slough
-E06000040,4.0,3.0,Windsor and Maidenhead
-E06000041,3.0,3.0,Wokingham
-E06000042,6.0,5.0,Milton Keynes
-E06000043,9.0,-2.0,Brighton and Hove
-E06000044,4.0,-1.0,Portsmouth
-E06000045,2.0,0.0,Southampton
-E06000046,1.0,-2.0,Isle of Wight
-E06000047,6.0,18.0,County Durham
-E06000049,4.0,11.0,Cheshire East
-E06000050,3.0,11.0,Cheshire West and Chester
-E06000051,1.0,9.0,Shropshire
-E06000052,-5.0,-2.0,Cornwall
-E06000053,-7.0,-3.0,Isles of Scilly
-E06000054,1.0,2.0,Wiltshire
-E06000055,9.0,7.0,Bedford
-E06000056,9.0,6.0,Central Bedfordshire
-E06000057,5.0,20.0,Northumberland
-E06000058,0.0,0.0,"Bournemouth, Christchurch and Poole"
-E06000059,-1.0,0.0,Dorset
-E06000060,5.0,5.0,Buckinghamshire
-E06000061,9.0,9.0,North Northamptonshire
-E06000062,7.0,6.0,West Northamptonshire
-E06000063,0.0,0.0,Cumberland
-E06000064,0.0,0.0,Westmorland and Furness
-E06000065,0.0,0.0,North Yorkshire
-E06000066,0.0,0.0,Somerset
-E07000008,12.0,8.0,Cambridge
-E07000009,12.0,9.0,East Cambridgeshire
-E07000010,13.0,10.0,Fenland
-E07000011,10.0,8.0,Huntingdonshire
-E07000012,11.0,8.0,South Cambridgeshire
-E07000032,7.0,11.0,Amber Valley
-E07000033,10.0,12.0,Bolsover
-E07000034,9.0,12.0,Chesterfield
-E07000035,7.0,12.0,Derbyshire Dales
-E07000036,7.0,9.0,Erewash
-E07000037,7.0,13.0,High Peak
-E07000038,8.0,12.0,North East Derbyshire
-E07000039,6.0,10.0,South Derbyshire
-E07000040,-2.0,-1.0,East Devon
-E07000041,-3.0,-1.0,Exeter
-E07000042,-2.0,0.0,Mid Devon
-E07000043,-3.0,1.0,North Devon
-E07000044,-4.0,-3.0,South Hams
-E07000045,-2.0,-2.0,Teignbridge
-E07000046,-4.0,-1.0,Torridge
-E07000047,-3.0,0.0,West Devon
-E07000061,10.0,-2.0,Eastbourne
-E07000062,13.0,-2.0,Hastings
-E07000063,10.0,-1.0,Lewes
-E07000064,12.0,-2.0,Rother
-E07000065,11.0,-2.0,Wealden
-E07000066,14.0,5.0,Basildon
-E07000067,14.0,7.0,Braintree
-E07000068,13.0,5.0,Brentwood
-E07000069,15.0,5.0,Castle Point
-E07000070,14.0,6.0,Chelmsford
-E07000071,15.0,8.0,Colchester
-E07000072,12.0,5.0,Epping Forest
-E07000073,13.0,6.0,Harlow
-E07000074,15.0,7.0,Maldon
-E07000075,15.0,6.0,Rochford
-E07000076,16.0,8.0,Tendring
-E07000077,13.0,7.0,Uttlesford
-E07000078,1.0,5.0,Cheltenham
-E07000079,2.0,5.0,Cotswold
-E07000080,-1.0,6.0,Forest of Dean
-E07000081,0.0,6.0,Gloucester
-E07000082,0.0,5.0,Stroud
-E07000083,1.0,6.0,Tewkesbury
-E07000084,2.0,1.0,Basingstoke and Deane
-E07000085,4.0,0.0,East Hampshire
-E07000086,3.0,0.0,Eastleigh
-E07000087,2.0,-1.0,Fareham
-E07000088,3.0,-1.0,Gosport
-E07000089,3.0,2.0,Hart
-E07000090,5.0,0.0,Havant
-E07000091,1.0,0.0,New Forest
-E07000092,4.0,1.0,Rushmoor
-E07000093,1.0,1.0,Test Valley
-E07000094,3.0,1.0,Winchester
-E07000095,12.0,6.0,Broxbourne
-E07000096,8.0,6.0,Dacorum
-E07000098,9.0,5.0,Hertsmere
-E07000099,11.0,7.0,North Hertfordshire
-E07000102,7.0,5.0,Three Rivers
-E07000103,8.0,5.0,Watford
-E07000105,12.0,-1.0,Ashford
-E07000106,15.0,0.0,Canterbury
-E07000107,13.0,1.0,Dartford
-E07000108,14.0,-1.0,Dover
-E07000109,14.0,1.0,Gravesham
-E07000110,14.0,0.0,Maidstone
-E07000111,12.0,0.0,Sevenoaks
-E07000112,13.0,-1.0,Folkestone and Hythe
-E07000113,16.0,0.0,Swale
-E07000114,15.0,-1.0,Thanet
-E07000115,13.0,0.0,Tonbridge and Malling
-E07000116,11.0,-1.0,Tunbridge Wells
-E07000117,6.0,15.0,Burnley
-E07000118,3.0,14.0,Chorley
-E07000119,4.0,16.0,Fylde
-E07000120,5.0,15.0,Hyndburn
-E07000121,3.0,17.0,Lancaster
-E07000122,6.0,16.0,Pendle
-E07000123,5.0,16.0,Preston
-E07000124,5.0,17.0,Ribble Valley
-E07000125,6.0,14.0,Rossendale
-E07000126,3.0,15.0,South Ribble
-E07000127,2.0,13.0,West Lancashire
-E07000128,3.0,16.0,Wyre
-E07000129,7.0,7.0,Blaby
-E07000130,8.0,9.0,Charnwood
-E07000131,8.0,7.0,Harborough
-E07000132,7.0,8.0,Hinckley and Bosworth
-E07000133,11.0,10.0,Melton
-E07000134,6.0,9.0,North West Leicestershire
-E07000135,9.0,8.0,Oadby and Wigston
-E07000136,12.0,12.0,Boston
-E07000137,12.0,13.0,East Lindsey
-E07000138,11.0,12.0,Lincoln
-E07000139,11.0,11.0,North Kesteven
-E07000140,12.0,11.0,South Holland
-E07000141,12.0,10.0,South Kesteven
-E07000142,11.0,13.0,West Lindsey
-E07000143,14.0,10.0,Breckland
-E07000144,15.0,12.0,Broadland
-E07000145,15.0,11.0,Great Yarmouth
-E07000146,13.0,11.0,King's Lynn and West Norfolk
-E07000147,14.0,12.0,North Norfolk
-E07000148,14.0,11.0,Norwich
-E07000149,15.0,10.0,South Norfolk
-E07000170,8.0,11.0,Ashfield
-E07000171,10.0,13.0,Bassetlaw
-E07000172,7.0,10.0,Broxtowe
-E07000173,9.0,10.0,Gedling
-E07000174,9.0,11.0,Mansfield
-E07000175,10.0,11.0,Newark and Sherwood
-E07000176,10.0,10.0,Rushcliffe
-E07000177,4.0,5.0,Cherwell
-E07000178,4.0,4.0,Oxford
-E07000179,5.0,4.0,South Oxfordshire
-E07000180,3.0,4.0,Vale of White Horse
-E07000181,3.0,5.0,West Oxfordshire
-E07000192,3.0,9.0,Cannock Chase
-E07000193,5.0,11.0,East Staffordshire
-E07000194,4.0,9.0,Lichfield
-E07000195,2.0,10.0,Newcastle-under-Lyme
-E07000196,2.0,8.0,South Staffordshire
-E07000197,4.0,10.0,Stafford
-E07000198,5.0,10.0,Staffordshire Moorlands
-E07000199,5.0,9.0,Tamworth
-E07000200,14.0,8.0,Babergh
-E07000202,15.0,9.0,Ipswich
-E07000203,14.0,9.0,Mid Suffolk
-E07000207,7.0,2.0,Elmbridge
-E07000208,8.0,0.0,Epsom and Ewell
-E07000209,5.0,1.0,Guildford
-E07000210,6.0,1.0,Mole Valley
-E07000211,7.0,0.0,Reigate and Banstead
-E07000212,5.0,3.0,Runnymede
-E07000213,6.0,3.0,Spelthorne
-E07000214,5.0,2.0,Surrey Heath
-E07000215,9.0,-1.0,Tandridge
-E07000216,6.0,0.0,Waverley
-E07000217,6.0,2.0,Woking
-E07000218,6.0,8.0,North Warwickshire
-E07000219,6.0,7.0,Nuneaton and Bedworth
-E07000220,6.0,6.0,Rugby
-E07000221,3.0,6.0,Stratford-on-Avon
-E07000222,4.0,6.0,Warwick
-E07000223,8.0,-2.0,Adur
-E07000224,6.0,-2.0,Arun
-E07000225,5.0,-1.0,Chichester
-E07000226,8.0,-1.0,Crawley
-E07000227,6.0,-1.0,Horsham
-E07000228,7.0,-1.0,Mid Sussex
-E07000229,7.0,-2.0,Worthing
-E07000234,2.0,7.0,Bromsgrove
-E07000235,-1.0,7.0,Malvern Hills
-E07000236,4.0,7.0,Redditch
-E07000237,0.0,7.0,Worcester
-E07000238,2.0,6.0,Wychavon
-E07000239,1.0,8.0,Wyre Forest
-E07000240,10.0,6.0,St Albans
-E07000241,11.0,6.0,Welwyn Hatfield
-E07000242,13.0,8.0,East Hertfordshire
-E07000243,12.0,7.0,Stevenage
-E07000244,16.0,10.0,East Suffolk
-E07000245,13.0,9.0,West Suffolk
-E08000001,4.0,14.0,Bolton
-E08000002,5.0,14.0,Bury
-E08000003,5.0,12.0,Manchester
-E08000004,5.0,13.0,Oldham
-E08000005,7.0,14.0,Rochdale
-E08000006,4.0,13.0,Salford
-E08000007,6.0,12.0,Stockport
-E08000008,6.0,13.0,Tameside
-E08000009,4.0,12.0,Trafford
-E08000010,3.0,13.0,Wigan
-E08000011,2.0,12.0,Knowsley
-E08000012,1.0,13.0,Liverpool
-E08000013,3.0,12.0,St. Helens
-E08000014,2.0,14.0,Sefton
-E08000015,1.0,12.0,Wirral
-E08000016,8.0,14.0,Barnsley
-E08000017,9.0,14.0,Doncaster
-E08000018,9.0,13.0,Rotherham
-E08000019,8.0,13.0,Sheffield
-E08000021,5.0,19.0,Newcastle upon Tyne
-E08000022,6.0,20.0,North Tyneside
-E08000023,7.0,20.0,South Tyneside
-E08000024,7.0,19.0,Sunderland
-E08000025,5.0,8.0,Birmingham
-E08000026,5.0,6.0,Coventry
-E08000027,1.0,7.0,Dudley
-E08000028,3.0,7.0,Sandwell
-E08000029,5.0,7.0,Solihull
-E08000030,4.0,8.0,Walsall
-E08000031,3.0,8.0,Wolverhampton
-E08000032,7.0,16.0,Bradford
-E08000033,7.0,15.0,Calderdale
-E08000034,8.0,15.0,Kirklees
-E08000035,8.0,16.0,Leeds
-E08000036,9.0,15.0,Wakefield
-E08000037,6.0,19.0,Gateshead
-E09000001,11.0,2.0,City of London
-E09000002,13.0,3.0,Barking and Dagenham
-E09000003,10.0,5.0,Barnet
-E09000004,12.0,1.0,Bexley
-E09000005,10.0,4.0,Brent
-E09000006,11.0,0.0,Bromley
-E09000007,11.0,4.0,Camden
-E09000008,10.0,0.0,Croydon
-E09000009,9.0,4.0,Ealing
-E09000010,11.0,5.0,Enfield
-E09000011,11.0,1.0,Greenwich
-E09000012,12.0,3.0,Hackney
-E09000013,8.0,3.0,Hammersmith and Fulham
-E09000014,12.0,4.0,Haringey
-E09000015,8.0,4.0,Harrow
-E09000016,14.0,3.0,Havering
-E09000017,7.0,4.0,Hillingdon
-E09000018,7.0,3.0,Hounslow
-E09000019,11.0,3.0,Islington
-E09000020,9.0,3.0,Kensington and Chelsea
-E09000021,7.0,1.0,Kingston upon Thames
-E09000022,10.0,2.0,Lambeth
-E09000023,10.0,1.0,Lewisham
-E09000024,8.0,1.0,Merton
-E09000025,13.0,2.0,Newham
-E09000026,14.0,4.0,Redbridge
-E09000027,8.0,2.0,Richmond upon Thames
-E09000028,9.0,1.0,Southwark
-E09000029,9.0,0.0,Sutton
-E09000030,12.0,2.0,Tower Hamlets
-E09000031,13.0,4.0,Waltham Forest
-E09000032,9.0,2.0,Wandsworth
-E09000033,10.0,3.0,Westminster
-N09000001,-4.0,16.0,Antrim and Newtownabbey
-N09000002,-5.0,16.0,"Armagh City, Banbridge and Craigavon"
-N09000003,-4.0,17.0,Belfast
-N09000004,-5.0,18.0,Causeway Coast and Glens
-N09000005,-6.0,17.0,Derry City and Strabane
-N09000006,-6.0,16.0,Fermanagh and Omagh
-N09000007,-5.0,15.0,Lisburn and Castlereagh
-N09000008,-4.0,18.0,Mid and East Antrim
-N09000009,-5.0,17.0,Mid Ulster
-N09000010,-4.0,15.0,"Newry, Mourne and Down"
-S12000005,2.0,24.0,Clackmannanshire
-S12000006,4.0,20.0,Dumfries and Galloway
-S12000008,3.0,20.0,East Ayrshire
-S12000010,5.0,22.0,East Lothian
-S12000011,2.0,20.0,East Renfrewshire
-S12000013,-1.0,27.0,Na h-Eileanan Siar
-S12000014,2.0,23.0,Falkirk
-S12000017,1.0,26.0,Highland
-S12000018,0.0,21.0,Inverclyde
-S12000019,3.0,21.0,Midlothian
-S12000020,2.0,26.0,Moray
-S12000021,1.0,20.0,North Ayrshire
-S12000023,4.0,28.0,Orkney Islands
-S12000026,4.0,21.0,Scottish Borders
-S12000027,5.0,30.0,Shetland Islands
-S12000028,1.0,19.0,South Ayrshire
-S12000029,2.0,21.0,South Lanarkshire
-S12000030,1.0,24.0,Stirling
-S12000033,4.0,26.0,Aberdeen City
-S12000034,3.0,26.0,Aberdeenshire
-S12000035,0.0,24.0,Argyll and Bute
-S12000036,4.0,22.0,City of Edinburgh
-S12000038,1.0,22.0,Renfrewshire
-S12000039,0.0,23.0,West Dunbartonshire
-S12000040,3.0,22.0,West Lothian
-S12000041,2.0,25.0,Angus
-S12000042,3.0,25.0,Dundee City
-S12000045,1.0,23.0,East Dunbartonshire
-S12000047,3.0,24.0,Fife
-S12000048,1.0,25.0,Perth and Kinross
-S12000049,1.0,21.0,Glasgow City
-S12000050,2.0,22.0,North Lanarkshire
-W06000001,-2.0,12.0,Isle of Anglesey
-W06000002,-2.0,10.0,Gwynedd
-W06000003,-1.0,10.0,Conwy
-W06000004,0.0,10.0,Denbighshire
-W06000005,0.0,11.0,Flintshire
-W06000006,1.0,10.0,Wrexham
-W06000008,-2.0,9.0,Ceredigion
-W06000009,-5.0,6.0,Pembrokeshire
-W06000010,-4.0,6.0,Carmarthenshire
-W06000011,-4.0,5.0,Swansea
-W06000012,-3.0,5.0,Neath Port Talbot
-W06000013,-3.0,6.0,Bridgend
-W06000014,-2.0,4.0,Vale of Glamorgan
-W06000015,-2.0,5.0,Cardiff
-W06000016,-3.0,7.0,Rhondda Cynon Taf
-W06000018,-2.0,6.0,Caerphilly
-W06000019,0.0,9.0,Blaenau Gwent
-W06000020,-2.0,7.0,Torfaen
-W06000021,-1.0,8.0,Monmouthshire
-W06000022,-1.0,5.0,Newport
-W06000023,-1.0,9.0,Powys
-W06000024,-2.0,8.0,Merthyr Tydfil
diff --git a/scripts/BUG_REPORT_build_from_dataframe.md b/scripts/BUG_REPORT_build_from_dataframe.md
deleted file mode 100644
index 503557e56..000000000
--- a/scripts/BUG_REPORT_build_from_dataframe.md
+++ /dev/null
@@ -1,172 +0,0 @@
-# Bug Report: Entity-Level Aggregation Missing in `build_from_dataframe`
-
-## Summary
-
-The `build_from_dataframe` method in `policyengine_uk` does not aggregate person-level data to entity-level before calling `set_input()`, causing UK country filtering (e.g., Wales) to fail with array length mismatch errors.
-
-## Affected Repository
-
-**Repository:** `policyengine-uk`
-**File:** `policyengine_uk/simulation.py`
-**Method:** `build_from_dataframe()`
-**Approximate Lines:** 281-286 (may vary by version)
-
-## Symptoms
-
-When running a UK simulation filtered to a specific country (e.g., Wales), the following error occurs:
-
-```
-ValueError: Unable to set value "[ True  True  True ... False False False]"
-for variable "would_evade_tv_licence_fee", as its length is 8470
-while there are 4108 households in the simulation.
-```
-
-The error occurs because:
-- 8,470 = number of Welsh **persons** in the dataset
-- 4,108 = number of Welsh **households** in the dataset
-- The code tries to assign person-level arrays to household-level variables
-
-## Root Cause
-
-### The Bug Location
-
-```python
-# In policyengine_uk/simulation.py, build_from_dataframe method:
-
-# Set input values for each variable and time period
-for column in df:
-    variable, time_period = column.split("__")
-    if variable not in self.tax_benefit_system.variables:
-        continue
-    self.set_input(variable, time_period, df[column])  # <-- BUG HERE
-```
-
-### Why This Fails
-
-1. **`to_input_dataframe()`** exports ALL variables at **person level** (one row per person), regardless of the variable's native entity. This is by design - it creates a flat DataFrame where each row represents a person.
-
-2. **`build_from_dataframe()`** correctly builds the entity structure:
-   - Extracts `person_household_id` to determine household membership
-   - Creates the correct number of households (e.g., 4,108 for Wales)
-   - Sets up person-to-household relationships properly
-
-3. **BUT** the loop that sets variable values does NOT check if aggregation is needed. It passes person-level arrays (8,470 values) directly to `set_input()` for household-level variables that only have 4,108 entities.
-
-### The Correct Approach
-
-The `policyengine_core` library's `build_from_dataset()` method handles this correctly in `policyengine_core/simulations/simulation.py`:
-
-```python
-# From policyengine_core/simulations/simulation.py, build_from_dataset method:
-
-if len(data[variable]) != len(population.ids):
-    population: GroupPopulation
-    entity_level_data = population.value_from_first_person(data[variable])
-else:
-    entity_level_data = data[variable]
-
-self.set_input(variable_name, time_period, entity_level_data)
-```
-
-## Required Fix
-
-### Current Buggy Code
-
-```python
-# Set input values for each variable and time period
-for column in df:
-    variable, time_period = column.split("__")
-    if variable not in self.tax_benefit_system.variables:
-        continue
-    self.set_input(variable, time_period, df[column])
-```
-
-### Fixed Code
-
-```python
-# Set input values for each variable and time period
-for column in df:
-    variable, time_period = column.split("__")
-    if variable not in self.tax_benefit_system.variables:
-        continue
-
-    # Get variable metadata and target population
-    var_meta = self.tax_benefit_system.get_variable(variable)
-    entity = var_meta.entity
-    population = self.get_population(entity.plural)
-
-    data = df[column].values
-
-    # Check if aggregation is needed (data is person-level but variable is group-level)
-    if len(data) != population.count:
-        # Aggregate from person-level to entity-level using first person's value
-        data = population.value_from_first_person(data)
-
-    self.set_input(variable, time_period, data)
-```
-
-## Technical Details
-
-### What `value_from_first_person()` Does
-
-This method aggregates person-level data to group-level by taking the value from the first person in each group. For household-level variables (like `would_evade_tv_licence_fee`), all persons in a household share the same value, so taking the first person's value is correct.
-
-The method is defined in `policyengine_core` on `GroupPopulation` objects.
-
-### Why This Pattern Works
-
-- Person-level variables: `len(data) == population.count` (no aggregation needed)
-- Group-level variables exported at person level: `len(data) != population.count` (aggregation needed)
-
-### Entity Structure in UK Model
-
-The UK tax-benefit system has these entities:
-- `person` - Individual people
-- `benunit` - Benefit units (roughly: nuclear families)
-- `household` - Households (one or more benefit units sharing accommodation)
-
-When filtering to Wales:
-- ~8,470 persons
-- ~4,108 households
-- Variable ratio depending on household composition
-
-## Reproduction Steps
-
-1. Create a UK macro simulation: `Simulation(country="uk", scope="macro")`
-2. Filter to a UK country: `Simulation(country="uk", scope="macro", region="country/wales")`
-3. The filtering process:
-   - Calls `to_input_dataframe()` on the baseline simulation
-   - Filters the DataFrame to Welsh persons only
-   - Calls `Microsimulation(dataset=filtered_df)` which invokes `build_from_dataframe()`
-4. Error occurs when `build_from_dataframe()` tries to set household-level variables
-
-## Verification
-
-A Jupyter notebook proving this bug exists at:
-`policyengine-api/scripts/prove_build_from_dataframe_bug.ipynb`
-
-The notebook:
-1. Creates a UK simulation and exports to DataFrame
-2. Filters to Wales (8,470 persons, 4,108 households)
-3. Manually traces through `build_from_dataframe()` step by step
-4. Shows entity structure is correctly built (4,108 households)
-5. Demonstrates the `set_input()` call fails with length mismatch
-6. Shows the fix (aggregation) works correctly
-
-## Impact
-
-This bug affects:
-- UK country filtering (`country/wales`, `country/scotland`, `country/northern_ireland`, `country/england`)
-- Any code path that uses `build_from_dataframe()` with a filtered DataFrame
-
-This bug does NOT affect:
-- Constituency filtering (uses weight adjustment, not DataFrame subsetting)
-- Local authority filtering (uses weight adjustment, not DataFrame subsetting)
-- UK-wide simulations (no filtering needed)
-
-## Notes for Implementation
-
-1. The fix is minimal - just wrap the existing `set_input()` call with a length check and aggregation
-2. No new dependencies are needed - `value_from_first_person()` is already available on population objects
-3. The fix matches the existing pattern in `policyengine_core`'s `build_from_dataset()` method
-4. Consider adding a unit test that creates a simulation from a filtered DataFrame and verifies household-level variables work correctly
diff --git a/scripts/diagnose_country_filtering.ipynb b/scripts/diagnose_country_filtering.ipynb
deleted file mode 100644
index e9d2b1498..000000000
--- a/scripts/diagnose_country_filtering.ipynb
+++ /dev/null
@@ -1,503 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Diagnosing UK Country Filtering Issue in policyengine.py\n",
-    "\n",
-    "This notebook tests whether `policyengine.py` properly filters simulations by UK country (e.g., Wales).\n",
-    "\n",
-    "## The Issue\n",
-    "When running a simulation filtered to a specific UK country (e.g., `country/wales`), we get:\n",
-    "```\n",
-    "ValueError: Unable to set value \"[ True  True  True ... False False False]\" for variable \n",
-    "\"would_evade_tv_licence_fee\", as its length is 8470 while there are 4108 households in the simulation.\n",
-    "```\n",
-    "\n",
-    "## Hypothesis\n",
-    "The `to_input_dataframe()` method doesn't export `person_household_id`, causing the filtered simulation\n",
-    "to lose entity relationship information and incorrectly set up household counts."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Step 1: Setup and Imports"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "from policyengine import Simulation\n",
-    "\n",
-    "# Check policyengine version\n",
-    "import policyengine\n",
-    "print(f\"policyengine version: {policyengine.__version__}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Step 2: Create a Baseline UK Simulation\n",
-    "\n",
-    "First, let's create a standard UK-wide simulation and examine its structure."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Create a UK-wide simulation (no region filter)\n",
-    "print(\"Creating UK-wide simulation...\")\n",
-    "sim_uk = Simulation(country=\"uk\", scope=\"macro\")\n",
-    "\n",
-    "# Access the underlying country simulation\n",
-    "underlying_sim = sim_uk.baseline_simulation\n",
-    "\n",
-    "print(f\"\\n=== UK-Wide Simulation Structure ===\")\n",
-    "print(f\"Person count: {underlying_sim.persons.count}\")\n",
-    "print(f\"Household count: {underlying_sim.household.count}\")\n",
-    "print(f\"BenUnit count: {underlying_sim.benunit.count}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Check the country distribution in the UK simulation\n",
-    "country_values = sim_uk.calculate(\"country\")\n",
-    "print(\"\\n=== Country Distribution (Household Level) ===\")\n",
-    "print(country_values.value_counts())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Check person-level country distribution\n",
-    "country_person = underlying_sim.calculate(\"country\", map_to=\"person\")\n",
-    "unique, counts = np.unique(country_person, return_counts=True)\n",
-    "print(\"\\n=== Country Distribution (Person Level) ===\")\n",
-    "for u, c in zip(unique, counts):\n",
-    "    print(f\"  {u}: {c} persons\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Step 3: Test `to_input_dataframe()` Export\n",
-    "\n",
-    "Let's examine what columns are exported by `to_input_dataframe()` to see if entity linkage variables are included."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Export the simulation to a dataframe\n",
-    "print(\"Exporting simulation to DataFrame...\")\n",
-    "df = underlying_sim.to_input_dataframe()\n",
-    "\n",
-    "print(f\"\\n=== Exported DataFrame ===\")\n",
-    "print(f\"Shape: {df.shape}\")\n",
-    "print(f\"Number of columns: {len(df.columns)}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Check for entity ID and linkage columns\n",
-    "print(\"\\n=== Entity-Related Columns ===\")\n",
-    "\n",
-    "id_columns = [c for c in df.columns if '_id' in c.lower()]\n",
-    "print(f\"\\nColumns containing '_id': {len(id_columns)}\")\n",
-    "for col in sorted(id_columns):\n",
-    "    print(f\"  - {col}\")\n",
-    "\n",
-    "# Specifically check for critical columns\n",
-    "critical_cols = ['person_id', 'household_id', 'person_household_id', 'benunit_id', 'person_benunit_id']\n",
-    "print(f\"\\n=== Critical Entity Linkage Columns ===\")\n",
-    "for col_base in critical_cols:\n",
-    "    matching = [c for c in df.columns if c.startswith(col_base)]\n",
-    "    if matching:\n",
-    "        print(f\"  {col_base}: FOUND -> {matching}\")\n",
-    "    else:\n",
-    "        print(f\"  {col_base}: MISSING!\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Check if person_household_id has known periods in the simulation\n",
-    "print(\"\\n=== Checking Known Periods for Entity Linkage Variables ===\")\n",
-    "\n",
-    "for var_name in ['person_id', 'household_id', 'person_household_id', 'person_benunit_id']:\n",
-    "    try:\n",
-    "        holder = underlying_sim.get_holder(var_name)\n",
-    "        known_periods = holder.get_known_periods()\n",
-    "        print(f\"  {var_name}: known_periods = {list(known_periods)}\")\n",
-    "    except Exception as e:\n",
-    "        print(f\"  {var_name}: ERROR - {e}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Step 4: Simulate Country Filtering (Wales)\n",
-    "\n",
-    "Now let's create a Wales-filtered simulation and see what happens."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Create a Wales simulation\n",
-    "print(\"Creating Wales simulation...\")\n",
-    "print(\"(This may trigger the error we're diagnosing)\")\n",
-    "print()\n",
-    "\n",
-    "try:\n",
-    "    sim_wales = Simulation(country=\"uk\", scope=\"macro\", region=\"country/wales\")\n",
-    "    wales_underlying = sim_wales.baseline_simulation\n",
-    "    \n",
-    "    print(f\"\\n=== Wales Simulation Structure ===\")\n",
-    "    print(f\"Person count: {wales_underlying.persons.count}\")\n",
-    "    print(f\"Household count: {wales_underlying.household.count}\")\n",
-    "    print(f\"BenUnit count: {wales_underlying.benunit.count}\")\n",
-    "    \n",
-    "    # Check if counts make sense\n",
-    "    if wales_underlying.household.count == wales_underlying.persons.count:\n",
-    "        print(\"\\n*** WARNING: Household count equals person count! ***\")\n",
-    "        print(\"This suggests entity linkage was lost during filtering.\")\n",
-    "        \n",
-    "except Exception as e:\n",
-    "    print(f\"\\n*** ERROR creating Wales simulation ***\")\n",
-    "    print(f\"Error type: {type(e).__name__}\")\n",
-    "    print(f\"Error message: {e}\")\n",
-    "    import traceback\n",
-    "    traceback.print_exc()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Step 5: Manual Reproduction of the Filtering Process\n",
-    "\n",
-    "Let's manually reproduce what `_apply_region_to_simulation` does to understand where it breaks."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Step-by-step reproduction of the filtering logic\n",
-    "print(\"=== Manual Reproduction of Country Filtering ===\")\n",
-    "\n",
-    "# Step 1: Export to DataFrame\n",
-    "print(\"\\n[Step 1] Exporting to DataFrame...\")\n",
-    "df = underlying_sim.to_input_dataframe()\n",
-    "print(f\"  DataFrame shape: {df.shape}\")\n",
-    "print(f\"  Columns with 'household': {[c for c in df.columns if 'household' in c.lower()][:10]}...\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Step 2: Calculate country at person level\n",
-    "print(\"\\n[Step 2] Calculating country at person level...\")\n",
-    "country_person_level = underlying_sim.calculate(\"country\", map_to=\"person\").values\n",
-    "print(f\"  Country array shape: {country_person_level.shape}\")\n",
-    "print(f\"  Unique values: {np.unique(country_person_level)}\")\n",
-    "\n",
-    "# Count Welsh persons\n",
-    "wales_mask = country_person_level == \"WALES\"\n",
-    "print(f\"  Welsh persons: {wales_mask.sum()}\")\n",
-    "print(f\"  Non-Welsh persons: {(~wales_mask).sum()}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Step 3: Filter DataFrame to Wales\n",
-    "print(\"\\n[Step 3] Filtering DataFrame to Wales...\")\n",
-    "df_wales = df[wales_mask]\n",
-    "print(f\"  Filtered DataFrame shape: {df_wales.shape}\")\n",
-    "\n",
-    "# Check what person_household_id looks like in filtered data\n",
-    "phh_cols = [c for c in df_wales.columns if 'person_household_id' in c]\n",
-    "if phh_cols:\n",
-    "    print(f\"  person_household_id columns: {phh_cols}\")\n",
-    "    for col in phh_cols:\n",
-    "        vals = df_wales[col].values\n",
-    "        print(f\"    {col}: {len(np.unique(vals))} unique values\")\n",
-    "else:\n",
-    "    print(\"  person_household_id: NOT IN DATAFRAME!\")\n",
-    "    print(\"  This is likely the root cause of the issue.\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Step 4: Try to create a new simulation from filtered DataFrame\n",
-    "print(\"\\n[Step 4] Creating new simulation from filtered DataFrame...\")\n",
-    "\n",
-    "from policyengine_uk import Microsimulation\n",
-    "\n",
-    "try:\n",
-    "    new_sim = Microsimulation(dataset=df_wales)\n",
-    "    \n",
-    "    print(f\"  New simulation created!\")\n",
-    "    print(f\"  Person count: {new_sim.persons.count}\")\n",
-    "    print(f\"  Household count: {new_sim.household.count}\")\n",
-    "    \n",
-    "    # Critical check\n",
-    "    if new_sim.household.count == new_sim.persons.count:\n",
-    "        print(\"\\n  *** CONFIRMED: Household count equals person count! ***\")\n",
-    "        print(\"  The entity linkage was lost because person_household_id is missing.\")\n",
-    "    elif new_sim.household.count == len(np.unique(df_wales.iloc[:, 0])):\n",
-    "        print(\"\\n  *** Household count matches first column's unique values ***\")\n",
-    "        print(\"  This confirms the fallback behavior in build_from_dataset()\")\n",
-    "        \n",
-    "except Exception as e:\n",
-    "    print(f\"  Error creating simulation: {e}\")\n",
-    "    import traceback\n",
-    "    traceback.print_exc()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Step 5: Try to calculate would_evade_tv_licence_fee (this should trigger the error)\n",
-    "print(\"\\n[Step 5] Attempting to calculate would_evade_tv_licence_fee...\")\n",
-    "\n",
-    "try:\n",
-    "    # This calculation uses random(household), which will fail if household count is wrong\n",
-    "    result = new_sim.calculate(\"would_evade_tv_licence_fee\")\n",
-    "    print(f\"  Calculation succeeded!\")\n",
-    "    print(f\"  Result shape: {result.shape}\")\n",
-    "    print(f\"  Result dtype: {result.dtype}\")\n",
-    "except ValueError as e:\n",
-    "    print(f\"  *** ValueError (expected): ***\")\n",
-    "    print(f\"  {e}\")\n",
-    "    \n",
-    "    # Parse the error to understand the mismatch\n",
-    "    error_str = str(e)\n",
-    "    if \"length is\" in error_str and \"while there are\" in error_str:\n",
-    "        print(f\"\\n  This confirms the array size mismatch issue.\")\n",
-    "except Exception as e:\n",
-    "    print(f\"  Unexpected error: {type(e).__name__}: {e}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Step 6: Deeper Investigation - What Does household_id Return?\n",
-    "\n",
-    "Let's check what `household_id` returns in the broken simulation."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Check household_id in the new (potentially broken) simulation\n",
-    "print(\"=== Investigating household_id in Filtered Simulation ===\")\n",
-    "\n",
-    "try:\n",
-    "    # This is what random() calls internally\n",
-    "    hh_ids = new_sim.calculate(\"household_id\", 2025)\n",
-    "    print(f\"household_id result length: {len(hh_ids)}\")\n",
-    "    print(f\"household_id unique count: {len(np.unique(hh_ids))}\")\n",
-    "    print(f\"Expected household count: {new_sim.household.count}\")\n",
-    "    \n",
-    "    if len(hh_ids) != new_sim.household.count:\n",
-    "        print(f\"\\n*** MISMATCH: household_id has {len(hh_ids)} values but simulation has {new_sim.household.count} households ***\")\n",
-    "except Exception as e:\n",
-    "    print(f\"Error: {e}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Check the holder for household_id\n",
-    "print(\"\\n=== Checking household_id Holder ===\")\n",
-    "try:\n",
-    "    holder = new_sim.get_holder(\"household_id\")\n",
-    "    known_periods = holder.get_known_periods()\n",
-    "    print(f\"Known periods: {list(known_periods)}\")\n",
-    "    \n",
-    "    for period in known_periods:\n",
-    "        arr = holder.get_array(period)\n",
-    "        print(f\"  Period {period}: array shape = {arr.shape if arr is not None else 'None'}\")\n",
-    "except Exception as e:\n",
-    "    print(f\"Error: {e}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Step 7: Compare with Working Approaches (Constituency/LA)\n",
-    "\n",
-    "Constituency and LA filtering use weight adjustment instead of DataFrame subsetting. Let's verify this works."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Test constituency filtering (should work)\n",
-    "print(\"=== Testing Constituency Filtering (Should Work) ===\")\n",
-    "\n",
-    "try:\n",
-    "    sim_constituency = Simulation(country=\"uk\", scope=\"macro\", region=\"constituency/Cardiff South and Penarth\")\n",
-    "    const_underlying = sim_constituency.baseline_simulation\n",
-    "    \n",
-    "    print(f\"Constituency simulation created successfully!\")\n",
-    "    print(f\"  Person count: {const_underlying.persons.count}\")\n",
-    "    print(f\"  Household count: {const_underlying.household.count}\")\n",
-    "    \n",
-    "    # Try the problematic calculation\n",
-    "    result = sim_constituency.calculate(\"would_evade_tv_licence_fee\")\n",
-    "    print(f\"  would_evade_tv_licence_fee calculated successfully!\")\n",
-    "    print(f\"  Result length: {len(result)}\")\n",
-    "    \n",
-    "except Exception as e:\n",
-    "    print(f\"Error: {type(e).__name__}: {e}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Test local authority filtering (should work)\n",
-    "print(\"\\n=== Testing Local Authority Filtering (Should Work) ===\")\n",
-    "\n",
-    "try:\n",
-    "    sim_la = Simulation(country=\"uk\", scope=\"macro\", region=\"local_authority/Cardiff\")\n",
-    "    la_underlying = sim_la.baseline_simulation\n",
-    "    \n",
-    "    print(f\"LA simulation created successfully!\")\n",
-    "    print(f\"  Person count: {la_underlying.persons.count}\")\n",
-    "    print(f\"  Household count: {la_underlying.household.count}\")\n",
-    "    \n",
-    "    # Try the problematic calculation\n",
-    "    result = sim_la.calculate(\"would_evade_tv_licence_fee\")\n",
-    "    print(f\"  would_evade_tv_licence_fee calculated successfully!\")\n",
-    "    print(f\"  Result length: {len(result)}\")\n",
-    "    \n",
-    "except Exception as e:\n",
-    "    print(f\"Error: {type(e).__name__}: {e}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Summary and Conclusions"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(\"=\"*70)\n",
-    "print(\"DIAGNOSIS SUMMARY\")\n",
-    "print(\"=\"*70)\n",
-    "\n",
-    "print(\"\"\"\n",
-    "Based on the tests above:\n",
-    "\n",
-    "1. COUNTRY FILTERING (country/wales):\n",
-    "   - Uses to_input_dataframe() + DataFrame subsetting + new Microsimulation()\n",
-    "   - FAILS because person_household_id is not exported\n",
-    "   - Results in household count = person count (entity linkage lost)\n",
-    "\n",
-    "2. CONSTITUENCY FILTERING (constituency/...):\n",
-    "   - Uses weight adjustment on existing simulation\n",
-    "   - WORKS because entity structure is preserved\n",
-    "\n",
-    "3. LOCAL AUTHORITY FILTERING (local_authority/...):\n",
-    "   - Uses weight adjustment on existing simulation  \n",
-    "   - WORKS because entity structure is preserved\n",
-    "\n",
-    "ROOT CAUSE:\n",
-    "- to_input_dataframe() only exports variables with known periods\n",
-    "- person_household_id doesn't have known periods (it's derived from dataset structure)\n",
-    "- When building from filtered DataFrame, the fallback creates 1 household per person\n",
-    "\n",
-    "RECOMMENDED FIX:\n",
-    "- Option A: Fix to_input_dataframe() to always export entity linkage variables\n",
-    "- Option B: Use weight-zeroing for country filtering (like constituency/LA)\n",
-    "\"\"\")"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "name": "python",
-   "version": "3.11.0"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/scripts/prove_build_from_dataframe_bug.ipynb b/scripts/prove_build_from_dataframe_bug.ipynb
deleted file mode 100644
index a65202fc9..000000000
--- a/scripts/prove_build_from_dataframe_bug.ipynb
+++ /dev/null
@@ -1,841 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "cell-0",
-   "metadata": {},
-   "source": [
-    "# Proving the Bug in policyengine_uk's build_from_dataframe Method\n",
-    "\n",
-    "This notebook proves that the UK country filtering bug is caused by `policyengine_uk`'s \n",
-    "`build_from_dataframe` method not handling entity-level aggregation.\n",
-    "\n",
-    "## The Bug Location\n",
-    "**File:** `policyengine_uk/simulation.py`  \n",
-    "**Method:** `build_from_dataframe()`  \n",
-    "**Lines:** 281-286\n",
-    "\n",
-    "```python\n",
-    "# Set input values for each variable and time period\n",
-    "for column in df:\n",
-    "    variable, time_period = column.split(\"__\")\n",
-    "    if variable not in self.tax_benefit_system.variables:\n",
-    "        continue\n",
-    "    self.set_input(variable, time_period, df[column])  # <-- BUG: No entity-level check!\n",
-    "```\n",
-    "\n",
-    "## The Problem\n",
-    "1. `to_input_dataframe()` exports ALL variables at **person level** (one row per person)\n",
-    "2. `build_from_dataframe()` correctly builds entity structure with proper counts\n",
-    "3. BUT it then tries to `set_input()` with person-level arrays for household-level variables\n",
-    "4. This causes a length mismatch error"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "cell-1",
-   "metadata": {},
-   "source": [
-    "## Step 1: Setup"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "cell-2",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "policyengine_uk version: unknown\n",
-      "policyengine_uk location: /opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_uk/__init__.py\n"
-     ]
-    }
-   ],
-   "source": [
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "import traceback\n",
-    "import inspect\n",
-    "\n",
-    "from policyengine import Simulation\n",
-    "from policyengine_uk import Simulation as UKSimulation\n",
-    "\n",
-    "# Show where policyengine_uk is loaded from\n",
-    "import policyengine_uk\n",
-    "version = getattr(policyengine_uk, '__version__', 'unknown')\n",
-    "print(f\"policyengine_uk version: {version}\")\n",
-    "print(f\"policyengine_uk location: {policyengine_uk.__file__}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "cell-3",
-   "metadata": {},
-   "source": [
-    "## Step 2: Examine the Buggy Code\n",
-    "\n",
-    "Let's look at the actual `build_from_dataframe` method to confirm the bug."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "cell-4",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "=== build_from_dataframe source code ===\n",
-      "    def build_from_dataframe(self, df: pd.DataFrame) -> None:\n",
-      "        \"\"\"Build simulation from a pandas DataFrame.\n",
-      "\n",
-      "        Args:\n",
-      "            df: DataFrame with columns in format \"variable_name__time_period\"\n",
-      "        \"\"\"\n",
-      "\n",
-      "        def get_first_array(variable_name: str) -> pd.Series:\n",
-      "            \"\"\"Extract the first array for a given variable name pattern.\"\"\"\n",
-      "            columns = df.columns[df.columns.str.contains(variable_name + \"__\")]\n",
-      "            return df[columns[0]]\n",
-      "\n",
-      "        # Extract ID columns\n",
-      "        (\n",
-      "            person_id,\n",
-      "            person_benunit_id,\n",
-      "            person_household_id,\n",
-      "            benunit_id,\n",
-      "            household_id,\n",
-      "        ) = map(\n",
-      "            get_first_array,\n",
-      "            [\n",
-      "                \"person_id\",\n",
-      "                \"person_benunit_id\",\n",
-      "                \"person_household_id\",\n",
-      "                \"benunit_id\",\n",
-      "                \"household_id\",\n",
-      "            ],\n",
-      "        )\n",
-      "\n",
-      "        # Build entity structure\n",
-      "        self.build_from_ids(\n",
-      "            person_id,\n",
-      "            person_benunit_id,\n",
-      "            person_household_id,\n",
-      "            benunit_id,\n",
-      "            household_id,\n",
-      "        )\n",
-      "\n",
-      "        # Set input values for each variable and time period\n",
-      "        for column in df:\n",
-      "            variable, time_period = column.split(\"__\")\n",
-      "            if variable not in self.tax_benefit_system.variables:\n",
-      "                continue\n",
-      "            self.set_input(variable, time_period, df[column])\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Show the source code of build_from_dataframe\n",
-    "print(\"=== build_from_dataframe source code ===\")\n",
-    "print(inspect.getsource(UKSimulation.build_from_dataframe))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "cell-5",
-   "metadata": {},
-   "source": [
-    "## Step 3: Create Test Data\n",
-    "\n",
-    "Create a UK simulation and export to DataFrame, then filter to Wales."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "cell-6",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Creating UK-wide simulation...\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "No data provided, using default dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n",
-      "Using dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n",
-      "Downloading enhanced_frs_2023_24.h5 from bucket policyengine-uk-data-private\n",
-      "WARNING:root:No version specified for policyengine-uk-data-private, enhanced_frs_2023_24.h5. Using latest version: 1.29.4\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "UK-wide entity counts:\n",
-      "  Persons: 115,612\n",
-      "  Households: 53,508\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Create UK-wide simulation\n",
-    "print(\"Creating UK-wide simulation...\")\n",
-    "sim_uk = Simulation(country=\"uk\", scope=\"macro\")\n",
-    "underlying_sim = sim_uk.baseline_simulation\n",
-    "\n",
-    "print(f\"\\nUK-wide entity counts:\")\n",
-    "print(f\"  Persons: {underlying_sim.persons.count:,}\")\n",
-    "print(f\"  Households: {underlying_sim.household.count:,}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "cell-7",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Exporting to DataFrame...\n",
-      "\n",
-      "Filtered DataFrame:\n",
-      "  Rows (Welsh persons): 8,470\n",
-      "  Columns: 1,127\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Export to DataFrame and filter to Wales\n",
-    "print(\"Exporting to DataFrame...\")\n",
-    "df = underlying_sim.to_input_dataframe()\n",
-    "\n",
-    "# Filter to Wales\n",
-    "country_person = underlying_sim.calculate(\"country\", map_to=\"person\").values\n",
-    "wales_mask = country_person == \"WALES\"\n",
-    "df_wales = df[wales_mask]\n",
-    "\n",
-    "print(f\"\\nFiltered DataFrame:\")\n",
-    "print(f\"  Rows (Welsh persons): {len(df_wales):,}\")\n",
-    "print(f\"  Columns: {len(df_wales.columns):,}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "cell-8",
-   "metadata": {},
-   "source": [
-    "## Step 4: Prove the DataFrame Has Person-Level Data for Household Variables\n",
-    "\n",
-    "This is the key insight: `to_input_dataframe()` exports EVERYTHING at person level."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "cell-9",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "=== Household-Level Variables in DataFrame ===\n",
-      "Found 392 household-level variable columns in DataFrame\n",
-      "\n",
-      "First 10 household variables:\n",
-      "  - corporate_wealth__2023\n",
-      "  - corporate_wealth__2024\n",
-      "  - corporate_wealth__2025\n",
-      "  - corporate_wealth__2026\n",
-      "  - corporate_wealth__2027\n",
-      "  - corporate_wealth__2028\n",
-      "  - corporate_wealth__2029\n",
-      "  - corporate_wealth__2030\n",
-      "  - non_residential_property_value__2023\n",
-      "  - non_residential_property_value__2024\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Find household-level variables in the DataFrame\n",
-    "print(\"=== Household-Level Variables in DataFrame ===\")\n",
-    "\n",
-    "tax_benefit_system = underlying_sim.tax_benefit_system\n",
-    "household_vars_in_df = []\n",
-    "\n",
-    "for col in df_wales.columns:\n",
-    "    var_name = col.split(\"__\")[0]\n",
-    "    if var_name in tax_benefit_system.variables:\n",
-    "        var_meta = tax_benefit_system.get_variable(var_name)\n",
-    "        if var_meta.entity.key == \"household\":\n",
-    "            household_vars_in_df.append((col, var_name))\n",
-    "\n",
-    "print(f\"Found {len(household_vars_in_df)} household-level variable columns in DataFrame\")\n",
-    "print(f\"\\nFirst 10 household variables:\")\n",
-    "for col, var_name in household_vars_in_df[:10]:\n",
-    "    print(f\"  - {col}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "cell-10",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "=== THE CRITICAL MISMATCH ===\n",
-      "\n",
-      "DataFrame rows (person-level): 8,470\n",
-      "Expected Welsh households: 4,108\n",
-      "\n",
-      "Example: 'corporate_wealth__2025'\n",
-      "  Data length in DataFrame: 8,470\n",
-      "  Should be (household count): 4,108\n",
-      "\n",
-      "  MISMATCH: 8,470 != 4,108\n",
-      "\n",
-      "This is why set_input() fails!\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Show the mismatch: DataFrame rows vs expected household count\n",
-    "print(\"=== THE CRITICAL MISMATCH ===\")\n",
-    "print()\n",
-    "\n",
-    "# Get expected Welsh household count from person_household_id\n",
-    "phh_col = [c for c in df_wales.columns if c.startswith('person_household_id__')][0]\n",
-    "welsh_household_count = df_wales[phh_col].nunique()\n",
-    "\n",
-    "print(f\"DataFrame rows (person-level): {len(df_wales):,}\")\n",
-    "print(f\"Expected Welsh households: {welsh_household_count:,}\")\n",
-    "print()\n",
-    "\n",
-    "# Show a specific household variable\n",
-    "example_var = \"corporate_wealth__2025\" if \"corporate_wealth__2025\" in df_wales.columns else household_vars_in_df[0][0]\n",
-    "print(f\"Example: '{example_var}'\")\n",
-    "print(f\"  Data length in DataFrame: {len(df_wales[example_var]):,}\")\n",
-    "print(f\"  Should be (household count): {welsh_household_count:,}\")\n",
-    "print()\n",
-    "print(f\"  MISMATCH: {len(df_wales[example_var]):,} != {welsh_household_count:,}\")\n",
-    "print()\n",
-    "print(\"This is why set_input() fails!\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "cell-11",
-   "metadata": {},
-   "source": [
-    "## Step 5: Trace Through build_from_dataframe Step-by-Step\n",
-    "\n",
-    "Let's manually execute what `build_from_dataframe` does to see exactly where it fails."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "cell-12",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "=== Step 5a: Extract ID columns ===\n",
-      "person_id length: 8470\n",
-      "person_household_id length: 8470\n",
-      "person_household_id unique values: 4108\n",
-      "household_id length: 8470\n",
-      "household_id unique values: 4108\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Step 5a: Extract ID columns (lines 249-270 of build_from_dataframe)\n",
-    "print(\"=== Step 5a: Extract ID columns ===\")\n",
-    "\n",
-    "def get_first_array(df, variable_name):\n",
-    "    columns = df.columns[df.columns.str.contains(variable_name + \"__\")]\n",
-    "    return df[columns[0]]\n",
-    "\n",
-    "person_id = get_first_array(df_wales, \"person_id\")\n",
-    "person_benunit_id = get_first_array(df_wales, \"person_benunit_id\")\n",
-    "person_household_id = get_first_array(df_wales, \"person_household_id\")\n",
-    "benunit_id = get_first_array(df_wales, \"benunit_id\")\n",
-    "household_id = get_first_array(df_wales, \"household_id\")\n",
-    "\n",
-    "print(f\"person_id length: {len(person_id)}\")\n",
-    "print(f\"person_household_id length: {len(person_household_id)}\")\n",
-    "print(f\"person_household_id unique values: {person_household_id.nunique()}\")\n",
-    "print(f\"household_id length: {len(household_id)}\")\n",
-    "print(f\"household_id unique values: {household_id.nunique()}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "cell-13",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "=== Step 5b: Build entity structure (build_from_ids) ===\n",
-      "Person entity count: 8470\n",
-      "Benunit entity count: 4664\n",
-      "Household entity count: 4108\n",
-      "\n",
-      "Entity structure is CORRECT! 4108 households were created.\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Step 5b: Build entity structure (lines 273-279 - build_from_ids)\n",
-    "print(\"\\n=== Step 5b: Build entity structure (build_from_ids) ===\")\n",
-    "\n",
-    "from policyengine_core.simulations.simulation_builder import SimulationBuilder\n",
-    "from policyengine_uk.tax_benefit_system import CountryTaxBenefitSystem\n",
-    "\n",
-    "# Create a fresh simulation to test\n",
-    "test_tbs = CountryTaxBenefitSystem()\n",
-    "builder = SimulationBuilder()\n",
-    "builder.populations = test_tbs.instantiate_entities()\n",
-    "\n",
-    "# Declare entities - this is what build_from_ids does\n",
-    "builder.declare_person_entity(\"person\", person_id.values)\n",
-    "builder.declare_entity(\"benunit\", np.unique(benunit_id.values))\n",
-    "builder.declare_entity(\"household\", np.unique(household_id.values))\n",
-    "\n",
-    "print(f\"Person entity count: {len(builder.populations['person'].ids)}\")\n",
-    "print(f\"Benunit entity count: {len(builder.populations['benunit'].ids)}\")\n",
-    "print(f\"Household entity count: {len(builder.populations['household'].ids)}\")\n",
-    "print()\n",
-    "print(\"Entity structure is CORRECT! 4108 households were created.\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "cell-14",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "=== Step 5c: Complete entity setup ===\n",
-      "Test simulation created:\n",
-      "  Persons: 8470\n",
-      "  Households: 4108\n",
-      "\n",
-      "Entity counts are CORRECT at this point!\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Step 5c: Complete entity setup with joins\n",
-    "print(\"\\n=== Step 5c: Complete entity setup ===\")\n",
-    "\n",
-    "builder.join_with_persons(\n",
-    "    builder.populations[\"benunit\"],\n",
-    "    person_benunit_id.values,\n",
-    "    np.array([\"member\"] * len(person_benunit_id)),\n",
-    ")\n",
-    "builder.join_with_persons(\n",
-    "    builder.populations[\"household\"],\n",
-    "    person_household_id.values,\n",
-    "    np.array([\"member\"] * len(person_household_id)),\n",
-    ")\n",
-    "\n",
-    "# Create simulation with these populations\n",
-    "from policyengine_core.simulations import Simulation as CoreSimulation\n",
-    "from policyengine_core.tracers import SimpleTracer\n",
-    "\n",
-    "class TestSimulation(CoreSimulation):\n",
-    "    default_input_period = 2025\n",
-    "    default_calculation_period = 2025\n",
-    "\n",
-    "test_sim = TestSimulation.__new__(TestSimulation)\n",
-    "test_sim.tax_benefit_system = test_tbs\n",
-    "test_sim.branch_name = \"default\"\n",
-    "test_sim.invalidated_caches = set()\n",
-    "test_sim.branches = {}\n",
-    "\n",
-    "# Initialize required attributes that build_from_populations expects\n",
-    "test_sim.debug = False\n",
-    "test_sim.trace = False\n",
-    "test_sim.tracer = SimpleTracer()\n",
-    "test_sim.opt_out_cache = False\n",
-    "test_sim.max_spiral_loops = 10\n",
-    "test_sim.memory_config = None\n",
-    "test_sim._data_storage_dir = None\n",
-    "\n",
-    "test_sim.build_from_populations(builder.populations)\n",
-    "\n",
-    "print(f\"Test simulation created:\")\n",
-    "print(f\"  Persons: {test_sim.persons.count}\")\n",
-    "print(f\"  Households: {test_sim.household.count}\")\n",
-    "print()\n",
-    "print(\"Entity counts are CORRECT at this point!\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "cell-15",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "=== Step 5d: THE BUG - set_input without aggregation ===\n",
-      "\n",
-      "Attempting to set 'corporate_wealth' for period 2025\n",
-      "  Variable entity: household\n",
-      "  Data length: 8470\n",
-      "  Household count: 4108\n",
-      "\n",
-      "ERROR (expected): Unable to set value \"[ 42531.723   42531.723   42531.723  ... 145237.94   145237.94\n",
-      "   6483.3296]\" for variable \"corporate_wealth\", as its length is 8470 while there are 4108 households in the simulation.\n",
-      "\n",
-      "============================================================\n",
-      "BUG PROVEN!\n",
-      "============================================================\n",
-      "\n",
-      "The build_from_dataframe method calls set_input() with\n",
-      "person-level data (8470 values) for a household-level\n",
-      "variable, but there are only 4108 households.\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Step 5d: THE BUG - Try to set_input for a household variable with person-level data\n",
-    "print(\"\\n=== Step 5d: THE BUG - set_input without aggregation ===\")\n",
-    "print()\n",
-    "\n",
-    "# This is what build_from_dataframe does at lines 281-286:\n",
-    "# for column in df:\n",
-    "#     variable, time_period = column.split(\"__\")\n",
-    "#     if variable not in self.tax_benefit_system.variables:\n",
-    "#         continue\n",
-    "#     self.set_input(variable, time_period, df[column])  # <-- BUG!\n",
-    "\n",
-    "# Let's simulate this for a household variable\n",
-    "test_column = example_var\n",
-    "variable_name, time_period = test_column.split(\"__\")\n",
-    "\n",
-    "print(f\"Attempting to set '{variable_name}' for period {time_period}\")\n",
-    "print(f\"  Variable entity: {test_tbs.get_variable(variable_name).entity.key}\")\n",
-    "print(f\"  Data length: {len(df_wales[test_column])}\")\n",
-    "print(f\"  Household count: {test_sim.household.count}\")\n",
-    "print()\n",
-    "\n",
-    "try:\n",
-    "    test_sim.set_input(variable_name, time_period, df_wales[test_column].values)\n",
-    "    print(\"SUCCESS - No error (unexpected!)\")\n",
-    "except ValueError as e:\n",
-    "    print(f\"ERROR (expected): {e}\")\n",
-    "    print()\n",
-    "    print(\"=\"*60)\n",
-    "    print(\"BUG PROVEN!\")\n",
-    "    print(\"=\"*60)\n",
-    "    print()\n",
-    "    print(\"The build_from_dataframe method calls set_input() with\")\n",
-    "    print(\"person-level data (8470 values) for a household-level\")\n",
-    "    print(f\"variable, but there are only {test_sim.household.count} households.\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "cell-16",
-   "metadata": {},
-   "source": [
-    "## Step 6: Show What the Fix Should Look Like\n",
-    "\n",
-    "The fix needs to check if aggregation is required before calling `set_input()`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "id": "cell-17",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "=== The Fix: Aggregate Before set_input ===\n",
-      "\n",
-      "Variable: corporate_wealth\n",
-      "Entity: household\n",
-      "Data length: 8470\n",
-      "Population count: 4108\n",
-      "\n",
-      "Aggregation needed: 8470 != 4108\n",
-      "\n",
-      "After aggregation: 4108 values\n",
-      "\n",
-      "SUCCESS! set_input worked with aggregated data.\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Demonstrate the correct approach: aggregate before set_input\n",
-    "print(\"=== The Fix: Aggregate Before set_input ===\")\n",
-    "print()\n",
-    "\n",
-    "variable_name, time_period = example_var.split(\"__\")\n",
-    "var_meta = test_tbs.get_variable(variable_name)\n",
-    "entity = var_meta.entity\n",
-    "population = test_sim.get_population(entity.plural)\n",
-    "\n",
-    "data = df_wales[example_var].values\n",
-    "\n",
-    "print(f\"Variable: {variable_name}\")\n",
-    "print(f\"Entity: {entity.key}\")\n",
-    "print(f\"Data length: {len(data)}\")\n",
-    "print(f\"Population count: {population.count}\")\n",
-    "print()\n",
-    "\n",
-    "# Check if aggregation is needed\n",
-    "if len(data) != population.count:\n",
-    "    print(f\"Aggregation needed: {len(data)} != {population.count}\")\n",
-    "    print()\n",
-    "    \n",
-    "    # Use value_from_first_person to aggregate\n",
-    "    aggregated_data = population.value_from_first_person(data)\n",
-    "    print(f\"After aggregation: {len(aggregated_data)} values\")\n",
-    "    print()\n",
-    "    \n",
-    "    # Now set_input should work\n",
-    "    try:\n",
-    "        test_sim.set_input(variable_name, time_period, aggregated_data)\n",
-    "        print(f\"SUCCESS! set_input worked with aggregated data.\")\n",
-    "    except Exception as e:\n",
-    "        print(f\"Still failed: {e}\")\n",
-    "else:\n",
-    "    print(\"No aggregation needed\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "cell-18",
-   "metadata": {},
-   "source": [
-    "## Step 7: Show the Required Code Fix\n",
-    "\n",
-    "Here's what the fixed `build_from_dataframe` method should look like."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "id": "cell-19",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "=== Required Fix for build_from_dataframe ===\n",
-      "\n",
-      "CURRENT CODE (buggy):\n",
-      "```python\n",
-      "# Set input values for each variable and time period\n",
-      "for column in df:\n",
-      "    variable, time_period = column.split(\"__\")\n",
-      "    if variable not in self.tax_benefit_system.variables:\n",
-      "        continue\n",
-      "    self.set_input(variable, time_period, df[column])\n",
-      "```\n",
-      "\n",
-      "FIXED CODE:\n",
-      "```python\n",
-      "# Set input values for each variable and time period\n",
-      "for column in df:\n",
-      "    variable, time_period = column.split(\"__\")\n",
-      "    if variable not in self.tax_benefit_system.variables:\n",
-      "        continue\n",
-      "    \n",
-      "    # Get variable metadata and target population\n",
-      "    var_meta = self.tax_benefit_system.get_variable(variable)\n",
-      "    entity = var_meta.entity\n",
-      "    population = self.get_population(entity.plural)\n",
-      "    \n",
-      "    data = df[column].values\n",
-      "    \n",
-      "    # Check if aggregation is needed (data is person-level but variable is group-level)\n",
-      "    if len(data) != population.count:\n",
-      "        # Aggregate from person-level to entity-level\n",
-      "        data = population.value_from_first_person(data)\n",
-      "    \n",
-      "    self.set_input(variable, time_period, data)\n",
-      "```\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(\"=== Required Fix for build_from_dataframe ===\")\n",
-    "print()\n",
-    "print(\"\"\"CURRENT CODE (buggy):\n",
-    "```python\n",
-    "# Set input values for each variable and time period\n",
-    "for column in df:\n",
-    "    variable, time_period = column.split(\"__\")\n",
-    "    if variable not in self.tax_benefit_system.variables:\n",
-    "        continue\n",
-    "    self.set_input(variable, time_period, df[column])\n",
-    "```\n",
-    "\n",
-    "FIXED CODE:\n",
-    "```python\n",
-    "# Set input values for each variable and time period\n",
-    "for column in df:\n",
-    "    variable, time_period = column.split(\"__\")\n",
-    "    if variable not in self.tax_benefit_system.variables:\n",
-    "        continue\n",
-    "    \n",
-    "    # Get variable metadata and target population\n",
-    "    var_meta = self.tax_benefit_system.get_variable(variable)\n",
-    "    entity = var_meta.entity\n",
-    "    population = self.get_population(entity.plural)\n",
-    "    \n",
-    "    data = df[column].values\n",
-    "    \n",
-    "    # Check if aggregation is needed (data is person-level but variable is group-level)\n",
-    "    if len(data) != population.count:\n",
-    "        # Aggregate from person-level to entity-level\n",
-    "        data = population.value_from_first_person(data)\n",
-    "    \n",
-    "    self.set_input(variable, time_period, data)\n",
-    "```\n",
-    "\"\"\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "cell-20",
-   "metadata": {},
-   "source": [
-    "## Summary"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "cell-21",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "======================================================================\n",
-      "SUMMARY: BUG PROVEN\n",
-      "======================================================================\n",
-      "\n",
-      "LOCATION:\n",
-      "  File: policyengine_uk/simulation.py\n",
-      "  Method: build_from_dataframe()\n",
-      "  Lines: 281-286\n",
-      "\n",
-      "ROOT CAUSE:\n",
-      "  The method iterates through DataFrame columns and calls set_input()\n",
-      "  without checking if the data length matches the target entity count.\n",
-      "  \n",
-      "  - to_input_dataframe() exports ALL variables at PERSON level\n",
-      "  - build_from_ids() correctly creates entity structure (e.g., 4108 households)\n",
-      "  - BUT the loop then tries to set 8470 person-level values for \n",
-      "    household-level variables that only have 4108 entities\n",
-      "\n",
-      "THE FIX:\n",
-      "  Before calling set_input(), check if len(data) != population.count.\n",
-      "  If so, aggregate using population.value_from_first_person(data).\n",
-      "\n",
-      "NOTE:\n",
-      "  This is the same aggregation logic that policyengine_core's\n",
-      "  build_from_dataset() method uses (simulation.py lines 406-414).\n",
-      "  The policyengine_uk version simply forgot to include it.\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(\"=\"*70)\n",
-    "print(\"SUMMARY: BUG PROVEN\")\n",
-    "print(\"=\"*70)\n",
-    "print(\"\"\"\n",
-    "LOCATION:\n",
-    "  File: policyengine_uk/simulation.py\n",
-    "  Method: build_from_dataframe()\n",
-    "  Lines: 281-286\n",
-    "\n",
-    "ROOT CAUSE:\n",
-    "  The method iterates through DataFrame columns and calls set_input()\n",
-    "  without checking if the data length matches the target entity count.\n",
-    "  \n",
-    "  - to_input_dataframe() exports ALL variables at PERSON level\n",
-    "  - build_from_ids() correctly creates entity structure (e.g., 4108 households)\n",
-    "  - BUT the loop then tries to set 8470 person-level values for \n",
-    "    household-level variables that only have 4108 entities\n",
-    "\n",
-    "THE FIX:\n",
-    "  Before calling set_input(), check if len(data) != population.count.\n",
-    "  If so, aggregate using population.value_from_first_person(data).\n",
-    "\n",
-    "NOTE:\n",
-    "  This is the same aggregation logic that policyengine_core's\n",
-    "  build_from_dataset() method uses (simulation.py lines 406-414).\n",
-    "  The policyengine_uk version simply forgot to include it.\n",
-    "\"\"\")"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "py-3.13",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.13.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/scripts/test_local_authority_api.py b/scripts/test_local_authority_api.py
deleted file mode 100755
index 81eeb8575..000000000
--- a/scripts/test_local_authority_api.py
+++ /dev/null
@@ -1,570 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test script for UK Local Authority API functionality.
-
-This script tests the economy-wide simulation API for:
-1. A specific UK local authority (e.g., Leicester)
-2. UK-wide calculation (to confirm local_authority_impact is returned)
-3. Scotland country filter (to confirm authorities are filtered by country)
-
-SETUP INSTRUCTIONS:
-===================
-
-You need THREE terminal windows:
-
-Terminal 1 - Start Redis:
-    redis-server
-
-Terminal 2 - Start the API worker (handles economy calculations):
-    FLASK_DEBUG=1 python policyengine_api/worker.py
-
-Terminal 3 - Start the API server:
-    make debug
-
-Then run this script in a 4th terminal:
-    python scripts/test_local_authority_api.py
-
-NOTE: UK calculations require access to the policyengine-uk-data-private
-HuggingFace repo. Make sure HUGGING_FACE_TOKEN is set in your environment.
-"""
-
-import requests
-import json
-import time
-import sqlite3
-from pathlib import Path
-
-# Configuration
-API_BASE_URL = "http://127.0.0.1:5000"
-COUNTRY_ID = "uk"
-BASELINE_POLICY_ID = 1  # UK current law
-TIME_PERIOD = 2025
-DATASET = "default"
-
-# Raise the UK income tax base rate by 6 percentage points (20% -> 26%)
-SAMPLE_REFORM = {
-    "gov.hmrc.income_tax.rates.uk[0].rate": {"2025-01-01.2100-12-31": 0.26}
-}
-
-
-def print_header(title: str):
-    """Print a formatted header."""
-    print("\n" + "=" * 70)
-    print(f"  {title}")
-    print("=" * 70)
-
-
-def print_step(step_num: int, description: str):
-    """Print a step description."""
-    print(f"\n[Step {step_num}] {description}")
-    print("-" * 50)
-
-
-def wait_for_confirmation(message: str = "Press Enter to continue..."):
-    """Wait for user confirmation before proceeding."""
-    input(f"\n>>> {message}")
-
-
-def check_api_health():
-    """Check if the API is running and healthy."""
-    print_step(0, "Checking API Health")
-
-    try:
-        response = requests.get(f"{API_BASE_URL}/liveness-check", timeout=5)
-        if response.status_code == 200:
-            print(f"  [OK] API is running at {API_BASE_URL}")
-            return True
-        else:
-            print(f"  [ERROR] API returned status {response.status_code}")
-            return False
-    except requests.exceptions.ConnectionError:
-        print(f"  [ERROR] Cannot connect to API at {API_BASE_URL}")
-        print("  Make sure the API server is running. You need 3 terminals:")
-        print("")
-        print("  Terminal 1 - Start Redis:")
-        print("    redis-server")
-        print("")
-        print("  Terminal 2 - Start the API worker:")
-        print("    FLASK_DEBUG=1 python policyengine_api/worker.py")
-        print("")
-        print("  Terminal 3 - Start the API server:")
-        print("    make debug")
-        return False
-
-
-def create_reform_policy():
-    """Create a reform policy and return its ID."""
-    print_step(1, "Creating Reform Policy")
-
-    print(f"  Reform to be created:")
-    print(f"    {json.dumps(SAMPLE_REFORM, indent=4)}")
-
-    wait_for_confirmation("Press Enter to create the reform policy...")
-
-    payload = {
-        "label": "Test LA Reform - UC Standard Allowance Increase",
-        "data": SAMPLE_REFORM,
-    }
-
-    response = requests.post(
-        f"{API_BASE_URL}/{COUNTRY_ID}/policy",
-        json=payload,
-        headers={"Content-Type": "application/json"},
-    )
-
-    print(f"  Response status: {response.status_code}")
-    result = response.json()
-    print(f"  Response body: {json.dumps(result, indent=4)}")
-
-    if response.status_code in [200, 201]:
-        policy_id = result["result"]["policy_id"]
-        print(f"  [OK] Reform policy created/found with ID: {policy_id}")
-        return policy_id
-    else:
-        print(f"  [ERROR] Failed to create policy")
-        return None
-
-
-def verify_baseline_policy_exists():
-    """Verify the baseline (current law) policy exists."""
-    print_step(2, "Verifying Baseline Policy Exists")
-
-    print(f"  Checking policy ID: {BASELINE_POLICY_ID}")
-
-    response = requests.get(
-        f"{API_BASE_URL}/{COUNTRY_ID}/policy/{BASELINE_POLICY_ID}"
-    )
-
-    print(f"  Response status: {response.status_code}")
-
-    if response.status_code == 200:
-        result = response.json()
-        policy_data = result.get("result", {})
-        print(f"  Policy label: {policy_data.get('label', 'N/A')}")
-        print(f"  [OK] Baseline policy exists")
-        return True
-    else:
-        print(f"  [ERROR] Baseline policy not found")
-        print(
-            "  You may need to initialize the database with the current law policy"
-        )
-        return False
-
-
-def poll_economy_endpoint(
-    region: str, reform_policy_id: int, description: str
-):
-    """
-    Poll the economy endpoint until the calculation is complete.
-
-    Returns the result data or None if failed.
-    """
-    print(f"\n  Polling for: {description}")
-    print(f"  Region: {region}")
-    print(f"  Reform Policy ID: {reform_policy_id}")
-    print(f"  Baseline Policy ID: {BASELINE_POLICY_ID}")
-    print(f"  Time Period: {TIME_PERIOD}")
-
-    url = f"{API_BASE_URL}/{COUNTRY_ID}/economy/{reform_policy_id}/over/{BASELINE_POLICY_ID}"
-    params = {
-        "region": region,
-        "dataset": DATASET,
-        "time_period": TIME_PERIOD,
-        "target": "general",
-    }
-
-    print(f"\n  Full URL: {url}")
-    print(f"  Query params: {params}")
-
-    wait_for_confirmation("Press Enter to start polling the API...")
-
-    max_attempts = 60  # 5 minutes with 5-second intervals
-    attempt = 0
-
-    while attempt < max_attempts:
-        attempt += 1
-        print(f"\n  Attempt {attempt}/{max_attempts}...")
-
-        try:
-            response = requests.get(url, params=params, timeout=30)
-            result = response.json()
-
-            status = result.get("status")
-            print(f"    Status: {status}")
-
-            if status == "ok":
-                print(f"    [OK] Calculation complete!")
-                return result.get("result")
-            elif status == "computing":
-                print(f"    Calculation in progress... waiting 5 seconds")
-                time.sleep(5)
-            elif status == "error":
-                print(f"    [ERROR] Calculation failed")
-                print(f"    Message: {result.get('message')}")
-                return None
-            else:
-                print(f"    Unknown status: {status}")
-                time.sleep(5)
-
-        except requests.exceptions.Timeout:
-            print(f"    Request timed out, retrying...")
-            time.sleep(5)
-        except Exception as e:
-            print(f"    Error: {e}")
-            time.sleep(5)
-
-    print(f"  [ERROR] Timed out waiting for calculation")
-    return None
-
-
-def display_results(result: dict, description: str):
-    """Display key results from the economy calculation."""
-    print(f"\n  Results for: {description}")
-    print("  " + "-" * 40)
-
-    if result is None:
-        print("    No results available")
-        return
-
-    # Budgetary impact
-    budget = result.get("budget")
-    if budget:
-        print(f"\n  BUDGETARY IMPACT:")
-        for key, value in budget.items():
-            if isinstance(value, (int, float)):
-                print(f"    {key}: {value:,.2f}")
-            else:
-                print(f"    {key}: {value}")
-
-    # Decile impact summary
-    decile = result.get("decile")
-    if decile:
-        print(f"\n  DECILE IMPACT (sample):")
-        relative = decile.get("relative", {})
-        if relative:
-            for d in ["1", "5", "10"]:
-                if d in relative:
-                    print(f"    Decile {d}: {relative[d]*100:.2f}%")
-
-    # Poverty impact
-    poverty = result.get("poverty")
-    if poverty:
-        print(f"\n  POVERTY IMPACT:")
-        deep_poverty = poverty.get("deep_poverty", {})
-        regular_poverty = poverty.get("poverty", {})
-        if deep_poverty:
-            print(
-                f"    Deep poverty change: {deep_poverty.get('change', 'N/A')}"
-            )
-        if regular_poverty:
-            print(
-                f"    Poverty change: {regular_poverty.get('change', 'N/A')}"
-            )
-
-    # Local Authority Impact (if present)
-    la_impact = result.get("local_authority_impact")
-    if la_impact:
-        print(f"\n  LOCAL AUTHORITY IMPACT:")
-        by_la = la_impact.get("by_local_authority", {})
-        print(f"    Number of local authorities: {len(by_la)}")
-
-        # Show first 5 local authorities
-        print(f"    Sample local authorities:")
-        for i, (name, data) in enumerate(list(by_la.items())[:5]):
-            avg_change = data.get("average_household_income_change", 0)
-            rel_change = data.get("relative_household_income_change", 0)
-            print(
-                f"      {name}: avg={avg_change:.2f}, rel={rel_change*100:.3f}%"
-            )
-
-        # Outcomes by region
-        outcomes = la_impact.get("outcomes_by_region", {})
-        if outcomes:
-            print(f"\n    Outcomes by UK region:")
-            for region, buckets in outcomes.items():
-                total = sum(buckets.values())
-                print(f"      {region}: {total} LAs")
-                for bucket, count in buckets.items():
-                    if count > 0:
-                        print(f"        - {bucket}: {count}")
-    else:
-        print(f"\n  LOCAL AUTHORITY IMPACT: Not present in response")
-
-    # Constituency Impact (if present)
-    const_impact = result.get("constituency_impact")
-    if const_impact:
-        by_const = const_impact.get("by_constituency", {})
-        print(f"\n  CONSTITUENCY IMPACT:")
-        print(f"    Number of constituencies: {len(by_const)}")
-
-
-def test_local_authority_simulation(reform_policy_id: int):
-    """Test 1: Run simulation for a specific local authority."""
-    print_header("TEST 1: Local Authority Simulation (Leicester)")
-
-    print(
-        """
-    This test runs an economy simulation for a specific UK local authority.
-    We're using Leicester as it's a well-known unitary authority.
-
-    Expected: The API should accept the local_authority/Leicester region
-    and return economic impact results.
-    """
-    )
-
-    wait_for_confirmation(
-        "Press Enter to run the local authority simulation..."
-    )
-
-    region = "local_authority/Leicester"
-    result = poll_economy_endpoint(
-        region, reform_policy_id, "Leicester Local Authority"
-    )
-
-    if result:
-        display_results(result, "Leicester Local Authority")
-        print(
-            "\n  [TEST 1 PASSED] Local authority simulation completed successfully"
-        )
-        return True
-    else:
-        print("\n  [TEST 1 FAILED] Local authority simulation failed")
-        return False
-
-
-def test_uk_wide_simulation(reform_policy_id: int):
-    """Test 2: Run UK-wide simulation and check for local_authority_impact."""
-    print_header("TEST 2: UK-Wide Simulation (Check local_authority_impact)")
-
-    print(
-        """
-    This test runs an economy simulation for the entire UK.
-
-    Expected: The API should return results that include:
-    - Standard budgetary/poverty/decile impacts
-    - constituency_impact (existing feature)
-    - local_authority_impact (NEW feature we just added)
-
-    We'll verify that local_authority_impact is present and contains
-    data for all 360 UK local authorities.
-    """
-    )
-
-    wait_for_confirmation("Press Enter to run the UK-wide simulation...")
-
-    region = "uk"
-    result = poll_economy_endpoint(region, reform_policy_id, "UK-wide")
-
-    if result:
-        display_results(result, "UK-wide")
-
-        # Verify local_authority_impact is present
-        la_impact = result.get("local_authority_impact")
-        if la_impact:
-            by_la = la_impact.get("by_local_authority", {})
-            if len(by_la) == 360:
-                print(
-                    f"\n  [OK] local_authority_impact contains all 360 local authorities"
-                )
-            else:
-                print(
-                    f"\n  [WARNING] Expected 360 local authorities, got {len(by_la)}"
-                )
-
-            # Check outcomes_by_region has all UK nations
-            outcomes = la_impact.get("outcomes_by_region", {})
-            expected_regions = [
-                "uk",
-                "england",
-                "scotland",
-                "wales",
-                "northern_ireland",
-            ]
-            for r in expected_regions:
-                if r in outcomes:
-                    print(f"  [OK] {r} region present in outcomes")
-                else:
-                    print(f"  [MISSING] {r} region not in outcomes")
-
-            print(
-                "\n  [TEST 2 PASSED] UK-wide simulation includes local_authority_impact"
-            )
-            return True
-        else:
-            print(
-                "\n  [TEST 2 FAILED] local_authority_impact not present in response"
-            )
-            return False
-    else:
-        print("\n  [TEST 2 FAILED] UK-wide simulation failed")
-        return False
-
-
-def test_wales_simulation(reform_policy_id: int):
-    """Test 3: Run Wales simulation and check local authorities are filtered."""
-    print_header("TEST 3: Wales Simulation (Filter Check)")
-
-    print(
-        """
-    This test runs an economy simulation for Wales only.
-
-    Expected: The API should return results where:
-    - The simulation is filtered to Wales
-    - If local_authority_impact is present, it should only contain
-      Welsh local authorities (codes starting with 'W')
-    - Wales has exactly 22 principal areas
-
-    Note: The local_authority_impact breakdown may only be calculated
-    for UK-wide simulations. This test will verify the behavior.
-    """
-    )
-
-    wait_for_confirmation("Press Enter to run the Wales simulation...")
-
-    region = "country/wales"
-    result = poll_economy_endpoint(region, reform_policy_id, "Wales")
-
-    if result:
-        display_results(result, "Wales")
-
-        la_impact = result.get("local_authority_impact")
-        if la_impact:
-            by_la = la_impact.get("by_local_authority", {})
-            print(f"\n  Local authorities in response: {len(by_la)}")
-
-            # If filtering is implemented, we'd expect 22 Welsh LAs
-            if len(by_la) == 22:
-                print(
-                    f"  [OK] Correctly filtered to 22 Welsh local authorities"
-                )
-            elif len(by_la) == 360:
-                print(
-                    f"  [INFO] All 360 LAs returned (filtering not applied at LA level)"
-                )
-            else:
-                print(f"  [INFO] Got {len(by_la)} local authorities")
-
-            print("\n  [TEST 3 PASSED] Wales simulation completed")
-            return True
-        else:
-            print(
-                f"\n  [INFO] local_authority_impact not present for country-level simulation"
-            )
-            print(
-                "  This may be expected behavior - LA breakdown may only be for UK-wide"
-            )
-            print(
-                "\n  [TEST 3 PASSED] Wales simulation completed (no LA breakdown)"
-            )
-            return True
-    else:
-        print("\n  [TEST 3 FAILED] Wales simulation failed")
-        return False
-
-
-def main():
-    """Main test runner."""
-    print_header("UK Local Authority API Test Script")
-
-    print(
-        """
-    This script tests the UK Local Authority feature in the PolicyEngine API.
-
-    It will:
-    1. Check API health
-    2. Create a test reform policy
-    3. Verify baseline policy exists
-    4. Run TEST 1: Local Authority simulation (Leicester)
-    5. Run TEST 2: UK-wide simulation (check local_authority_impact)
-    6. Run TEST 3: Wales simulation (filter check)
-
-    Prerequisites (you need 3 other terminals running):
-    - Terminal 1: redis-server
-    - Terminal 2: FLASK_DEBUG=1 python policyengine_api/worker.py
-    - Terminal 3: make debug
-    - HUGGING_FACE_TOKEN environment variable set (for UK data access)
-
-    You will be prompted before each major step.
-    """
-    )
-
-    wait_for_confirmation("Press Enter to begin testing...")
-
-    # Step 0: Check API health
-    if not check_api_health():
-        print("\n[ABORT] API is not available. Please start the server first.")
-        return
-
-    wait_for_confirmation("API is healthy. Press Enter to continue...")
-
-    # Step 1: Create reform policy
-    reform_policy_id = create_reform_policy()
-    if reform_policy_id is None:
-        print("\n[ABORT] Failed to create reform policy.")
-        return
-
-    # Step 2: Verify baseline policy
-    if not verify_baseline_policy_exists():
-        print("\n[WARNING] Baseline policy not found. Tests may fail.")
-        wait_for_confirmation("Press Enter to continue anyway...")
-
-    print_header("Setup Complete - Ready to Run Tests")
-    print(
-        f"""
-    Configuration:
-    - API Base URL: {API_BASE_URL}
-    - Country: {COUNTRY_ID}
-    - Reform Policy ID: {reform_policy_id}
-    - Baseline Policy ID: {BASELINE_POLICY_ID}
-    - Time Period: {TIME_PERIOD}
-    - Dataset: {DATASET}
-    """
-    )
-
-    wait_for_confirmation("Press Enter to start running tests...")
-
-    # Run tests
-    results = []
-
-    # Test 1: Local Authority simulation
-    results.append(
-        (
-            "Local Authority (Leicester)",
-            test_local_authority_simulation(reform_policy_id),
-        )
-    )
-    wait_for_confirmation(
-        "Test 1 complete. Press Enter to continue to Test 2..."
-    )
-
-    # Test 2: UK-wide simulation
-    results.append(
-        ("UK-Wide with LA Impact", test_uk_wide_simulation(reform_policy_id))
-    )
-    wait_for_confirmation(
-        "Test 2 complete. Press Enter to continue to Test 3..."
-    )
-
-    # Test 3: Wales simulation
-    results.append(("Wales Filter", test_wales_simulation(reform_policy_id)))
-
-    # Summary
-    print_header("Test Summary")
-    print("\n  Results:")
-    for test_name, passed in results:
-        status = "[PASSED]" if passed else "[FAILED]"
-        print(f"    {status} {test_name}")
-
-    all_passed = all(r[1] for r in results)
-    if all_passed:
-        print("\n  All tests passed!")
-    else:
-        print("\n  Some tests failed. Review output above for details.")
-
-    print("\n" + "=" * 70)
-    print("  Testing complete.")
-    print("=" * 70 + "\n")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/scripts/verify_country_filtering_bug.ipynb b/scripts/verify_country_filtering_bug.ipynb
deleted file mode 100644
index 73c71e701..000000000
--- a/scripts/verify_country_filtering_bug.ipynb
+++ /dev/null
@@ -1,1147 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "cell-0",
-   "metadata": {},
-   "source": [
-    "# Verifying UK Country Filtering Bug in policyengine.py\n",
-    "\n",
-    "This notebook verifies the bug that occurs when filtering simulations by UK country (e.g., Wales).\n",
-    "\n",
-    "## The Bug\n",
-    "When running a simulation filtered to a specific UK country (e.g., `region=\"country/wales\"`), we get:\n",
-    "```\n",
-    "ValueError: Unable to set value \"[ True  True  True ... False False False]\" for variable \n",
-    "\"would_evade_tv_licence_fee\", as its length is 8470 while there are 4108 households in the simulation.\n",
-    "```\n",
-    "\n",
-    "## Root Cause Hypothesis\n",
-    "The country filtering code in `policyengine/simulation.py` uses DataFrame subsetting:\n",
-    "1. Exports simulation to DataFrame via `to_input_dataframe()`\n",
-    "2. Filters DataFrame rows by country\n",
-    "3. Creates new simulation from filtered DataFrame\n",
-    "\n",
-    "The issue is that entity linkage variables (like `household_id`) may not be properly \n",
-    "exported/imported, causing entity count mismatches during variable calculations."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "cell-1",
-   "metadata": {},
-   "source": [
-    "## Step 1: Setup and Version Check"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "cell-2",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "import traceback\n",
-    "\n",
-    "# Import policyengine (the high-level package)\n",
-    "import policyengine\n",
-    "from policyengine import Simulation\n",
-    "\n",
-    "# Also import the underlying UK simulation for manual testing\n",
-    "from policyengine_uk import Microsimulation as UKMicrosimulation"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "cell-3",
-   "metadata": {},
-   "source": [
-    "## Step 2: Create UK-Wide Baseline Simulation\n",
-    "\n",
-    "First, create a standard UK-wide simulation to understand the data structure."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "cell-4",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Creating UK-wide simulation...\n",
-      "(This may take a minute to download data)\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "No data provided, using default dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n",
-      "Using dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n",
-      "Downloading enhanced_frs_2023_24.h5 from bucket policyengine-uk-data-private\n",
-      "WARNING:root:No version specified for policyengine-uk-data-private, enhanced_frs_2023_24.h5. Using latest version: 1.29.4\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "=== UK-Wide Simulation Structure ===\n",
-      "Person count: 115612\n",
-      "Household count: 53508\n",
-      "BenUnit count: 61858\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Create UK-wide simulation using policyengine.Simulation\n",
-    "print(\"Creating UK-wide simulation...\")\n",
-    "print(\"(This may take a minute to download data)\")\n",
-    "\n",
-    "sim_uk = Simulation(country=\"uk\", scope=\"macro\")\n",
-    "\n",
-    "# Get the underlying microsimulation\n",
-    "underlying_sim = sim_uk.baseline_simulation\n",
-    "\n",
-    "print(f\"\\n=== UK-Wide Simulation Structure ===\")\n",
-    "print(f\"Person count: {underlying_sim.persons.count}\")\n",
-    "print(f\"Household count: {underlying_sim.household.count}\")\n",
-    "print(f\"BenUnit count: {underlying_sim.benunit.count}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "cell-5",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Creating UK-wide simulation...\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "No data provided, using default dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n",
-      "Using dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n",
-      "Downloading enhanced_frs_2023_24.h5 from bucket policyengine-uk-data-private\n",
-      "WARNING:root:No version specified for policyengine-uk-data-private, enhanced_frs_2023_24.h5. Using latest version: 1.29.4\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "=== UK-Wide Simulation Structure ===\n",
-      "Person count: 115612\n",
-      "Household count: 53508\n",
-      "BenUnit count: 61858\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Create a UK-wide simulation (no region filter)\n",
-    "print(\"Creating UK-wide simulation...\")\n",
-    "sim_uk = Simulation(country=\"uk\", scope=\"macro\")\n",
-    "\n",
-    "# Access the underlying country simulation\n",
-    "underlying_sim = sim_uk.baseline_simulation\n",
-    "\n",
-    "print(f\"\\n=== UK-Wide Simulation Structure ===\")\n",
-    "print(f\"Person count: {underlying_sim.persons.count}\")\n",
-    "print(f\"Household count: {underlying_sim.household.count}\")\n",
-    "print(f\"BenUnit count: {underlying_sim.benunit.count}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "cell-6",
-   "metadata": {},
-   "source": [
-    "## Step 3: Examine to_input_dataframe() Export\n",
-    "\n",
-    "This is what `_apply_region_to_simulation` uses to get the data before filtering."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "cell-7",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Exporting simulation to DataFrame...\n",
-      "\n",
-      "=== Exported DataFrame ===\n",
-      "Shape: (115612, 1127)\n",
-      "Number of rows (should be person count): 115612\n",
-      "Number of columns: 1127\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Export the simulation to DataFrame\n",
-    "print(\"Exporting simulation to DataFrame...\")\n",
-    "df = underlying_sim.to_input_dataframe()\n",
-    "\n",
-    "print(f\"\\n=== Exported DataFrame ===\")\n",
-    "print(f\"Shape: {df.shape}\")\n",
-    "print(f\"Number of rows (should be person count): {len(df)}\")\n",
-    "print(f\"Number of columns: {len(df.columns)}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "cell-8",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "=== Critical Entity Columns ===\n",
-      "  person_id: FOUND (8 columns)\n",
-      "    - person_id__2023\n",
-      "    - person_id__2024\n",
-      "    - person_id__2025\n",
-      "    ... and 5 more\n",
-      "  household_id: FOUND (8 columns)\n",
-      "    - household_id__2023\n",
-      "    - household_id__2024\n",
-      "    - household_id__2025\n",
-      "    ... and 5 more\n",
-      "  person_household_id: FOUND (8 columns)\n",
-      "    - person_household_id__2023\n",
-      "    - person_household_id__2024\n",
-      "    - person_household_id__2025\n",
-      "    ... and 5 more\n",
-      "  benunit_id: FOUND (8 columns)\n",
-      "    - benunit_id__2023\n",
-      "    - benunit_id__2024\n",
-      "    - benunit_id__2025\n",
-      "    ... and 5 more\n",
-      "  person_benunit_id: FOUND (8 columns)\n",
-      "    - person_benunit_id__2023\n",
-      "    - person_benunit_id__2024\n",
-      "    - person_benunit_id__2025\n",
-      "    ... and 5 more\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Check for critical entity linkage columns\n",
-    "print(\"\\n=== Critical Entity Columns ===\")\n",
-    "\n",
-    "critical_patterns = [\n",
-    "    'person_id',\n",
-    "    'household_id', \n",
-    "    'person_household_id',\n",
-    "    'benunit_id',\n",
-    "    'person_benunit_id'\n",
-    "]\n",
-    "\n",
-    "for pattern in critical_patterns:\n",
-    "    matching = [c for c in df.columns if c.startswith(pattern)]\n",
-    "    if matching:\n",
-    "        print(f\"  {pattern}: FOUND ({len(matching)} columns)\")\n",
-    "        for col in matching[:3]:  # Show first 3\n",
-    "            print(f\"    - {col}\")\n",
-    "        if len(matching) > 3:\n",
-    "            print(f\"    ... and {len(matching) - 3} more\")\n",
-    "    else:\n",
-    "        print(f\"  {pattern}: MISSING!\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "cell-9",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "=== household_id Export Analysis ===\n",
-      "Column: household_id__2023\n",
-      "  Length: 115612\n",
-      "  Unique values: 53508\n",
-      "  Min: 1, Max: 67019\n",
-      "  Sample values: [2 1 2 2 2 2 3 6 6 3]\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Check if household_id is exported and examine its values\n",
-    "hh_id_cols = [c for c in df.columns if c.startswith('household_id__')]\n",
-    "\n",
-    "print(\"\\n=== household_id Export Analysis ===\")\n",
-    "if hh_id_cols:\n",
-    "    col = hh_id_cols[0]\n",
-    "    print(f\"Column: {col}\")\n",
-    "    print(f\"  Length: {len(df[col])}\")\n",
-    "    print(f\"  Unique values: {df[col].nunique()}\")\n",
-    "    print(f\"  Min: {df[col].min()}, Max: {df[col].max()}\")\n",
-    "    print(f\"  Sample values: {df[col].values[:10]}\")\n",
-    "else:\n",
-    "    print(\"household_id NOT exported!\")\n",
-    "    print(\"This could be the root cause of the bug.\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "cell-10",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "=== person_household_id Export Analysis ===\n",
-      "Column: person_household_id__2023\n",
-      "  Length: 115612\n",
-      "  Unique values (should match household count): 53508\n",
-      "  Expected household count: 53508\n",
-      "  [OK] Unique count matches household count\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Check person_household_id linkage\n",
-    "phh_id_cols = [c for c in df.columns if c.startswith('person_household_id__')]\n",
-    "\n",
-    "print(\"\\n=== person_household_id Export Analysis ===\")\n",
-    "if phh_id_cols:\n",
-    "    col = phh_id_cols[0]\n",
-    "    print(f\"Column: {col}\")\n",
-    "    print(f\"  Length: {len(df[col])}\")\n",
-    "    print(f\"  Unique values (should match household count): {df[col].nunique()}\")\n",
-    "    print(f\"  Expected household count: {underlying_sim.household.count}\")\n",
-    "    \n",
-    "    if df[col].nunique() == underlying_sim.household.count:\n",
-    "        print(\"  [OK] Unique count matches household count\")\n",
-    "    else:\n",
-    "        print(\"  [WARNING] Mismatch!\")\n",
-    "else:\n",
-    "    print(\"person_household_id NOT exported! This is critical.\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "cell-11",
-   "metadata": {},
-   "source": [
-    "## Step 4: Manually Reproduce the Wales Filtering\n",
-    "\n",
-    "Let's manually do what `_apply_region_to_simulation` does to identify where it breaks."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "cell-12",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "=== Step 4a: Calculate country at person level ===\n",
-      "Country array shape: (115612,)\n",
-      "\n",
-      "Welsh persons: 8,470\n",
-      "Non-Welsh persons: 107,142\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Step 4a: Get country at person level (same as policyengine.py:296-298)\n",
-    "print(\"=== Step 4a: Calculate country at person level ===\")\n",
-    "country_person = underlying_sim.calculate(\"country\", map_to=\"person\").values\n",
-    "print(f\"Country array shape: {country_person.shape}\")\n",
-    "\n",
-    "# Create Wales mask\n",
-    "wales_mask = country_person == \"WALES\"\n",
-    "print(f\"\\nWelsh persons: {wales_mask.sum():,}\")\n",
-    "print(f\"Non-Welsh persons: {(~wales_mask).sum():,}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "cell-13",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "=== Step 4b: Filter DataFrame to Wales ===\n",
-      "Filtered DataFrame shape: (8470, 1127)\n",
-      "Number of Welsh persons: 8470\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Step 4b: Filter DataFrame to Wales (same as policyengine.py:299-300)\n",
-    "print(\"\\n=== Step 4b: Filter DataFrame to Wales ===\")\n",
-    "df_wales = df[wales_mask]\n",
-    "print(f\"Filtered DataFrame shape: {df_wales.shape}\")\n",
-    "print(f\"Number of Welsh persons: {len(df_wales)}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "cell-14",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "=== Step 4c: Examine person_household_id in filtered data ===\n",
-      "Column: person_household_id__2023\n",
-      "  Length: 8470\n",
-      "  Unique households in Wales: 4108\n",
-      "  Min household ID: 2.0\n",
-      "  Max household ID: 66996.0\n",
-      "  Sample values: [2. 2. 2. 2. 2. 6. 6. 6. 6. 7.]\n",
-      "  [INFO] Household IDs are NOT contiguous (gaps from filtering)\n",
-      "         This is expected - they're original UK-wide IDs\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Check what person_household_id looks like in filtered data\n",
-    "print(\"\\n=== Step 4c: Examine person_household_id in filtered data ===\")\n",
-    "if phh_id_cols:\n",
-    "    col = phh_id_cols[0]\n",
-    "    welsh_phh = df_wales[col].values\n",
-    "    print(f\"Column: {col}\")\n",
-    "    print(f\"  Length: {len(welsh_phh)}\")\n",
-    "    print(f\"  Unique households in Wales: {len(np.unique(welsh_phh))}\")\n",
-    "    print(f\"  Min household ID: {welsh_phh.min()}\")\n",
-    "    print(f\"  Max household ID: {welsh_phh.max()}\")\n",
-    "    print(f\"  Sample values: {welsh_phh[:10]}\")\n",
-    "    \n",
-    "    # Check if IDs are contiguous\n",
-    "    unique_hh = np.unique(welsh_phh)\n",
-    "    if np.array_equal(unique_hh, np.arange(len(unique_hh))):\n",
-    "        print(\"  [INFO] Household IDs are contiguous 0-based\")\n",
-    "    else:\n",
-    "        print(\"  [INFO] Household IDs are NOT contiguous (gaps from filtering)\")\n",
-    "        print(f\"         This is expected - they're original UK-wide IDs\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "cell-15",
-   "metadata": {},
-   "source": [
-    "## Step 5: Try to Create Simulation from Filtered DataFrame\n",
-    "\n",
-    "This is where the error should occur."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "id": "cell-16",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "=== Step 5a: Create simulation from filtered DataFrame ===\n",
-      "(This is what policyengine.py:299-300 does)\n",
-      "\n",
-      "[ERROR] Failed to create simulation: Unable to set value \"[ 39361.   39361.   39361.  ... 134410.5 134410.5   6000. ]\" for variable \"corporate_wealth\", as its length is 8470 while there are 4108 households in the simulation.\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Traceback (most recent call last):\n",
-      "  File \"/var/folders/qf/7fglpnr94wsc5xgbqkp80xbw0000gn/T/ipykernel_19066/2037714397.py\", line 7, in <module>\n",
-      "    new_sim = UKMicrosimulation(dataset=df_wales)\n",
-      "  File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_uk/simulation.py\", line 100, in __init__\n",
-      "    self.build_from_dataframe(dataset)\n",
-      "    ~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^\n",
-      "  File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_uk/simulation.py\", line 286, in build_from_dataframe\n",
-      "    self.set_input(variable, time_period, df[column])\n",
-      "    ~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
-      "  File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_core/simulations/simulation.py\", line 1241, in set_input\n",
-      "    self.get_holder(variable_name).set_input(\n",
-      "    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^\n",
-      "        period, value, self.branch_name\n",
-      "        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
-      "    )\n",
-      "    ^\n",
-      "  File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_core/holders/holder.py\", line 244, in set_input\n",
-      "    return self._set(period, array, branch_name)\n",
-      "           ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
-      "  File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_core/holders/holder.py\", line 286, in _set\n",
-      "    value = self._to_array(value)\n",
-      "  File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_core/holders/holder.py\", line 253, in _to_array\n",
-      "    raise ValueError(\n",
-      "    ...<7 lines>...\n",
-      "    )\n",
-      "ValueError: Unable to set value \"[ 39361.   39361.   39361.  ... 134410.5 134410.5   6000. ]\" for variable \"corporate_wealth\", as its length is 8470 while there are 4108 households in the simulation.\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Step 5a: Create new simulation from filtered DataFrame\n",
-    "print(\"=== Step 5a: Create simulation from filtered DataFrame ===\")\n",
-    "print(\"(This is what policyengine.py:299-300 does)\")\n",
-    "print()\n",
-    "\n",
-    "try:\n",
-    "    new_sim = UKMicrosimulation(dataset=df_wales)\n",
-    "    \n",
-    "    print(f\"New simulation created successfully!\")\n",
-    "    print(f\"  Person count: {new_sim.persons.count}\")\n",
-    "    print(f\"  Household count: {new_sim.household.count}\")\n",
-    "    print(f\"  BenUnit count: {new_sim.benunit.count}\")\n",
-    "    \n",
-    "    # Critical check\n",
-    "    if new_sim.household.count == new_sim.persons.count:\n",
-    "        print(\"\\n  [ERROR] Household count equals person count!\")\n",
-    "        print(\"  Entity linkage was lost during filtering.\")\n",
-    "        \n",
-    "except Exception as e:\n",
-    "    print(f\"[ERROR] Failed to create simulation: {e}\")\n",
-    "    traceback.print_exc()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "id": "cell-17",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "=== Step 5b: Check household_id holder ===\n",
-      "Error checking household_id: name 'new_sim' is not defined\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Step 5b: Check if household_id holder has data\n",
-    "print(\"\\n=== Step 5b: Check household_id holder ===\")\n",
-    "\n",
-    "try:\n",
-    "    hh_id_holder = new_sim.get_holder(\"household_id\")\n",
-    "    known_periods = list(hh_id_holder.get_known_periods())\n",
-    "    print(f\"household_id known periods: {known_periods}\")\n",
-    "    \n",
-    "    if known_periods:\n",
-    "        period = known_periods[0]\n",
-    "        arr = hh_id_holder.get_array(period)\n",
-    "        print(f\"  Period {period}: array shape = {arr.shape if arr is not None else 'None'}\")\n",
-    "        if arr is not None:\n",
-    "            print(f\"  Values sample: {arr[:10]}\")\n",
-    "    else:\n",
-    "        print(\"  No known periods - household_id was not set as input!\")\n",
-    "except Exception as e:\n",
-    "    print(f\"Error checking household_id: {e}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "cell-18",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "=== Step 5c: Calculate household_id ===\n",
-      "Error calculating household_id: name 'new_sim' is not defined\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Traceback (most recent call last):\n",
-      "  File \"/var/folders/qf/7fglpnr94wsc5xgbqkp80xbw0000gn/T/ipykernel_19066/1284064109.py\", line 5, in <module>\n",
-      "    hh_ids = new_sim.calculate(\"household_id\", 2025)\n",
-      "             ^^^^^^^\n",
-      "NameError: name 'new_sim' is not defined\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Step 5c: Try to calculate household_id\n",
-    "print(\"\\n=== Step 5c: Calculate household_id ===\")\n",
-    "\n",
-    "try:\n",
-    "    hh_ids = new_sim.calculate(\"household_id\", 2025)\n",
-    "    print(f\"household_id calculation result:\")\n",
-    "    print(f\"  Length: {len(hh_ids)}\")\n",
-    "    print(f\"  Expected (household count): {new_sim.household.count}\")\n",
-    "    \n",
-    "    if len(hh_ids) == new_sim.household.count:\n",
-    "        print(\"  [OK] Length matches household count\")\n",
-    "    else:\n",
-    "        print(f\"  [ERROR] Length mismatch! Got {len(hh_ids)}, expected {new_sim.household.count}\")\n",
-    "        \n",
-    "except Exception as e:\n",
-    "    print(f\"Error calculating household_id: {e}\")\n",
-    "    traceback.print_exc()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "cell-19",
-   "metadata": {},
-   "source": [
-    "## Step 6: Try to Calculate would_evade_tv_licence_fee\n",
-    "\n",
-    "This is the variable that triggers the error in production."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "id": "cell-20",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "=== Step 6: Calculate would_evade_tv_licence_fee ===\n",
-      "(This calculation uses random(household) internally)\n",
-      "\n",
-      "Unexpected error: NameError: name 'new_sim' is not defined\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Traceback (most recent call last):\n",
-      "  File \"/var/folders/qf/7fglpnr94wsc5xgbqkp80xbw0000gn/T/ipykernel_19066/1304269510.py\", line 7, in <module>\n",
-      "    result = new_sim.calculate(\"would_evade_tv_licence_fee\", 2025)\n",
-      "             ^^^^^^^\n",
-      "NameError: name 'new_sim' is not defined\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Step 6: Calculate the problematic variable\n",
-    "print(\"=== Step 6: Calculate would_evade_tv_licence_fee ===\")\n",
-    "print(\"(This calculation uses random(household) internally)\")\n",
-    "print()\n",
-    "\n",
-    "try:\n",
-    "    result = new_sim.calculate(\"would_evade_tv_licence_fee\", 2025)\n",
-    "    print(f\"Calculation succeeded!\")\n",
-    "    print(f\"  Result length: {len(result)}\")\n",
-    "    print(f\"  Expected (household count): {new_sim.household.count}\")\n",
-    "    print(f\"  Result dtype: {result.dtype}\")\n",
-    "    \n",
-    "except ValueError as e:\n",
-    "    print(f\"[EXPECTED ERROR] ValueError:\")\n",
-    "    print(f\"  {e}\")\n",
-    "    print()\n",
-    "    print(\"This confirms the bug!\")\n",
-    "    \n",
-    "    # Parse the error message\n",
-    "    error_str = str(e)\n",
-    "    if \"length is\" in error_str and \"while there are\" in error_str:\n",
-    "        print(\"\\nThe error indicates:\")\n",
-    "        print(\"  - The formula returned an array sized for persons\")\n",
-    "        print(\"  - But the variable is household-level\")\n",
-    "        print(\"  - This means random(household) returned wrong-sized array\")\n",
-    "        \n",
-    "except Exception as e:\n",
-    "    print(f\"Unexpected error: {type(e).__name__}: {e}\")\n",
-    "    traceback.print_exc()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "cell-21",
-   "metadata": {},
-   "source": [
-    "## Step 7: Test Using policyengine.Simulation Directly\n",
-    "\n",
-    "Now let's test using the high-level API to confirm the bug occurs there too."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "id": "cell-22",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "=== Step 7: Test with policyengine.Simulation ===\n",
-      "Creating Simulation with region='country/wales'...\n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "No data provided, using default dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n",
-      "Using dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n",
-      "Downloading enhanced_frs_2023_24.h5 from bucket policyengine-uk-data-private\n",
-      "WARNING:root:No version specified for policyengine-uk-data-private, enhanced_frs_2023_24.h5. Using latest version: 1.29.4\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "DataFrame columns: ['miscellaneous_income__2023', 'miscellaneous_income__2024', 'miscellaneous_income__2025', 'miscellaneous_income__2026', 'miscellaneous_income__2027', 'miscellaneous_income__2028', 'miscellaneous_income__2029', 'miscellaneous_income__2030', 'corporate_wealth__2023', 'corporate_wealth__2024', 'corporate_wealth__2025', 'corporate_wealth__2026', 'corporate_wealth__2027', 'corporate_wealth__2028', 'corporate_wealth__2029', 'corporate_wealth__2030', 'non_residential_property_value__2023', 'non_residential_property_value__2024', 'non_residential_property_value__2025', 'non_residential_property_value__2026', 'non_residential_property_value__2027', 'non_residential_property_value__2028', 'non_residential_property_value__2029', 'non_residential_property_value__2030', 'employment_income_before_lsr__2023', 'employment_income_before_lsr__2024', 'employment_income_before_lsr__2025', 'employment_income_before_lsr__2026', 'employment_income_before_lsr__2027', 'employment_income_before_lsr__2028', 'employment_income_before_lsr__2029', 'employment_income_before_lsr__2030', 'property_income__2023', 'property_income__2024', 'property_income__2025', 'property_income__2026', 'property_income__2027', 'property_income__2028', 'property_income__2029', 'property_income__2030', 'savings_interest_income__2023', 'savings_interest_income__2024', 'savings_interest_income__2025', 'savings_interest_income__2026', 'savings_interest_income__2027', 'savings_interest_income__2028', 'savings_interest_income__2029', 'savings_interest_income__2030', 'main_residence_value__2023', 'main_residence_value__2024', 'main_residence_value__2025', 'main_residence_value__2026', 'main_residence_value__2027', 'main_residence_value__2028', 'main_residence_value__2029', 'main_residence_value__2030', 'rent__2023', 'rent__2024', 'rent__2025', 'rent__2026', 'rent__2027', 'rent__2028', 'rent__2029', 'rent__2030', 'private_pension_income__2023', 'private_pension_income__2024', 'private_pension_income__2025', 'private_pension_income__2026', 'private_pension_income__2027', 'private_pension_income__2028', 'private_pension_income__2029', 'private_pension_income__2030', 'self_employment_income__2023', 'self_employment_income__2024', 'self_employment_income__2025', 'self_employment_income__2026', 'self_employment_income__2027', 'self_employment_income__2028', 'self_employment_income__2029', 'self_employment_income__2030', 'private_transfer_income__2023', 'private_transfer_income__2024', 'private_transfer_income__2025', 'private_transfer_income__2026', 'private_transfer_income__2027', 'private_transfer_income__2028', 'private_transfer_income__2029', 'private_transfer_income__2030', 'age__2023', 'age__2024', 'age__2025', 'age__2026', 'age__2027', 'age__2028', 'age__2029', 'age__2030', 'owned_land__2023', 'owned_land__2024', 'owned_land__2025', 'owned_land__2026', 'owned_land__2027', 'owned_land__2028', 'owned_land__2029', 'owned_land__2030', 'lump_sum_income__2023', 'lump_sum_income__2024', 'lump_sum_income__2025', 'lump_sum_income__2026', 'lump_sum_income__2027', 'lump_sum_income__2028', 'lump_sum_income__2029', 'lump_sum_income__2030', 'council_tax_band__2023', 'council_tax_band__2024', 'council_tax_band__2025', 'council_tax_band__2026', 'council_tax_band__2027', 'council_tax_band__2028', 'council_tax_band__2029', 'council_tax_band__2030', 'other_residential_property_value__2023', 'other_residential_property_value__2024', 'other_residential_property_value__2025', 'other_residential_property_value__2026', 'other_residential_property_value__2027', 'other_residential_property_value__2028', 'other_residential_property_value__2029', 'other_residential_property_value__2030', 'dividend_income__2023', 'dividend_income__2024', 'dividend_income__2025', 'dividend_income__2026', 'dividend_income__2027', 'dividend_income__2028', 'dividend_income__2029', 'dividend_income__2030', 'maintenance_income__2023', 'maintenance_income__2024', 'maintenance_income__2025', 'maintenance_income__2026', 'maintenance_income__2027', 'maintenance_income__2028', 'maintenance_income__2029', 'maintenance_income__2030', 'petrol_spending__2023', 'petrol_spending__2024', 'petrol_spending__2025', 'petrol_spending__2026', 'petrol_spending__2027', 'petrol_spending__2028', 'petrol_spending__2029', 'petrol_spending__2030', 'health_consumption__2023', 'health_consumption__2024', 'health_consumption__2025', 'health_consumption__2026', 'health_consumption__2027', 'health_consumption__2028', 'health_consumption__2029', 'health_consumption__2030', 'household_furnishings_consumption__2023', 'household_furnishings_consumption__2024', 'household_furnishings_consumption__2025', 'household_furnishings_consumption__2026', 'household_furnishings_consumption__2027', 'household_furnishings_consumption__2028', 'household_furnishings_consumption__2029', 'household_furnishings_consumption__2030', 'restaurants_and_hotels_consumption__2023', 'restaurants_and_hotels_consumption__2024', 'restaurants_and_hotels_consumption__2025', 'restaurants_and_hotels_consumption__2026', 'restaurants_and_hotels_consumption__2027', 'restaurants_and_hotels_consumption__2028', 'restaurants_and_hotels_consumption__2029', 'restaurants_and_hotels_consumption__2030', 'miscellaneous_consumption__2023', 'miscellaneous_consumption__2024', 'miscellaneous_consumption__2025', 'miscellaneous_consumption__2026', 'miscellaneous_consumption__2027', 'miscellaneous_consumption__2028', 'miscellaneous_consumption__2029', 'miscellaneous_consumption__2030', 'recreation_consumption__2023', 'recreation_consumption__2024', 'recreation_consumption__2025', 'recreation_consumption__2026', 'recreation_consumption__2027', 'recreation_consumption__2028', 'recreation_consumption__2029', 'recreation_consumption__2030', 'domestic_energy_consumption__2023', 'domestic_energy_consumption__2024', 'domestic_energy_consumption__2025', 'domestic_energy_consumption__2026', 'domestic_energy_consumption__2027', 'domestic_energy_consumption__2028', 'domestic_energy_consumption__2029', 'domestic_energy_consumption__2030', 'alcohol_and_tobacco_consumption__2023', 'alcohol_and_tobacco_consumption__2024', 'alcohol_and_tobacco_consumption__2025', 'alcohol_and_tobacco_consumption__2026', 'alcohol_and_tobacco_consumption__2027', 'alcohol_and_tobacco_consumption__2028', 'alcohol_and_tobacco_consumption__2029', 'alcohol_and_tobacco_consumption__2030', 'clothing_and_footwear_consumption__2023', 'clothing_and_footwear_consumption__2024', 'clothing_and_footwear_consumption__2025', 'clothing_and_footwear_consumption__2026', 'clothing_and_footwear_consumption__2027', 'clothing_and_footwear_consumption__2028', 'clothing_and_footwear_consumption__2029', 'clothing_and_footwear_consumption__2030', 'education_consumption__2023', 'education_consumption__2024', 'education_consumption__2025', 'education_consumption__2026', 'education_consumption__2027', 'education_consumption__2028', 'education_consumption__2029', 'education_consumption__2030', 'communication_consumption__2023', 'communication_consumption__2024', 'communication_consumption__2025', 'communication_consumption__2026', 'communication_consumption__2027', 'communication_consumption__2028', 'communication_consumption__2029', 'communication_consumption__2030', 'housing_water_and_electricity_consumption__2023', 'housing_water_and_electricity_consumption__2024', 'housing_water_and_electricity_consumption__2025', 'housing_water_and_electricity_consumption__2026', 'housing_water_and_electricity_consumption__2027', 'housing_water_and_electricity_consumption__2028', 'housing_water_and_electricity_consumption__2029', 'housing_water_and_electricity_consumption__2030', 'diesel_spending__2023', 'diesel_spending__2024', 'diesel_spending__2025', 'diesel_spending__2026', 'diesel_spending__2027', 'diesel_spending__2028', 'diesel_spending__2029', 'diesel_spending__2030', 'food_and_non_alcoholic_beverages_consumption__2023', 'food_and_non_alcoholic_beverages_consumption__2024', 'food_and_non_alcoholic_beverages_consumption__2025', 'food_and_non_alcoholic_beverages_consumption__2026', 'food_and_non_alcoholic_beverages_consumption__2027', 'food_and_non_alcoholic_beverages_consumption__2028', 'food_and_non_alcoholic_beverages_consumption__2029', 'food_and_non_alcoholic_beverages_consumption__2030', 'transport_consumption__2023', 'transport_consumption__2024', 'transport_consumption__2025', 'transport_consumption__2026', 'transport_consumption__2027', 'transport_consumption__2028', 'transport_consumption__2029', 'transport_consumption__2030', 'childcare_expenses__2023', 'childcare_expenses__2024', 'childcare_expenses__2025', 'childcare_expenses__2026', 'childcare_expenses__2027', 'childcare_expenses__2028', 'childcare_expenses__2029', 'childcare_expenses__2030', 'water_and_sewerage_charges__2023', 'water_and_sewerage_charges__2024', 'water_and_sewerage_charges__2025', 'water_and_sewerage_charges__2026', 'water_and_sewerage_charges__2027', 'water_and_sewerage_charges__2028', 'water_and_sewerage_charges__2029', 'water_and_sewerage_charges__2030', 'maintenance_expenses__2023', 'maintenance_expenses__2024', 'maintenance_expenses__2025', 'maintenance_expenses__2026', 'maintenance_expenses__2027', 'maintenance_expenses__2028', 'maintenance_expenses__2029', 'maintenance_expenses__2030', 'employee_pension_contributions_reported__2023', 'employee_pension_contributions_reported__2024', 'employee_pension_contributions_reported__2025', 'employee_pension_contributions_reported__2026', 'employee_pension_contributions_reported__2027', 'employee_pension_contributions_reported__2028', 'employee_pension_contributions_reported__2029', 'employee_pension_contributions_reported__2030', 'mortgage_capital_repayment__2023', 'mortgage_capital_repayment__2024', 'mortgage_capital_repayment__2025', 'mortgage_capital_repayment__2026', 'mortgage_capital_repayment__2027', 'mortgage_capital_repayment__2028', 'mortgage_capital_repayment__2029', 'mortgage_capital_repayment__2030', 'pension_contributions_via_salary_sacrifice__2023', 'pension_contributions_via_salary_sacrifice__2024', 'pension_contributions_via_salary_sacrifice__2025', 'pension_contributions_via_salary_sacrifice__2026', 'pension_contributions_via_salary_sacrifice__2027', 'pension_contributions_via_salary_sacrifice__2028', 'pension_contributions_via_salary_sacrifice__2029', 'pension_contributions_via_salary_sacrifice__2030', 'council_tax__2023', 'council_tax__2024', 'council_tax__2025', 'council_tax__2026', 'council_tax__2027', 'council_tax__2028', 'council_tax__2029', 'council_tax__2030', 'mortgage_interest_repayment__2023', 'mortgage_interest_repayment__2024', 'mortgage_interest_repayment__2025', 'mortgage_interest_repayment__2026', 'mortgage_interest_repayment__2027', 'mortgage_interest_repayment__2028', 'mortgage_interest_repayment__2029', 'mortgage_interest_repayment__2030', 'housing_service_charges__2023', 'housing_service_charges__2024', 'housing_service_charges__2025', 'housing_service_charges__2026', 'housing_service_charges__2027', 'housing_service_charges__2028', 'housing_service_charges__2029', 'housing_service_charges__2030', 'employer_pension_contributions__2023', 'employer_pension_contributions__2024', 'employer_pension_contributions__2025', 'employer_pension_contributions__2026', 'employer_pension_contributions__2027', 'employer_pension_contributions__2028', 'employer_pension_contributions__2029', 'employer_pension_contributions__2030', 'personal_pension_contributions__2023', 'personal_pension_contributions__2024', 'personal_pension_contributions__2025', 'personal_pension_contributions__2026', 'personal_pension_contributions__2027', 'personal_pension_contributions__2028', 'personal_pension_contributions__2029', 'personal_pension_contributions__2030', 'attends_private_school__2023', 'attends_private_school__2024', 'attends_private_school__2025', 'attends_private_school__2026', 'attends_private_school__2027', 'attends_private_school__2028', 'attends_private_school__2029', 'attends_private_school__2030', 'region__2023', 'region__2024', 'region__2025', 'region__2026', 'region__2027', 'region__2028', 'region__2029', 'region__2030', 'brma__2023', 'brma__2024', 'brma__2025', 'brma__2026', 'brma__2027', 'brma__2028', 'brma__2029', 'brma__2030', 'net_financial_wealth__2023', 'net_financial_wealth__2024', 'net_financial_wealth__2025', 'net_financial_wealth__2026', 'net_financial_wealth__2027', 'net_financial_wealth__2028', 'net_financial_wealth__2029', 'net_financial_wealth__2030', 'property_wealth__2023', 'property_wealth__2024', 'property_wealth__2025', 'property_wealth__2026', 'property_wealth__2027', 'property_wealth__2028', 'property_wealth__2029', 'property_wealth__2030', 'savings__2023', 'savings__2024', 'savings__2025', 'savings__2026', 'savings__2027', 'savings__2028', 'savings__2029', 'savings__2030', 'num_vehicles__2023', 'num_vehicles__2024', 'num_vehicles__2025', 'num_vehicles__2026', 'num_vehicles__2027', 'num_vehicles__2028', 'num_vehicles__2029', 'num_vehicles__2030', 'gross_financial_wealth__2023', 'gross_financial_wealth__2024', 'gross_financial_wealth__2025', 'gross_financial_wealth__2026', 'gross_financial_wealth__2027', 'gross_financial_wealth__2028', 'gross_financial_wealth__2029', 'gross_financial_wealth__2030', 'relation_type__2025', 'is_enhanced_disabled_for_benefits__2023', 'is_enhanced_disabled_for_benefits__2024', 'is_enhanced_disabled_for_benefits__2025', 'is_enhanced_disabled_for_benefits__2026', 'is_enhanced_disabled_for_benefits__2027', 'is_enhanced_disabled_for_benefits__2028', 'is_enhanced_disabled_for_benefits__2029', 'is_enhanced_disabled_for_benefits__2030', 'is_higher_earner__2023', 'is_higher_earner__2024', 'is_higher_earner__2025', 'is_higher_earner__2026', 'is_higher_earner__2027', 'is_higher_earner__2028', 'is_higher_earner__2029', 'is_higher_earner__2030', 'gender__2023', 'gender__2024', 'gender__2025', 'gender__2026', 'gender__2027', 'gender__2028', 'gender__2029', 'gender__2030', 'person_id__2023', 'person_id__2024', 'person_id__2025', 'person_id__2026', 'person_id__2027', 'person_id__2028', 'person_id__2029', 'person_id__2030', 'household_weight__2023', 'household_weight__2024', 'household_weight__2025', 'household_weight__2026', 'household_weight__2027', 'household_weight__2028', 'household_weight__2029', 'household_weight__2030', 'is_benunit_head__2023', 'is_benunit_head__2024', 'is_benunit_head__2025', 'is_benunit_head__2026', 'is_benunit_head__2027', 'is_benunit_head__2028', 'is_benunit_head__2029', 'is_benunit_head__2030', 'person_weight__2025', 'person_weight__2026', 'person_weight__2027', 'person_weight__2028', 'person_weight__2029', 'person_weight__2023', 'person_weight__2024', 'person_weight__2030', 'person_benunit_id__2023', 'person_benunit_id__2024', 'person_benunit_id__2025', 'person_benunit_id__2026', 'person_benunit_id__2027', 'person_benunit_id__2028', 'person_benunit_id__2029', 'person_benunit_id__2030', 'person_household_id__2023', 'person_household_id__2024', 'person_household_id__2025', 'person_household_id__2026', 'person_household_id__2027', 'person_household_id__2028', 'person_household_id__2029', 'person_household_id__2030', 'tenure_type__2023', 'tenure_type__2024', 'tenure_type__2025', 'tenure_type__2026', 'tenure_type__2027', 'tenure_type__2028', 'tenure_type__2029', 'tenure_type__2030', 'marital_status__2023', 'marital_status__2024', 'marital_status__2025', 'marital_status__2026', 'marital_status__2027', 'marital_status__2028', 'marital_status__2029', 'marital_status__2030', 'is_household_head__2023', 'is_household_head__2024', 'is_household_head__2025', 'is_household_head__2026', 'is_household_head__2027', 'is_household_head__2028', 'is_household_head__2029', 'is_household_head__2030', 'current_education__2023', 'current_education__2024', 'current_education__2025', 'current_education__2026', 'current_education__2027', 'current_education__2028', 'current_education__2029', 'current_education__2030', 'household_owns_tv__2023', 'household_owns_tv__2024', 'household_owns_tv__2025', 'household_owns_tv__2026', 'household_owns_tv__2027', 'household_owns_tv__2028', 'household_owns_tv__2029', 'household_owns_tv__2030', 'is_severely_disabled_for_benefits__2023', 'is_severely_disabled_for_benefits__2024', 'is_severely_disabled_for_benefits__2025', 'is_severely_disabled_for_benefits__2026', 'is_severely_disabled_for_benefits__2027', 'is_severely_disabled_for_benefits__2028', 'is_severely_disabled_for_benefits__2029', 'is_severely_disabled_for_benefits__2030', 'accommodation_type__2023', 'accommodation_type__2024', 'accommodation_type__2025', 'accommodation_type__2026', 'accommodation_type__2027', 'accommodation_type__2028', 'accommodation_type__2029', 'accommodation_type__2030', 'is_married__2023', 'is_married__2024', 'is_married__2025', 'is_married__2026', 'is_married__2027', 'is_married__2028', 'is_married__2029', 'is_married__2030', 'benunit_id__2023', 'benunit_id__2024', 'benunit_id__2025', 'benunit_id__2026', 'benunit_id__2027', 'benunit_id__2028', 'benunit_id__2029', 'benunit_id__2030', 'is_disabled_for_benefits__2023', 'is_disabled_for_benefits__2024', 'is_disabled_for_benefits__2025', 'is_disabled_for_benefits__2026', 'is_disabled_for_benefits__2027', 'is_disabled_for_benefits__2028', 'is_disabled_for_benefits__2029', 'is_disabled_for_benefits__2030', 'eldest_adult_age__2025', 'is_adult__2025', 'benunit_weight__2025', 'benunit_weight__2026', 'benunit_weight__2027', 'benunit_weight__2028', 'benunit_weight__2029', 'household_id__2023', 'household_id__2024', 'household_id__2025', 'household_id__2026', 'household_id__2027', 'household_id__2028', 'household_id__2029', 'household_id__2030', 'structural_insurance_payments__2023', 'structural_insurance_payments__2024', 'structural_insurance_payments__2025', 'structural_insurance_payments__2026', 'structural_insurance_payments__2027', 'structural_insurance_payments__2028', 'structural_insurance_payments__2029', 'structural_insurance_payments__2030', 'main_residential_property_purchased_is_first_home__2023', 'main_residential_property_purchased_is_first_home__2024', 'main_residential_property_purchased_is_first_home__2025', 'main_residential_property_purchased_is_first_home__2026', 'main_residential_property_purchased_is_first_home__2027', 'main_residential_property_purchased_is_first_home__2028', 'main_residential_property_purchased_is_first_home__2029', 'main_residential_property_purchased_is_first_home__2030', 'full_rate_vat_expenditure_rate__2023', 'full_rate_vat_expenditure_rate__2024', 'full_rate_vat_expenditure_rate__2025', 'full_rate_vat_expenditure_rate__2026', 'full_rate_vat_expenditure_rate__2027', 'full_rate_vat_expenditure_rate__2028', 'full_rate_vat_expenditure_rate__2029', 'full_rate_vat_expenditure_rate__2030', 'external_child_payments__2023', 'external_child_payments__2024', 'external_child_payments__2025', 'external_child_payments__2026', 'external_child_payments__2027', 'external_child_payments__2028', 'external_child_payments__2029', 'external_child_payments__2030', 'statutory_maternity_pay__2023', 'statutory_maternity_pay__2024', 'statutory_maternity_pay__2025', 'statutory_maternity_pay__2026', 'statutory_maternity_pay__2027', 'statutory_maternity_pay__2028', 'statutory_maternity_pay__2029', 'statutory_maternity_pay__2030', 'employment_status__2023', 'employment_status__2024', 'employment_status__2025', 'employment_status__2026', 'employment_status__2027', 'employment_status__2028', 'employment_status__2029', 'employment_status__2030', 'is_single__2025', 'statutory_sick_pay__2023', 'statutory_sick_pay__2024', 'statutory_sick_pay__2025', 'statutory_sick_pay__2026', 'statutory_sick_pay__2027', 'statutory_sick_pay__2028', 'statutory_sick_pay__2029', 'statutory_sick_pay__2030', 'hours_worked__2023', 'hours_worked__2024', 'hours_worked__2025', 'hours_worked__2026', 'hours_worked__2027', 'hours_worked__2028', 'hours_worked__2029', 'hours_worked__2030', 'rail_usage__2023', 'rail_usage__2024', 'rail_usage__2025', 'rail_usage__2026', 'rail_usage__2027', 'rail_usage__2028', 'rail_usage__2029', 'rail_usage__2030', 'rail_subsidy_spending__2023', 'rail_subsidy_spending__2024', 'rail_subsidy_spending__2025', 'rail_subsidy_spending__2026', 'rail_subsidy_spending__2027', 'rail_subsidy_spending__2028', 'rail_subsidy_spending__2029', 'rail_subsidy_spending__2030', 'bus_subsidy_spending__2023', 'bus_subsidy_spending__2024', 'bus_subsidy_spending__2025', 'bus_subsidy_spending__2026', 'bus_subsidy_spending__2027', 'bus_subsidy_spending__2028', 'bus_subsidy_spending__2029', 'bus_subsidy_spending__2030', 'outpatient_visits__2023', 'outpatient_visits__2024', 'outpatient_visits__2025', 'outpatient_visits__2026', 'outpatient_visits__2027', 'outpatient_visits__2028', 'outpatient_visits__2029', 'outpatient_visits__2030', 'nhs_outpatient_spending__2023', 'nhs_outpatient_spending__2024', 'nhs_outpatient_spending__2025', 'nhs_outpatient_spending__2026', 'nhs_outpatient_spending__2027', 'nhs_outpatient_spending__2028', 'nhs_outpatient_spending__2029', 'nhs_outpatient_spending__2030', 'nhs_a_and_e_spending__2023', 'nhs_a_and_e_spending__2024', 'nhs_a_and_e_spending__2025', 'nhs_a_and_e_spending__2026', 'nhs_a_and_e_spending__2027', 'nhs_a_and_e_spending__2028', 'nhs_a_and_e_spending__2029', 'nhs_a_and_e_spending__2030', 'a_and_e_visits__2023', 'a_and_e_visits__2024', 'a_and_e_visits__2025', 'a_and_e_visits__2026', 'a_and_e_visits__2027', 'a_and_e_visits__2028', 'a_and_e_visits__2029', 'a_and_e_visits__2030', 'admitted_patient_visits__2023', 'admitted_patient_visits__2024', 'admitted_patient_visits__2025', 'admitted_patient_visits__2026', 'admitted_patient_visits__2027', 'admitted_patient_visits__2028', 'admitted_patient_visits__2029', 'admitted_patient_visits__2030', 'nhs_admitted_patient_spending__2023', 'nhs_admitted_patient_spending__2024', 'nhs_admitted_patient_spending__2025', 'nhs_admitted_patient_spending__2026', 'nhs_admitted_patient_spending__2027', 'nhs_admitted_patient_spending__2028', 'nhs_admitted_patient_spending__2029', 'nhs_admitted_patient_spending__2030', 'healthy_start_vouchers__2023', 'healthy_start_vouchers__2024', 'healthy_start_vouchers__2025', 'healthy_start_vouchers__2026', 'healthy_start_vouchers__2027', 'healthy_start_vouchers__2028', 'healthy_start_vouchers__2029', 'healthy_start_vouchers__2030', 'education_grants__2023', 'education_grants__2024', 'education_grants__2025', 'education_grants__2026', 'education_grants__2027', 'education_grants__2028', 'education_grants__2029', 'education_grants__2030', 'jsa_contrib_reported__2023', 'jsa_contrib_reported__2024', 'jsa_contrib_reported__2025', 'jsa_contrib_reported__2026', 'jsa_contrib_reported__2027', 'jsa_contrib_reported__2028', 'jsa_contrib_reported__2029', 'jsa_contrib_reported__2030', 'sda_reported__2023', 'sda_reported__2024', 'sda_reported__2025', 'sda_reported__2026', 'sda_reported__2027', 'sda_reported__2028', 'sda_reported__2029', 'sda_reported__2030', 'adult_ema__2023', 'adult_ema__2024', 'adult_ema__2025', 'adult_ema__2026', 'adult_ema__2027', 'adult_ema__2028', 'adult_ema__2029', 'adult_ema__2030', 'winter_fuel_allowance_reported__2023', 'winter_fuel_allowance_reported__2024', 'winter_fuel_allowance_reported__2025', 'winter_fuel_allowance_reported__2026', 'winter_fuel_allowance_reported__2027', 'winter_fuel_allowance_reported__2028', 'winter_fuel_allowance_reported__2029', 'winter_fuel_allowance_reported__2030', 'child_tax_credit_reported__2023', 'child_tax_credit_reported__2024', 'child_tax_credit_reported__2025', 'child_tax_credit_reported__2026', 'child_tax_credit_reported__2027', 'child_tax_credit_reported__2028', 'child_tax_credit_reported__2029', 'child_tax_credit_reported__2030', 'working_tax_credit_reported__2023', 'working_tax_credit_reported__2024', 'working_tax_credit_reported__2025', 'working_tax_credit_reported__2026', 'working_tax_credit_reported__2027', 'working_tax_credit_reported__2028', 'working_tax_credit_reported__2029', 'working_tax_credit_reported__2030', 'bsp_reported__2023', 'bsp_reported__2024', 'bsp_reported__2025', 'bsp_reported__2026', 'bsp_reported__2027', 'bsp_reported__2028', 'bsp_reported__2029', 'bsp_reported__2030', 'carers_allowance_reported__2023', 'carers_allowance_reported__2024', 'carers_allowance_reported__2025', 'carers_allowance_reported__2026', 'carers_allowance_reported__2027', 'carers_allowance_reported__2028', 'carers_allowance_reported__2029', 'carers_allowance_reported__2030', 'access_fund__2023', 'access_fund__2024', 'access_fund__2025', 'access_fund__2026', 'access_fund__2027', 'access_fund__2028', 'access_fund__2029', 'access_fund__2030', 'ssmg_reported__2023', 'ssmg_reported__2024', 'ssmg_reported__2025', 'ssmg_reported__2026', 'ssmg_reported__2027', 'ssmg_reported__2028', 'ssmg_reported__2029', 'ssmg_reported__2030', 'incapacity_benefit_reported__2023', 'incapacity_benefit_reported__2024', 'incapacity_benefit_reported__2025', 'incapacity_benefit_reported__2026', 'incapacity_benefit_reported__2027', 'incapacity_benefit_reported__2028', 'incapacity_benefit_reported__2029', 'incapacity_benefit_reported__2030', 'iidb_reported__2023', 'iidb_reported__2024', 'iidb_reported__2025', 'iidb_reported__2026', 'iidb_reported__2027', 'iidb_reported__2028', 'iidb_reported__2029', 'iidb_reported__2030', 'attendance_allowance_reported__2023', 'attendance_allowance_reported__2024', 'attendance_allowance_reported__2025', 'attendance_allowance_reported__2026', 'attendance_allowance_reported__2027', 'attendance_allowance_reported__2028', 'attendance_allowance_reported__2029', 'attendance_allowance_reported__2030', 'student_loans__2023', 'student_loans__2024', 'student_loans__2025', 'student_loans__2026', 'student_loans__2027', 'student_loans__2028', 'student_loans__2029', 'student_loans__2030', 'esa_income_reported__2023', 'esa_income_reported__2024', 'esa_income_reported__2025', 'esa_income_reported__2026', 'esa_income_reported__2027', 'esa_income_reported__2028', 'esa_income_reported__2029', 'esa_income_reported__2030', 'state_pension_reported__2023', 'state_pension_reported__2024', 'state_pension_reported__2025', 'state_pension_reported__2026', 'state_pension_reported__2027', 'state_pension_reported__2028', 'state_pension_reported__2029', 'state_pension_reported__2030', 'afcs_reported__2023', 'afcs_reported__2024', 'afcs_reported__2025', 'afcs_reported__2026', 'afcs_reported__2027', 'afcs_reported__2028', 'afcs_reported__2029', 'afcs_reported__2030', 'council_tax_benefit_reported__2023', 'council_tax_benefit_reported__2024', 'council_tax_benefit_reported__2025', 'council_tax_benefit_reported__2026', 'council_tax_benefit_reported__2027', 'council_tax_benefit_reported__2028', 'council_tax_benefit_reported__2029', 'council_tax_benefit_reported__2030', 'income_support_reported__2023', 'income_support_reported__2024', 'income_support_reported__2025', 'income_support_reported__2026', 'income_support_reported__2027', 'income_support_reported__2028', 'income_support_reported__2029', 'income_support_reported__2030', 'esa_contrib_reported__2023', 'esa_contrib_reported__2024', 'esa_contrib_reported__2025', 'esa_contrib_reported__2026', 'esa_contrib_reported__2027', 'esa_contrib_reported__2028', 'esa_contrib_reported__2029', 'esa_contrib_reported__2030', 'jsa_income_reported__2023', 'jsa_income_reported__2024', 'jsa_income_reported__2025', 'jsa_income_reported__2026', 'jsa_income_reported__2027', 'jsa_income_reported__2028', 'jsa_income_reported__2029', 'jsa_income_reported__2030', 'child_ema__2023', 'child_ema__2024', 'child_ema__2025', 'child_ema__2026', 'child_ema__2027', 'child_ema__2028', 'child_ema__2029', 'child_ema__2030', 'dla_sc_reported__2023', 'dla_sc_reported__2024', 'dla_sc_reported__2025', 'dla_sc_reported__2026', 'dla_sc_reported__2027', 'dla_sc_reported__2028', 'dla_sc_reported__2029', 'dla_sc_reported__2030', 'dla_m_reported__2023', 'dla_m_reported__2024', 'dla_m_reported__2025', 'dla_m_reported__2026', 'dla_m_reported__2027', 'dla_m_reported__2028', 'dla_m_reported__2029', 'dla_m_reported__2030', 'housing_benefit_reported__2023', 'housing_benefit_reported__2024', 'housing_benefit_reported__2025', 'housing_benefit_reported__2026', 'housing_benefit_reported__2027', 'housing_benefit_reported__2028', 'housing_benefit_reported__2029', 'housing_benefit_reported__2030', 'would_claim_uc__2023', 'would_claim_uc__2024', 'would_claim_uc__2025', 'would_claim_uc__2026', 'would_claim_uc__2027', 'would_claim_uc__2028', 'would_claim_uc__2029', 'would_claim_uc__2030', 'universal_credit_reported__2023', 'universal_credit_reported__2024', 'universal_credit_reported__2025', 'universal_credit_reported__2026', 'universal_credit_reported__2027', 'universal_credit_reported__2028', 'universal_credit_reported__2029', 'universal_credit_reported__2030', 'uc_standard_allowance_claimant_type__2025', 'uc_standard_allowance__2025', 'uc_standard_allowance__2026', 'uc_standard_allowance__2027', 'uc_standard_allowance__2028', 'uc_standard_allowance__2029', 'uc_limited_capability_for_WRA__2026', 'uc_limited_capability_for_WRA__2027', 'uc_limited_capability_for_WRA__2028', 'uc_limited_capability_for_WRA__2029', 'uc_LCWRA_element__2026', 'uc_LCWRA_element__2027', 'uc_LCWRA_element__2028', 'uc_LCWRA_element__2029', 'pip_m_reported__2023', 'pip_m_reported__2024', 'pip_m_reported__2025', 'pip_m_reported__2026', 'pip_m_reported__2027', 'pip_m_reported__2028', 'pip_m_reported__2029', 'pip_m_reported__2030', 'pip_dl_reported__2023', 'pip_dl_reported__2024', 'pip_dl_reported__2025', 'pip_dl_reported__2026', 'pip_dl_reported__2027', 'pip_dl_reported__2028', 'pip_dl_reported__2029', 'pip_dl_reported__2030', 'pension_credit_reported__2023', 'pension_credit_reported__2024', 'pension_credit_reported__2025', 'pension_credit_reported__2026', 'pension_credit_reported__2027', 'pension_credit_reported__2028', 'pension_credit_reported__2029', 'pension_credit_reported__2030', 'would_claim_pc__2023', 'would_claim_pc__2024', 'would_claim_pc__2025', 'would_claim_pc__2026', 'would_claim_pc__2027', 'would_claim_pc__2028', 'would_claim_pc__2029', 'would_claim_pc__2030', 'would_evade_tv_licence_fee__2023', 'would_evade_tv_licence_fee__2024', 'would_evade_tv_licence_fee__2025', 'would_evade_tv_licence_fee__2026', 'would_evade_tv_licence_fee__2027', 'would_evade_tv_licence_fee__2028', 'would_evade_tv_licence_fee__2029', 'would_evade_tv_licence_fee__2030', 'free_school_fruit_veg__2023', 'free_school_fruit_veg__2024', 'free_school_fruit_veg__2025', 'free_school_fruit_veg__2026', 'free_school_fruit_veg__2027', 'free_school_fruit_veg__2028', 'free_school_fruit_veg__2029', 'free_school_fruit_veg__2030', 'dfe_education_spending__2023', 'dfe_education_spending__2024', 'dfe_education_spending__2025', 'dfe_education_spending__2026', 'dfe_education_spending__2027', 'dfe_education_spending__2028', 'dfe_education_spending__2029', 'dfe_education_spending__2030', 'free_school_meals__2023', 'free_school_meals__2024', 'free_school_meals__2025', 'free_school_meals__2026', 'free_school_meals__2027', 'free_school_meals__2028', 'free_school_meals__2029', 'free_school_meals__2030', 'would_claim_extended_childcare__2023', 'would_claim_extended_childcare__2024', 'would_claim_extended_childcare__2025', 'would_claim_extended_childcare__2026', 'would_claim_extended_childcare__2027', 'would_claim_extended_childcare__2028', 'would_claim_extended_childcare__2029', 'would_claim_extended_childcare__2030', 'maximum_extended_childcare_hours_usage__2023', 'maximum_extended_childcare_hours_usage__2024', 'maximum_extended_childcare_hours_usage__2025', 'maximum_extended_childcare_hours_usage__2026', 'maximum_extended_childcare_hours_usage__2027', 'maximum_extended_childcare_hours_usage__2028', 'maximum_extended_childcare_hours_usage__2029', 'maximum_extended_childcare_hours_usage__2030', 'would_claim_targeted_childcare__2023', 'would_claim_targeted_childcare__2024', 'would_claim_targeted_childcare__2025', 'would_claim_targeted_childcare__2026', 'would_claim_targeted_childcare__2027', 'would_claim_targeted_childcare__2028', 'would_claim_targeted_childcare__2029', 'would_claim_targeted_childcare__2030', 'would_claim_universal_childcare__2023', 'would_claim_universal_childcare__2024', 'would_claim_universal_childcare__2025', 'would_claim_universal_childcare__2026', 'would_claim_universal_childcare__2027', 'would_claim_universal_childcare__2028', 'would_claim_universal_childcare__2029', 'would_claim_universal_childcare__2030', 'student_loan_repayments__2023', 'student_loan_repayments__2024', 'student_loan_repayments__2025', 'student_loan_repayments__2026', 'student_loan_repayments__2027', 'student_loan_repayments__2028', 'student_loan_repayments__2029', 'student_loan_repayments__2030', 'would_claim_child_benefit__2023', 'would_claim_child_benefit__2024', 'would_claim_child_benefit__2025', 'would_claim_child_benefit__2026', 'would_claim_child_benefit__2027', 'would_claim_child_benefit__2028', 'would_claim_child_benefit__2029', 'would_claim_child_benefit__2030', 'child_benefit_reported__2023', 'child_benefit_reported__2024', 'child_benefit_reported__2025', 'child_benefit_reported__2026', 'child_benefit_reported__2027', 'child_benefit_reported__2028', 'child_benefit_reported__2029', 'child_benefit_reported__2030', 'capital_gains_before_response__2023', 'capital_gains_before_response__2024', 'capital_gains_before_response__2025', 'capital_gains_before_response__2026', 'capital_gains_before_response__2027', 'capital_gains_before_response__2028', 'capital_gains_before_response__2029', 'capital_gains_before_response__2030', 'tax_free_savings_income__2023', 'tax_free_savings_income__2024', 'tax_free_savings_income__2025', 'tax_free_savings_income__2026', 'tax_free_savings_income__2027', 'tax_free_savings_income__2028', 'tax_free_savings_income__2029', 'tax_free_savings_income__2030', 'would_claim_tfc__2023', 'would_claim_tfc__2024', 'would_claim_tfc__2025', 'would_claim_tfc__2026', 'would_claim_tfc__2027', 'would_claim_tfc__2028', 'would_claim_tfc__2029', 'would_claim_tfc__2030', 'student_loan_plan__2023', 'student_loan_plan__2024', 'student_loan_plan__2025', 'student_loan_plan__2026', 'student_loan_plan__2027', 'student_loan_plan__2028', 'student_loan_plan__2029', 'student_loan_plan__2030', 'domestic_rates__2023', 'domestic_rates__2024', 'domestic_rates__2025', 'domestic_rates__2026', 'domestic_rates__2027', 'domestic_rates__2028', 'domestic_rates__2029', 'domestic_rates__2030']\n",
-      "DataFrame shape: (115612, 1127)\n",
-      "'person_household_id' columns: ['person_household_id__2023', 'person_household_id__2024', 'person_household_id__2025', 'person_household_id__2026', 'person_household_id__2027', 'person_household_id__2028', 'person_household_id__2029', 'person_household_id__2030']\n",
-      "Filtered DataFrame shape: (8470, 1127)\n",
-      "[ERROR] ValueError: Unable to set value \"[ 39361.   39361.   39361.  ... 134410.5 134410.5   6000. ]\" for variable \"corporate_wealth\", as its length is 8470 while there are 4108 households in the simulation.\n",
-      "\n",
-      "This confirms the bug exists in the high-level API.\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Traceback (most recent call last):\n",
-      "  File \"/var/folders/qf/7fglpnr94wsc5xgbqkp80xbw0000gn/T/ipykernel_19066/3661659745.py\", line 7, in <module>\n",
-      "    sim_wales = Simulation(country=\"uk\", scope=\"macro\", region=\"country/wales\")\n",
-      "  File \"/Users/administrator/Documents/PolicyEngine/policyengine.py/policyengine/simulation.py\", line 110, in __init__\n",
-      "    self._initialise_simulations()\n",
-      "    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^\n",
-      "  File \"/Users/administrator/Documents/PolicyEngine/policyengine.py/policyengine/simulation.py\", line 202, in _initialise_simulations\n",
-      "    self.baseline_simulation = self._initialise_simulation(\n",
-      "                               ~~~~~~~~~~~~~~~~~~~~~~~~~~~^\n",
-      "        scope=self.options.scope,\n",
-      "        ^^^^^^^^^^^^^^^^^^^^^^^^^\n",
-      "    ...<5 lines>...\n",
-      "        subsample=self.options.subsample,\n",
-      "        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
-      "    )\n",
-      "    ^\n",
-      "  File \"/Users/administrator/Documents/PolicyEngine/policyengine.py/policyengine/simulation.py\", line 260, in _initialise_simulation\n",
-      "    simulation = self._apply_region_to_simulation(\n",
-      "        country=country,\n",
-      "    ...<4 lines>...\n",
-      "        time_period=time_period,\n",
-      "    )\n",
-      "  File \"/Users/administrator/Documents/PolicyEngine/policyengine.py/policyengine/simulation.py\", line 307, in _apply_region_to_simulation\n",
-      "    simulation = simulation_type(\n",
-      "        dataset=filtered_df, reform=reform\n",
-      "    )\n",
-      "  File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_uk/simulation.py\", line 100, in __init__\n",
-      "    self.build_from_dataframe(dataset)\n",
-      "    ~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^\n",
-      "  File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_uk/simulation.py\", line 286, in build_from_dataframe\n",
-      "    self.set_input(variable, time_period, df[column])\n",
-      "    ~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
-      "  File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_core/simulations/simulation.py\", line 1241, in set_input\n",
-      "    self.get_holder(variable_name).set_input(\n",
-      "    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^\n",
-      "        period, value, self.branch_name\n",
-      "        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
-      "    )\n",
-      "    ^\n",
-      "  File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_core/holders/holder.py\", line 244, in set_input\n",
-      "    return self._set(period, array, branch_name)\n",
-      "           ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
-      "  File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_core/holders/holder.py\", line 286, in _set\n",
-      "    value = self._to_array(value)\n",
-      "  File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_core/holders/holder.py\", line 253, in _to_array\n",
-      "    raise ValueError(\n",
-      "    ...<7 lines>...\n",
-      "    )\n",
-      "ValueError: Unable to set value \"[ 39361.   39361.   39361.  ... 134410.5 134410.5   6000. ]\" for variable \"corporate_wealth\", as its length is 8470 while there are 4108 households in the simulation.\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Test with policyengine.Simulation using region=\"country/wales\"\n",
-    "print(\"=== Step 7: Test with policyengine.Simulation ===\")\n",
-    "print(\"Creating Simulation with region='country/wales'...\")\n",
-    "print()\n",
-    "\n",
-    "try:\n",
-    "    sim_wales = Simulation(country=\"uk\", scope=\"macro\", region=\"country/wales\")\n",
-    "    \n",
-    "    wales_underlying = sim_wales.baseline_simulation\n",
-    "    print(f\"Wales simulation created!\")\n",
-    "    print(f\"  Person count: {wales_underlying.persons.count}\")\n",
-    "    print(f\"  Household count: {wales_underlying.household.count}\")\n",
-    "    \n",
-    "    # Try calculating the problematic variable\n",
-    "    print(\"\\nCalculating would_evade_tv_licence_fee...\")\n",
-    "    result = sim_wales.calculate(\"would_evade_tv_licence_fee\")\n",
-    "    print(f\"  Result length: {len(result)}\")\n",
-    "    print(\"  [OK] No error!\")\n",
-    "    \n",
-    "except Exception as e:\n",
-    "    print(f\"[ERROR] {type(e).__name__}: {e}\")\n",
-    "    print()\n",
-    "    print(\"This confirms the bug exists in the high-level API.\")\n",
-    "    traceback.print_exc()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "cell-23",
-   "metadata": {},
-   "source": [
-    "## Step 8: Compare with Constituency Filtering (Should Work)\n",
-    "\n",
-    "Constituency filtering uses weight adjustment instead of DataFrame subsetting."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "id": "cell-24",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "No data provided, using default dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n",
-      "Using dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n",
-      "Downloading enhanced_frs_2023_24.h5 from bucket policyengine-uk-data-private\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "=== Step 8: Test Constituency Filtering ===\n",
-      "Creating Simulation with region='constituency/Cardiff South and Penarth'...\n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "WARNING:root:No version specified for policyengine-uk-data-private, enhanced_frs_2023_24.h5. Using latest version: 1.29.4\n",
-      "WARNING:root:No metadata found for blob policyengine-uk-data-private, constituencies_2024.csv, so it has no version attached.\n",
-      "WARNING:root:No version specified for policyengine-uk-data-private, constituencies_2024.csv. Using latest version: None\n",
-      "WARNING:root:No version specified for policyengine-uk-data-private, parliamentary_constituency_weights.h5. Using latest version: 1.29.4\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Constituency simulation created!\n",
-      "  Person count: 115612\n",
-      "  Household count: 53508\n",
-      "  (Full UK counts, but weights adjusted for constituency)\n",
-      "\n",
-      "Calculating would_evade_tv_licence_fee...\n",
-      "[ERROR] AttributeError: 'Simulation' object has no attribute 'calculate'\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Traceback (most recent call last):\n",
-      "  File \"/var/folders/qf/7fglpnr94wsc5xgbqkp80xbw0000gn/T/ipykernel_19066/2462177757.py\", line 21, in <module>\n",
-      "    result = sim_const.calculate(\"would_evade_tv_licence_fee\")\n",
-      "             ^^^^^^^^^^^^^^^^^^^\n",
-      "AttributeError: 'Simulation' object has no attribute 'calculate'\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Test constituency filtering\n",
-    "print(\"=== Step 8: Test Constituency Filtering ===\")\n",
-    "print(\"Creating Simulation with region='constituency/Cardiff South and Penarth'...\")\n",
-    "print()\n",
-    "\n",
-    "try:\n",
-    "    sim_const = Simulation(\n",
-    "        country=\"uk\", \n",
-    "        scope=\"macro\", \n",
-    "        region=\"constituency/Cardiff South and Penarth\"\n",
-    "    )\n",
-    "    \n",
-    "    const_underlying = sim_const.baseline_simulation\n",
-    "    print(f\"Constituency simulation created!\")\n",
-    "    print(f\"  Person count: {const_underlying.persons.count}\")\n",
-    "    print(f\"  Household count: {const_underlying.household.count}\")\n",
-    "    print(\"  (Full UK counts, but weights adjusted for constituency)\")\n",
-    "    \n",
-    "    # Try calculating the problematic variable\n",
-    "    print(\"\\nCalculating would_evade_tv_licence_fee...\")\n",
-    "    result = sim_const.calculate(\"would_evade_tv_licence_fee\")\n",
-    "    print(f\"  Result length: {len(result)}\")\n",
-    "    print(\"  [OK] Constituency filtering works!\")\n",
-    "    \n",
-    "except Exception as e:\n",
-    "    print(f\"[ERROR] {type(e).__name__}: {e}\")\n",
-    "    traceback.print_exc()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "id": "cell-25",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "=== Step 8b: Test Local Authority Filtering ===\n",
-      "Creating Simulation with region='local_authority/Cardiff'...\n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "No data provided, using default dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n",
-      "Using dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n",
-      "Downloading enhanced_frs_2023_24.h5 from bucket policyengine-uk-data-private\n",
-      "WARNING:root:No version specified for policyengine-uk-data-private, enhanced_frs_2023_24.h5. Using latest version: 1.29.4\n",
-      "WARNING:root:No metadata found for blob policyengine-uk-data-private, local_authorities_2021.csv, so it has no version attached.\n",
-      "WARNING:root:No version specified for policyengine-uk-data-private, local_authorities_2021.csv. Using latest version: None\n",
-      "WARNING:root:No version specified for policyengine-uk-data-private, local_authority_weights.h5. Using latest version: 1.29.4\n"
-     ]
-    },
-    {
-     "ename": "KeyboardInterrupt",
-     "evalue": "",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[17], line 7\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[38;5;28mprint\u001b[39m()\n\u001b[1;32m      6\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m----> 7\u001b[0m     sim_la \u001b[38;5;241m=\u001b[39m \u001b[43mSimulation\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      8\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcountry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muk\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m      9\u001b[0m \u001b[43m        \u001b[49m\u001b[43mscope\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmacro\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m     10\u001b[0m \u001b[43m        \u001b[49m\u001b[43mregion\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlocal_authority/Cardiff\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m     11\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     13\u001b[0m     la_underlying \u001b[38;5;241m=\u001b[39m sim_la\u001b[38;5;241m.\u001b[39mbaseline_simulation\n\u001b[1;32m     14\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mLocal Authority simulation created!\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
-      "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/simulation.py:110\u001b[0m, in \u001b[0;36mSimulation.__init__\u001b[0;34m(self, **options)\u001b[0m\n\u001b[1;32m    108\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_set_data(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mdata)\n\u001b[1;32m    109\u001b[0m     logging\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mData loaded\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 110\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_initialise_simulations\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    111\u001b[0m logging\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSimulations initialised\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    112\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcheck_data_version()\n",
-      "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/simulation.py:202\u001b[0m, in \u001b[0;36mSimulation._initialise_simulations\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    201\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_initialise_simulations\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m--> 202\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbaseline_simulation \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_initialise_simulation\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    203\u001b[0m \u001b[43m        \u001b[49m\u001b[43mscope\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mscope\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    204\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcountry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcountry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    205\u001b[0m \u001b[43m        \u001b[49m\u001b[43mreform\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbaseline\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    206\u001b[0m \u001b[43m        \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    207\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtime_period\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtime_period\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    208\u001b[0m \u001b[43m        \u001b[49m\u001b[43mregion\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mregion\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    209\u001b[0m \u001b[43m        \u001b[49m\u001b[43msubsample\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msubsample\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    210\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    212\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mreform \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    213\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreform_simulation \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_initialise_simulation(\n\u001b[1;32m    214\u001b[0m             scope\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mscope,\n\u001b[1;32m    215\u001b[0m             country\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mcountry,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    220\u001b[0m             subsample\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39msubsample,\n\u001b[1;32m    221\u001b[0m         )\n",
-      "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/simulation.py:260\u001b[0m, in \u001b[0;36mSimulation._initialise_simulation\u001b[0;34m(self, country, scope, reform, data, time_period, region, subsample)\u001b[0m\n\u001b[1;32m    257\u001b[0m simulation\u001b[38;5;241m.\u001b[39mdefault_calculation_period \u001b[38;5;241m=\u001b[39m time_period\n\u001b[1;32m    259\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m region \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 260\u001b[0m     simulation \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_apply_region_to_simulation\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    261\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcountry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcountry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    262\u001b[0m \u001b[43m        \u001b[49m\u001b[43msimulation\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msimulation\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    263\u001b[0m \u001b[43m        \u001b[49m\u001b[43msimulation_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_simulation_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    264\u001b[0m \u001b[43m        \u001b[49m\u001b[43mregion\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mregion\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    265\u001b[0m \u001b[43m        \u001b[49m\u001b[43mreform\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreform\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    266\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtime_period\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtime_period\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    267\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    269\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m subsample \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    270\u001b[0m     simulation \u001b[38;5;241m=\u001b[39m simulation\u001b[38;5;241m.\u001b[39msubsample(subsample)\n",
-      "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/simulation.py:366\u001b[0m, in \u001b[0;36mSimulation._apply_region_to_simulation\u001b[0;34m(self, country, simulation, simulation_type, region, reform, time_period)\u001b[0m\n\u001b[1;32m    362\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    363\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m    364\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mLocal authority \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mla\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m not found. See \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mla_names_local_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m for the list of available local authorities.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    365\u001b[0m     )\n\u001b[0;32m--> 366\u001b[0m weights_local_path \u001b[38;5;241m=\u001b[39m \u001b[43mdownload\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    367\u001b[0m \u001b[43m    \u001b[49m\u001b[43mgcs_bucket\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpolicyengine-uk-data-private\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    368\u001b[0m \u001b[43m    \u001b[49m\u001b[43mgcs_key\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlocal_authority_weights.h5\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    369\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    371\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m h5py\u001b[38;5;241m.\u001b[39mFile(weights_local_path, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[1;32m    372\u001b[0m     weights \u001b[38;5;241m=\u001b[39m f[\u001b[38;5;28mstr\u001b[39m(time_period)][\u001b[38;5;241m.\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;241m.\u001b[39m]\n",
-      "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/utils/data_download.py:38\u001b[0m, in \u001b[0;36mdownload\u001b[0;34m(gcs_key, gcs_bucket, version, return_version)\u001b[0m\n\u001b[1;32m     21\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m     22\u001b[0m \u001b[38;5;124;03mDownload a file from Google Cloud Storage.\u001b[39;00m\n\u001b[1;32m     23\u001b[0m \n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     35\u001b[0m \u001b[38;5;124;03m    Otherwise: just the local_path string\u001b[39;00m\n\u001b[1;32m     36\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m     37\u001b[0m logging\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUsing Google Cloud Storage for download.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 38\u001b[0m local_path, downloaded_version \u001b[38;5;241m=\u001b[39m \u001b[43mdownload_file_from_gcs\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m     39\u001b[0m \u001b[43m    \u001b[49m\u001b[43mbucket_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgcs_bucket\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     40\u001b[0m \u001b[43m    \u001b[49m\u001b[43mgcs_key\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgcs_key\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     41\u001b[0m \u001b[43m    \u001b[49m\u001b[43mversion\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mversion\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     42\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     43\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m return_version:\n\u001b[1;32m     44\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m local_path, downloaded_version\n",
-      "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/utils/google_cloud_bucket.py:75\u001b[0m, in \u001b[0;36mdownload_file_from_gcs\u001b[0;34m(bucket_name, gcs_key, version)\u001b[0m\n\u001b[1;32m     72\u001b[0m local_path \u001b[38;5;241m=\u001b[39m DATASETS_DIR \u001b[38;5;241m/\u001b[39m gcs_key\n\u001b[1;32m     73\u001b[0m local_path\u001b[38;5;241m.\u001b[39mparent\u001b[38;5;241m.\u001b[39mmkdir(parents\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, exist_ok\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m---> 75\u001b[0m version \u001b[38;5;241m=\u001b[39m \u001b[43m_get_client\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdownload\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m     76\u001b[0m \u001b[43m    \u001b[49m\u001b[43mbucket_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     77\u001b[0m \u001b[43m    \u001b[49m\u001b[43mgcs_key\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     78\u001b[0m \u001b[43m    \u001b[49m\u001b[43mlocal_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     79\u001b[0m \u001b[43m    \u001b[49m\u001b[43mversion\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mversion\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     80\u001b[0m \u001b[43m    \u001b[49m\u001b[43mreturn_version\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m     81\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     82\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mstr\u001b[39m(local_path), version\n",
-      "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/utils/data/caching_google_storage_client.py:64\u001b[0m, in \u001b[0;36mCachingGoogleStorageClient.download\u001b[0;34m(self, bucket, key, target, version, return_version)\u001b[0m\n\u001b[1;32m     60\u001b[0m     version \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclient\u001b[38;5;241m.\u001b[39m_get_latest_version(bucket, key)\n\u001b[1;32m     61\u001b[0m     logging\u001b[38;5;241m.\u001b[39mwarning(\n\u001b[1;32m     62\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo version specified for \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbucket\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkey\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m. Using latest version: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mversion\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     63\u001b[0m     )\n\u001b[0;32m---> 64\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msync\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbucket\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mversion\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     65\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcache\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_data_key(bucket, key, version))\n\u001b[1;32m     66\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(data) \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28mbytes\u001b[39m:\n",
-      "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/utils/data/caching_google_storage_client.py:106\u001b[0m, in \u001b[0;36mCachingGoogleStorageClient.sync\u001b[0;34m(self, bucket, key, version)\u001b[0m\n\u001b[1;32m    104\u001b[0m     logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCache exists and crc is unchanged for \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mid_string\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m .\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    105\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[0;32m--> 106\u001b[0m [content, downloaded_crc] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdownload\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    107\u001b[0m \u001b[43m    \u001b[49m\u001b[43mbucket\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mversion\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mversion\u001b[49m\n\u001b[1;32m    108\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    109\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\n\u001b[1;32m    110\u001b[0m     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDownloaded new version of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mid_string\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m with crc \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdownloaded_crc\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    111\u001b[0m )\n\u001b[1;32m    113\u001b[0m \u001b[38;5;66;03m# atomic transaction to update both the data and the metadata\u001b[39;00m\n\u001b[1;32m    114\u001b[0m \u001b[38;5;66;03m# at the same time.\u001b[39;00m\n",
-      "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/utils/data/version_aware_storage_client.py:171\u001b[0m, in \u001b[0;36mVersionAwareStorageClient.download\u001b[0;34m(self, bucket_name, key, version)\u001b[0m\n\u001b[1;32m    166\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\n\u001b[1;32m    167\u001b[0m     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDownloading \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbucket_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkey\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    168\u001b[0m     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, version: \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;250m \u001b[39m\u001b[38;5;241m+\u001b[39m\u001b[38;5;250m \u001b[39mversion\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mif\u001b[39;00m\u001b[38;5;250m \u001b[39mversion\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01melse\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    169\u001b[0m )\n\u001b[1;32m    170\u001b[0m blob \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_blob(bucket_name, key, version)\n\u001b[0;32m--> 171\u001b[0m content \u001b[38;5;241m=\u001b[39m \u001b[43mblob\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdownload_as_bytes\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    172\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\n\u001b[1;32m    173\u001b[0m     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDownloaded \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbucket_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkey\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    174\u001b[0m     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, version: \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;250m \u001b[39m\u001b[38;5;241m+\u001b[39m\u001b[38;5;250m \u001b[39mversion\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mif\u001b[39;00m\u001b[38;5;250m \u001b[39mversion\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01melse\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    175\u001b[0m )\n\u001b[1;32m    176\u001b[0m \u001b[38;5;66;03m# According to documentation, blob.crc32c is updated as a side effect of\u001b[39;00m\n\u001b[1;32m    177\u001b[0m \u001b[38;5;66;03m# downloading the content. This should be the CRC of the downloaded\u001b[39;00m\n\u001b[1;32m    178\u001b[0m \u001b[38;5;66;03m# content (avoiding race conditions with the cloud).\u001b[39;00m\n",
-      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/cloud/storage/blob.py:1530\u001b[0m, in \u001b[0;36mBlob.download_as_bytes\u001b[0;34m(self, client, start, end, raw_download, if_etag_match, if_etag_not_match, if_generation_match, if_generation_not_match, if_metageneration_match, if_metageneration_not_match, timeout, checksum, retry, single_shot_download)\u001b[0m\n\u001b[1;32m   1527\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m create_trace_span(name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mStorage.Blob.downloadAsBytes\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m   1528\u001b[0m     string_buffer \u001b[38;5;241m=\u001b[39m BytesIO()\n\u001b[0;32m-> 1530\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_prep_and_do_download\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1531\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstring_buffer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1532\u001b[0m \u001b[43m        \u001b[49m\u001b[43mclient\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mclient\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1533\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstart\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1534\u001b[0m \u001b[43m        \u001b[49m\u001b[43mend\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mend\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1535\u001b[0m \u001b[43m        \u001b[49m\u001b[43mraw_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mraw_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1536\u001b[0m \u001b[43m        \u001b[49m\u001b[43mif_etag_match\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mif_etag_match\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1537\u001b[0m \u001b[43m        \u001b[49m\u001b[43mif_etag_not_match\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mif_etag_not_match\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1538\u001b[0m \u001b[43m        \u001b[49m\u001b[43mif_generation_match\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mif_generation_match\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1539\u001b[0m \u001b[43m        \u001b[49m\u001b[43mif_generation_not_match\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mif_generation_not_match\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1540\u001b[0m \u001b[43m        \u001b[49m\u001b[43mif_metageneration_match\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mif_metageneration_match\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1541\u001b[0m \u001b[43m        \u001b[49m\u001b[43mif_metageneration_not_match\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mif_metageneration_not_match\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1542\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1543\u001b[0m \u001b[43m        \u001b[49m\u001b[43mchecksum\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchecksum\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1544\u001b[0m \u001b[43m        \u001b[49m\u001b[43mretry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1545\u001b[0m \u001b[43m        \u001b[49m\u001b[43msingle_shot_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msingle_shot_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1546\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1547\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m string_buffer\u001b[38;5;241m.\u001b[39mgetvalue()\n",
-      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/cloud/storage/blob.py:4659\u001b[0m, in \u001b[0;36mBlob._prep_and_do_download\u001b[0;34m(self, file_obj, client, start, end, raw_download, if_etag_match, if_etag_not_match, if_generation_match, if_generation_not_match, if_metageneration_match, if_metageneration_not_match, timeout, checksum, retry, single_shot_download, command)\u001b[0m\n\u001b[1;32m   4656\u001b[0m transport \u001b[38;5;241m=\u001b[39m client\u001b[38;5;241m.\u001b[39m_http\n\u001b[1;32m   4658\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 4659\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_do_download\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   4660\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtransport\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   4661\u001b[0m \u001b[43m        \u001b[49m\u001b[43mfile_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   4662\u001b[0m \u001b[43m        \u001b[49m\u001b[43mdownload_url\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   4663\u001b[0m \u001b[43m        \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   4664\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   4665\u001b[0m \u001b[43m        \u001b[49m\u001b[43mend\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   4666\u001b[0m \u001b[43m        \u001b[49m\u001b[43mraw_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   4667\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   4668\u001b[0m \u001b[43m        \u001b[49m\u001b[43mchecksum\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchecksum\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   4669\u001b[0m \u001b[43m        \u001b[49m\u001b[43mretry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   4670\u001b[0m \u001b[43m        \u001b[49m\u001b[43msingle_shot_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msingle_shot_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   4671\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   4672\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m InvalidResponse \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[1;32m   4673\u001b[0m     _raise_from_invalid_response(exc)\n",
-      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/cloud/storage/blob.py:1094\u001b[0m, in \u001b[0;36mBlob._do_download\u001b[0;34m(self, transport, file_obj, download_url, headers, start, end, raw_download, timeout, checksum, retry, single_shot_download)\u001b[0m\n\u001b[1;32m   1076\u001b[0m     download \u001b[38;5;241m=\u001b[39m klass(\n\u001b[1;32m   1077\u001b[0m         download_url,\n\u001b[1;32m   1078\u001b[0m         stream\u001b[38;5;241m=\u001b[39mfile_obj,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1087\u001b[0m         single_shot_download\u001b[38;5;241m=\u001b[39msingle_shot_download,\n\u001b[1;32m   1088\u001b[0m     )\n\u001b[1;32m   1089\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m create_trace_span(\n\u001b[1;32m   1090\u001b[0m         name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mStorage.\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdownload_class\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/consume\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m   1091\u001b[0m         attributes\u001b[38;5;241m=\u001b[39mextra_attributes,\n\u001b[1;32m   1092\u001b[0m         api_request\u001b[38;5;241m=\u001b[39margs,\n\u001b[1;32m   1093\u001b[0m     ):\n\u001b[0;32m-> 1094\u001b[0m         response \u001b[38;5;241m=\u001b[39m \u001b[43mdownload\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconsume\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtransport\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1095\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_extract_headers_from_download(response)\n\u001b[1;32m   1096\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
-      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/cloud/storage/_media/requests/download.py:280\u001b[0m, in \u001b[0;36mDownload.consume\u001b[0;34m(self, transport, timeout)\u001b[0m\n\u001b[1;32m    276\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_write_to_stream(result)\n\u001b[1;32m    278\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m result\n\u001b[0;32m--> 280\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_request_helpers\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwait_and_retry\u001b[49m\u001b[43m(\u001b[49m\u001b[43mretriable_request\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_strategy\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/cloud/storage/_media/requests/_request_helpers.py:107\u001b[0m, in \u001b[0;36mwait_and_retry\u001b[0;34m(func, retry_strategy)\u001b[0m\n\u001b[1;32m    105\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m retry_strategy:\n\u001b[1;32m    106\u001b[0m     func \u001b[38;5;241m=\u001b[39m retry_strategy(func)\n\u001b[0;32m--> 107\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:294\u001b[0m, in \u001b[0;36mRetry.__call__.<locals>.retry_wrapped_func\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    290\u001b[0m target \u001b[38;5;241m=\u001b[39m functools\u001b[38;5;241m.\u001b[39mpartial(func, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m    291\u001b[0m sleep_generator \u001b[38;5;241m=\u001b[39m exponential_sleep_generator(\n\u001b[1;32m    292\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_initial, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_maximum, multiplier\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_multiplier\n\u001b[1;32m    293\u001b[0m )\n\u001b[0;32m--> 294\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mretry_target\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    295\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    296\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_predicate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    297\u001b[0m \u001b[43m    \u001b[49m\u001b[43msleep_generator\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    298\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    299\u001b[0m \u001b[43m    \u001b[49m\u001b[43mon_error\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mon_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    300\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:147\u001b[0m, in \u001b[0;36mretry_target\u001b[0;34m(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)\u001b[0m\n\u001b[1;32m    145\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[1;32m    146\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 147\u001b[0m         result \u001b[38;5;241m=\u001b[39m \u001b[43mtarget\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    148\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m inspect\u001b[38;5;241m.\u001b[39misawaitable(result):\n\u001b[1;32m    149\u001b[0m             warnings\u001b[38;5;241m.\u001b[39mwarn(_ASYNC_RETRY_WARNING)\n",
-      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/cloud/storage/_media/requests/download.py:276\u001b[0m, in \u001b[0;36mDownload.consume.<locals>.retriable_request\u001b[0;34m()\u001b[0m\n\u001b[1;32m    273\u001b[0m             \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(msg) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mexc\u001b[39;00m\n\u001b[1;32m    274\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_bytes_downloaded \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m--> 276\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_write_to_stream\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresult\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    278\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m result\n",
-      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/cloud/storage/_media/requests/download.py:149\u001b[0m, in \u001b[0;36mDownload._write_to_stream\u001b[0;34m(self, response)\u001b[0m\n\u001b[1;32m    144\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    145\u001b[0m     body_iter \u001b[38;5;241m=\u001b[39m response\u001b[38;5;241m.\u001b[39miter_content(\n\u001b[1;32m    146\u001b[0m         chunk_size\u001b[38;5;241m=\u001b[39m_request_helpers\u001b[38;5;241m.\u001b[39m_SINGLE_GET_CHUNK_SIZE,\n\u001b[1;32m    147\u001b[0m         decode_unicode\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m    148\u001b[0m     )\n\u001b[0;32m--> 149\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mchunk\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mbody_iter\u001b[49m\u001b[43m:\u001b[49m\n\u001b[1;32m    150\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_stream\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwrite\u001b[49m\u001b[43m(\u001b[49m\u001b[43mchunk\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    151\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_bytes_downloaded\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mchunk\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/requests/models.py:820\u001b[0m, in \u001b[0;36mResponse.iter_content.<locals>.generate\u001b[0;34m()\u001b[0m\n\u001b[1;32m    818\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mraw, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstream\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m    819\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 820\u001b[0m         \u001b[38;5;28;01myield from\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mraw\u001b[38;5;241m.\u001b[39mstream(chunk_size, decode_content\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m    821\u001b[0m     \u001b[38;5;28;01mexcept\u001b[39;00m ProtocolError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m    822\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m ChunkedEncodingError(e)\n",
-      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/urllib3/response.py:1253\u001b[0m, in \u001b[0;36mHTTPResponse.stream\u001b[0;34m(self, amt, decode_content)\u001b[0m\n\u001b[1;32m   1247\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   1248\u001b[0m     \u001b[38;5;28;01mwhile\u001b[39;00m (\n\u001b[1;32m   1249\u001b[0m         \u001b[38;5;129;01mnot\u001b[39;00m is_fp_closed(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp)\n\u001b[1;32m   1250\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoded_buffer) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m   1251\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoder \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoder\u001b[38;5;241m.\u001b[39mhas_unconsumed_tail)\n\u001b[1;32m   1252\u001b[0m     ):\n\u001b[0;32m-> 1253\u001b[0m         data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mamt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_content\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1255\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m data:\n\u001b[1;32m   1256\u001b[0m             \u001b[38;5;28;01myield\u001b[39;00m data\n",
-      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/urllib3/response.py:1108\u001b[0m, in \u001b[0;36mHTTPResponse.read\u001b[0;34m(self, amt, decode_content, cache_content)\u001b[0m\n\u001b[1;32m   1105\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoded_buffer) \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m amt:\n\u001b[1;32m   1106\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoded_buffer\u001b[38;5;241m.\u001b[39mget(amt)\n\u001b[0;32m-> 1108\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_raw_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1110\u001b[0m flush_decoder \u001b[38;5;241m=\u001b[39m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mor\u001b[39;00m (amt \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m data)\n\u001b[1;32m   1112\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m   1113\u001b[0m     \u001b[38;5;129;01mnot\u001b[39;00m data\n\u001b[1;32m   1114\u001b[0m     \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoded_buffer) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m   1115\u001b[0m     \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoder \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoder\u001b[38;5;241m.\u001b[39mhas_unconsumed_tail)\n\u001b[1;32m   1116\u001b[0m ):\n",
-      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/urllib3/response.py:1024\u001b[0m, in \u001b[0;36mHTTPResponse._raw_read\u001b[0;34m(self, amt, read1)\u001b[0m\n\u001b[1;32m   1021\u001b[0m fp_closed \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mclosed\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m   1023\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_error_catcher():\n\u001b[0;32m-> 1024\u001b[0m     data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fp_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mread1\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mread1\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m fp_closed \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1025\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m amt \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m data:\n\u001b[1;32m   1026\u001b[0m         \u001b[38;5;66;03m# Platform-specific: Buggy versions of Python.\u001b[39;00m\n\u001b[1;32m   1027\u001b[0m         \u001b[38;5;66;03m# Close the connection when no data is returned\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1032\u001b[0m         \u001b[38;5;66;03m# not properly close the connection in all cases. There is\u001b[39;00m\n\u001b[1;32m   1033\u001b[0m         \u001b[38;5;66;03m# no harm in redundantly calling close.\u001b[39;00m\n\u001b[1;32m   1034\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp\u001b[38;5;241m.\u001b[39mclose()\n",
-      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/urllib3/response.py:1007\u001b[0m, in \u001b[0;36mHTTPResponse._fp_read\u001b[0;34m(self, amt, read1)\u001b[0m\n\u001b[1;32m   1004\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp\u001b[38;5;241m.\u001b[39mread1(amt) \u001b[38;5;28;01mif\u001b[39;00m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp\u001b[38;5;241m.\u001b[39mread1()\n\u001b[1;32m   1005\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   1006\u001b[0m     \u001b[38;5;66;03m# StringIO doesn't like amt=None\u001b[39;00m\n\u001b[0;32m-> 1007\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp\u001b[38;5;241m.\u001b[39mread()\n",
-      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/http/client.py:479\u001b[0m, in \u001b[0;36mHTTPResponse.read\u001b[0;34m(self, amt)\u001b[0m\n\u001b[1;32m    476\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlength \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m amt \u001b[38;5;241m>\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlength:\n\u001b[1;32m    477\u001b[0m     \u001b[38;5;66;03m# clip the read to the \"end of response\"\u001b[39;00m\n\u001b[1;32m    478\u001b[0m     amt \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlength\n\u001b[0;32m--> 479\u001b[0m s \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    480\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m s \u001b[38;5;129;01mand\u001b[39;00m amt:\n\u001b[1;32m    481\u001b[0m     \u001b[38;5;66;03m# Ideally, we would raise IncompleteRead if the content-length\u001b[39;00m\n\u001b[1;32m    482\u001b[0m     \u001b[38;5;66;03m# wasn't satisfied, but it might break compatibility.\u001b[39;00m\n\u001b[1;32m    483\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_close_conn()\n",
-      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/socket.py:719\u001b[0m, in \u001b[0;36mSocketIO.readinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m    717\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mOSError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcannot read from timed out object\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    718\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 719\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrecv_into\u001b[49m\u001b[43m(\u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    720\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m timeout:\n\u001b[1;32m    721\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_timeout_occurred \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
-      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/ssl.py:1304\u001b[0m, in \u001b[0;36mSSLSocket.recv_into\u001b[0;34m(self, buffer, nbytes, flags)\u001b[0m\n\u001b[1;32m   1300\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m flags \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m   1301\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m   1302\u001b[0m           \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnon-zero flags not allowed in calls to recv_into() on \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m\n\u001b[1;32m   1303\u001b[0m           \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m)\n\u001b[0;32m-> 1304\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnbytes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbuffer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1305\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   1306\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39mrecv_into(buffer, nbytes, flags)\n",
-      "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/ssl.py:1138\u001b[0m, in \u001b[0;36mSSLSocket.read\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m   1136\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m   1137\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m buffer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1138\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sslobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbuffer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1139\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   1140\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_sslobj\u001b[38;5;241m.\u001b[39mread(\u001b[38;5;28mlen\u001b[39m)\n",
-      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
-     ]
-    }
-   ],
-   "source": [
-    "# Test local authority filtering\n",
-    "print(\"\\n=== Step 8b: Test Local Authority Filtering ===\")\n",
-    "print(\"Creating Simulation with region='local_authority/Cardiff'...\")\n",
-    "print()\n",
-    "\n",
-    "try:\n",
-    "    sim_la = Simulation(\n",
-    "        country=\"uk\", \n",
-    "        scope=\"macro\", \n",
-    "        region=\"local_authority/Cardiff\"\n",
-    "    )\n",
-    "    \n",
-    "    la_underlying = sim_la.baseline_simulation\n",
-    "    print(f\"Local Authority simulation created!\")\n",
-    "    print(f\"  Person count: {la_underlying.persons.count}\")\n",
-    "    print(f\"  Household count: {la_underlying.household.count}\")\n",
-    "    print(\"  (Full UK counts, but weights adjusted for LA)\")\n",
-    "    \n",
-    "    # Try calculating the problematic variable\n",
-    "    print(\"\\nCalculating would_evade_tv_licence_fee...\")\n",
-    "    result = sim_la.calculate(\"would_evade_tv_licence_fee\")\n",
-    "    print(f\"  Result length: {len(result)}\")\n",
-    "    print(\"  [OK] Local authority filtering works!\")\n",
-    "    \n",
-    "except Exception as e:\n",
-    "    print(f\"[ERROR] {type(e).__name__}: {e}\")\n",
-    "    traceback.print_exc()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "cell-26",
-   "metadata": {},
-   "source": [
-    "## Step 9: Deep Dive - Check random() Function Behavior"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "cell-27",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Check what random(household) would return in the broken simulation\n",
-    "print(\"=== Step 9: Investigate random() function behavior ===\")\n",
-    "\n",
-    "# Import the random function\n",
-    "from policyengine_core.commons.formulas import random\n",
-    "\n",
-    "try:\n",
-    "    # Get household population from the new (potentially broken) simulation\n",
-    "    hh_pop = new_sim.household\n",
-    "    print(f\"Household population count: {hh_pop.count}\")\n",
-    "    \n",
-    "    # Check what household_id returns when calculated via population\n",
-    "    print(\"\\nCalling hh_pop('household_id', 2025)...\")\n",
-    "    hh_ids_from_pop = hh_pop(\"household_id\", 2025)\n",
-    "    print(f\"  Result length: {len(hh_ids_from_pop)}\")\n",
-    "    print(f\"  Expected: {hh_pop.count}\")\n",
-    "    \n",
-    "    if len(hh_ids_from_pop) != hh_pop.count:\n",
-    "        print(f\"\\n  [BUG CONFIRMED] household_id returned {len(hh_ids_from_pop)} values\")\n",
-    "        print(f\"  but household population only has {hh_pop.count} entities!\")\n",
-    "        print(\"  This is why random(household) fails.\")\n",
-    "        \n",
-    "except Exception as e:\n",
-    "    print(f\"Error: {e}\")\n",
-    "    traceback.print_exc()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "cell-28",
-   "metadata": {},
-   "source": [
-    "## Summary and Conclusions"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "cell-29",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(\"=\"*70)\n",
-    "print(\"DIAGNOSTIC SUMMARY\")\n",
-    "print(\"=\"*70)\n",
-    "\n",
-    "print(\"\"\"\n",
-    "FINDINGS:\n",
-    "\n",
-    "1. COUNTRY FILTERING (country/wales):\n",
-    "   - Uses to_input_dataframe() + DataFrame subsetting + new Simulation()\n",
-    "   - Creates entity count mismatch between persons and households\n",
-    "   - Breaks when calculating variables that use random(household)\n",
-    "\n",
-    "2. CONSTITUENCY/LA FILTERING:\n",
-    "   - Uses weight adjustment on existing simulation\n",
-    "   - Preserves entity structure\n",
-    "   - Works correctly\n",
-    "\n",
-    "ROOT CAUSE:\n",
-    "   - The to_input_dataframe() -> filter -> new Simulation() approach\n",
-    "     doesn't properly preserve entity relationships\n",
-    "   - Either household_id isn't properly exported/imported, OR\n",
-    "   - The entity membership mapping gets corrupted during rebuild\n",
-    "\n",
-    "RECOMMENDED FIX:\n",
-    "   - Use weight-based filtering for country filtering (like constituency/LA)\n",
-    "   - Zero out weights for households not in the target country\n",
-    "   - This preserves entity structure and avoids the export/import complexity\n",
-    "\n",
-    "Example fix for policyengine/simulation.py:\n",
-    "\n",
-    "    if \"country/\" in region:\n",
-    "        country_name = region.split(\"/\")[1]\n",
-    "        country = simulation.calculate(\"country\", map_to=\"household\").values\n",
-    "        is_in_country = (country == country_name.upper())\n",
-    "        current_weights = simulation.calculate(\n",
-    "            \"household_weight\", simulation.default_calculation_period\n",
-    "        )\n",
-    "        simulation.set_input(\n",
-    "            \"household_weight\",\n",
-    "            simulation.default_calculation_period,\n",
-    "            current_weights * is_in_country  # Zero out non-matching\n",
-    "        )\n",
-    "\"\"\")"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "py-3.13",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.13.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

From 76ec6827c76a45091bdde612a827ea3d1fd30d88 Mon Sep 17 00:00:00 2001
From: Anthony Volk <anth.volk@gmail.com>
Date: Tue, 16 Dec 2025 14:13:26 +0400
Subject: [PATCH 7/7] fix: Gitignore .DS_Store

---
 .DS_Store  | Bin 6148 -> 0 bytes
 .gitignore |   1 +
 2 files changed, 1 insertion(+)
 delete mode 100644 .DS_Store

diff --git a/.DS_Store b/.DS_Store
deleted file mode 100644
index b2a6b3e6c870dde3f033c72e8cfe8a9aab8f5326..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6148
zcmeHKOG-mQ5UkdK0XJD@IalxoLx?BH1p*O5K%yk-`mJ&<k7o4;F}!3WxRGk;u9=>$
zdAwS@eht7D?@tfF48WA`h?9r0`MLYZZYpC$I`4SJ8;&2V!~XeYRDC(&+&dhx!2!FU
z{QWj>y-ZV93P=GdAO)m=6!@hA-g{}&`$R=4AO)nrw*vlsXmrP3I3&iWgCRx$;)Lli
zu49%UHct?H;gHA-&5}w?s?~^LNoT%QT`wFGlMbul!|KUa6N<&td4G#?SWi@x0#e{y
zf!o~9y#GJaugw4FB<-Yt6!=#P*krj{F8E5-TSqVFy|&TUbg%iOyKx;9hG@scXvf@m
fJDx{T)-_-AycZ6ML1#YbMEwl7E;1?b*9v?AELauN

diff --git a/.gitignore b/.gitignore
index 91af8c3c1..ff41b4b5a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,6 +14,7 @@ dist/*
 **/*.h5
 **/*.csv.gz
 .env
+.DS_Store
 
 # Ignore generated credentials from google-github-actions/auth
 gha-creds-*.json