From a9e870e150f4958b6ef597c96b514d1fab2887f1 Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Sat, 13 Dec 2025 12:25:32 +0400 Subject: [PATCH 1/7] feat: Local authority breakdown --- policyengine_api/constants.py | 2 + policyengine_api/country.py | 12 + .../data/local_authorities_2021.csv | 361 ++++++++++++++++++ policyengine_api/endpoints/economy/compare.py | 98 +++++ tests/unit/services/test_metadata_service.py | 2 +- 5 files changed, 474 insertions(+), 1 deletion(-) create mode 100644 policyengine_api/data/local_authorities_2021.csv diff --git a/policyengine_api/constants.py b/policyengine_api/constants.py index c22dabaa2..d1dfac30b 100644 --- a/policyengine_api/constants.py +++ b/policyengine_api/constants.py @@ -38,6 +38,7 @@ "national", # National level (e.g., "uk") "country", # UK countries (e.g., "country/england", "country/scotland") "constituency", # UK parliamentary constituencies (e.g., "constituency/Aldershot") + "local_authority", # UK local authorities (e.g., "local_authority/Maidstone") ) # Valid region prefixes for each country @@ -51,6 +52,7 @@ "uk": [ "country/", # UK countries (e.g., "country/england", "country/scotland") "constituency/", # UK parliamentary constituencies (e.g., "constituency/Aldershot") + "local_authority/", # UK local authorities (e.g., "local_authority/Maidstone") ], } diff --git a/policyengine_api/country.py b/policyengine_api/country.py index 44cb47476..4c602b347 100644 --- a/policyengine_api/country.py +++ b/policyengine_api/country.py @@ -92,6 +92,18 @@ def build_microsimulation_options(self) -> dict: type="constituency", ) ) + local_authority_names_path = ( + Path(__file__).parent / "data" / "local_authorities_2021.csv" + ) + local_authority_names = pd.read_csv(local_authority_names_path) + for i in range(len(local_authority_names)): + region.append( + dict( + name=f"local_authority/{local_authority_names.iloc[i]['name']}", + label=local_authority_names.iloc[i]["name"], + type="local_authority", + ) + ) time_period = [ dict(name=2024, label="2024"), dict(name=2025, label="2025"), diff --git a/policyengine_api/data/local_authorities_2021.csv b/policyengine_api/data/local_authorities_2021.csv new file mode 100644 index 000000000..9fcf922ed --- /dev/null +++ b/policyengine_api/data/local_authorities_2021.csv @@ -0,0 +1,361 @@ +code,x,y,name +E06000001,8.0,19.0,Hartlepool +E06000002,9.0,18.0,Middlesbrough +E06000003,9.0,19.0,Redcar and Cleveland +E06000004,8.0,18.0,Stockton-on-Tees +E06000005,7.0,18.0,Darlington +E06000006,1.0,11.0,Halton +E06000007,2.0,11.0,Warrington +E06000008,4.0,15.0,Blackburn with Darwen +E06000009,2.0,15.0,Blackpool +E06000010,10.0,15.0,"Kingston upon Hull, City of" +E06000011,11.0,16.0,East Riding of Yorkshire +E06000012,11.0,14.0,North East Lincolnshire +E06000013,10.0,14.0,North Lincolnshire +E06000014,9.0,17.0,York +E06000015,6.0,11.0,Derby +E06000016,8.0,8.0,Leicester +E06000017,10.0,9.0,Rutland +E06000018,8.0,10.0,Nottingham +E06000019,0.0,8.0,"Herefordshire, County of" +E06000020,2.0,9.0,Telford and Wrekin +E06000021,3.0,10.0,Stoke-on-Trent +E06000022,1.0,3.0,Bath and North East Somerset +E06000023,0.0,3.0,"Bristol, City of" +E06000024,0.0,2.0,North Somerset +E06000025,1.0,4.0,South Gloucestershire +E06000026,-4.0,-2.0,Plymouth +E06000027,-3.0,-2.0,Torbay +E06000030,2.0,4.0,Swindon +E06000031,11.0,9.0,Peterborough +E06000032,10.0,7.0,Luton +E06000033,16.0,6.0,Southend-on-Sea +E06000034,15.0,4.0,Thurrock +E06000035,15.0,1.0,Medway +E06000036,4.0,2.0,Bracknell Forest +E06000037,2.0,2.0,West Berkshire +E06000038,2.0,3.0,Reading +E06000039,6.0,4.0,Slough +E06000040,4.0,3.0,Windsor and Maidenhead +E06000041,3.0,3.0,Wokingham +E06000042,6.0,5.0,Milton Keynes +E06000043,9.0,-2.0,Brighton and Hove +E06000044,4.0,-1.0,Portsmouth +E06000045,2.0,0.0,Southampton +E06000046,1.0,-2.0,Isle of Wight +E06000047,6.0,18.0,County Durham +E06000049,4.0,11.0,Cheshire East +E06000050,3.0,11.0,Cheshire West and Chester +E06000051,1.0,9.0,Shropshire +E06000052,-5.0,-2.0,Cornwall +E06000053,-7.0,-3.0,Isles of Scilly +E06000054,1.0,2.0,Wiltshire +E06000055,9.0,7.0,Bedford +E06000056,9.0,6.0,Central Bedfordshire +E06000057,5.0,20.0,Northumberland +E06000058,0.0,0.0,"Bournemouth, Christchurch and Poole" +E06000059,-1.0,0.0,Dorset +E06000060,5.0,5.0,Buckinghamshire +E06000061,9.0,9.0,North Northamptonshire +E06000062,7.0,6.0,West Northamptonshire +E06000063,0.0,0.0,Cumberland +E06000064,0.0,0.0,Westmorland and Furness +E06000065,0.0,0.0,North Yorkshire +E06000066,0.0,0.0,Somerset +E07000008,12.0,8.0,Cambridge +E07000009,12.0,9.0,East Cambridgeshire +E07000010,13.0,10.0,Fenland +E07000011,10.0,8.0,Huntingdonshire +E07000012,11.0,8.0,South Cambridgeshire +E07000032,7.0,11.0,Amber Valley +E07000033,10.0,12.0,Bolsover +E07000034,9.0,12.0,Chesterfield +E07000035,7.0,12.0,Derbyshire Dales +E07000036,7.0,9.0,Erewash +E07000037,7.0,13.0,High Peak +E07000038,8.0,12.0,North East Derbyshire +E07000039,6.0,10.0,South Derbyshire +E07000040,-2.0,-1.0,East Devon +E07000041,-3.0,-1.0,Exeter +E07000042,-2.0,0.0,Mid Devon +E07000043,-3.0,1.0,North Devon +E07000044,-4.0,-3.0,South Hams +E07000045,-2.0,-2.0,Teignbridge +E07000046,-4.0,-1.0,Torridge +E07000047,-3.0,0.0,West Devon +E07000061,10.0,-2.0,Eastbourne +E07000062,13.0,-2.0,Hastings +E07000063,10.0,-1.0,Lewes +E07000064,12.0,-2.0,Rother +E07000065,11.0,-2.0,Wealden +E07000066,14.0,5.0,Basildon +E07000067,14.0,7.0,Braintree +E07000068,13.0,5.0,Brentwood +E07000069,15.0,5.0,Castle Point +E07000070,14.0,6.0,Chelmsford +E07000071,15.0,8.0,Colchester +E07000072,12.0,5.0,Epping Forest +E07000073,13.0,6.0,Harlow +E07000074,15.0,7.0,Maldon +E07000075,15.0,6.0,Rochford +E07000076,16.0,8.0,Tendring +E07000077,13.0,7.0,Uttlesford +E07000078,1.0,5.0,Cheltenham +E07000079,2.0,5.0,Cotswold +E07000080,-1.0,6.0,Forest of Dean +E07000081,0.0,6.0,Gloucester +E07000082,0.0,5.0,Stroud +E07000083,1.0,6.0,Tewkesbury +E07000084,2.0,1.0,Basingstoke and Deane +E07000085,4.0,0.0,East Hampshire +E07000086,3.0,0.0,Eastleigh +E07000087,2.0,-1.0,Fareham +E07000088,3.0,-1.0,Gosport +E07000089,3.0,2.0,Hart +E07000090,5.0,0.0,Havant +E07000091,1.0,0.0,New Forest +E07000092,4.0,1.0,Rushmoor +E07000093,1.0,1.0,Test Valley +E07000094,3.0,1.0,Winchester +E07000095,12.0,6.0,Broxbourne +E07000096,8.0,6.0,Dacorum +E07000098,9.0,5.0,Hertsmere +E07000099,11.0,7.0,North Hertfordshire +E07000102,7.0,5.0,Three Rivers +E07000103,8.0,5.0,Watford +E07000105,12.0,-1.0,Ashford +E07000106,15.0,0.0,Canterbury +E07000107,13.0,1.0,Dartford +E07000108,14.0,-1.0,Dover +E07000109,14.0,1.0,Gravesham +E07000110,14.0,0.0,Maidstone +E07000111,12.0,0.0,Sevenoaks +E07000112,13.0,-1.0,Folkestone and Hythe +E07000113,16.0,0.0,Swale +E07000114,15.0,-1.0,Thanet +E07000115,13.0,0.0,Tonbridge and Malling +E07000116,11.0,-1.0,Tunbridge Wells +E07000117,6.0,15.0,Burnley +E07000118,3.0,14.0,Chorley +E07000119,4.0,16.0,Fylde +E07000120,5.0,15.0,Hyndburn +E07000121,3.0,17.0,Lancaster +E07000122,6.0,16.0,Pendle +E07000123,5.0,16.0,Preston +E07000124,5.0,17.0,Ribble Valley +E07000125,6.0,14.0,Rossendale +E07000126,3.0,15.0,South Ribble +E07000127,2.0,13.0,West Lancashire +E07000128,3.0,16.0,Wyre +E07000129,7.0,7.0,Blaby +E07000130,8.0,9.0,Charnwood +E07000131,8.0,7.0,Harborough +E07000132,7.0,8.0,Hinckley and Bosworth +E07000133,11.0,10.0,Melton +E07000134,6.0,9.0,North West Leicestershire +E07000135,9.0,8.0,Oadby and Wigston +E07000136,12.0,12.0,Boston +E07000137,12.0,13.0,East Lindsey +E07000138,11.0,12.0,Lincoln +E07000139,11.0,11.0,North Kesteven +E07000140,12.0,11.0,South Holland +E07000141,12.0,10.0,South Kesteven +E07000142,11.0,13.0,West Lindsey +E07000143,14.0,10.0,Breckland +E07000144,15.0,12.0,Broadland +E07000145,15.0,11.0,Great Yarmouth +E07000146,13.0,11.0,King's Lynn and West Norfolk +E07000147,14.0,12.0,North Norfolk +E07000148,14.0,11.0,Norwich +E07000149,15.0,10.0,South Norfolk +E07000170,8.0,11.0,Ashfield +E07000171,10.0,13.0,Bassetlaw +E07000172,7.0,10.0,Broxtowe +E07000173,9.0,10.0,Gedling +E07000174,9.0,11.0,Mansfield +E07000175,10.0,11.0,Newark and Sherwood +E07000176,10.0,10.0,Rushcliffe +E07000177,4.0,5.0,Cherwell +E07000178,4.0,4.0,Oxford +E07000179,5.0,4.0,South Oxfordshire +E07000180,3.0,4.0,Vale of White Horse +E07000181,3.0,5.0,West Oxfordshire +E07000192,3.0,9.0,Cannock Chase +E07000193,5.0,11.0,East Staffordshire +E07000194,4.0,9.0,Lichfield +E07000195,2.0,10.0,Newcastle-under-Lyme +E07000196,2.0,8.0,South Staffordshire +E07000197,4.0,10.0,Stafford +E07000198,5.0,10.0,Staffordshire Moorlands +E07000199,5.0,9.0,Tamworth +E07000200,14.0,8.0,Babergh +E07000202,15.0,9.0,Ipswich +E07000203,14.0,9.0,Mid Suffolk +E07000207,7.0,2.0,Elmbridge +E07000208,8.0,0.0,Epsom and Ewell +E07000209,5.0,1.0,Guildford +E07000210,6.0,1.0,Mole Valley +E07000211,7.0,0.0,Reigate and Banstead +E07000212,5.0,3.0,Runnymede +E07000213,6.0,3.0,Spelthorne +E07000214,5.0,2.0,Surrey Heath +E07000215,9.0,-1.0,Tandridge +E07000216,6.0,0.0,Waverley +E07000217,6.0,2.0,Woking +E07000218,6.0,8.0,North Warwickshire +E07000219,6.0,7.0,Nuneaton and Bedworth +E07000220,6.0,6.0,Rugby +E07000221,3.0,6.0,Stratford-on-Avon +E07000222,4.0,6.0,Warwick +E07000223,8.0,-2.0,Adur +E07000224,6.0,-2.0,Arun +E07000225,5.0,-1.0,Chichester +E07000226,8.0,-1.0,Crawley +E07000227,6.0,-1.0,Horsham +E07000228,7.0,-1.0,Mid Sussex +E07000229,7.0,-2.0,Worthing +E07000234,2.0,7.0,Bromsgrove +E07000235,-1.0,7.0,Malvern Hills +E07000236,4.0,7.0,Redditch +E07000237,0.0,7.0,Worcester +E07000238,2.0,6.0,Wychavon +E07000239,1.0,8.0,Wyre Forest +E07000240,10.0,6.0,St Albans +E07000241,11.0,6.0,Welwyn Hatfield +E07000242,13.0,8.0,East Hertfordshire +E07000243,12.0,7.0,Stevenage +E07000244,16.0,10.0,East Suffolk +E07000245,13.0,9.0,West Suffolk +E08000001,4.0,14.0,Bolton +E08000002,5.0,14.0,Bury +E08000003,5.0,12.0,Manchester +E08000004,5.0,13.0,Oldham +E08000005,7.0,14.0,Rochdale +E08000006,4.0,13.0,Salford +E08000007,6.0,12.0,Stockport +E08000008,6.0,13.0,Tameside +E08000009,4.0,12.0,Trafford +E08000010,3.0,13.0,Wigan +E08000011,2.0,12.0,Knowsley +E08000012,1.0,13.0,Liverpool +E08000013,3.0,12.0,St. Helens +E08000014,2.0,14.0,Sefton +E08000015,1.0,12.0,Wirral +E08000016,8.0,14.0,Barnsley +E08000017,9.0,14.0,Doncaster +E08000018,9.0,13.0,Rotherham +E08000019,8.0,13.0,Sheffield +E08000021,5.0,19.0,Newcastle upon Tyne +E08000022,6.0,20.0,North Tyneside +E08000023,7.0,20.0,South Tyneside +E08000024,7.0,19.0,Sunderland +E08000025,5.0,8.0,Birmingham +E08000026,5.0,6.0,Coventry +E08000027,1.0,7.0,Dudley +E08000028,3.0,7.0,Sandwell +E08000029,5.0,7.0,Solihull +E08000030,4.0,8.0,Walsall +E08000031,3.0,8.0,Wolverhampton +E08000032,7.0,16.0,Bradford +E08000033,7.0,15.0,Calderdale +E08000034,8.0,15.0,Kirklees +E08000035,8.0,16.0,Leeds +E08000036,9.0,15.0,Wakefield +E08000037,6.0,19.0,Gateshead +E09000001,11.0,2.0,City of London +E09000002,13.0,3.0,Barking and Dagenham +E09000003,10.0,5.0,Barnet +E09000004,12.0,1.0,Bexley +E09000005,10.0,4.0,Brent +E09000006,11.0,0.0,Bromley +E09000007,11.0,4.0,Camden +E09000008,10.0,0.0,Croydon +E09000009,9.0,4.0,Ealing +E09000010,11.0,5.0,Enfield +E09000011,11.0,1.0,Greenwich +E09000012,12.0,3.0,Hackney +E09000013,8.0,3.0,Hammersmith and Fulham +E09000014,12.0,4.0,Haringey +E09000015,8.0,4.0,Harrow +E09000016,14.0,3.0,Havering +E09000017,7.0,4.0,Hillingdon +E09000018,7.0,3.0,Hounslow +E09000019,11.0,3.0,Islington +E09000020,9.0,3.0,Kensington and Chelsea +E09000021,7.0,1.0,Kingston upon Thames +E09000022,10.0,2.0,Lambeth +E09000023,10.0,1.0,Lewisham +E09000024,8.0,1.0,Merton +E09000025,13.0,2.0,Newham +E09000026,14.0,4.0,Redbridge +E09000027,8.0,2.0,Richmond upon Thames +E09000028,9.0,1.0,Southwark +E09000029,9.0,0.0,Sutton +E09000030,12.0,2.0,Tower Hamlets +E09000031,13.0,4.0,Waltham Forest +E09000032,9.0,2.0,Wandsworth +E09000033,10.0,3.0,Westminster +N09000001,-4.0,16.0,Antrim and Newtownabbey +N09000002,-5.0,16.0,"Armagh City, Banbridge and Craigavon" +N09000003,-4.0,17.0,Belfast +N09000004,-5.0,18.0,Causeway Coast and Glens +N09000005,-6.0,17.0,Derry City and Strabane +N09000006,-6.0,16.0,Fermanagh and Omagh +N09000007,-5.0,15.0,Lisburn and Castlereagh +N09000008,-4.0,18.0,Mid and East Antrim +N09000009,-5.0,17.0,Mid Ulster +N09000010,-4.0,15.0,"Newry, Mourne and Down" +S12000005,2.0,24.0,Clackmannanshire +S12000006,4.0,20.0,Dumfries and Galloway +S12000008,3.0,20.0,East Ayrshire +S12000010,5.0,22.0,East Lothian +S12000011,2.0,20.0,East Renfrewshire +S12000013,-1.0,27.0,Na h-Eileanan Siar +S12000014,2.0,23.0,Falkirk +S12000017,1.0,26.0,Highland +S12000018,0.0,21.0,Inverclyde +S12000019,3.0,21.0,Midlothian +S12000020,2.0,26.0,Moray +S12000021,1.0,20.0,North Ayrshire +S12000023,4.0,28.0,Orkney Islands +S12000026,4.0,21.0,Scottish Borders +S12000027,5.0,30.0,Shetland Islands +S12000028,1.0,19.0,South Ayrshire +S12000029,2.0,21.0,South Lanarkshire +S12000030,1.0,24.0,Stirling +S12000033,4.0,26.0,Aberdeen City +S12000034,3.0,26.0,Aberdeenshire +S12000035,0.0,24.0,Argyll and Bute +S12000036,4.0,22.0,City of Edinburgh +S12000038,1.0,22.0,Renfrewshire +S12000039,0.0,23.0,West Dunbartonshire +S12000040,3.0,22.0,West Lothian +S12000041,2.0,25.0,Angus +S12000042,3.0,25.0,Dundee City +S12000045,1.0,23.0,East Dunbartonshire +S12000047,3.0,24.0,Fife +S12000048,1.0,25.0,Perth and Kinross +S12000049,1.0,21.0,Glasgow City +S12000050,2.0,22.0,North Lanarkshire +W06000001,-2.0,12.0,Isle of Anglesey +W06000002,-2.0,10.0,Gwynedd +W06000003,-1.0,10.0,Conwy +W06000004,0.0,10.0,Denbighshire +W06000005,0.0,11.0,Flintshire +W06000006,1.0,10.0,Wrexham +W06000008,-2.0,9.0,Ceredigion +W06000009,-5.0,6.0,Pembrokeshire +W06000010,-4.0,6.0,Carmarthenshire +W06000011,-4.0,5.0,Swansea +W06000012,-3.0,5.0,Neath Port Talbot +W06000013,-3.0,6.0,Bridgend +W06000014,-2.0,4.0,Vale of Glamorgan +W06000015,-2.0,5.0,Cardiff +W06000016,-3.0,7.0,Rhondda Cynon Taf +W06000018,-2.0,6.0,Caerphilly +W06000019,0.0,9.0,Blaenau Gwent +W06000020,-2.0,7.0,Torfaen +W06000021,-1.0,8.0,Monmouthshire +W06000022,-1.0,5.0,Newport +W06000023,-1.0,9.0,Powys +W06000024,-2.0,8.0,Merthyr Tydfil diff --git a/policyengine_api/endpoints/economy/compare.py b/policyengine_api/endpoints/economy/compare.py index 1a21d40d0..38b098a7d 100644 --- a/policyengine_api/endpoints/economy/compare.py +++ b/policyengine_api/endpoints/economy/compare.py @@ -548,6 +548,18 @@ class UKConstituencyBreakdown(BaseModel): outcomes_by_region: dict[str, dict[str, int]] +class UKLocalAuthorityBreakdownByLA(BaseModel): + average_household_income_change: float + relative_household_income_change: float + x: int + y: int + + +class UKLocalAuthorityBreakdown(BaseModel): + by_local_authority: dict[str, UKLocalAuthorityBreakdownByLA] + outcomes_by_region: dict[str, dict[str, int]] + + def uk_constituency_breakdown( baseline: dict, reform: dict, country_id: str ) -> UKConstituencyBreakdown | None: @@ -632,6 +644,86 @@ def uk_constituency_breakdown( return UKConstituencyBreakdown(**output) +def uk_local_authority_breakdown( + baseline: dict, reform: dict, country_id: str +) -> UKLocalAuthorityBreakdown | None: + if country_id != "uk": + return None + + output = { + "by_local_authority": {}, + "outcomes_by_region": {}, + } + for region in ["uk", "england", "scotland", "wales", "northern_ireland"]: + output["outcomes_by_region"][region] = { + "Gain more than 5%": 0, + "Gain less than 5%": 0, + "No change": 0, + "Lose less than 5%": 0, + "Lose more than 5%": 0, + } + baseline_hnet = baseline["household_net_income"] + reform_hnet = reform["household_net_income"] + + local_authority_weights_path = download_huggingface_dataset( + repo="policyengine/policyengine-uk-data-private", + repo_filename="local_authority_weights.h5", + ) + with h5py.File(local_authority_weights_path, "r") as f: + weights = f["2025"][...] + + local_authority_names_path = download_huggingface_dataset( + repo="policyengine/policyengine-uk-data-public", + repo_filename="local_authorities_2021.csv", + ) + local_authority_names = pd.read_csv(local_authority_names_path) + + for i in range(len(local_authority_names)): + name: str = local_authority_names.iloc[i]["name"] + code: str = local_authority_names.iloc[i]["code"] + weight: np.ndarray = weights[i] + baseline_income = MicroSeries(baseline_hnet, weights=weight) + reform_income = MicroSeries(reform_hnet, weights=weight) + average_household_income_change: float = ( + reform_income.sum() - baseline_income.sum() + ) / baseline_income.count() + percent_household_income_change: float = ( + reform_income.sum() / baseline_income.sum() - 1 + ) + output["by_local_authority"][name] = { + "average_household_income_change": average_household_income_change, + "relative_household_income_change": percent_household_income_change, + "x": int(local_authority_names.iloc[i]["x"]), + "y": int(local_authority_names.iloc[i]["y"]), + } + + regions = ["uk"] + if code.startswith("E"): + regions.append("england") + elif code.startswith("S"): + regions.append("scotland") + elif code.startswith("W"): + regions.append("wales") + elif code.startswith("N"): + regions.append("northern_ireland") + + if percent_household_income_change > 0.05: + bucket = "Gain more than 5%" + elif percent_household_income_change > 1e-3: + bucket = "Gain less than 5%" + elif percent_household_income_change > -1e-3: + bucket = "No change" + elif percent_household_income_change > -0.05: + bucket = "Lose less than 5%" + else: + bucket = "Lose more than 5%" + + for region_ in regions: + output["outcomes_by_region"][region_][bucket] += 1 + + return UKLocalAuthorityBreakdown(**output) + + def compare_economic_outputs( baseline: dict, reform: dict, country_id: str = None ) -> dict: @@ -662,6 +754,11 @@ def compare_economic_outputs( ) if constituency_impact_data is not None: constituency_impact_data = constituency_impact_data.model_dump() + local_authority_impact_data: UKLocalAuthorityBreakdown | None = ( + uk_local_authority_breakdown(baseline, reform, country_id) + ) + if local_authority_impact_data is not None: + local_authority_impact_data = local_authority_impact_data.model_dump() try: wealth_decile_impact_data = wealth_decile_impact(baseline, reform) intra_wealth_decile_impact_data = intra_wealth_decile_impact( @@ -684,6 +781,7 @@ def compare_economic_outputs( intra_wealth_decile=intra_wealth_decile_impact_data, labor_supply_response=labor_supply_response_data, constituency_impact=constituency_impact_data, + local_authority_impact=local_authority_impact_data, ) elif baseline.get("type") == "cliff": return dict( diff --git a/tests/unit/services/test_metadata_service.py b/tests/unit/services/test_metadata_service.py index ac33d5250..70ea9262e 100644 --- a/tests/unit/services/test_metadata_service.py +++ b/tests/unit/services/test_metadata_service.py @@ -123,7 +123,7 @@ def test_verify_metadata_for_given_country( @pytest.mark.parametrize( "country_id, expected_types", [ - ("uk", ["national", "country", "constituency"]), + ("uk", ["national", "country", "constituency", "local_authority"]), ("us", ["national", "state", "city", "congressional_district"]), ], ) From c9f7a36099a972fe4c9b81dcb75eb53ec79a38be Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Sat, 13 Dec 2025 12:47:11 +0400 Subject: [PATCH 2/7] test: Add tests --- tests/unit/endpoints/__init__.py | 0 tests/unit/endpoints/economy/__init__.py | 0 tests/unit/endpoints/economy/test_compare.py | 353 +++++++++++++++++++ tests/unit/test_constants.py | 85 +++++ tests/unit/test_country.py | 154 ++++++++ 5 files changed, 592 insertions(+) create mode 100644 tests/unit/endpoints/__init__.py create mode 100644 tests/unit/endpoints/economy/__init__.py create mode 100644 tests/unit/endpoints/economy/test_compare.py create mode 100644 tests/unit/test_constants.py create mode 100644 tests/unit/test_country.py diff --git a/tests/unit/endpoints/__init__.py b/tests/unit/endpoints/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/endpoints/economy/__init__.py b/tests/unit/endpoints/economy/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/endpoints/economy/test_compare.py b/tests/unit/endpoints/economy/test_compare.py new file mode 100644 index 000000000..26fd40f5d --- /dev/null +++ b/tests/unit/endpoints/economy/test_compare.py @@ -0,0 +1,353 @@ +import pytest +from unittest.mock import patch, MagicMock +import numpy as np +import pandas as pd +from pydantic import ValidationError + +from policyengine_api.endpoints.economy.compare import ( + UKConstituencyBreakdownByConstituency, + UKConstituencyBreakdown, + UKLocalAuthorityBreakdownByLA, + UKLocalAuthorityBreakdown, + uk_constituency_breakdown, + uk_local_authority_breakdown, +) + + +class TestUKLocalAuthorityBreakdownByLA: + """Tests for the UKLocalAuthorityBreakdownByLA Pydantic model.""" + + def test__given_valid_data__creates_instance(self): + breakdown = UKLocalAuthorityBreakdownByLA( + average_household_income_change=100.50, + relative_household_income_change=0.05, + x=10, + y=20, + ) + assert breakdown.average_household_income_change == 100.50 + assert breakdown.relative_household_income_change == 0.05 + assert breakdown.x == 10 + assert breakdown.y == 20 + + def test__given_negative_income_change__creates_instance(self): + breakdown = UKLocalAuthorityBreakdownByLA( + average_household_income_change=-500.0, + relative_household_income_change=-0.03, + x=5, + y=-10, + ) + assert breakdown.average_household_income_change == -500.0 + assert breakdown.relative_household_income_change == -0.03 + + def test__given_zero_values__creates_instance(self): + breakdown = UKLocalAuthorityBreakdownByLA( + average_household_income_change=0.0, + relative_household_income_change=0.0, + x=0, + y=0, + ) + assert breakdown.average_household_income_change == 0.0 + assert breakdown.relative_household_income_change == 0.0 + + def test__given_missing_field__raises_validation_error(self): + with pytest.raises(ValidationError): + UKLocalAuthorityBreakdownByLA( + average_household_income_change=100.0, + # Missing relative_household_income_change + x=10, + y=20, + ) + + +class TestUKLocalAuthorityBreakdown: + """Tests for the UKLocalAuthorityBreakdown Pydantic model.""" + + def test__given_valid_data__creates_instance(self): + breakdown = UKLocalAuthorityBreakdown( + by_local_authority={ + "Hartlepool": UKLocalAuthorityBreakdownByLA( + average_household_income_change=100.0, + relative_household_income_change=0.02, + x=8, + y=19, + ) + }, + outcomes_by_region={ + "uk": {"Gain more than 5%": 1, "No change": 0}, + "england": {"Gain more than 5%": 1, "No change": 0}, + }, + ) + assert "Hartlepool" in breakdown.by_local_authority + assert "uk" in breakdown.outcomes_by_region + + def test__given_empty_by_local_authority__creates_instance(self): + breakdown = UKLocalAuthorityBreakdown( + by_local_authority={}, + outcomes_by_region={ + "uk": {"No change": 0}, + }, + ) + assert len(breakdown.by_local_authority) == 0 + + def test__model_dump_returns_dict(self): + breakdown = UKLocalAuthorityBreakdown( + by_local_authority={ + "Leicester": UKLocalAuthorityBreakdownByLA( + average_household_income_change=50.0, + relative_household_income_change=0.01, + x=8, + y=8, + ) + }, + outcomes_by_region={"uk": {"No change": 1}}, + ) + result = breakdown.model_dump() + assert isinstance(result, dict) + assert "by_local_authority" in result + assert "outcomes_by_region" in result + + +class TestUKLocalAuthorityBreakdownFunction: + """Tests for the uk_local_authority_breakdown function.""" + + def test__given_non_uk_country__returns_none(self): + result = uk_local_authority_breakdown({}, {}, "us") + assert result is None + + def test__given_non_uk_country_canada__returns_none(self): + result = uk_local_authority_breakdown({}, {}, "ca") + assert result is None + + @patch( + "policyengine_api.endpoints.economy.compare.download_huggingface_dataset" + ) + @patch("policyengine_api.endpoints.economy.compare.h5py.File") + @patch("policyengine_api.endpoints.economy.compare.pd.read_csv") + def test__given_uk_country__returns_breakdown( + self, mock_read_csv, mock_h5py_file, mock_download + ): + # Setup mocks + mock_download.side_effect = [ + "/path/to/weights.h5", + "/path/to/names.csv", + ] + + # Create mock weights - 3 local authorities, 10 households + mock_weights = np.ones((3, 10)) + mock_h5py_context = MagicMock() + mock_h5py_context.__enter__ = MagicMock( + return_value={"2025": mock_weights} + ) + mock_h5py_context.__exit__ = MagicMock(return_value=False) + mock_h5py_file.return_value = mock_h5py_context + + # Create mock local authority names DataFrame + mock_la_df = pd.DataFrame( + { + "code": ["E06000001", "S12000033", "W06000001"], + "name": ["Hartlepool", "Aberdeen City", "Isle of Anglesey"], + "x": [8.0, 5.0, 3.0], + "y": [19.0, 10.0, 15.0], + } + ) + mock_read_csv.return_value = mock_la_df + + # Create baseline and reform data + baseline = {"household_net_income": np.array([1000.0] * 10)} + reform = {"household_net_income": np.array([1050.0] * 10)} + + result = uk_local_authority_breakdown(baseline, reform, "uk") + + assert result is not None + assert isinstance(result, UKLocalAuthorityBreakdown) + assert "Hartlepool" in result.by_local_authority + assert "Aberdeen City" in result.by_local_authority + assert "Isle of Anglesey" in result.by_local_authority + + def test__region_categorization_by_code_prefix(self): + """Test that region categorization logic correctly identifies UK nations by code prefix.""" + # This is a unit test for the region categorization logic + # We test the logic directly rather than through the full function + + test_cases = [ + ("E06000001", ["uk", "england"]), # English LA + ("S12000033", ["uk", "scotland"]), # Scottish LA + ("W06000001", ["uk", "wales"]), # Welsh LA + ("N09000001", ["uk", "northern_ireland"]), # NI LA + ] + + for code, expected_regions in test_cases: + regions = ["uk"] + if code.startswith("E"): + regions.append("england") + elif code.startswith("S"): + regions.append("scotland") + elif code.startswith("W"): + regions.append("wales") + elif code.startswith("N"): + regions.append("northern_ireland") + + assert regions == expected_regions, f"Failed for code {code}" + + def test__outcome_bucket_categorization_logic(self): + """Test that outcome bucket categorization logic is correct.""" + # Thresholds: > 0.05 (5%), > 0.001 (0.1%), > -0.001, > -0.05 + test_cases = [ + (0.10, "Gain more than 5%"), # 10% gain + (0.06, "Gain more than 5%"), # 6% gain + (0.051, "Gain more than 5%"), # Just over 5% + (0.05, "Gain less than 5%"), # Exactly 5% gain (not > 5%) + (0.03, "Gain less than 5%"), # 3% gain + (0.002, "Gain less than 5%"), # 0.2% gain (> 0.001) + (0.001, "No change"), # Exactly 0.1% - not > 0.001 + (0.0005, "No change"), # 0.05% gain (within tolerance) + (0.0, "No change"), # No change + (-0.0005, "No change"), # 0.05% loss (> -0.001) + (-0.001, "Lose less than 5%"), # Exactly -0.1% (not > -0.001) + (-0.002, "Lose less than 5%"), # 0.2% loss + (-0.03, "Lose less than 5%"), # 3% loss + (-0.049, "Lose less than 5%"), # Just under 5% loss (> -0.05) + (-0.05, "Lose more than 5%"), # Exactly 5% loss (not > -0.05) + (-0.051, "Lose more than 5%"), # Just over 5% loss + (-0.06, "Lose more than 5%"), # 6% loss + (-0.10, "Lose more than 5%"), # 10% loss + ] + + for percent_change, expected_bucket in test_cases: + if percent_change > 0.05: + bucket = "Gain more than 5%" + elif percent_change > 1e-3: + bucket = "Gain less than 5%" + elif percent_change > -1e-3: + bucket = "No change" + elif percent_change > -0.05: + bucket = "Lose less than 5%" + else: + bucket = "Lose more than 5%" + + assert ( + bucket == expected_bucket + ), f"Failed for {percent_change}: expected {expected_bucket}, got {bucket}" + + @patch( + "policyengine_api.endpoints.economy.compare.download_huggingface_dataset" + ) + @patch("policyengine_api.endpoints.economy.compare.h5py.File") + @patch("policyengine_api.endpoints.economy.compare.pd.read_csv") + def test__outcome_buckets_are_correct( + self, mock_read_csv, mock_h5py_file, mock_download + ): + mock_download.side_effect = [ + "/path/to/weights.h5", + "/path/to/names.csv", + ] + + mock_weights = np.ones((1, 10)) + mock_h5py_context = MagicMock() + mock_h5py_context.__enter__ = MagicMock( + return_value={"2025": mock_weights} + ) + mock_h5py_context.__exit__ = MagicMock(return_value=False) + mock_h5py_file.return_value = mock_h5py_context + + mock_la_df = pd.DataFrame( + { + "code": ["E06000001"], + "name": ["Hartlepool"], + "x": [8.0], + "y": [19.0], + } + ) + mock_read_csv.return_value = mock_la_df + + baseline = {"household_net_income": np.array([1000.0] * 10)} + # 10% gain - should be "Gain more than 5%" + reform = {"household_net_income": np.array([1100.0] * 10)} + + result = uk_local_authority_breakdown(baseline, reform, "uk") + + assert result.outcomes_by_region["uk"]["Gain more than 5%"] == 1 + assert result.outcomes_by_region["uk"]["Gain less than 5%"] == 0 + + @patch( + "policyengine_api.endpoints.economy.compare.download_huggingface_dataset" + ) + @patch("policyengine_api.endpoints.economy.compare.h5py.File") + @patch("policyengine_api.endpoints.economy.compare.pd.read_csv") + def test__downloads_from_correct_repos( + self, mock_read_csv, mock_h5py_file, mock_download + ): + mock_download.side_effect = [ + "/path/to/weights.h5", + "/path/to/names.csv", + ] + + mock_weights = np.ones((1, 10)) + mock_h5py_context = MagicMock() + mock_h5py_context.__enter__ = MagicMock( + return_value={"2025": mock_weights} + ) + mock_h5py_context.__exit__ = MagicMock(return_value=False) + mock_h5py_file.return_value = mock_h5py_context + + mock_la_df = pd.DataFrame( + { + "code": ["E06000001"], + "name": ["Test"], + "x": [0.0], + "y": [0.0], + } + ) + mock_read_csv.return_value = mock_la_df + + baseline = {"household_net_income": np.array([1000.0] * 10)} + reform = {"household_net_income": np.array([1000.0] * 10)} + + uk_local_authority_breakdown(baseline, reform, "uk") + + # Verify correct repos are used + calls = mock_download.call_args_list + assert calls[0][1]["repo"] == "policyengine/policyengine-uk-data-private" + assert calls[0][1]["repo_filename"] == "local_authority_weights.h5" + assert calls[1][1]["repo"] == "policyengine/policyengine-uk-data-public" + assert calls[1][1]["repo_filename"] == "local_authorities_2021.csv" + + +class TestUKConstituencyBreakdownModels: + """Tests for the existing UK constituency breakdown models (for completeness).""" + + def test__constituency_breakdown_by_constituency_creates_instance(self): + breakdown = UKConstituencyBreakdownByConstituency( + average_household_income_change=200.0, + relative_household_income_change=0.04, + x=56, + y=-40, + ) + assert breakdown.average_household_income_change == 200.0 + assert breakdown.x == 56 + + def test__constituency_breakdown_creates_instance(self): + breakdown = UKConstituencyBreakdown( + by_constituency={ + "Aldershot": UKConstituencyBreakdownByConstituency( + average_household_income_change=150.0, + relative_household_income_change=0.03, + x=56, + y=-40, + ) + }, + outcomes_by_region={"uk": {"No change": 1}}, + ) + assert "Aldershot" in breakdown.by_constituency + + +class TestUKConstituencyBreakdownFunction: + """Tests for the uk_constituency_breakdown function.""" + + def test__given_non_uk_country__returns_none(self): + result = uk_constituency_breakdown({}, {}, "us") + assert result is None + + def test__given_non_uk_country_nigeria__returns_none(self): + result = uk_constituency_breakdown({}, {}, "ng") + assert result is None diff --git a/tests/unit/test_constants.py b/tests/unit/test_constants.py new file mode 100644 index 000000000..439d5a239 --- /dev/null +++ b/tests/unit/test_constants.py @@ -0,0 +1,85 @@ +import pytest + +from policyengine_api.constants import ( + UK_REGION_TYPES, + US_REGION_TYPES, + REGION_PREFIXES, +) + + +class TestUKRegionTypes: + """Tests for UK_REGION_TYPES constant.""" + + def test__contains_national(self): + assert "national" in UK_REGION_TYPES + + def test__contains_country(self): + assert "country" in UK_REGION_TYPES + + def test__contains_constituency(self): + assert "constituency" in UK_REGION_TYPES + + def test__contains_local_authority(self): + assert "local_authority" in UK_REGION_TYPES + + def test__has_exactly_four_types(self): + assert len(UK_REGION_TYPES) == 4 + + +class TestUSRegionTypes: + """Tests for US_REGION_TYPES constant.""" + + def test__contains_national(self): + assert "national" in US_REGION_TYPES + + def test__contains_state(self): + assert "state" in US_REGION_TYPES + + def test__contains_city(self): + assert "city" in US_REGION_TYPES + + def test__contains_congressional_district(self): + assert "congressional_district" in US_REGION_TYPES + + def test__has_exactly_four_types(self): + assert len(US_REGION_TYPES) == 4 + + +class TestRegionPrefixes: + """Tests for REGION_PREFIXES constant.""" + + class TestUKPrefixes: + """Tests for UK region prefixes.""" + + def test__uk_key_exists(self): + assert "uk" in REGION_PREFIXES + + def test__contains_country_prefix(self): + assert "country/" in REGION_PREFIXES["uk"] + + def test__contains_constituency_prefix(self): + assert "constituency/" in REGION_PREFIXES["uk"] + + def test__contains_local_authority_prefix(self): + assert "local_authority/" in REGION_PREFIXES["uk"] + + def test__has_exactly_three_prefixes(self): + assert len(REGION_PREFIXES["uk"]) == 3 + + class TestUSPrefixes: + """Tests for US region prefixes.""" + + def test__us_key_exists(self): + assert "us" in REGION_PREFIXES + + def test__contains_state_prefix(self): + assert "state/" in REGION_PREFIXES["us"] + + def test__contains_city_prefix(self): + assert "city/" in REGION_PREFIXES["us"] + + def test__contains_congressional_district_prefix(self): + assert "congressional_district/" in REGION_PREFIXES["us"] + + def test__has_exactly_three_prefixes(self): + assert len(REGION_PREFIXES["us"]) == 3 diff --git a/tests/unit/test_country.py b/tests/unit/test_country.py new file mode 100644 index 000000000..1b597ec0a --- /dev/null +++ b/tests/unit/test_country.py @@ -0,0 +1,154 @@ +import pytest +import pandas as pd +from pathlib import Path + +from policyengine_api.country import COUNTRIES + + +class TestUKCountryMetadata: + """Tests for UK country metadata, specifically local authority loading.""" + + @pytest.fixture + def uk_country(self): + return COUNTRIES["uk"] + + @pytest.fixture + def uk_regions(self, uk_country): + return uk_country.metadata["economy_options"]["region"] + + def test__uk_metadata_contains_local_authorities(self, uk_regions): + """Verify that local authorities are included in UK region options.""" + local_authority_regions = [ + r for r in uk_regions if r.get("type") == "local_authority" + ] + assert len(local_authority_regions) > 0 + + def test__uk_has_360_local_authorities(self, uk_regions): + """Verify the correct number of local authorities are loaded.""" + local_authority_regions = [ + r for r in uk_regions if r.get("type") == "local_authority" + ] + assert len(local_authority_regions) == 360 + + def test__local_authority_regions_have_correct_name_format( + self, uk_regions + ): + """Verify local authority region names have the correct prefix.""" + local_authority_regions = [ + r for r in uk_regions if r.get("type") == "local_authority" + ] + for region in local_authority_regions: + assert region["name"].startswith("local_authority/") + + def test__local_authority_regions_have_labels(self, uk_regions): + """Verify all local authority regions have labels.""" + local_authority_regions = [ + r for r in uk_regions if r.get("type") == "local_authority" + ] + for region in local_authority_regions: + assert "label" in region + assert len(region["label"]) > 0 + + def test__local_authority_regions_have_type_field(self, uk_regions): + """Verify all local authority regions have type field set correctly.""" + local_authority_regions = [ + r for r in uk_regions if r.get("type") == "local_authority" + ] + for region in local_authority_regions: + assert region["type"] == "local_authority" + + def test__specific_local_authorities_present(self, uk_regions): + """Verify specific local authorities are present in metadata.""" + local_authority_names = [ + r["name"] + for r in uk_regions + if r.get("type") == "local_authority" + ] + # Check some well-known local authorities + assert "local_authority/Hartlepool" in local_authority_names + assert "local_authority/Middlesbrough" in local_authority_names + assert "local_authority/Leicester" in local_authority_names + + def test__uk_still_has_constituencies(self, uk_regions): + """Verify constituencies are still present after adding local authorities.""" + constituency_regions = [ + r for r in uk_regions if r.get("type") == "constituency" + ] + assert len(constituency_regions) == 650 + + def test__uk_has_all_region_types(self, uk_regions): + """Verify all expected region types are present.""" + types = set(r.get("type") for r in uk_regions) + assert "national" in types + assert "country" in types + assert "constituency" in types + assert "local_authority" in types + + +class TestLocalAuthoritiesDataFile: + """Tests for the local authorities CSV data file.""" + + @pytest.fixture + def local_authorities_df(self): + path = ( + Path(__file__).parents[2] + / "policyengine_api" + / "data" + / "local_authorities_2021.csv" + ) + return pd.read_csv(path) + + def test__file_has_correct_columns(self, local_authorities_df): + """Verify the CSV has the expected columns.""" + expected_columns = {"code", "name", "x", "y"} + assert expected_columns == set(local_authorities_df.columns) + + def test__file_has_360_local_authorities(self, local_authorities_df): + """Verify the correct number of local authorities in file.""" + assert len(local_authorities_df) == 360 + + def test__all_codes_are_valid_ons_codes(self, local_authorities_df): + """Verify all codes follow ONS local authority code patterns.""" + for code in local_authorities_df["code"]: + # ONS codes start with E (England), S (Scotland), W (Wales), or N (Northern Ireland) + assert code[0] in ["E", "S", "W", "N"] + + def test__all_names_are_non_empty(self, local_authorities_df): + """Verify all local authority names are non-empty.""" + for name in local_authorities_df["name"]: + assert len(str(name)) > 0 + + def test__coordinates_are_numeric(self, local_authorities_df): + """Verify x and y coordinates are numeric.""" + assert local_authorities_df["x"].dtype in ["float64", "int64"] + assert local_authorities_df["y"].dtype in ["float64", "int64"] + + def test__english_local_authorities_have_e_prefix( + self, local_authorities_df + ): + """Verify English local authorities have E prefix codes.""" + english_las = local_authorities_df[ + local_authorities_df["code"].str.startswith("E") + ] + # England has 296 local authorities (majority of the 360 total) + assert len(english_las) == 296 + + def test__scottish_local_authorities_have_s_prefix( + self, local_authorities_df + ): + """Verify Scottish local authorities have S prefix codes.""" + scottish_las = local_authorities_df[ + local_authorities_df["code"].str.startswith("S") + ] + # Scotland has 32 council areas + assert len(scottish_las) == 32 + + def test__welsh_local_authorities_have_w_prefix( + self, local_authorities_df + ): + """Verify Welsh local authorities have W prefix codes.""" + welsh_las = local_authorities_df[ + local_authorities_df["code"].str.startswith("W") + ] + # Wales has 22 principal areas + assert len(welsh_las) == 22 From 32ab3e7b21a6e1024c6d8fa0ca2fd982afd39a60 Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Mon, 15 Dec 2025 13:29:52 +0400 Subject: [PATCH 3/7] fix: Properly filter outputs for a given LA or PC --- policyengine_api/endpoints/economy/compare.py | 88 ++++- tests/unit/endpoints/economy/test_compare.py | 372 ++++++++++++++++++ 2 files changed, 451 insertions(+), 9 deletions(-) diff --git a/policyengine_api/endpoints/economy/compare.py b/policyengine_api/endpoints/economy/compare.py index 38b098a7d..f82f7eaa5 100644 --- a/policyengine_api/endpoints/economy/compare.py +++ b/policyengine_api/endpoints/economy/compare.py @@ -561,17 +561,31 @@ class UKLocalAuthorityBreakdown(BaseModel): def uk_constituency_breakdown( - baseline: dict, reform: dict, country_id: str + baseline: dict, reform: dict, country_id: str, region: str | None = None ) -> UKConstituencyBreakdown | None: if country_id != "uk": return None + # If simulating a local authority, constituency breakdown is not applicable + if region is not None and region.startswith("local_authority/"): + return None + + # Determine if we're filtering to a specific constituency + selected_constituency = None + if region is not None and region.startswith("constituency/"): + selected_constituency = region.split("/", 1)[1] + + # Determine if we're filtering to a specific country + selected_country = None + if region is not None and region.startswith("country/"): + selected_country = region.split("/", 1)[1].upper() + output = { "by_constituency": {}, "outcomes_by_region": {}, } - for region in ["uk", "england", "scotland", "wales", "northern_ireland"]: - output["outcomes_by_region"][region] = { + for region_name in ["uk", "england", "scotland", "wales", "northern_ireland"]: + output["outcomes_by_region"][region_name] = { "Gain more than 5%": 0, "Gain less than 5%": 0, "No change": 0, @@ -601,6 +615,23 @@ def uk_constituency_breakdown( for i in range(len(constituency_names)): name: str = constituency_names.iloc[i]["name"] code: str = constituency_names.iloc[i]["code"] + + # Filter to specific constituency if requested + if selected_constituency is not None: + if name != selected_constituency and code != selected_constituency: + continue + + # Filter to specific country if requested + if selected_country is not None: + if selected_country == "ENGLAND" and "E" not in code: + continue + elif selected_country == "SCOTLAND" and "S" not in code: + continue + elif selected_country == "WALES" and "W" not in code: + continue + elif selected_country == "NORTHERN_IRELAND" and "N" not in code: + continue + weight: np.ndarray = weights[i] baseline_income = MicroSeries(baseline_hnet, weights=weight) reform_income = MicroSeries(reform_hnet, weights=weight) @@ -645,17 +676,31 @@ def uk_constituency_breakdown( def uk_local_authority_breakdown( - baseline: dict, reform: dict, country_id: str + baseline: dict, reform: dict, country_id: str, region: str | None = None ) -> UKLocalAuthorityBreakdown | None: if country_id != "uk": return None + # If simulating a constituency, local authority breakdown is not applicable + if region is not None and region.startswith("constituency/"): + return None + + # Determine if we're filtering to a specific local authority + selected_la = None + if region is not None and region.startswith("local_authority/"): + selected_la = region.split("/", 1)[1] + + # Determine if we're filtering to a specific country + selected_country = None + if region is not None and region.startswith("country/"): + selected_country = region.split("/", 1)[1].lower() + output = { "by_local_authority": {}, "outcomes_by_region": {}, } - for region in ["uk", "england", "scotland", "wales", "northern_ireland"]: - output["outcomes_by_region"][region] = { + for region_name in ["uk", "england", "scotland", "wales", "northern_ireland"]: + output["outcomes_by_region"][region_name] = { "Gain more than 5%": 0, "Gain less than 5%": 0, "No change": 0, @@ -681,6 +726,25 @@ def uk_local_authority_breakdown( for i in range(len(local_authority_names)): name: str = local_authority_names.iloc[i]["name"] code: str = local_authority_names.iloc[i]["code"] + + # Filter to specific local authority if requested + if selected_la is not None: + if name != selected_la and code != selected_la: + continue + + # Filter to specific country if requested + if selected_country is not None: + if selected_country == "england" and not code.startswith("E"): + continue + elif selected_country == "scotland" and not code.startswith("S"): + continue + elif selected_country == "wales" and not code.startswith("W"): + continue + elif selected_country == "northern_ireland" and not code.startswith( + "N" + ): + continue + weight: np.ndarray = weights[i] baseline_income = MicroSeries(baseline_hnet, weights=weight) reform_income = MicroSeries(reform_hnet, weights=weight) @@ -725,7 +789,10 @@ def uk_local_authority_breakdown( def compare_economic_outputs( - baseline: dict, reform: dict, country_id: str = None + baseline: dict, + reform: dict, + country_id: str = None, + region: str | None = None, ) -> dict: """ Compare the economic outputs of two economies. @@ -733,6 +800,9 @@ def compare_economic_outputs( Args: baseline (dict): The baseline economy. reform (dict): The reform economy. + country_id (str): The country identifier (e.g., "uk", "us"). + region (str | None): The region filter (e.g., "uk", "local_authority/Leicester", + "constituency/Aldershot", "country/scotland"). Used to filter breakdown results. Returns: dict: The comparison of the two economies. @@ -750,12 +820,12 @@ def compare_economic_outputs( intra_decile_impact_data = intra_decile_impact(baseline, reform) labor_supply_response_data = labor_supply_response(baseline, reform) constituency_impact_data: UKConstituencyBreakdown | None = ( - uk_constituency_breakdown(baseline, reform, country_id) + uk_constituency_breakdown(baseline, reform, country_id, region) ) if constituency_impact_data is not None: constituency_impact_data = constituency_impact_data.model_dump() local_authority_impact_data: UKLocalAuthorityBreakdown | None = ( - uk_local_authority_breakdown(baseline, reform, country_id) + uk_local_authority_breakdown(baseline, reform, country_id, region) ) if local_authority_impact_data is not None: local_authority_impact_data = local_authority_impact_data.model_dump() diff --git a/tests/unit/endpoints/economy/test_compare.py b/tests/unit/endpoints/economy/test_compare.py index 26fd40f5d..0b8cf5f94 100644 --- a/tests/unit/endpoints/economy/test_compare.py +++ b/tests/unit/endpoints/economy/test_compare.py @@ -312,6 +312,193 @@ def test__downloads_from_correct_repos( assert calls[1][1]["repo"] == "policyengine/policyengine-uk-data-public" assert calls[1][1]["repo_filename"] == "local_authorities_2021.csv" + def test__given_constituency_region__returns_none(self): + """When simulating a constituency, local authority breakdown should not be computed.""" + result = uk_local_authority_breakdown( + {}, {}, "uk", "constituency/Aldershot" + ) + assert result is None + + def test__given_constituency_region_with_code__returns_none(self): + """When simulating a constituency by code, local authority breakdown should not be computed.""" + result = uk_local_authority_breakdown( + {}, {}, "uk", "constituency/E12345678" + ) + assert result is None + + @patch( + "policyengine_api.endpoints.economy.compare.download_huggingface_dataset" + ) + @patch("policyengine_api.endpoints.economy.compare.h5py.File") + @patch("policyengine_api.endpoints.economy.compare.pd.read_csv") + def test__given_specific_la_region__returns_only_that_la( + self, mock_read_csv, mock_h5py_file, mock_download + ): + """When simulating a specific local authority, only that LA should be returned.""" + mock_download.side_effect = [ + "/path/to/weights.h5", + "/path/to/names.csv", + ] + + mock_weights = np.ones((3, 10)) + mock_h5py_context = MagicMock() + mock_h5py_context.__enter__ = MagicMock( + return_value={"2025": mock_weights} + ) + mock_h5py_context.__exit__ = MagicMock(return_value=False) + mock_h5py_file.return_value = mock_h5py_context + + mock_la_df = pd.DataFrame( + { + "code": ["E06000001", "S12000033", "W06000001"], + "name": ["Hartlepool", "Aberdeen City", "Isle of Anglesey"], + "x": [8.0, 5.0, 3.0], + "y": [19.0, 10.0, 15.0], + } + ) + mock_read_csv.return_value = mock_la_df + + baseline = {"household_net_income": np.array([1000.0] * 10)} + reform = {"household_net_income": np.array([1050.0] * 10)} + + result = uk_local_authority_breakdown( + baseline, reform, "uk", "local_authority/Hartlepool" + ) + + assert result is not None + assert len(result.by_local_authority) == 1 + assert "Hartlepool" in result.by_local_authority + assert "Aberdeen City" not in result.by_local_authority + assert "Isle of Anglesey" not in result.by_local_authority + + @patch( + "policyengine_api.endpoints.economy.compare.download_huggingface_dataset" + ) + @patch("policyengine_api.endpoints.economy.compare.h5py.File") + @patch("policyengine_api.endpoints.economy.compare.pd.read_csv") + def test__given_country_scotland_region__returns_only_scottish_las( + self, mock_read_csv, mock_h5py_file, mock_download + ): + """When simulating country/scotland, only Scottish local authorities should be returned.""" + mock_download.side_effect = [ + "/path/to/weights.h5", + "/path/to/names.csv", + ] + + mock_weights = np.ones((3, 10)) + mock_h5py_context = MagicMock() + mock_h5py_context.__enter__ = MagicMock( + return_value={"2025": mock_weights} + ) + mock_h5py_context.__exit__ = MagicMock(return_value=False) + mock_h5py_file.return_value = mock_h5py_context + + mock_la_df = pd.DataFrame( + { + "code": ["E06000001", "S12000033", "W06000001"], + "name": ["Hartlepool", "Aberdeen City", "Isle of Anglesey"], + "x": [8.0, 5.0, 3.0], + "y": [19.0, 10.0, 15.0], + } + ) + mock_read_csv.return_value = mock_la_df + + baseline = {"household_net_income": np.array([1000.0] * 10)} + reform = {"household_net_income": np.array([1050.0] * 10)} + + result = uk_local_authority_breakdown( + baseline, reform, "uk", "country/scotland" + ) + + assert result is not None + assert len(result.by_local_authority) == 1 + assert "Aberdeen City" in result.by_local_authority + assert "Hartlepool" not in result.by_local_authority + assert "Isle of Anglesey" not in result.by_local_authority + + @patch( + "policyengine_api.endpoints.economy.compare.download_huggingface_dataset" + ) + @patch("policyengine_api.endpoints.economy.compare.h5py.File") + @patch("policyengine_api.endpoints.economy.compare.pd.read_csv") + def test__given_uk_region__returns_all_las( + self, mock_read_csv, mock_h5py_file, mock_download + ): + """When simulating uk-wide, all local authorities should be returned.""" + mock_download.side_effect = [ + "/path/to/weights.h5", + "/path/to/names.csv", + ] + + mock_weights = np.ones((3, 10)) + mock_h5py_context = MagicMock() + mock_h5py_context.__enter__ = MagicMock( + return_value={"2025": mock_weights} + ) + mock_h5py_context.__exit__ = MagicMock(return_value=False) + mock_h5py_file.return_value = mock_h5py_context + + mock_la_df = pd.DataFrame( + { + "code": ["E06000001", "S12000033", "W06000001"], + "name": ["Hartlepool", "Aberdeen City", "Isle of Anglesey"], + "x": [8.0, 5.0, 3.0], + "y": [19.0, 10.0, 15.0], + } + ) + mock_read_csv.return_value = mock_la_df + + baseline = {"household_net_income": np.array([1000.0] * 10)} + reform = {"household_net_income": np.array([1050.0] * 10)} + + result = uk_local_authority_breakdown(baseline, reform, "uk", "uk") + + assert result is not None + assert len(result.by_local_authority) == 3 + assert "Hartlepool" in result.by_local_authority + assert "Aberdeen City" in result.by_local_authority + assert "Isle of Anglesey" in result.by_local_authority + + @patch( + "policyengine_api.endpoints.economy.compare.download_huggingface_dataset" + ) + @patch("policyengine_api.endpoints.economy.compare.h5py.File") + @patch("policyengine_api.endpoints.economy.compare.pd.read_csv") + def test__given_no_region__returns_all_las( + self, mock_read_csv, mock_h5py_file, mock_download + ): + """When no region specified (None), all local authorities should be returned.""" + mock_download.side_effect = [ + "/path/to/weights.h5", + "/path/to/names.csv", + ] + + mock_weights = np.ones((3, 10)) + mock_h5py_context = MagicMock() + mock_h5py_context.__enter__ = MagicMock( + return_value={"2025": mock_weights} + ) + mock_h5py_context.__exit__ = MagicMock(return_value=False) + mock_h5py_file.return_value = mock_h5py_context + + mock_la_df = pd.DataFrame( + { + "code": ["E06000001", "S12000033", "W06000001"], + "name": ["Hartlepool", "Aberdeen City", "Isle of Anglesey"], + "x": [8.0, 5.0, 3.0], + "y": [19.0, 10.0, 15.0], + } + ) + mock_read_csv.return_value = mock_la_df + + baseline = {"household_net_income": np.array([1000.0] * 10)} + reform = {"household_net_income": np.array([1050.0] * 10)} + + result = uk_local_authority_breakdown(baseline, reform, "uk", None) + + assert result is not None + assert len(result.by_local_authority) == 3 + class TestUKConstituencyBreakdownModels: """Tests for the existing UK constituency breakdown models (for completeness).""" @@ -351,3 +538,188 @@ def test__given_non_uk_country__returns_none(self): def test__given_non_uk_country_nigeria__returns_none(self): result = uk_constituency_breakdown({}, {}, "ng") assert result is None + + def test__given_local_authority_region__returns_none(self): + """When simulating a local authority, constituency breakdown should not be computed.""" + result = uk_constituency_breakdown({}, {}, "uk", "local_authority/Leicester") + assert result is None + + def test__given_local_authority_region_with_code__returns_none(self): + """When simulating a local authority by code, constituency breakdown should not be computed.""" + result = uk_constituency_breakdown({}, {}, "uk", "local_authority/E06000016") + assert result is None + + @patch( + "policyengine_api.endpoints.economy.compare.download_huggingface_dataset" + ) + @patch("policyengine_api.endpoints.economy.compare.h5py.File") + @patch("policyengine_api.endpoints.economy.compare.pd.read_csv") + def test__given_specific_constituency_region__returns_only_that_constituency( + self, mock_read_csv, mock_h5py_file, mock_download + ): + """When simulating a specific constituency, only that constituency should be returned.""" + mock_download.side_effect = [ + "/path/to/weights.h5", + "/path/to/names.csv", + ] + + # Create mock weights - 3 constituencies, 10 households + mock_weights = np.ones((3, 10)) + mock_h5py_context = MagicMock() + mock_h5py_context.__enter__ = MagicMock( + return_value={"2025": mock_weights} + ) + mock_h5py_context.__exit__ = MagicMock(return_value=False) + mock_h5py_file.return_value = mock_h5py_context + + # Create mock constituency names DataFrame + mock_const_df = pd.DataFrame( + { + "code": ["E12345678", "S12345678", "W12345678"], + "name": ["Aldershot", "Edinburgh East", "Cardiff South"], + "x": [10.0, 5.0, 3.0], + "y": [20.0, 15.0, 12.0], + } + ) + mock_read_csv.return_value = mock_const_df + + baseline = {"household_net_income": np.array([1000.0] * 10)} + reform = {"household_net_income": np.array([1050.0] * 10)} + + result = uk_constituency_breakdown( + baseline, reform, "uk", "constituency/Aldershot" + ) + + assert result is not None + assert len(result.by_constituency) == 1 + assert "Aldershot" in result.by_constituency + assert "Edinburgh East" not in result.by_constituency + assert "Cardiff South" not in result.by_constituency + + @patch( + "policyengine_api.endpoints.economy.compare.download_huggingface_dataset" + ) + @patch("policyengine_api.endpoints.economy.compare.h5py.File") + @patch("policyengine_api.endpoints.economy.compare.pd.read_csv") + def test__given_country_scotland_region__returns_only_scottish_constituencies( + self, mock_read_csv, mock_h5py_file, mock_download + ): + """When simulating country/scotland, only Scottish constituencies should be returned.""" + mock_download.side_effect = [ + "/path/to/weights.h5", + "/path/to/names.csv", + ] + + mock_weights = np.ones((3, 10)) + mock_h5py_context = MagicMock() + mock_h5py_context.__enter__ = MagicMock( + return_value={"2025": mock_weights} + ) + mock_h5py_context.__exit__ = MagicMock(return_value=False) + mock_h5py_file.return_value = mock_h5py_context + + mock_const_df = pd.DataFrame( + { + "code": ["E12345678", "S12345678", "W12345678"], + "name": ["Aldershot", "Edinburgh East", "Cardiff South"], + "x": [10.0, 5.0, 3.0], + "y": [20.0, 15.0, 12.0], + } + ) + mock_read_csv.return_value = mock_const_df + + baseline = {"household_net_income": np.array([1000.0] * 10)} + reform = {"household_net_income": np.array([1050.0] * 10)} + + result = uk_constituency_breakdown( + baseline, reform, "uk", "country/scotland" + ) + + assert result is not None + assert len(result.by_constituency) == 1 + assert "Edinburgh East" in result.by_constituency + assert "Aldershot" not in result.by_constituency + assert "Cardiff South" not in result.by_constituency + + @patch( + "policyengine_api.endpoints.economy.compare.download_huggingface_dataset" + ) + @patch("policyengine_api.endpoints.economy.compare.h5py.File") + @patch("policyengine_api.endpoints.economy.compare.pd.read_csv") + def test__given_uk_region__returns_all_constituencies( + self, mock_read_csv, mock_h5py_file, mock_download + ): + """When simulating uk-wide, all constituencies should be returned.""" + mock_download.side_effect = [ + "/path/to/weights.h5", + "/path/to/names.csv", + ] + + mock_weights = np.ones((3, 10)) + mock_h5py_context = MagicMock() + mock_h5py_context.__enter__ = MagicMock( + return_value={"2025": mock_weights} + ) + mock_h5py_context.__exit__ = MagicMock(return_value=False) + mock_h5py_file.return_value = mock_h5py_context + + mock_const_df = pd.DataFrame( + { + "code": ["E12345678", "S12345678", "W12345678"], + "name": ["Aldershot", "Edinburgh East", "Cardiff South"], + "x": [10.0, 5.0, 3.0], + "y": [20.0, 15.0, 12.0], + } + ) + mock_read_csv.return_value = mock_const_df + + baseline = {"household_net_income": np.array([1000.0] * 10)} + reform = {"household_net_income": np.array([1050.0] * 10)} + + result = uk_constituency_breakdown(baseline, reform, "uk", "uk") + + assert result is not None + assert len(result.by_constituency) == 3 + assert "Aldershot" in result.by_constituency + assert "Edinburgh East" in result.by_constituency + assert "Cardiff South" in result.by_constituency + + @patch( + "policyengine_api.endpoints.economy.compare.download_huggingface_dataset" + ) + @patch("policyengine_api.endpoints.economy.compare.h5py.File") + @patch("policyengine_api.endpoints.economy.compare.pd.read_csv") + def test__given_no_region__returns_all_constituencies( + self, mock_read_csv, mock_h5py_file, mock_download + ): + """When no region specified (None), all constituencies should be returned.""" + mock_download.side_effect = [ + "/path/to/weights.h5", + "/path/to/names.csv", + ] + + mock_weights = np.ones((3, 10)) + mock_h5py_context = MagicMock() + mock_h5py_context.__enter__ = MagicMock( + return_value={"2025": mock_weights} + ) + mock_h5py_context.__exit__ = MagicMock(return_value=False) + mock_h5py_file.return_value = mock_h5py_context + + mock_const_df = pd.DataFrame( + { + "code": ["E12345678", "S12345678", "W12345678"], + "name": ["Aldershot", "Edinburgh East", "Cardiff South"], + "x": [10.0, 5.0, 3.0], + "y": [20.0, 15.0, 12.0], + } + ) + mock_read_csv.return_value = mock_const_df + + baseline = {"household_net_income": np.array([1000.0] * 10)} + reform = {"household_net_income": np.array([1050.0] * 10)} + + result = uk_constituency_breakdown(baseline, reform, "uk", None) + + assert result is not None + assert len(result.by_constituency) == 3 From 9f33189f7ab73675f9f9f8e64299a542e6f3c218 Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Mon, 15 Dec 2025 22:02:03 +0400 Subject: [PATCH 4/7] chore: Lint and changelog --- .DS_Store | Bin 0 -> 6148 bytes changelog_entry.yaml | 5 + policyengine_api/endpoints/economy/compare.py | 25 +- scripts/.datasets/constituencies_2024.csv | 651 ++++++++++ scripts/.datasets/local_authorities_2021.csv | 361 ++++++ scripts/BUG_REPORT_build_from_dataframe.md | 172 +++ scripts/diagnose_country_filtering.ipynb | 503 ++++++++ scripts/prove_build_from_dataframe_bug.ipynb | 841 ++++++++++++ scripts/test_local_authority_api.py | 570 ++++++++ scripts/verify_country_filtering_bug.ipynb | 1147 +++++++++++++++++ tests/unit/endpoints/economy/test_compare.py | 16 +- tests/unit/test_country.py | 4 +- 12 files changed, 4283 insertions(+), 12 deletions(-) create mode 100644 .DS_Store create mode 100644 scripts/.datasets/constituencies_2024.csv create mode 100644 scripts/.datasets/local_authorities_2021.csv create mode 100644 scripts/BUG_REPORT_build_from_dataframe.md create mode 100644 scripts/diagnose_country_filtering.ipynb create mode 100644 scripts/prove_build_from_dataframe_bug.ipynb create mode 100755 scripts/test_local_authority_api.py create mode 100644 scripts/verify_country_filtering_bug.ipynb diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..b2a6b3e6c870dde3f033c72e8cfe8a9aab8f5326 GIT binary patch literal 6148 zcmeHKOG-mQ5UkdK0XJD@IalxoLx?BH1p*O5K%yk-`mJ&$ zdAwS@eht7D?@tfF48WA`h?9r0`MLYZZYpC$I`4SJ8;&2V!~XeYRDC(&+&dhx!2!FU z{QWj>y-ZV93P=GdAO)m=6!@hA-g{}&`$R=4AO)nrw*vlsXmrP3I3&iWgCRx$;)Lli zu49%UHct?H;gHA-&5}w?s?~^LNoT%QT`wFGlMbul!|KUa6N<&td4G#?SWi@x0#e{y zf!o~9y#GJaugw4FB<-Yt6!=#P*krj{F8E5-TSqVFy|&TUbg%iOyKx;9hG@scXvf@m fJDx{T)-_-AycZ6ML1#YbMEwl7E;1?b*9v?AELauN literal 0 HcmV?d00001 diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29bb..fd4509d1f 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,5 @@ +- bump: minor + changes: + added: + - Metadata for UK local authorities + - Calculation of UK local authority-level outputs \ No newline at end of file diff --git a/policyengine_api/endpoints/economy/compare.py b/policyengine_api/endpoints/economy/compare.py index f82f7eaa5..c97a03f6f 100644 --- a/policyengine_api/endpoints/economy/compare.py +++ b/policyengine_api/endpoints/economy/compare.py @@ -584,7 +584,13 @@ def uk_constituency_breakdown( "by_constituency": {}, "outcomes_by_region": {}, } - for region_name in ["uk", "england", "scotland", "wales", "northern_ireland"]: + for region_name in [ + "uk", + "england", + "scotland", + "wales", + "northern_ireland", + ]: output["outcomes_by_region"][region_name] = { "Gain more than 5%": 0, "Gain less than 5%": 0, @@ -699,7 +705,13 @@ def uk_local_authority_breakdown( "by_local_authority": {}, "outcomes_by_region": {}, } - for region_name in ["uk", "england", "scotland", "wales", "northern_ireland"]: + for region_name in [ + "uk", + "england", + "scotland", + "wales", + "northern_ireland", + ]: output["outcomes_by_region"][region_name] = { "Gain more than 5%": 0, "Gain less than 5%": 0, @@ -740,8 +752,9 @@ def uk_local_authority_breakdown( continue elif selected_country == "wales" and not code.startswith("W"): continue - elif selected_country == "northern_ireland" and not code.startswith( - "N" + elif ( + selected_country == "northern_ireland" + and not code.startswith("N") ): continue @@ -828,7 +841,9 @@ def compare_economic_outputs( uk_local_authority_breakdown(baseline, reform, country_id, region) ) if local_authority_impact_data is not None: - local_authority_impact_data = local_authority_impact_data.model_dump() + local_authority_impact_data = ( + local_authority_impact_data.model_dump() + ) try: wealth_decile_impact_data = wealth_decile_impact(baseline, reform) intra_wealth_decile_impact_data = intra_wealth_decile_impact( diff --git a/scripts/.datasets/constituencies_2024.csv b/scripts/.datasets/constituencies_2024.csv new file mode 100644 index 000000000..bd9a1df28 --- /dev/null +++ b/scripts/.datasets/constituencies_2024.csv @@ -0,0 +1,651 @@ +code,name,x,y +E14001063,Aldershot,56,-40 +E14001064,Aldridge-Brownhills,56,-30 +E14001065,Altrincham and Sale West,52,-25 +E14001066,Amber Valley,58,-27 +E14001067,Arundel and South Downs,61,-44 +E14001068,Ashfield,60,-27 +E14001069,Ashford,72,-42 +E14001070,Ashton-under-Lyne,54,-23 +E14001071,Aylesbury,60,-35 +E14001072,Banbury,58,-33 +E14001073,Barking,68,-38 +E14001074,Barnsley North,57,-23 +E14001075,Barnsley South,58,-23 +E14001076,Barrow and Furness,54,-16 +E14001077,Basildon and Billericay,67,-34 +E14001078,Basingstoke,55,-39 +E14001079,Bassetlaw,61,-26 +E14001080,Bath,51,-40 +E14001081,Battersea,62,-41 +E14001082,Beaconsfield,57,-37 +E14001083,Beckenham and Penge,65,-43 +E14001084,Bedford,63,-32 +E14001085,Bermondsey and Old Southwark,64,-40 +E14001086,Bethnal Green and Stepney,65,-39 +E14001087,Beverley and Holderness,64,-22 +E14001088,Bexhill and Battle,70,-44 +E14001089,Bexleyheath and Crayford,67,-39 +E14001090,Bicester and Woodstock,59,-34 +E14001091,Birkenhead,49,-27 +E14001092,Birmingham Edgbaston,53,-33 +E14001093,Birmingham Erdington,54,-31 +E14001094,Birmingham Hall Green and Moseley,55,-32 +E14001095,Birmingham Hodge Hill and Solihull North,55,-31 +E14001096,Birmingham Ladywood,54,-32 +E14001097,Birmingham Northfield,54,-34 +E14001098,Birmingham Perry Barr,53,-31 +E14001099,Birmingham Selly Oak,54,-33 +E14001100,Birmingham Yardley,56,-32 +E14001101,Bishop Auckland,54,-14 +E14001102,Blackburn,53,-19 +E14001103,Blackley and Middleton South,53,-23 +E14001104,Blackpool North and Fleetwood,53,-18 +E14001105,Blackpool South,52,-18 +E14001106,Blaydon and Consett,55,-14 +E14001107,Blyth and Ashington,55,-12 +E14001108,Bognor Regis and Littlehampton,63,-44 +E14001109,Bolsover,60,-26 +E14001110,Bolton North East,52,-21 +E14001111,Bolton South and Walkden,52,-22 +E14001112,Bolton West,51,-21 +E14001113,Bootle,49,-22 +E14001114,Boston and Skegness,64,-26 +E14001115,Bournemouth East,52,-43 +E14001116,Bournemouth West,52,-42 +E14001117,Bracknell,56,-39 +E14001118,Bradford East,58,-20 +E14001119,Bradford South,56,-21 +E14001120,Bradford West,57,-20 +E14001121,Braintree,67,-31 +E14001122,Brent East,61,-38 +E14001123,Brent West,60,-38 +E14001124,Brentford and Isleworth,60,-40 +E14001125,Brentwood and Ongar,66,-33 +E14001126,Bridgwater,48,-41 +E14001127,Bridlington and The Wolds,63,-20 +E14001128,Brigg and Immingham,62,-24 +E14001129,Brighton Kemptown and Peacehaven,67,-45 +E14001130,Brighton Pavilion,67,-44 +E14001131,Bristol Central,51,-38 +E14001132,Bristol East,52,-38 +E14001133,Bristol North East,51,-37 +E14001134,Bristol North West,50,-38 +E14001135,Bristol South,51,-39 +E14001136,Broadland and Fakenham,66,-27 +E14001137,Bromley and Biggin Hill,67,-42 +E14001138,Bromsgrove,52,-33 +E14001139,Broxbourne,66,-35 +E14001140,Broxtowe,59,-27 +E14001141,Buckingham and Bletchley,60,-34 +E14001142,Burnley,55,-19 +E14001143,Burton and Uttoxeter,56,-28 +E14001144,Bury North,53,-21 +E14001145,Bury South,53,-22 +E14001146,Bury St Edmunds and Stowmarket,68,-31 +E14001147,Calder Valley,56,-20 +E14001148,Camborne and Redruth,43,-45 +E14001149,Cambridge,65,-30 +E14001150,Cannock Chase,54,-29 +E14001151,Canterbury,71,-41 +E14001152,Carlisle,53,-14 +E14001153,Carshalton and Wallington,62,-43 +E14001154,Castle Point,69,-36 +E14001155,Central Devon,47,-42 +E14001156,Central Suffolk and North Ipswich,68,-29 +E14001157,Chatham and Aylesford,69,-40 +E14001158,Cheadle,55,-26 +E14001159,Chelmsford,67,-33 +E14001160,Chelsea and Fulham,61,-40 +E14001161,Cheltenham,52,-36 +E14001162,Chesham and Amersham,59,-36 +E14001163,Chester North and Neston,50,-28 +E14001164,Chester South and Eddisbury,51,-27 +E14001165,Chesterfield,59,-26 +E14001166,Chichester,60,-44 +E14001167,Chingford and Woodford Green,64,-35 +E14001168,Chippenham,52,-39 +E14001169,Chipping Barnet,62,-36 +E14001170,Chorley,53,-20 +E14001171,Christchurch,53,-42 +E14001172,Cities of London and Westminster,63,-40 +E14001173,City of Durham,55,-16 +E14001174,Clacton,69,-32 +E14001175,Clapham and Brixton Hill,62,-42 +E14001176,Colchester,68,-32 +E14001177,Colne Valley,55,-23 +E14001178,Congleton,54,-27 +E14001179,Corby and East Northamptonshire,62,-30 +E14001180,Coventry East,57,-33 +E14001181,Coventry North West,56,-33 +E14001182,Coventry South,57,-34 +E14001183,Cramlington and Killingworth,56,-12 +E14001184,Crawley,69,-44 +E14001185,Crewe and Nantwich,53,-27 +E14001186,Croydon East,65,-42 +E14001187,Croydon South,64,-43 +E14001188,Croydon West,63,-43 +E14001189,Dagenham and Rainham,67,-37 +E14001190,Darlington,55,-17 +E14001191,Dartford,68,-40 +E14001192,Daventry,60,-32 +E14001193,Derby North,58,-28 +E14001194,Derby South,57,-28 +E14001195,Derbyshire Dales,57,-26 +E14001196,Dewsbury and Batley,57,-22 +E14001197,Didcot and Wantage,54,-38 +E14001198,Doncaster Central,60,-23 +E14001199,Doncaster East and the Isle of Axholme,61,-23 +E14001200,Doncaster North,61,-22 +E14001201,Dorking and Horley,59,-43 +E14001202,Dover and Deal,72,-41 +E14001203,Droitwich and Evesham,54,-36 +E14001204,Dudley,51,-31 +E14001205,Dulwich and West Norwood,63,-42 +E14001206,Dunstable and Leighton Buzzard,62,-33 +E14001207,Ealing Central and Acton,59,-39 +E14001208,Ealing North,59,-38 +E14001209,Ealing Southall,58,-39 +E14001210,Earley and Woodley,56,-36 +E14001211,Easington,57,-16 +E14001212,East Grinstead and Uckfield,69,-43 +E14001213,East Ham,67,-38 +E14001214,East Hampshire,55,-41 +E14001215,East Surrey,67,-43 +E14001216,East Thanet,71,-39 +E14001217,East Wiltshire,53,-41 +E14001218,East Worthing and Shoreham,65,-44 +E14001219,Eastbourne,69,-45 +E14001220,Eastleigh,54,-41 +E14001221,Edmonton and Winchmore Hill,64,-36 +E14001222,Ellesmere Port and Bromborough,50,-27 +E14001223,Eltham and Chislehurst,66,-41 +E14001224,Ely and East Cambridgeshire,66,-30 +E14001225,Enfield North,62,-35 +E14001226,Epping Forest,67,-35 +E14001227,Epsom and Ewell,60,-43 +E14001228,Erewash,59,-28 +E14001229,Erith and Thamesmead,67,-40 +E14001230,Esher and Walton,58,-42 +E14001231,Exeter,48,-42 +E14001232,Exmouth and Exeter East,48,-43 +E14001233,Fareham and Waterlooville,55,-43 +E14001234,Farnham and Bordon,56,-42 +E14001235,Faversham and Mid Kent,71,-40 +E14001236,Feltham and Heston,59,-40 +E14001237,Filton and Bradley Stoke,50,-37 +E14001238,Finchley and Golders Green,61,-37 +E14001239,Folkestone and Hythe,71,-42 +E14001240,Forest of Dean,50,-35 +E14001241,Frome and East Somerset,50,-41 +E14001242,Fylde,51,-19 +E14001243,Gainsborough,61,-25 +E14001244,Gateshead Central and Whickham,56,-15 +E14001245,Gedling,61,-28 +E14001246,Gillingham and Rainham,70,-40 +E14001247,Glastonbury and Somerton,49,-41 +E14001248,Gloucester,51,-35 +E14001249,Godalming and Ash,57,-42 +E14001250,Goole and Pocklington,61,-21 +E14001251,Gorton and Denton,55,-24 +E14001252,Gosport,57,-43 +E14001253,Grantham and Bourne,63,-28 +E14001254,Gravesham,68,-39 +E14001255,Great Grimsby and Cleethorpes,63,-24 +E14001256,Great Yarmouth,67,-27 +E14001257,Greenwich and Woolwich,66,-40 +E14001258,Guildford,56,-41 +E14001259,Hackney North and Stoke Newington,64,-38 +E14001260,Hackney South and Shoreditch,64,-39 +E14001261,Halesowen,51,-33 +E14001262,Halifax,55,-21 +E14001263,Hamble Valley,56,-43 +E14001264,Hammersmith and Chiswick,60,-39 +E14001265,Hampstead and Highgate,62,-38 +E14001266,"Harborough, Oadby and Wigston",61,-31 +E14001267,Harlow,67,-32 +E14001268,Harpenden and Berkhamsted,62,-34 +E14001269,Harrogate and Knaresborough,59,-18 +E14001270,Harrow East,60,-37 +E14001271,Harrow West,59,-37 +E14001272,Hartlepool,59,-16 +E14001273,Harwich and North Essex,69,-31 +E14001274,Hastings and Rye,70,-43 +E14001275,Havant,59,-44 +E14001276,Hayes and Harlington,58,-38 +E14001277,Hazel Grove,55,-25 +E14001278,Hemel Hempstead,64,-34 +E14001279,Hendon,61,-36 +E14001280,Henley and Thame,58,-35 +E14001281,Hereford and South Herefordshire,51,-34 +E14001282,Herne Bay and Sandwich,72,-40 +E14001283,Hertford and Stortford,66,-32 +E14001284,Hertsmere,66,-34 +E14001285,Hexham,53,-13 +E14001286,Heywood and Middleton North,54,-20 +E14001287,High Peak,56,-25 +E14001288,Hinckley and Bosworth,58,-30 +E14001289,Hitchin,64,-32 +E14001290,Holborn and St Pancras,62,-39 +E14001291,Honiton and Sidmouth,49,-43 +E14001292,Hornchurch and Upminster,66,-37 +E14001293,Hornsey and Friern Barnet,63,-36 +E14001294,Horsham,62,-44 +E14001295,Houghton and Sunderland South,57,-15 +E14001296,Hove and Portslade,66,-44 +E14001297,Huddersfield,56,-22 +E14001298,Huntingdon,63,-31 +E14001299,Hyndburn,54,-19 +E14001300,Ilford North,65,-36 +E14001301,Ilford South,65,-37 +E14001302,Ipswich,68,-30 +E14001303,Isle of Wight East,54,-45 +E14001304,Isle of Wight West,53,-45 +E14001305,Islington North,63,-38 +E14001306,Islington South and Finsbury,63,-39 +E14001307,Jarrow and Gateshead East,57,-14 +E14001308,Keighley and Ilkley,56,-19 +E14001309,Kenilworth and Southam,56,-34 +E14001310,Kensington and Bayswater,61,-39 +E14001311,Kettering,61,-30 +E14001312,Kingston and Surbiton,59,-42 +E14001313,Kingston upon Hull East,63,-22 +E14001314,Kingston upon Hull North and Cottingham,62,-21 +E14001315,Kingston upon Hull West and Haltemprice,62,-22 +E14001316,Kingswinford and South Staffordshire,52,-30 +E14001317,Knowsley,50,-23 +E14001318,Lancaster and Wyre,54,-18 +E14001319,Leeds Central and Headingley,60,-20 +E14001320,Leeds East,61,-20 +E14001321,Leeds North East,59,-19 +E14001322,Leeds North West,58,-19 +E14001323,Leeds South,59,-21 +E14001324,Leeds South West and Morley,58,-21 +E14001325,Leeds West and Pudsey,59,-20 +E14001326,Leicester East,60,-30 +E14001327,Leicester South,60,-31 +E14001328,Leicester West,59,-31 +E14001329,Leigh and Atherton,51,-25 +E14001330,Lewes,68,-45 +E14001331,Lewisham East,66,-42 +E14001332,Lewisham North,65,-40 +E14001333,Lewisham West and East Dulwich,65,-41 +E14001334,Leyton and Wanstead,64,-37 +E14001335,Lichfield,56,-29 +E14001336,Lincoln,62,-25 +E14001337,Liverpool Garston,50,-25 +E14001338,Liverpool Riverside,49,-24 +E14001339,Liverpool Walton,49,-23 +E14001340,Liverpool Wavertree,49,-25 +E14001341,Liverpool West Derby,50,-24 +E14001342,Loughborough,59,-30 +E14001343,Louth and Horncastle,63,-25 +E14001344,Lowestoft,68,-28 +E14001345,Luton North,63,-33 +E14001346,Luton South and South Bedfordshire,63,-34 +E14001347,Macclesfield,56,-26 +E14001348,Maidenhead,57,-36 +E14001349,Maidstone and Malling,69,-41 +E14001350,Makerfield,51,-22 +E14001351,Maldon,69,-33 +E14001352,Manchester Central,54,-24 +E14001353,Manchester Rusholme,53,-25 +E14001354,Manchester Withington,54,-26 +E14001355,Mansfield,61,-27 +E14001356,Melksham and Devizes,52,-40 +E14001357,Melton and Syston,61,-29 +E14001358,Meriden and Solihull East,55,-33 +E14001359,Mid Bedfordshire,62,-32 +E14001360,Mid Buckinghamshire,59,-35 +E14001361,Mid Cheshire,52,-27 +E14001362,Mid Derbyshire,57,-27 +E14001363,Mid Dorset and North Poole,50,-43 +E14001364,Mid Leicestershire,58,-31 +E14001365,Mid Norfolk,65,-28 +E14001366,Mid Sussex,68,-43 +E14001367,Middlesbrough and Thornaby East,57,-17 +E14001368,Middlesbrough South and East Cleveland,59,-17 +E14001369,Milton Keynes Central,61,-34 +E14001370,Milton Keynes North,61,-33 +E14001371,Mitcham and Morden,61,-43 +E14001372,Morecambe and Lunesdale,54,-17 +E14001373,New Forest East,54,-43 +E14001374,New Forest West,53,-43 +E14001375,Newark,62,-26 +E14001376,Newbury,54,-37 +E14001377,Newcastle upon Tyne Central and West,54,-13 +E14001378,Newcastle upon Tyne East and Wallsend,56,-14 +E14001379,Newcastle upon Tyne North,55,-13 +E14001380,Newcastle-under-Lyme,52,-28 +E14001381,Newton Abbot,47,-43 +E14001382,Newton Aycliffe and Spennymoor,56,-16 +E14001383,Normanton and Hemsworth,59,-23 +E14001384,North Bedfordshire,62,-31 +E14001385,North Cornwall,45,-43 +E14001386,North Cotswolds,53,-37 +E14001387,North Devon,46,-41 +E14001388,North Dorset,51,-42 +E14001389,North Durham,54,-15 +E14001390,North East Cambridgeshire,64,-29 +E14001391,North East Derbyshire,58,-26 +E14001392,North East Hampshire,56,-38 +E14001393,North East Hertfordshire,65,-32 +E14001394,North East Somerset and Hanham,50,-39 +E14001395,North Herefordshire,52,-34 +E14001396,North Norfolk,65,-27 +E14001397,North Northumberland,54,-12 +E14001398,North Shropshire,50,-29 +E14001399,North Somerset,49,-39 +E14001400,North Warwickshire and Bedworth,57,-32 +E14001401,North West Cambridgeshire,64,-30 +E14001402,North West Essex,66,-31 +E14001403,North West Hampshire,54,-39 +E14001404,North West Leicestershire,58,-29 +E14001405,North West Norfolk,64,-28 +E14001406,Northampton North,61,-32 +E14001407,Northampton South,60,-33 +E14001408,Norwich North,66,-28 +E14001409,Norwich South,66,-29 +E14001410,Nottingham East,60,-29 +E14001411,Nottingham North and Kimberley,60,-28 +E14001412,Nottingham South,59,-29 +E14001413,Nuneaton,57,-31 +E14001414,Old Bexley and Sidcup,67,-41 +E14001415,Oldham East and Saddleworth,55,-22 +E14001416,"Oldham West, Chadderton and Royton",54,-22 +E14001417,Orpington,66,-43 +E14001418,Ossett and Denby Dale,58,-22 +E14001419,Oxford East,58,-34 +E14001420,Oxford West and Abingdon,57,-35 +E14001421,Peckham,64,-41 +E14001422,Pendle and Clitheroe,56,-18 +E14001423,Penistone and Stocksbridge,56,-23 +E14001424,Penrith and Solway,52,-15 +E14001425,Peterborough,63,-29 +E14001426,Plymouth Moor View,46,-43 +E14001427,Plymouth Sutton and Devonport,47,-44 +E14001428,"Pontefract, Castleford and Knottingley",60,-22 +E14001429,Poole,51,-43 +E14001430,Poplar and Limehouse,66,-39 +E14001431,Portsmouth North,58,-43 +E14001432,Portsmouth South,58,-44 +E14001433,Preston,52,-19 +E14001434,Putney,61,-41 +E14001435,Queen's Park and Maida Vale,62,-40 +E14001436,Rawmarsh and Conisbrough,60,-24 +E14001437,Rayleigh and Wickford,68,-34 +E14001438,Reading Central,55,-37 +E14001439,Reading West and Mid Berkshire,55,-36 +E14001440,Redcar,58,-17 +E14001441,Redditch,53,-35 +E14001442,Reigate,68,-44 +E14001443,Ribble Valley,55,-18 +E14001444,Richmond and Northallerton,57,-18 +E14001445,Richmond Park,59,-41 +E14001446,Rochdale,54,-21 +E14001447,Rochester and Strood,69,-39 +E14001448,Romford,66,-36 +E14001449,Romsey and Southampton North,54,-40 +E14001450,Rossendale and Darwen,55,-20 +E14001451,Rother Valley,60,-25 +E14001452,Rotherham,59,-24 +E14001453,Rugby,58,-32 +E14001454,"Ruislip, Northwood and Pinner",60,-36 +E14001455,Runcorn and Helsby,51,-28 +E14001456,Runnymede and Weybridge,57,-41 +E14001457,Rushcliffe,62,-28 +E14001458,Rutland and Stamford,62,-29 +E14001459,Salford,53,-24 +E14001460,Salisbury,52,-41 +E14001461,Scarborough and Whitby,61,-19 +E14001462,Scunthorpe,61,-24 +E14001463,Sefton Central,50,-20 +E14001464,Selby,60,-21 +E14001465,Sevenoaks,68,-42 +E14001466,Sheffield Brightside and Hillsborough,58,-24 +E14001467,Sheffield Central,58,-25 +E14001468,Sheffield Hallam,57,-24 +E14001469,Sheffield Heeley,57,-25 +E14001470,Sheffield South East,59,-25 +E14001471,Sherwood Forest,62,-27 +E14001472,Shipley,57,-19 +E14001473,Shrewsbury,51,-30 +E14001474,Sittingbourne and Sheppey,70,-39 +E14001475,Skipton and Ripon,58,-18 +E14001476,Sleaford and North Hykeham,63,-26 +E14001477,Slough,56,-37 +E14001478,Smethwick,53,-32 +E14001479,Solihull West and Shirley,55,-34 +E14001480,South Basildon and East Thurrock,68,-36 +E14001481,South Cambridgeshire,65,-31 +E14001482,South Cotswolds,53,-38 +E14001483,South Derbyshire,57,-29 +E14001484,South Devon,48,-45 +E14001485,South Dorset,51,-44 +E14001486,South East Cornwall,46,-44 +E14001487,South Holland and The Deepings,63,-27 +E14001488,South Leicestershire,59,-32 +E14001489,South Norfolk,67,-29 +E14001490,South Northamptonshire,59,-33 +E14001491,South Ribble,52,-20 +E14001492,South Shields,58,-14 +E14001493,South Shropshire,50,-31 +E14001494,South Suffolk,69,-30 +E14001495,South West Devon,47,-45 +E14001496,South West Hertfordshire,61,-35 +E14001497,South West Norfolk,65,-29 +E14001498,South West Wiltshire,51,-41 +E14001499,Southampton Itchen,55,-42 +E14001500,Southampton Test,54,-42 +E14001501,Southend East and Rochford,69,-34 +E14001502,Southend West and Leigh,68,-35 +E14001503,Southgate and Wood Green,63,-35 +E14001504,Southport,50,-19 +E14001505,Spelthorne,58,-40 +E14001506,Spen Valley,57,-21 +E14001507,St Albans,65,-34 +E14001508,St Austell and Newquay,45,-44 +E14001509,St Helens North,50,-21 +E14001510,St Helens South and Whiston,50,-22 +E14001511,St Ives,43,-46 +E14001512,St Neots and Mid Cambridgeshire,64,-31 +E14001513,Stafford,54,-28 +E14001514,Staffordshire Moorlands,56,-27 +E14001515,Stalybridge and Hyde,56,-24 +E14001516,Stevenage,64,-33 +E14001517,Stockport,54,-25 +E14001518,Stockton North,58,-16 +E14001519,Stockton West,56,-17 +E14001520,Stoke-on-Trent Central,55,-28 +E14001521,Stoke-on-Trent North,55,-27 +E14001522,Stoke-on-Trent South,55,-29 +E14001523,"Stone, Great Wyrley and Penkridge",53,-28 +E14001524,Stourbridge,51,-32 +E14001525,Stratford and Bow,65,-38 +E14001526,Stratford-on-Avon,54,-35 +E14001527,Streatham and Croydon North,64,-42 +E14001528,Stretford and Urmston,52,-24 +E14001529,Stroud,52,-37 +E14001530,Suffolk Coastal,69,-29 +E14001531,Sunderland Central,58,-15 +E14001532,Surrey Heath,57,-39 +E14001533,Sussex Weald,70,-42 +E14001534,Sutton and Cheam,60,-42 +E14001535,Sutton Coldfield,56,-31 +E14001536,Swindon North,53,-39 +E14001537,Swindon South,53,-40 +E14001538,Tamworth,57,-30 +E14001539,Tatton,52,-26 +E14001540,Taunton and Wellington,49,-42 +E14001541,Telford,52,-29 +E14001542,Tewkesbury,53,-36 +E14001543,The Wrekin,51,-29 +E14001544,Thirsk and Malton,60,-18 +E14001545,Thornbury and Yate,51,-36 +E14001546,Thurrock,67,-36 +E14001547,Tipton and Wednesbury,52,-31 +E14001548,Tiverton and Minehead,47,-41 +E14001549,Tonbridge,68,-41 +E14001550,Tooting,61,-42 +E14001551,Torbay,48,-44 +E14001552,Torridge and Tavistock,46,-42 +E14001553,Tottenham,62,-37 +E14001554,Truro and Falmouth,44,-45 +E14001555,Tunbridge Wells,69,-42 +E14001556,Twickenham,58,-41 +E14001557,Tynemouth,56,-13 +E14001558,Uxbridge and South Ruislip,58,-37 +E14001559,Vauxhall and Camberwell Green,63,-41 +E14001560,Wakefield and Rothwell,59,-22 +E14001561,Wallasey,48,-27 +E14001562,Walsall and Bloxwich,55,-30 +E14001563,Walthamstow,63,-37 +E14001564,Warrington North,51,-23 +E14001565,Warrington South,51,-24 +E14001566,Warwick and Leamington,55,-35 +E14001567,Washington and Gateshead South,55,-15 +E14001568,Watford,65,-35 +E14001569,Waveney Valley,67,-28 +E14001570,Weald of Kent,70,-41 +E14001571,Wellingborough and Rushden,63,-30 +E14001572,Wells and Mendip Hills,50,-40 +E14001573,Welwyn Hatfield,65,-33 +E14001574,West Bromwich,52,-32 +E14001575,West Dorset,50,-44 +E14001576,West Ham and Beckton,66,-38 +E14001577,West Lancashire,49,-21 +E14001578,West Suffolk,67,-30 +E14001579,West Worcestershire,52,-35 +E14001580,Westmorland and Lonsdale,53,-15 +E14001581,Weston-super-Mare,49,-40 +E14001582,Wetherby and Easingwold,62,-20 +E14001583,Whitehaven and Workington,53,-16 +E14001584,Widnes and Halewood,51,-26 +E14001585,Wigan,51,-20 +E14001586,Wimbledon,60,-41 +E14001587,Winchester,55,-40 +E14001588,Windsor,57,-38 +E14001589,Wirral West,49,-28 +E14001590,Witham,68,-33 +E14001591,Witney,56,-35 +E14001592,Woking,57,-40 +E14001593,Wokingham,55,-38 +E14001594,Wolverhampton North East,53,-29 +E14001595,Wolverhampton South East,54,-30 +E14001596,Wolverhampton West,53,-30 +E14001597,Worcester,53,-34 +E14001598,Worsley and Eccles,52,-23 +E14001599,Worthing West,64,-44 +E14001600,Wycombe,58,-36 +E14001601,Wyre Forest,50,-33 +E14001602,Wythenshawe and Sale East,53,-26 +E14001603,Yeovil,50,-42 +E14001604,York Central,60,-19 +E14001605,York Outer,61,-18 +N05000001,Belfast East,45,-17 +N05000002,Belfast North,45,-16 +N05000003,Belfast South and Mid Down,45,-18 +N05000004,Belfast West,44,-17 +N05000005,East Antrim,45,-15 +N05000006,East Londonderry,43,-15 +N05000007,Fermanagh and South Tyrone,42,-17 +N05000008,Foyle,42,-15 +N05000009,Lagan Valley,44,-18 +N05000010,Mid Ulster,43,-16 +N05000011,Newry and Armagh,44,-19 +N05000012,North Antrim,44,-15 +N05000013,North Down,46,-16 +N05000014,South Antrim,44,-16 +N05000015,South Down,46,-18 +N05000016,Strangford,46,-17 +N05000017,Upper Bann,43,-18 +N05000018,West Tyrone,42,-16 +S14000021,East Renfrewshire,48,-11 +S14000027,Na h-Eileanan an Iar,47,-2 +S14000045,Midlothian,52,-11 +S14000048,North Ayrshire and Arran,48,-10 +S14000051,Orkney and Shetland,51,0 +S14000060,Aberdeen North,52,-3 +S14000061,Aberdeen South,52,-4 +S14000062,Aberdeenshire North and Moray East,51,-3 +S14000063,Airdrie and Shotts,50,-11 +S14000064,Alloa and Grangemouth,50,-7 +S14000065,Angus and Perthshire Glens,50,-5 +S14000066,Arbroath and Broughty Ferry,52,-5 +S14000067,"Argyll, Bute and South Lochaber",49,-5 +S14000068,Bathgate and Linlithgow,51,-9 +S14000069,"Caithness, Sutherland and Easter Ross",50,-2 +S14000070,Coatbridge and Bellshill,50,-12 +S14000071,Cowdenbeath and Kirkcaldy,52,-7 +S14000072,Cumbernauld and Kirkintilloch,50,-8 +S14000073,Dumfries and Galloway,51,-13 +S14000074,"Dumfriesshire, Clydesdale and Tweeddale",52,-13 +S14000075,Dundee Central,50,-6 +S14000076,Dunfermline and Dollar,51,-7 +S14000077,East Kilbride and Strathaven,48,-13 +S14000078,Edinburgh East and Musselburgh,54,-10 +S14000079,Edinburgh North and Leith,53,-9 +S14000080,Edinburgh South,53,-10 +S14000081,Edinburgh South West,52,-10 +S14000082,Edinburgh West,52,-9 +S14000083,Falkirk,51,-8 +S14000084,Glasgow East,51,-10 +S14000085,Glasgow North,49,-9 +S14000086,Glasgow North East,50,-9 +S14000087,Glasgow South,49,-11 +S14000088,Glasgow South West,50,-10 +S14000089,Glasgow West,49,-8 +S14000090,Glenrothes and Mid Fife,52,-6 +S14000091,Gordon and Buchan,50,-4 +S14000092,Hamilton and Clyde Valley,51,-12 +S14000093,Inverclyde and Renfrewshire West,48,-8 +S14000094,"Inverness, Skye and West Ross-shire",49,-3 +S14000095,Livingston,51,-11 +S14000096,Lothian East,53,-11 +S14000097,Mid Dunbartonshire,49,-7 +S14000098,"Moray West, Nairn and Strathspey",49,-4 +S14000099,"Motherwell, Wishaw and Carluke",52,-12 +S14000100,North East Fife,51,-6 +S14000101,Paisley and Renfrewshire North,48,-9 +S14000102,Paisley and Renfrewshire South,49,-10 +S14000103,Perth and Kinross-shire,51,-5 +S14000104,Rutherglen,49,-12 +S14000105,Stirling and Strathallan,49,-6 +S14000106,West Dunbartonshire,48,-7 +S14000107,"Ayr, Carrick and Cumnock",49,-13 +S14000108,"Berwickshire, Roxburgh and Selkirk",53,-12 +S14000109,Central Ayrshire,48,-12 +S14000110,Kilmarnock and Loudoun,50,-13 +S14000111,West Aberdeenshire and Kincardine,51,-4 +W07000081,Aberafan Maesteg,46,-36 +W07000082,Alyn and Deeside,49,-29 +W07000083,Bangor Aberconwy,47,-31 +W07000084,Blaenau Gwent and Rhymney,49,-33 +W07000085,"Brecon, Radnor and Cwm Tawe",50,-32 +W07000086,Bridgend,46,-37 +W07000087,Caerfyrddin,49,-32 +W07000088,Caerphilly,49,-35 +W07000089,Cardiff East,48,-37 +W07000090,Cardiff North,48,-36 +W07000091,Cardiff South and Penarth,48,-38 +W07000092,Cardiff West,47,-37 +W07000093,Ceredigion Preseli,48,-34 +W07000094,Clwyd East,49,-30 +W07000095,Clwyd North,48,-30 +W07000096,Dwyfor Meirionnydd,48,-31 +W07000097,Gower,44,-37 +W07000098,Llanelli,45,-36 +W07000099,Merthyr Tydfil and Aberdare,49,-34 +W07000100,Mid and South Pembrokeshire,44,-36 +W07000101,Monmouthshire,50,-36 +W07000102,Montgomeryshire and Glyndwr,49,-31 +W07000103,Neath and Swansea East,47,-35 +W07000104,Newport East,49,-37 +W07000105,Newport West and Islwyn,49,-36 +W07000106,Pontypridd,48,-35 +W07000107,Rhondda and Ogmore,47,-36 +W07000108,Swansea West,45,-37 +W07000109,Torfaen,50,-34 +W07000110,Vale of Glamorgan,47,-38 +W07000111,Wrexham,50,-30 +W07000112,Ynys Môn,46,-29 diff --git a/scripts/.datasets/local_authorities_2021.csv b/scripts/.datasets/local_authorities_2021.csv new file mode 100644 index 000000000..9fcf922ed --- /dev/null +++ b/scripts/.datasets/local_authorities_2021.csv @@ -0,0 +1,361 @@ +code,x,y,name +E06000001,8.0,19.0,Hartlepool +E06000002,9.0,18.0,Middlesbrough +E06000003,9.0,19.0,Redcar and Cleveland +E06000004,8.0,18.0,Stockton-on-Tees +E06000005,7.0,18.0,Darlington +E06000006,1.0,11.0,Halton +E06000007,2.0,11.0,Warrington +E06000008,4.0,15.0,Blackburn with Darwen +E06000009,2.0,15.0,Blackpool +E06000010,10.0,15.0,"Kingston upon Hull, City of" +E06000011,11.0,16.0,East Riding of Yorkshire +E06000012,11.0,14.0,North East Lincolnshire +E06000013,10.0,14.0,North Lincolnshire +E06000014,9.0,17.0,York +E06000015,6.0,11.0,Derby +E06000016,8.0,8.0,Leicester +E06000017,10.0,9.0,Rutland +E06000018,8.0,10.0,Nottingham +E06000019,0.0,8.0,"Herefordshire, County of" +E06000020,2.0,9.0,Telford and Wrekin +E06000021,3.0,10.0,Stoke-on-Trent +E06000022,1.0,3.0,Bath and North East Somerset +E06000023,0.0,3.0,"Bristol, City of" +E06000024,0.0,2.0,North Somerset +E06000025,1.0,4.0,South Gloucestershire +E06000026,-4.0,-2.0,Plymouth +E06000027,-3.0,-2.0,Torbay +E06000030,2.0,4.0,Swindon +E06000031,11.0,9.0,Peterborough +E06000032,10.0,7.0,Luton +E06000033,16.0,6.0,Southend-on-Sea +E06000034,15.0,4.0,Thurrock +E06000035,15.0,1.0,Medway +E06000036,4.0,2.0,Bracknell Forest +E06000037,2.0,2.0,West Berkshire +E06000038,2.0,3.0,Reading +E06000039,6.0,4.0,Slough +E06000040,4.0,3.0,Windsor and Maidenhead +E06000041,3.0,3.0,Wokingham +E06000042,6.0,5.0,Milton Keynes +E06000043,9.0,-2.0,Brighton and Hove +E06000044,4.0,-1.0,Portsmouth +E06000045,2.0,0.0,Southampton +E06000046,1.0,-2.0,Isle of Wight +E06000047,6.0,18.0,County Durham +E06000049,4.0,11.0,Cheshire East +E06000050,3.0,11.0,Cheshire West and Chester +E06000051,1.0,9.0,Shropshire +E06000052,-5.0,-2.0,Cornwall +E06000053,-7.0,-3.0,Isles of Scilly +E06000054,1.0,2.0,Wiltshire +E06000055,9.0,7.0,Bedford +E06000056,9.0,6.0,Central Bedfordshire +E06000057,5.0,20.0,Northumberland +E06000058,0.0,0.0,"Bournemouth, Christchurch and Poole" +E06000059,-1.0,0.0,Dorset +E06000060,5.0,5.0,Buckinghamshire +E06000061,9.0,9.0,North Northamptonshire +E06000062,7.0,6.0,West Northamptonshire +E06000063,0.0,0.0,Cumberland +E06000064,0.0,0.0,Westmorland and Furness +E06000065,0.0,0.0,North Yorkshire +E06000066,0.0,0.0,Somerset +E07000008,12.0,8.0,Cambridge +E07000009,12.0,9.0,East Cambridgeshire +E07000010,13.0,10.0,Fenland +E07000011,10.0,8.0,Huntingdonshire +E07000012,11.0,8.0,South Cambridgeshire +E07000032,7.0,11.0,Amber Valley +E07000033,10.0,12.0,Bolsover +E07000034,9.0,12.0,Chesterfield +E07000035,7.0,12.0,Derbyshire Dales +E07000036,7.0,9.0,Erewash +E07000037,7.0,13.0,High Peak +E07000038,8.0,12.0,North East Derbyshire +E07000039,6.0,10.0,South Derbyshire +E07000040,-2.0,-1.0,East Devon +E07000041,-3.0,-1.0,Exeter +E07000042,-2.0,0.0,Mid Devon +E07000043,-3.0,1.0,North Devon +E07000044,-4.0,-3.0,South Hams +E07000045,-2.0,-2.0,Teignbridge +E07000046,-4.0,-1.0,Torridge +E07000047,-3.0,0.0,West Devon +E07000061,10.0,-2.0,Eastbourne +E07000062,13.0,-2.0,Hastings +E07000063,10.0,-1.0,Lewes +E07000064,12.0,-2.0,Rother +E07000065,11.0,-2.0,Wealden +E07000066,14.0,5.0,Basildon +E07000067,14.0,7.0,Braintree +E07000068,13.0,5.0,Brentwood +E07000069,15.0,5.0,Castle Point +E07000070,14.0,6.0,Chelmsford +E07000071,15.0,8.0,Colchester +E07000072,12.0,5.0,Epping Forest +E07000073,13.0,6.0,Harlow +E07000074,15.0,7.0,Maldon +E07000075,15.0,6.0,Rochford +E07000076,16.0,8.0,Tendring +E07000077,13.0,7.0,Uttlesford +E07000078,1.0,5.0,Cheltenham +E07000079,2.0,5.0,Cotswold +E07000080,-1.0,6.0,Forest of Dean +E07000081,0.0,6.0,Gloucester +E07000082,0.0,5.0,Stroud +E07000083,1.0,6.0,Tewkesbury +E07000084,2.0,1.0,Basingstoke and Deane +E07000085,4.0,0.0,East Hampshire +E07000086,3.0,0.0,Eastleigh +E07000087,2.0,-1.0,Fareham +E07000088,3.0,-1.0,Gosport +E07000089,3.0,2.0,Hart +E07000090,5.0,0.0,Havant +E07000091,1.0,0.0,New Forest +E07000092,4.0,1.0,Rushmoor +E07000093,1.0,1.0,Test Valley +E07000094,3.0,1.0,Winchester +E07000095,12.0,6.0,Broxbourne +E07000096,8.0,6.0,Dacorum +E07000098,9.0,5.0,Hertsmere +E07000099,11.0,7.0,North Hertfordshire +E07000102,7.0,5.0,Three Rivers +E07000103,8.0,5.0,Watford +E07000105,12.0,-1.0,Ashford +E07000106,15.0,0.0,Canterbury +E07000107,13.0,1.0,Dartford +E07000108,14.0,-1.0,Dover +E07000109,14.0,1.0,Gravesham +E07000110,14.0,0.0,Maidstone +E07000111,12.0,0.0,Sevenoaks +E07000112,13.0,-1.0,Folkestone and Hythe +E07000113,16.0,0.0,Swale +E07000114,15.0,-1.0,Thanet +E07000115,13.0,0.0,Tonbridge and Malling +E07000116,11.0,-1.0,Tunbridge Wells +E07000117,6.0,15.0,Burnley +E07000118,3.0,14.0,Chorley +E07000119,4.0,16.0,Fylde +E07000120,5.0,15.0,Hyndburn +E07000121,3.0,17.0,Lancaster +E07000122,6.0,16.0,Pendle +E07000123,5.0,16.0,Preston +E07000124,5.0,17.0,Ribble Valley +E07000125,6.0,14.0,Rossendale +E07000126,3.0,15.0,South Ribble +E07000127,2.0,13.0,West Lancashire +E07000128,3.0,16.0,Wyre +E07000129,7.0,7.0,Blaby +E07000130,8.0,9.0,Charnwood +E07000131,8.0,7.0,Harborough +E07000132,7.0,8.0,Hinckley and Bosworth +E07000133,11.0,10.0,Melton +E07000134,6.0,9.0,North West Leicestershire +E07000135,9.0,8.0,Oadby and Wigston +E07000136,12.0,12.0,Boston +E07000137,12.0,13.0,East Lindsey +E07000138,11.0,12.0,Lincoln +E07000139,11.0,11.0,North Kesteven +E07000140,12.0,11.0,South Holland +E07000141,12.0,10.0,South Kesteven +E07000142,11.0,13.0,West Lindsey +E07000143,14.0,10.0,Breckland +E07000144,15.0,12.0,Broadland +E07000145,15.0,11.0,Great Yarmouth +E07000146,13.0,11.0,King's Lynn and West Norfolk +E07000147,14.0,12.0,North Norfolk +E07000148,14.0,11.0,Norwich +E07000149,15.0,10.0,South Norfolk +E07000170,8.0,11.0,Ashfield +E07000171,10.0,13.0,Bassetlaw +E07000172,7.0,10.0,Broxtowe +E07000173,9.0,10.0,Gedling +E07000174,9.0,11.0,Mansfield +E07000175,10.0,11.0,Newark and Sherwood +E07000176,10.0,10.0,Rushcliffe +E07000177,4.0,5.0,Cherwell +E07000178,4.0,4.0,Oxford +E07000179,5.0,4.0,South Oxfordshire +E07000180,3.0,4.0,Vale of White Horse +E07000181,3.0,5.0,West Oxfordshire +E07000192,3.0,9.0,Cannock Chase +E07000193,5.0,11.0,East Staffordshire +E07000194,4.0,9.0,Lichfield +E07000195,2.0,10.0,Newcastle-under-Lyme +E07000196,2.0,8.0,South Staffordshire +E07000197,4.0,10.0,Stafford +E07000198,5.0,10.0,Staffordshire Moorlands +E07000199,5.0,9.0,Tamworth +E07000200,14.0,8.0,Babergh +E07000202,15.0,9.0,Ipswich +E07000203,14.0,9.0,Mid Suffolk +E07000207,7.0,2.0,Elmbridge +E07000208,8.0,0.0,Epsom and Ewell +E07000209,5.0,1.0,Guildford +E07000210,6.0,1.0,Mole Valley +E07000211,7.0,0.0,Reigate and Banstead +E07000212,5.0,3.0,Runnymede +E07000213,6.0,3.0,Spelthorne +E07000214,5.0,2.0,Surrey Heath +E07000215,9.0,-1.0,Tandridge +E07000216,6.0,0.0,Waverley +E07000217,6.0,2.0,Woking +E07000218,6.0,8.0,North Warwickshire +E07000219,6.0,7.0,Nuneaton and Bedworth +E07000220,6.0,6.0,Rugby +E07000221,3.0,6.0,Stratford-on-Avon +E07000222,4.0,6.0,Warwick +E07000223,8.0,-2.0,Adur +E07000224,6.0,-2.0,Arun +E07000225,5.0,-1.0,Chichester +E07000226,8.0,-1.0,Crawley +E07000227,6.0,-1.0,Horsham +E07000228,7.0,-1.0,Mid Sussex +E07000229,7.0,-2.0,Worthing +E07000234,2.0,7.0,Bromsgrove +E07000235,-1.0,7.0,Malvern Hills +E07000236,4.0,7.0,Redditch +E07000237,0.0,7.0,Worcester +E07000238,2.0,6.0,Wychavon +E07000239,1.0,8.0,Wyre Forest +E07000240,10.0,6.0,St Albans +E07000241,11.0,6.0,Welwyn Hatfield +E07000242,13.0,8.0,East Hertfordshire +E07000243,12.0,7.0,Stevenage +E07000244,16.0,10.0,East Suffolk +E07000245,13.0,9.0,West Suffolk +E08000001,4.0,14.0,Bolton +E08000002,5.0,14.0,Bury +E08000003,5.0,12.0,Manchester +E08000004,5.0,13.0,Oldham +E08000005,7.0,14.0,Rochdale +E08000006,4.0,13.0,Salford +E08000007,6.0,12.0,Stockport +E08000008,6.0,13.0,Tameside +E08000009,4.0,12.0,Trafford +E08000010,3.0,13.0,Wigan +E08000011,2.0,12.0,Knowsley +E08000012,1.0,13.0,Liverpool +E08000013,3.0,12.0,St. Helens +E08000014,2.0,14.0,Sefton +E08000015,1.0,12.0,Wirral +E08000016,8.0,14.0,Barnsley +E08000017,9.0,14.0,Doncaster +E08000018,9.0,13.0,Rotherham +E08000019,8.0,13.0,Sheffield +E08000021,5.0,19.0,Newcastle upon Tyne +E08000022,6.0,20.0,North Tyneside +E08000023,7.0,20.0,South Tyneside +E08000024,7.0,19.0,Sunderland +E08000025,5.0,8.0,Birmingham +E08000026,5.0,6.0,Coventry +E08000027,1.0,7.0,Dudley +E08000028,3.0,7.0,Sandwell +E08000029,5.0,7.0,Solihull +E08000030,4.0,8.0,Walsall +E08000031,3.0,8.0,Wolverhampton +E08000032,7.0,16.0,Bradford +E08000033,7.0,15.0,Calderdale +E08000034,8.0,15.0,Kirklees +E08000035,8.0,16.0,Leeds +E08000036,9.0,15.0,Wakefield +E08000037,6.0,19.0,Gateshead +E09000001,11.0,2.0,City of London +E09000002,13.0,3.0,Barking and Dagenham +E09000003,10.0,5.0,Barnet +E09000004,12.0,1.0,Bexley +E09000005,10.0,4.0,Brent +E09000006,11.0,0.0,Bromley +E09000007,11.0,4.0,Camden +E09000008,10.0,0.0,Croydon +E09000009,9.0,4.0,Ealing +E09000010,11.0,5.0,Enfield +E09000011,11.0,1.0,Greenwich +E09000012,12.0,3.0,Hackney +E09000013,8.0,3.0,Hammersmith and Fulham +E09000014,12.0,4.0,Haringey +E09000015,8.0,4.0,Harrow +E09000016,14.0,3.0,Havering +E09000017,7.0,4.0,Hillingdon +E09000018,7.0,3.0,Hounslow +E09000019,11.0,3.0,Islington +E09000020,9.0,3.0,Kensington and Chelsea +E09000021,7.0,1.0,Kingston upon Thames +E09000022,10.0,2.0,Lambeth +E09000023,10.0,1.0,Lewisham +E09000024,8.0,1.0,Merton +E09000025,13.0,2.0,Newham +E09000026,14.0,4.0,Redbridge +E09000027,8.0,2.0,Richmond upon Thames +E09000028,9.0,1.0,Southwark +E09000029,9.0,0.0,Sutton +E09000030,12.0,2.0,Tower Hamlets +E09000031,13.0,4.0,Waltham Forest +E09000032,9.0,2.0,Wandsworth +E09000033,10.0,3.0,Westminster +N09000001,-4.0,16.0,Antrim and Newtownabbey +N09000002,-5.0,16.0,"Armagh City, Banbridge and Craigavon" +N09000003,-4.0,17.0,Belfast +N09000004,-5.0,18.0,Causeway Coast and Glens +N09000005,-6.0,17.0,Derry City and Strabane +N09000006,-6.0,16.0,Fermanagh and Omagh +N09000007,-5.0,15.0,Lisburn and Castlereagh +N09000008,-4.0,18.0,Mid and East Antrim +N09000009,-5.0,17.0,Mid Ulster +N09000010,-4.0,15.0,"Newry, Mourne and Down" +S12000005,2.0,24.0,Clackmannanshire +S12000006,4.0,20.0,Dumfries and Galloway +S12000008,3.0,20.0,East Ayrshire +S12000010,5.0,22.0,East Lothian +S12000011,2.0,20.0,East Renfrewshire +S12000013,-1.0,27.0,Na h-Eileanan Siar +S12000014,2.0,23.0,Falkirk +S12000017,1.0,26.0,Highland +S12000018,0.0,21.0,Inverclyde +S12000019,3.0,21.0,Midlothian +S12000020,2.0,26.0,Moray +S12000021,1.0,20.0,North Ayrshire +S12000023,4.0,28.0,Orkney Islands +S12000026,4.0,21.0,Scottish Borders +S12000027,5.0,30.0,Shetland Islands +S12000028,1.0,19.0,South Ayrshire +S12000029,2.0,21.0,South Lanarkshire +S12000030,1.0,24.0,Stirling +S12000033,4.0,26.0,Aberdeen City +S12000034,3.0,26.0,Aberdeenshire +S12000035,0.0,24.0,Argyll and Bute +S12000036,4.0,22.0,City of Edinburgh +S12000038,1.0,22.0,Renfrewshire +S12000039,0.0,23.0,West Dunbartonshire +S12000040,3.0,22.0,West Lothian +S12000041,2.0,25.0,Angus +S12000042,3.0,25.0,Dundee City +S12000045,1.0,23.0,East Dunbartonshire +S12000047,3.0,24.0,Fife +S12000048,1.0,25.0,Perth and Kinross +S12000049,1.0,21.0,Glasgow City +S12000050,2.0,22.0,North Lanarkshire +W06000001,-2.0,12.0,Isle of Anglesey +W06000002,-2.0,10.0,Gwynedd +W06000003,-1.0,10.0,Conwy +W06000004,0.0,10.0,Denbighshire +W06000005,0.0,11.0,Flintshire +W06000006,1.0,10.0,Wrexham +W06000008,-2.0,9.0,Ceredigion +W06000009,-5.0,6.0,Pembrokeshire +W06000010,-4.0,6.0,Carmarthenshire +W06000011,-4.0,5.0,Swansea +W06000012,-3.0,5.0,Neath Port Talbot +W06000013,-3.0,6.0,Bridgend +W06000014,-2.0,4.0,Vale of Glamorgan +W06000015,-2.0,5.0,Cardiff +W06000016,-3.0,7.0,Rhondda Cynon Taf +W06000018,-2.0,6.0,Caerphilly +W06000019,0.0,9.0,Blaenau Gwent +W06000020,-2.0,7.0,Torfaen +W06000021,-1.0,8.0,Monmouthshire +W06000022,-1.0,5.0,Newport +W06000023,-1.0,9.0,Powys +W06000024,-2.0,8.0,Merthyr Tydfil diff --git a/scripts/BUG_REPORT_build_from_dataframe.md b/scripts/BUG_REPORT_build_from_dataframe.md new file mode 100644 index 000000000..503557e56 --- /dev/null +++ b/scripts/BUG_REPORT_build_from_dataframe.md @@ -0,0 +1,172 @@ +# Bug Report: Entity-Level Aggregation Missing in `build_from_dataframe` + +## Summary + +The `build_from_dataframe` method in `policyengine_uk` does not aggregate person-level data to entity-level before calling `set_input()`, causing UK country filtering (e.g., Wales) to fail with array length mismatch errors. + +## Affected Repository + +**Repository:** `policyengine-uk` +**File:** `policyengine_uk/simulation.py` +**Method:** `build_from_dataframe()` +**Approximate Lines:** 281-286 (may vary by version) + +## Symptoms + +When running a UK simulation filtered to a specific country (e.g., Wales), the following error occurs: + +``` +ValueError: Unable to set value "[ True True True ... False False False]" +for variable "would_evade_tv_licence_fee", as its length is 8470 +while there are 4108 households in the simulation. +``` + +The error occurs because: +- 8,470 = number of Welsh **persons** in the dataset +- 4,108 = number of Welsh **households** in the dataset +- The code tries to assign person-level arrays to household-level variables + +## Root Cause + +### The Bug Location + +```python +# In policyengine_uk/simulation.py, build_from_dataframe method: + +# Set input values for each variable and time period +for column in df: + variable, time_period = column.split("__") + if variable not in self.tax_benefit_system.variables: + continue + self.set_input(variable, time_period, df[column]) # <-- BUG HERE +``` + +### Why This Fails + +1. **`to_input_dataframe()`** exports ALL variables at **person level** (one row per person), regardless of the variable's native entity. This is by design - it creates a flat DataFrame where each row represents a person. + +2. **`build_from_dataframe()`** correctly builds the entity structure: + - Extracts `person_household_id` to determine household membership + - Creates the correct number of households (e.g., 4,108 for Wales) + - Sets up person-to-household relationships properly + +3. **BUT** the loop that sets variable values does NOT check if aggregation is needed. It passes person-level arrays (8,470 values) directly to `set_input()` for household-level variables that only have 4,108 entities. + +### The Correct Approach + +The `policyengine_core` library's `build_from_dataset()` method handles this correctly in `policyengine_core/simulations/simulation.py`: + +```python +# From policyengine_core/simulations/simulation.py, build_from_dataset method: + +if len(data[variable]) != len(population.ids): + population: GroupPopulation + entity_level_data = population.value_from_first_person(data[variable]) +else: + entity_level_data = data[variable] + +self.set_input(variable_name, time_period, entity_level_data) +``` + +## Required Fix + +### Current Buggy Code + +```python +# Set input values for each variable and time period +for column in df: + variable, time_period = column.split("__") + if variable not in self.tax_benefit_system.variables: + continue + self.set_input(variable, time_period, df[column]) +``` + +### Fixed Code + +```python +# Set input values for each variable and time period +for column in df: + variable, time_period = column.split("__") + if variable not in self.tax_benefit_system.variables: + continue + + # Get variable metadata and target population + var_meta = self.tax_benefit_system.get_variable(variable) + entity = var_meta.entity + population = self.get_population(entity.plural) + + data = df[column].values + + # Check if aggregation is needed (data is person-level but variable is group-level) + if len(data) != population.count: + # Aggregate from person-level to entity-level using first person's value + data = population.value_from_first_person(data) + + self.set_input(variable, time_period, data) +``` + +## Technical Details + +### What `value_from_first_person()` Does + +This method aggregates person-level data to group-level by taking the value from the first person in each group. For household-level variables (like `would_evade_tv_licence_fee`), all persons in a household share the same value, so taking the first person's value is correct. + +The method is defined in `policyengine_core` on `GroupPopulation` objects. + +### Why This Pattern Works + +- Person-level variables: `len(data) == population.count` (no aggregation needed) +- Group-level variables exported at person level: `len(data) != population.count` (aggregation needed) + +### Entity Structure in UK Model + +The UK tax-benefit system has these entities: +- `person` - Individual people +- `benunit` - Benefit units (roughly: nuclear families) +- `household` - Households (one or more benefit units sharing accommodation) + +When filtering to Wales: +- ~8,470 persons +- ~4,108 households +- Variable ratio depending on household composition + +## Reproduction Steps + +1. Create a UK macro simulation: `Simulation(country="uk", scope="macro")` +2. Filter to a UK country: `Simulation(country="uk", scope="macro", region="country/wales")` +3. The filtering process: + - Calls `to_input_dataframe()` on the baseline simulation + - Filters the DataFrame to Welsh persons only + - Calls `Microsimulation(dataset=filtered_df)` which invokes `build_from_dataframe()` +4. Error occurs when `build_from_dataframe()` tries to set household-level variables + +## Verification + +A Jupyter notebook proving this bug exists at: +`policyengine-api/scripts/prove_build_from_dataframe_bug.ipynb` + +The notebook: +1. Creates a UK simulation and exports to DataFrame +2. Filters to Wales (8,470 persons, 4,108 households) +3. Manually traces through `build_from_dataframe()` step by step +4. Shows entity structure is correctly built (4,108 households) +5. Demonstrates the `set_input()` call fails with length mismatch +6. Shows the fix (aggregation) works correctly + +## Impact + +This bug affects: +- UK country filtering (`country/wales`, `country/scotland`, `country/northern_ireland`, `country/england`) +- Any code path that uses `build_from_dataframe()` with a filtered DataFrame + +This bug does NOT affect: +- Constituency filtering (uses weight adjustment, not DataFrame subsetting) +- Local authority filtering (uses weight adjustment, not DataFrame subsetting) +- UK-wide simulations (no filtering needed) + +## Notes for Implementation + +1. The fix is minimal - just wrap the existing `set_input()` call with a length check and aggregation +2. No new dependencies are needed - `value_from_first_person()` is already available on population objects +3. The fix matches the existing pattern in `policyengine_core`'s `build_from_dataset()` method +4. Consider adding a unit test that creates a simulation from a filtered DataFrame and verifies household-level variables work correctly diff --git a/scripts/diagnose_country_filtering.ipynb b/scripts/diagnose_country_filtering.ipynb new file mode 100644 index 000000000..e9d2b1498 --- /dev/null +++ b/scripts/diagnose_country_filtering.ipynb @@ -0,0 +1,503 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Diagnosing UK Country Filtering Issue in policyengine.py\n", + "\n", + "This notebook tests whether `policyengine.py` properly filters simulations by UK country (e.g., Wales).\n", + "\n", + "## The Issue\n", + "When running a simulation filtered to a specific UK country (e.g., `country/wales`), we get:\n", + "```\n", + "ValueError: Unable to set value \"[ True True True ... False False False]\" for variable \n", + "\"would_evade_tv_licence_fee\", as its length is 8470 while there are 4108 households in the simulation.\n", + "```\n", + "\n", + "## Hypothesis\n", + "The `to_input_dataframe()` method doesn't export `person_household_id`, causing the filtered simulation\n", + "to lose entity relationship information and incorrectly set up household counts." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 1: Setup and Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from policyengine import Simulation\n", + "\n", + "# Check policyengine version\n", + "import policyengine\n", + "print(f\"policyengine version: {policyengine.__version__}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 2: Create a Baseline UK Simulation\n", + "\n", + "First, let's create a standard UK-wide simulation and examine its structure." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create a UK-wide simulation (no region filter)\n", + "print(\"Creating UK-wide simulation...\")\n", + "sim_uk = Simulation(country=\"uk\", scope=\"macro\")\n", + "\n", + "# Access the underlying country simulation\n", + "underlying_sim = sim_uk.baseline_simulation\n", + "\n", + "print(f\"\\n=== UK-Wide Simulation Structure ===\")\n", + "print(f\"Person count: {underlying_sim.persons.count}\")\n", + "print(f\"Household count: {underlying_sim.household.count}\")\n", + "print(f\"BenUnit count: {underlying_sim.benunit.count}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check the country distribution in the UK simulation\n", + "country_values = sim_uk.calculate(\"country\")\n", + "print(\"\\n=== Country Distribution (Household Level) ===\")\n", + "print(country_values.value_counts())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check person-level country distribution\n", + "country_person = underlying_sim.calculate(\"country\", map_to=\"person\")\n", + "unique, counts = np.unique(country_person, return_counts=True)\n", + "print(\"\\n=== Country Distribution (Person Level) ===\")\n", + "for u, c in zip(unique, counts):\n", + " print(f\" {u}: {c} persons\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 3: Test `to_input_dataframe()` Export\n", + "\n", + "Let's examine what columns are exported by `to_input_dataframe()` to see if entity linkage variables are included." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Export the simulation to a dataframe\n", + "print(\"Exporting simulation to DataFrame...\")\n", + "df = underlying_sim.to_input_dataframe()\n", + "\n", + "print(f\"\\n=== Exported DataFrame ===\")\n", + "print(f\"Shape: {df.shape}\")\n", + "print(f\"Number of columns: {len(df.columns)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check for entity ID and linkage columns\n", + "print(\"\\n=== Entity-Related Columns ===\")\n", + "\n", + "id_columns = [c for c in df.columns if '_id' in c.lower()]\n", + "print(f\"\\nColumns containing '_id': {len(id_columns)}\")\n", + "for col in sorted(id_columns):\n", + " print(f\" - {col}\")\n", + "\n", + "# Specifically check for critical columns\n", + "critical_cols = ['person_id', 'household_id', 'person_household_id', 'benunit_id', 'person_benunit_id']\n", + "print(f\"\\n=== Critical Entity Linkage Columns ===\")\n", + "for col_base in critical_cols:\n", + " matching = [c for c in df.columns if c.startswith(col_base)]\n", + " if matching:\n", + " print(f\" {col_base}: FOUND -> {matching}\")\n", + " else:\n", + " print(f\" {col_base}: MISSING!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check if person_household_id has known periods in the simulation\n", + "print(\"\\n=== Checking Known Periods for Entity Linkage Variables ===\")\n", + "\n", + "for var_name in ['person_id', 'household_id', 'person_household_id', 'person_benunit_id']:\n", + " try:\n", + " holder = underlying_sim.get_holder(var_name)\n", + " known_periods = holder.get_known_periods()\n", + " print(f\" {var_name}: known_periods = {list(known_periods)}\")\n", + " except Exception as e:\n", + " print(f\" {var_name}: ERROR - {e}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 4: Simulate Country Filtering (Wales)\n", + "\n", + "Now let's create a Wales-filtered simulation and see what happens." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create a Wales simulation\n", + "print(\"Creating Wales simulation...\")\n", + "print(\"(This may trigger the error we're diagnosing)\")\n", + "print()\n", + "\n", + "try:\n", + " sim_wales = Simulation(country=\"uk\", scope=\"macro\", region=\"country/wales\")\n", + " wales_underlying = sim_wales.baseline_simulation\n", + " \n", + " print(f\"\\n=== Wales Simulation Structure ===\")\n", + " print(f\"Person count: {wales_underlying.persons.count}\")\n", + " print(f\"Household count: {wales_underlying.household.count}\")\n", + " print(f\"BenUnit count: {wales_underlying.benunit.count}\")\n", + " \n", + " # Check if counts make sense\n", + " if wales_underlying.household.count == wales_underlying.persons.count:\n", + " print(\"\\n*** WARNING: Household count equals person count! ***\")\n", + " print(\"This suggests entity linkage was lost during filtering.\")\n", + " \n", + "except Exception as e:\n", + " print(f\"\\n*** ERROR creating Wales simulation ***\")\n", + " print(f\"Error type: {type(e).__name__}\")\n", + " print(f\"Error message: {e}\")\n", + " import traceback\n", + " traceback.print_exc()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 5: Manual Reproduction of the Filtering Process\n", + "\n", + "Let's manually reproduce what `_apply_region_to_simulation` does to understand where it breaks." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Step-by-step reproduction of the filtering logic\n", + "print(\"=== Manual Reproduction of Country Filtering ===\")\n", + "\n", + "# Step 1: Export to DataFrame\n", + "print(\"\\n[Step 1] Exporting to DataFrame...\")\n", + "df = underlying_sim.to_input_dataframe()\n", + "print(f\" DataFrame shape: {df.shape}\")\n", + "print(f\" Columns with 'household': {[c for c in df.columns if 'household' in c.lower()][:10]}...\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Step 2: Calculate country at person level\n", + "print(\"\\n[Step 2] Calculating country at person level...\")\n", + "country_person_level = underlying_sim.calculate(\"country\", map_to=\"person\").values\n", + "print(f\" Country array shape: {country_person_level.shape}\")\n", + "print(f\" Unique values: {np.unique(country_person_level)}\")\n", + "\n", + "# Count Welsh persons\n", + "wales_mask = country_person_level == \"WALES\"\n", + "print(f\" Welsh persons: {wales_mask.sum()}\")\n", + "print(f\" Non-Welsh persons: {(~wales_mask).sum()}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Step 3: Filter DataFrame to Wales\n", + "print(\"\\n[Step 3] Filtering DataFrame to Wales...\")\n", + "df_wales = df[wales_mask]\n", + "print(f\" Filtered DataFrame shape: {df_wales.shape}\")\n", + "\n", + "# Check what person_household_id looks like in filtered data\n", + "phh_cols = [c for c in df_wales.columns if 'person_household_id' in c]\n", + "if phh_cols:\n", + " print(f\" person_household_id columns: {phh_cols}\")\n", + " for col in phh_cols:\n", + " vals = df_wales[col].values\n", + " print(f\" {col}: {len(np.unique(vals))} unique values\")\n", + "else:\n", + " print(\" person_household_id: NOT IN DATAFRAME!\")\n", + " print(\" This is likely the root cause of the issue.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Step 4: Try to create a new simulation from filtered DataFrame\n", + "print(\"\\n[Step 4] Creating new simulation from filtered DataFrame...\")\n", + "\n", + "from policyengine_uk import Microsimulation\n", + "\n", + "try:\n", + " new_sim = Microsimulation(dataset=df_wales)\n", + " \n", + " print(f\" New simulation created!\")\n", + " print(f\" Person count: {new_sim.persons.count}\")\n", + " print(f\" Household count: {new_sim.household.count}\")\n", + " \n", + " # Critical check\n", + " if new_sim.household.count == new_sim.persons.count:\n", + " print(\"\\n *** CONFIRMED: Household count equals person count! ***\")\n", + " print(\" The entity linkage was lost because person_household_id is missing.\")\n", + " elif new_sim.household.count == len(np.unique(df_wales.iloc[:, 0])):\n", + " print(\"\\n *** Household count matches first column's unique values ***\")\n", + " print(\" This confirms the fallback behavior in build_from_dataset()\")\n", + " \n", + "except Exception as e:\n", + " print(f\" Error creating simulation: {e}\")\n", + " import traceback\n", + " traceback.print_exc()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Step 5: Try to calculate would_evade_tv_licence_fee (this should trigger the error)\n", + "print(\"\\n[Step 5] Attempting to calculate would_evade_tv_licence_fee...\")\n", + "\n", + "try:\n", + " # This calculation uses random(household), which will fail if household count is wrong\n", + " result = new_sim.calculate(\"would_evade_tv_licence_fee\")\n", + " print(f\" Calculation succeeded!\")\n", + " print(f\" Result shape: {result.shape}\")\n", + " print(f\" Result dtype: {result.dtype}\")\n", + "except ValueError as e:\n", + " print(f\" *** ValueError (expected): ***\")\n", + " print(f\" {e}\")\n", + " \n", + " # Parse the error to understand the mismatch\n", + " error_str = str(e)\n", + " if \"length is\" in error_str and \"while there are\" in error_str:\n", + " print(f\"\\n This confirms the array size mismatch issue.\")\n", + "except Exception as e:\n", + " print(f\" Unexpected error: {type(e).__name__}: {e}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 6: Deeper Investigation - What Does household_id Return?\n", + "\n", + "Let's check what `household_id` returns in the broken simulation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check household_id in the new (potentially broken) simulation\n", + "print(\"=== Investigating household_id in Filtered Simulation ===\")\n", + "\n", + "try:\n", + " # This is what random() calls internally\n", + " hh_ids = new_sim.calculate(\"household_id\", 2025)\n", + " print(f\"household_id result length: {len(hh_ids)}\")\n", + " print(f\"household_id unique count: {len(np.unique(hh_ids))}\")\n", + " print(f\"Expected household count: {new_sim.household.count}\")\n", + " \n", + " if len(hh_ids) != new_sim.household.count:\n", + " print(f\"\\n*** MISMATCH: household_id has {len(hh_ids)} values but simulation has {new_sim.household.count} households ***\")\n", + "except Exception as e:\n", + " print(f\"Error: {e}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check the holder for household_id\n", + "print(\"\\n=== Checking household_id Holder ===\")\n", + "try:\n", + " holder = new_sim.get_holder(\"household_id\")\n", + " known_periods = holder.get_known_periods()\n", + " print(f\"Known periods: {list(known_periods)}\")\n", + " \n", + " for period in known_periods:\n", + " arr = holder.get_array(period)\n", + " print(f\" Period {period}: array shape = {arr.shape if arr is not None else 'None'}\")\n", + "except Exception as e:\n", + " print(f\"Error: {e}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 7: Compare with Working Approaches (Constituency/LA)\n", + "\n", + "Constituency and LA filtering use weight adjustment instead of DataFrame subsetting. Let's verify this works." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Test constituency filtering (should work)\n", + "print(\"=== Testing Constituency Filtering (Should Work) ===\")\n", + "\n", + "try:\n", + " sim_constituency = Simulation(country=\"uk\", scope=\"macro\", region=\"constituency/Cardiff South and Penarth\")\n", + " const_underlying = sim_constituency.baseline_simulation\n", + " \n", + " print(f\"Constituency simulation created successfully!\")\n", + " print(f\" Person count: {const_underlying.persons.count}\")\n", + " print(f\" Household count: {const_underlying.household.count}\")\n", + " \n", + " # Try the problematic calculation\n", + " result = sim_constituency.calculate(\"would_evade_tv_licence_fee\")\n", + " print(f\" would_evade_tv_licence_fee calculated successfully!\")\n", + " print(f\" Result length: {len(result)}\")\n", + " \n", + "except Exception as e:\n", + " print(f\"Error: {type(e).__name__}: {e}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Test local authority filtering (should work)\n", + "print(\"\\n=== Testing Local Authority Filtering (Should Work) ===\")\n", + "\n", + "try:\n", + " sim_la = Simulation(country=\"uk\", scope=\"macro\", region=\"local_authority/Cardiff\")\n", + " la_underlying = sim_la.baseline_simulation\n", + " \n", + " print(f\"LA simulation created successfully!\")\n", + " print(f\" Person count: {la_underlying.persons.count}\")\n", + " print(f\" Household count: {la_underlying.household.count}\")\n", + " \n", + " # Try the problematic calculation\n", + " result = sim_la.calculate(\"would_evade_tv_licence_fee\")\n", + " print(f\" would_evade_tv_licence_fee calculated successfully!\")\n", + " print(f\" Result length: {len(result)}\")\n", + " \n", + "except Exception as e:\n", + " print(f\"Error: {type(e).__name__}: {e}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary and Conclusions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"=\"*70)\n", + "print(\"DIAGNOSIS SUMMARY\")\n", + "print(\"=\"*70)\n", + "\n", + "print(\"\"\"\n", + "Based on the tests above:\n", + "\n", + "1. COUNTRY FILTERING (country/wales):\n", + " - Uses to_input_dataframe() + DataFrame subsetting + new Microsimulation()\n", + " - FAILS because person_household_id is not exported\n", + " - Results in household count = person count (entity linkage lost)\n", + "\n", + "2. CONSTITUENCY FILTERING (constituency/...):\n", + " - Uses weight adjustment on existing simulation\n", + " - WORKS because entity structure is preserved\n", + "\n", + "3. LOCAL AUTHORITY FILTERING (local_authority/...):\n", + " - Uses weight adjustment on existing simulation \n", + " - WORKS because entity structure is preserved\n", + "\n", + "ROOT CAUSE:\n", + "- to_input_dataframe() only exports variables with known periods\n", + "- person_household_id doesn't have known periods (it's derived from dataset structure)\n", + "- When building from filtered DataFrame, the fallback creates 1 household per person\n", + "\n", + "RECOMMENDED FIX:\n", + "- Option A: Fix to_input_dataframe() to always export entity linkage variables\n", + "- Option B: Use weight-zeroing for country filtering (like constituency/LA)\n", + "\"\"\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/scripts/prove_build_from_dataframe_bug.ipynb b/scripts/prove_build_from_dataframe_bug.ipynb new file mode 100644 index 000000000..a65202fc9 --- /dev/null +++ b/scripts/prove_build_from_dataframe_bug.ipynb @@ -0,0 +1,841 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "cell-0", + "metadata": {}, + "source": [ + "# Proving the Bug in policyengine_uk's build_from_dataframe Method\n", + "\n", + "This notebook proves that the UK country filtering bug is caused by `policyengine_uk`'s \n", + "`build_from_dataframe` method not handling entity-level aggregation.\n", + "\n", + "## The Bug Location\n", + "**File:** `policyengine_uk/simulation.py` \n", + "**Method:** `build_from_dataframe()` \n", + "**Lines:** 281-286\n", + "\n", + "```python\n", + "# Set input values for each variable and time period\n", + "for column in df:\n", + " variable, time_period = column.split(\"__\")\n", + " if variable not in self.tax_benefit_system.variables:\n", + " continue\n", + " self.set_input(variable, time_period, df[column]) # <-- BUG: No entity-level check!\n", + "```\n", + "\n", + "## The Problem\n", + "1. `to_input_dataframe()` exports ALL variables at **person level** (one row per person)\n", + "2. `build_from_dataframe()` correctly builds entity structure with proper counts\n", + "3. BUT it then tries to `set_input()` with person-level arrays for household-level variables\n", + "4. This causes a length mismatch error" + ] + }, + { + "cell_type": "markdown", + "id": "cell-1", + "metadata": {}, + "source": [ + "## Step 1: Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "cell-2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "policyengine_uk version: unknown\n", + "policyengine_uk location: /opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_uk/__init__.py\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import traceback\n", + "import inspect\n", + "\n", + "from policyengine import Simulation\n", + "from policyengine_uk import Simulation as UKSimulation\n", + "\n", + "# Show where policyengine_uk is loaded from\n", + "import policyengine_uk\n", + "version = getattr(policyengine_uk, '__version__', 'unknown')\n", + "print(f\"policyengine_uk version: {version}\")\n", + "print(f\"policyengine_uk location: {policyengine_uk.__file__}\")" + ] + }, + { + "cell_type": "markdown", + "id": "cell-3", + "metadata": {}, + "source": [ + "## Step 2: Examine the Buggy Code\n", + "\n", + "Let's look at the actual `build_from_dataframe` method to confirm the bug." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "cell-4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== build_from_dataframe source code ===\n", + " def build_from_dataframe(self, df: pd.DataFrame) -> None:\n", + " \"\"\"Build simulation from a pandas DataFrame.\n", + "\n", + " Args:\n", + " df: DataFrame with columns in format \"variable_name__time_period\"\n", + " \"\"\"\n", + "\n", + " def get_first_array(variable_name: str) -> pd.Series:\n", + " \"\"\"Extract the first array for a given variable name pattern.\"\"\"\n", + " columns = df.columns[df.columns.str.contains(variable_name + \"__\")]\n", + " return df[columns[0]]\n", + "\n", + " # Extract ID columns\n", + " (\n", + " person_id,\n", + " person_benunit_id,\n", + " person_household_id,\n", + " benunit_id,\n", + " household_id,\n", + " ) = map(\n", + " get_first_array,\n", + " [\n", + " \"person_id\",\n", + " \"person_benunit_id\",\n", + " \"person_household_id\",\n", + " \"benunit_id\",\n", + " \"household_id\",\n", + " ],\n", + " )\n", + "\n", + " # Build entity structure\n", + " self.build_from_ids(\n", + " person_id,\n", + " person_benunit_id,\n", + " person_household_id,\n", + " benunit_id,\n", + " household_id,\n", + " )\n", + "\n", + " # Set input values for each variable and time period\n", + " for column in df:\n", + " variable, time_period = column.split(\"__\")\n", + " if variable not in self.tax_benefit_system.variables:\n", + " continue\n", + " self.set_input(variable, time_period, df[column])\n", + "\n" + ] + } + ], + "source": [ + "# Show the source code of build_from_dataframe\n", + "print(\"=== build_from_dataframe source code ===\")\n", + "print(inspect.getsource(UKSimulation.build_from_dataframe))" + ] + }, + { + "cell_type": "markdown", + "id": "cell-5", + "metadata": {}, + "source": [ + "## Step 3: Create Test Data\n", + "\n", + "Create a UK simulation and export to DataFrame, then filter to Wales." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "cell-6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Creating UK-wide simulation...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "No data provided, using default dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n", + "Using dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n", + "Downloading enhanced_frs_2023_24.h5 from bucket policyengine-uk-data-private\n", + "WARNING:root:No version specified for policyengine-uk-data-private, enhanced_frs_2023_24.h5. Using latest version: 1.29.4\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "UK-wide entity counts:\n", + " Persons: 115,612\n", + " Households: 53,508\n" + ] + } + ], + "source": [ + "# Create UK-wide simulation\n", + "print(\"Creating UK-wide simulation...\")\n", + "sim_uk = Simulation(country=\"uk\", scope=\"macro\")\n", + "underlying_sim = sim_uk.baseline_simulation\n", + "\n", + "print(f\"\\nUK-wide entity counts:\")\n", + "print(f\" Persons: {underlying_sim.persons.count:,}\")\n", + "print(f\" Households: {underlying_sim.household.count:,}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cell-7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Exporting to DataFrame...\n", + "\n", + "Filtered DataFrame:\n", + " Rows (Welsh persons): 8,470\n", + " Columns: 1,127\n" + ] + } + ], + "source": [ + "# Export to DataFrame and filter to Wales\n", + "print(\"Exporting to DataFrame...\")\n", + "df = underlying_sim.to_input_dataframe()\n", + "\n", + "# Filter to Wales\n", + "country_person = underlying_sim.calculate(\"country\", map_to=\"person\").values\n", + "wales_mask = country_person == \"WALES\"\n", + "df_wales = df[wales_mask]\n", + "\n", + "print(f\"\\nFiltered DataFrame:\")\n", + "print(f\" Rows (Welsh persons): {len(df_wales):,}\")\n", + "print(f\" Columns: {len(df_wales.columns):,}\")" + ] + }, + { + "cell_type": "markdown", + "id": "cell-8", + "metadata": {}, + "source": [ + "## Step 4: Prove the DataFrame Has Person-Level Data for Household Variables\n", + "\n", + "This is the key insight: `to_input_dataframe()` exports EVERYTHING at person level." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "cell-9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== Household-Level Variables in DataFrame ===\n", + "Found 392 household-level variable columns in DataFrame\n", + "\n", + "First 10 household variables:\n", + " - corporate_wealth__2023\n", + " - corporate_wealth__2024\n", + " - corporate_wealth__2025\n", + " - corporate_wealth__2026\n", + " - corporate_wealth__2027\n", + " - corporate_wealth__2028\n", + " - corporate_wealth__2029\n", + " - corporate_wealth__2030\n", + " - non_residential_property_value__2023\n", + " - non_residential_property_value__2024\n" + ] + } + ], + "source": [ + "# Find household-level variables in the DataFrame\n", + "print(\"=== Household-Level Variables in DataFrame ===\")\n", + "\n", + "tax_benefit_system = underlying_sim.tax_benefit_system\n", + "household_vars_in_df = []\n", + "\n", + "for col in df_wales.columns:\n", + " var_name = col.split(\"__\")[0]\n", + " if var_name in tax_benefit_system.variables:\n", + " var_meta = tax_benefit_system.get_variable(var_name)\n", + " if var_meta.entity.key == \"household\":\n", + " household_vars_in_df.append((col, var_name))\n", + "\n", + "print(f\"Found {len(household_vars_in_df)} household-level variable columns in DataFrame\")\n", + "print(f\"\\nFirst 10 household variables:\")\n", + "for col, var_name in household_vars_in_df[:10]:\n", + " print(f\" - {col}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "cell-10", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== THE CRITICAL MISMATCH ===\n", + "\n", + "DataFrame rows (person-level): 8,470\n", + "Expected Welsh households: 4,108\n", + "\n", + "Example: 'corporate_wealth__2025'\n", + " Data length in DataFrame: 8,470\n", + " Should be (household count): 4,108\n", + "\n", + " MISMATCH: 8,470 != 4,108\n", + "\n", + "This is why set_input() fails!\n" + ] + } + ], + "source": [ + "# Show the mismatch: DataFrame rows vs expected household count\n", + "print(\"=== THE CRITICAL MISMATCH ===\")\n", + "print()\n", + "\n", + "# Get expected Welsh household count from person_household_id\n", + "phh_col = [c for c in df_wales.columns if c.startswith('person_household_id__')][0]\n", + "welsh_household_count = df_wales[phh_col].nunique()\n", + "\n", + "print(f\"DataFrame rows (person-level): {len(df_wales):,}\")\n", + "print(f\"Expected Welsh households: {welsh_household_count:,}\")\n", + "print()\n", + "\n", + "# Show a specific household variable\n", + "example_var = \"corporate_wealth__2025\" if \"corporate_wealth__2025\" in df_wales.columns else household_vars_in_df[0][0]\n", + "print(f\"Example: '{example_var}'\")\n", + "print(f\" Data length in DataFrame: {len(df_wales[example_var]):,}\")\n", + "print(f\" Should be (household count): {welsh_household_count:,}\")\n", + "print()\n", + "print(f\" MISMATCH: {len(df_wales[example_var]):,} != {welsh_household_count:,}\")\n", + "print()\n", + "print(\"This is why set_input() fails!\")" + ] + }, + { + "cell_type": "markdown", + "id": "cell-11", + "metadata": {}, + "source": [ + "## Step 5: Trace Through build_from_dataframe Step-by-Step\n", + "\n", + "Let's manually execute what `build_from_dataframe` does to see exactly where it fails." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "cell-12", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== Step 5a: Extract ID columns ===\n", + "person_id length: 8470\n", + "person_household_id length: 8470\n", + "person_household_id unique values: 4108\n", + "household_id length: 8470\n", + "household_id unique values: 4108\n" + ] + } + ], + "source": [ + "# Step 5a: Extract ID columns (lines 249-270 of build_from_dataframe)\n", + "print(\"=== Step 5a: Extract ID columns ===\")\n", + "\n", + "def get_first_array(df, variable_name):\n", + " columns = df.columns[df.columns.str.contains(variable_name + \"__\")]\n", + " return df[columns[0]]\n", + "\n", + "person_id = get_first_array(df_wales, \"person_id\")\n", + "person_benunit_id = get_first_array(df_wales, \"person_benunit_id\")\n", + "person_household_id = get_first_array(df_wales, \"person_household_id\")\n", + "benunit_id = get_first_array(df_wales, \"benunit_id\")\n", + "household_id = get_first_array(df_wales, \"household_id\")\n", + "\n", + "print(f\"person_id length: {len(person_id)}\")\n", + "print(f\"person_household_id length: {len(person_household_id)}\")\n", + "print(f\"person_household_id unique values: {person_household_id.nunique()}\")\n", + "print(f\"household_id length: {len(household_id)}\")\n", + "print(f\"household_id unique values: {household_id.nunique()}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "cell-13", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "=== Step 5b: Build entity structure (build_from_ids) ===\n", + "Person entity count: 8470\n", + "Benunit entity count: 4664\n", + "Household entity count: 4108\n", + "\n", + "Entity structure is CORRECT! 4108 households were created.\n" + ] + } + ], + "source": [ + "# Step 5b: Build entity structure (lines 273-279 - build_from_ids)\n", + "print(\"\\n=== Step 5b: Build entity structure (build_from_ids) ===\")\n", + "\n", + "from policyengine_core.simulations.simulation_builder import SimulationBuilder\n", + "from policyengine_uk.tax_benefit_system import CountryTaxBenefitSystem\n", + "\n", + "# Create a fresh simulation to test\n", + "test_tbs = CountryTaxBenefitSystem()\n", + "builder = SimulationBuilder()\n", + "builder.populations = test_tbs.instantiate_entities()\n", + "\n", + "# Declare entities - this is what build_from_ids does\n", + "builder.declare_person_entity(\"person\", person_id.values)\n", + "builder.declare_entity(\"benunit\", np.unique(benunit_id.values))\n", + "builder.declare_entity(\"household\", np.unique(household_id.values))\n", + "\n", + "print(f\"Person entity count: {len(builder.populations['person'].ids)}\")\n", + "print(f\"Benunit entity count: {len(builder.populations['benunit'].ids)}\")\n", + "print(f\"Household entity count: {len(builder.populations['household'].ids)}\")\n", + "print()\n", + "print(\"Entity structure is CORRECT! 4108 households were created.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "cell-14", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "=== Step 5c: Complete entity setup ===\n", + "Test simulation created:\n", + " Persons: 8470\n", + " Households: 4108\n", + "\n", + "Entity counts are CORRECT at this point!\n" + ] + } + ], + "source": [ + "# Step 5c: Complete entity setup with joins\n", + "print(\"\\n=== Step 5c: Complete entity setup ===\")\n", + "\n", + "builder.join_with_persons(\n", + " builder.populations[\"benunit\"],\n", + " person_benunit_id.values,\n", + " np.array([\"member\"] * len(person_benunit_id)),\n", + ")\n", + "builder.join_with_persons(\n", + " builder.populations[\"household\"],\n", + " person_household_id.values,\n", + " np.array([\"member\"] * len(person_household_id)),\n", + ")\n", + "\n", + "# Create simulation with these populations\n", + "from policyengine_core.simulations import Simulation as CoreSimulation\n", + "from policyengine_core.tracers import SimpleTracer\n", + "\n", + "class TestSimulation(CoreSimulation):\n", + " default_input_period = 2025\n", + " default_calculation_period = 2025\n", + "\n", + "test_sim = TestSimulation.__new__(TestSimulation)\n", + "test_sim.tax_benefit_system = test_tbs\n", + "test_sim.branch_name = \"default\"\n", + "test_sim.invalidated_caches = set()\n", + "test_sim.branches = {}\n", + "\n", + "# Initialize required attributes that build_from_populations expects\n", + "test_sim.debug = False\n", + "test_sim.trace = False\n", + "test_sim.tracer = SimpleTracer()\n", + "test_sim.opt_out_cache = False\n", + "test_sim.max_spiral_loops = 10\n", + "test_sim.memory_config = None\n", + "test_sim._data_storage_dir = None\n", + "\n", + "test_sim.build_from_populations(builder.populations)\n", + "\n", + "print(f\"Test simulation created:\")\n", + "print(f\" Persons: {test_sim.persons.count}\")\n", + "print(f\" Households: {test_sim.household.count}\")\n", + "print()\n", + "print(\"Entity counts are CORRECT at this point!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "cell-15", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "=== Step 5d: THE BUG - set_input without aggregation ===\n", + "\n", + "Attempting to set 'corporate_wealth' for period 2025\n", + " Variable entity: household\n", + " Data length: 8470\n", + " Household count: 4108\n", + "\n", + "ERROR (expected): Unable to set value \"[ 42531.723 42531.723 42531.723 ... 145237.94 145237.94\n", + " 6483.3296]\" for variable \"corporate_wealth\", as its length is 8470 while there are 4108 households in the simulation.\n", + "\n", + "============================================================\n", + "BUG PROVEN!\n", + "============================================================\n", + "\n", + "The build_from_dataframe method calls set_input() with\n", + "person-level data (8470 values) for a household-level\n", + "variable, but there are only 4108 households.\n" + ] + } + ], + "source": [ + "# Step 5d: THE BUG - Try to set_input for a household variable with person-level data\n", + "print(\"\\n=== Step 5d: THE BUG - set_input without aggregation ===\")\n", + "print()\n", + "\n", + "# This is what build_from_dataframe does at lines 281-286:\n", + "# for column in df:\n", + "# variable, time_period = column.split(\"__\")\n", + "# if variable not in self.tax_benefit_system.variables:\n", + "# continue\n", + "# self.set_input(variable, time_period, df[column]) # <-- BUG!\n", + "\n", + "# Let's simulate this for a household variable\n", + "test_column = example_var\n", + "variable_name, time_period = test_column.split(\"__\")\n", + "\n", + "print(f\"Attempting to set '{variable_name}' for period {time_period}\")\n", + "print(f\" Variable entity: {test_tbs.get_variable(variable_name).entity.key}\")\n", + "print(f\" Data length: {len(df_wales[test_column])}\")\n", + "print(f\" Household count: {test_sim.household.count}\")\n", + "print()\n", + "\n", + "try:\n", + " test_sim.set_input(variable_name, time_period, df_wales[test_column].values)\n", + " print(\"SUCCESS - No error (unexpected!)\")\n", + "except ValueError as e:\n", + " print(f\"ERROR (expected): {e}\")\n", + " print()\n", + " print(\"=\"*60)\n", + " print(\"BUG PROVEN!\")\n", + " print(\"=\"*60)\n", + " print()\n", + " print(\"The build_from_dataframe method calls set_input() with\")\n", + " print(\"person-level data (8470 values) for a household-level\")\n", + " print(f\"variable, but there are only {test_sim.household.count} households.\")" + ] + }, + { + "cell_type": "markdown", + "id": "cell-16", + "metadata": {}, + "source": [ + "## Step 6: Show What the Fix Should Look Like\n", + "\n", + "The fix needs to check if aggregation is required before calling `set_input()`." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "cell-17", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== The Fix: Aggregate Before set_input ===\n", + "\n", + "Variable: corporate_wealth\n", + "Entity: household\n", + "Data length: 8470\n", + "Population count: 4108\n", + "\n", + "Aggregation needed: 8470 != 4108\n", + "\n", + "After aggregation: 4108 values\n", + "\n", + "SUCCESS! set_input worked with aggregated data.\n" + ] + } + ], + "source": [ + "# Demonstrate the correct approach: aggregate before set_input\n", + "print(\"=== The Fix: Aggregate Before set_input ===\")\n", + "print()\n", + "\n", + "variable_name, time_period = example_var.split(\"__\")\n", + "var_meta = test_tbs.get_variable(variable_name)\n", + "entity = var_meta.entity\n", + "population = test_sim.get_population(entity.plural)\n", + "\n", + "data = df_wales[example_var].values\n", + "\n", + "print(f\"Variable: {variable_name}\")\n", + "print(f\"Entity: {entity.key}\")\n", + "print(f\"Data length: {len(data)}\")\n", + "print(f\"Population count: {population.count}\")\n", + "print()\n", + "\n", + "# Check if aggregation is needed\n", + "if len(data) != population.count:\n", + " print(f\"Aggregation needed: {len(data)} != {population.count}\")\n", + " print()\n", + " \n", + " # Use value_from_first_person to aggregate\n", + " aggregated_data = population.value_from_first_person(data)\n", + " print(f\"After aggregation: {len(aggregated_data)} values\")\n", + " print()\n", + " \n", + " # Now set_input should work\n", + " try:\n", + " test_sim.set_input(variable_name, time_period, aggregated_data)\n", + " print(f\"SUCCESS! set_input worked with aggregated data.\")\n", + " except Exception as e:\n", + " print(f\"Still failed: {e}\")\n", + "else:\n", + " print(\"No aggregation needed\")" + ] + }, + { + "cell_type": "markdown", + "id": "cell-18", + "metadata": {}, + "source": [ + "## Step 7: Show the Required Code Fix\n", + "\n", + "Here's what the fixed `build_from_dataframe` method should look like." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "cell-19", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== Required Fix for build_from_dataframe ===\n", + "\n", + "CURRENT CODE (buggy):\n", + "```python\n", + "# Set input values for each variable and time period\n", + "for column in df:\n", + " variable, time_period = column.split(\"__\")\n", + " if variable not in self.tax_benefit_system.variables:\n", + " continue\n", + " self.set_input(variable, time_period, df[column])\n", + "```\n", + "\n", + "FIXED CODE:\n", + "```python\n", + "# Set input values for each variable and time period\n", + "for column in df:\n", + " variable, time_period = column.split(\"__\")\n", + " if variable not in self.tax_benefit_system.variables:\n", + " continue\n", + " \n", + " # Get variable metadata and target population\n", + " var_meta = self.tax_benefit_system.get_variable(variable)\n", + " entity = var_meta.entity\n", + " population = self.get_population(entity.plural)\n", + " \n", + " data = df[column].values\n", + " \n", + " # Check if aggregation is needed (data is person-level but variable is group-level)\n", + " if len(data) != population.count:\n", + " # Aggregate from person-level to entity-level\n", + " data = population.value_from_first_person(data)\n", + " \n", + " self.set_input(variable, time_period, data)\n", + "```\n", + "\n" + ] + } + ], + "source": [ + "print(\"=== Required Fix for build_from_dataframe ===\")\n", + "print()\n", + "print(\"\"\"CURRENT CODE (buggy):\n", + "```python\n", + "# Set input values for each variable and time period\n", + "for column in df:\n", + " variable, time_period = column.split(\"__\")\n", + " if variable not in self.tax_benefit_system.variables:\n", + " continue\n", + " self.set_input(variable, time_period, df[column])\n", + "```\n", + "\n", + "FIXED CODE:\n", + "```python\n", + "# Set input values for each variable and time period\n", + "for column in df:\n", + " variable, time_period = column.split(\"__\")\n", + " if variable not in self.tax_benefit_system.variables:\n", + " continue\n", + " \n", + " # Get variable metadata and target population\n", + " var_meta = self.tax_benefit_system.get_variable(variable)\n", + " entity = var_meta.entity\n", + " population = self.get_population(entity.plural)\n", + " \n", + " data = df[column].values\n", + " \n", + " # Check if aggregation is needed (data is person-level but variable is group-level)\n", + " if len(data) != population.count:\n", + " # Aggregate from person-level to entity-level\n", + " data = population.value_from_first_person(data)\n", + " \n", + " self.set_input(variable, time_period, data)\n", + "```\n", + "\"\"\")" + ] + }, + { + "cell_type": "markdown", + "id": "cell-20", + "metadata": {}, + "source": [ + "## Summary" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "cell-21", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "======================================================================\n", + "SUMMARY: BUG PROVEN\n", + "======================================================================\n", + "\n", + "LOCATION:\n", + " File: policyengine_uk/simulation.py\n", + " Method: build_from_dataframe()\n", + " Lines: 281-286\n", + "\n", + "ROOT CAUSE:\n", + " The method iterates through DataFrame columns and calls set_input()\n", + " without checking if the data length matches the target entity count.\n", + " \n", + " - to_input_dataframe() exports ALL variables at PERSON level\n", + " - build_from_ids() correctly creates entity structure (e.g., 4108 households)\n", + " - BUT the loop then tries to set 8470 person-level values for \n", + " household-level variables that only have 4108 entities\n", + "\n", + "THE FIX:\n", + " Before calling set_input(), check if len(data) != population.count.\n", + " If so, aggregate using population.value_from_first_person(data).\n", + "\n", + "NOTE:\n", + " This is the same aggregation logic that policyengine_core's\n", + " build_from_dataset() method uses (simulation.py lines 406-414).\n", + " The policyengine_uk version simply forgot to include it.\n", + "\n" + ] + } + ], + "source": [ + "print(\"=\"*70)\n", + "print(\"SUMMARY: BUG PROVEN\")\n", + "print(\"=\"*70)\n", + "print(\"\"\"\n", + "LOCATION:\n", + " File: policyengine_uk/simulation.py\n", + " Method: build_from_dataframe()\n", + " Lines: 281-286\n", + "\n", + "ROOT CAUSE:\n", + " The method iterates through DataFrame columns and calls set_input()\n", + " without checking if the data length matches the target entity count.\n", + " \n", + " - to_input_dataframe() exports ALL variables at PERSON level\n", + " - build_from_ids() correctly creates entity structure (e.g., 4108 households)\n", + " - BUT the loop then tries to set 8470 person-level values for \n", + " household-level variables that only have 4108 entities\n", + "\n", + "THE FIX:\n", + " Before calling set_input(), check if len(data) != population.count.\n", + " If so, aggregate using population.value_from_first_person(data).\n", + "\n", + "NOTE:\n", + " This is the same aggregation logic that policyengine_core's\n", + " build_from_dataset() method uses (simulation.py lines 406-414).\n", + " The policyengine_uk version simply forgot to include it.\n", + "\"\"\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py-3.13", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/scripts/test_local_authority_api.py b/scripts/test_local_authority_api.py new file mode 100755 index 000000000..81eeb8575 --- /dev/null +++ b/scripts/test_local_authority_api.py @@ -0,0 +1,570 @@ +#!/usr/bin/env python3 +""" +Test script for UK Local Authority API functionality. + +This script tests the economy-wide simulation API for: +1. A specific UK local authority (e.g., Leicester) +2. UK-wide calculation (to confirm local_authority_impact is returned) +3. Scotland country filter (to confirm authorities are filtered by country) + +SETUP INSTRUCTIONS: +=================== + +You need THREE terminal windows: + +Terminal 1 - Start Redis: + redis-server + +Terminal 2 - Start the API worker (handles economy calculations): + FLASK_DEBUG=1 python policyengine_api/worker.py + +Terminal 3 - Start the API server: + make debug + +Then run this script in a 4th terminal: + python scripts/test_local_authority_api.py + +NOTE: UK calculations require access to the policyengine-uk-data-private +HuggingFace repo. Make sure HUGGING_FACE_TOKEN is set in your environment. +""" + +import requests +import json +import time +import sqlite3 +from pathlib import Path + +# Configuration +API_BASE_URL = "http://127.0.0.1:5000" +COUNTRY_ID = "uk" +BASELINE_POLICY_ID = 1 # UK current law +TIME_PERIOD = 2025 +DATASET = "default" + +# Raise the UK income tax base rate by 6 percentage points (20% -> 26%) +SAMPLE_REFORM = { + "gov.hmrc.income_tax.rates.uk[0].rate": {"2025-01-01.2100-12-31": 0.26} +} + + +def print_header(title: str): + """Print a formatted header.""" + print("\n" + "=" * 70) + print(f" {title}") + print("=" * 70) + + +def print_step(step_num: int, description: str): + """Print a step description.""" + print(f"\n[Step {step_num}] {description}") + print("-" * 50) + + +def wait_for_confirmation(message: str = "Press Enter to continue..."): + """Wait for user confirmation before proceeding.""" + input(f"\n>>> {message}") + + +def check_api_health(): + """Check if the API is running and healthy.""" + print_step(0, "Checking API Health") + + try: + response = requests.get(f"{API_BASE_URL}/liveness-check", timeout=5) + if response.status_code == 200: + print(f" [OK] API is running at {API_BASE_URL}") + return True + else: + print(f" [ERROR] API returned status {response.status_code}") + return False + except requests.exceptions.ConnectionError: + print(f" [ERROR] Cannot connect to API at {API_BASE_URL}") + print(" Make sure the API server is running. You need 3 terminals:") + print("") + print(" Terminal 1 - Start Redis:") + print(" redis-server") + print("") + print(" Terminal 2 - Start the API worker:") + print(" FLASK_DEBUG=1 python policyengine_api/worker.py") + print("") + print(" Terminal 3 - Start the API server:") + print(" make debug") + return False + + +def create_reform_policy(): + """Create a reform policy and return its ID.""" + print_step(1, "Creating Reform Policy") + + print(f" Reform to be created:") + print(f" {json.dumps(SAMPLE_REFORM, indent=4)}") + + wait_for_confirmation("Press Enter to create the reform policy...") + + payload = { + "label": "Test LA Reform - UC Standard Allowance Increase", + "data": SAMPLE_REFORM, + } + + response = requests.post( + f"{API_BASE_URL}/{COUNTRY_ID}/policy", + json=payload, + headers={"Content-Type": "application/json"}, + ) + + print(f" Response status: {response.status_code}") + result = response.json() + print(f" Response body: {json.dumps(result, indent=4)}") + + if response.status_code in [200, 201]: + policy_id = result["result"]["policy_id"] + print(f" [OK] Reform policy created/found with ID: {policy_id}") + return policy_id + else: + print(f" [ERROR] Failed to create policy") + return None + + +def verify_baseline_policy_exists(): + """Verify the baseline (current law) policy exists.""" + print_step(2, "Verifying Baseline Policy Exists") + + print(f" Checking policy ID: {BASELINE_POLICY_ID}") + + response = requests.get( + f"{API_BASE_URL}/{COUNTRY_ID}/policy/{BASELINE_POLICY_ID}" + ) + + print(f" Response status: {response.status_code}") + + if response.status_code == 200: + result = response.json() + policy_data = result.get("result", {}) + print(f" Policy label: {policy_data.get('label', 'N/A')}") + print(f" [OK] Baseline policy exists") + return True + else: + print(f" [ERROR] Baseline policy not found") + print( + " You may need to initialize the database with the current law policy" + ) + return False + + +def poll_economy_endpoint( + region: str, reform_policy_id: int, description: str +): + """ + Poll the economy endpoint until the calculation is complete. + + Returns the result data or None if failed. + """ + print(f"\n Polling for: {description}") + print(f" Region: {region}") + print(f" Reform Policy ID: {reform_policy_id}") + print(f" Baseline Policy ID: {BASELINE_POLICY_ID}") + print(f" Time Period: {TIME_PERIOD}") + + url = f"{API_BASE_URL}/{COUNTRY_ID}/economy/{reform_policy_id}/over/{BASELINE_POLICY_ID}" + params = { + "region": region, + "dataset": DATASET, + "time_period": TIME_PERIOD, + "target": "general", + } + + print(f"\n Full URL: {url}") + print(f" Query params: {params}") + + wait_for_confirmation("Press Enter to start polling the API...") + + max_attempts = 60 # 5 minutes with 5-second intervals + attempt = 0 + + while attempt < max_attempts: + attempt += 1 + print(f"\n Attempt {attempt}/{max_attempts}...") + + try: + response = requests.get(url, params=params, timeout=30) + result = response.json() + + status = result.get("status") + print(f" Status: {status}") + + if status == "ok": + print(f" [OK] Calculation complete!") + return result.get("result") + elif status == "computing": + print(f" Calculation in progress... waiting 5 seconds") + time.sleep(5) + elif status == "error": + print(f" [ERROR] Calculation failed") + print(f" Message: {result.get('message')}") + return None + else: + print(f" Unknown status: {status}") + time.sleep(5) + + except requests.exceptions.Timeout: + print(f" Request timed out, retrying...") + time.sleep(5) + except Exception as e: + print(f" Error: {e}") + time.sleep(5) + + print(f" [ERROR] Timed out waiting for calculation") + return None + + +def display_results(result: dict, description: str): + """Display key results from the economy calculation.""" + print(f"\n Results for: {description}") + print(" " + "-" * 40) + + if result is None: + print(" No results available") + return + + # Budgetary impact + budget = result.get("budget") + if budget: + print(f"\n BUDGETARY IMPACT:") + for key, value in budget.items(): + if isinstance(value, (int, float)): + print(f" {key}: {value:,.2f}") + else: + print(f" {key}: {value}") + + # Decile impact summary + decile = result.get("decile") + if decile: + print(f"\n DECILE IMPACT (sample):") + relative = decile.get("relative", {}) + if relative: + for d in ["1", "5", "10"]: + if d in relative: + print(f" Decile {d}: {relative[d]*100:.2f}%") + + # Poverty impact + poverty = result.get("poverty") + if poverty: + print(f"\n POVERTY IMPACT:") + deep_poverty = poverty.get("deep_poverty", {}) + regular_poverty = poverty.get("poverty", {}) + if deep_poverty: + print( + f" Deep poverty change: {deep_poverty.get('change', 'N/A')}" + ) + if regular_poverty: + print( + f" Poverty change: {regular_poverty.get('change', 'N/A')}" + ) + + # Local Authority Impact (if present) + la_impact = result.get("local_authority_impact") + if la_impact: + print(f"\n LOCAL AUTHORITY IMPACT:") + by_la = la_impact.get("by_local_authority", {}) + print(f" Number of local authorities: {len(by_la)}") + + # Show first 5 local authorities + print(f" Sample local authorities:") + for i, (name, data) in enumerate(list(by_la.items())[:5]): + avg_change = data.get("average_household_income_change", 0) + rel_change = data.get("relative_household_income_change", 0) + print( + f" {name}: avg={avg_change:.2f}, rel={rel_change*100:.3f}%" + ) + + # Outcomes by region + outcomes = la_impact.get("outcomes_by_region", {}) + if outcomes: + print(f"\n Outcomes by UK region:") + for region, buckets in outcomes.items(): + total = sum(buckets.values()) + print(f" {region}: {total} LAs") + for bucket, count in buckets.items(): + if count > 0: + print(f" - {bucket}: {count}") + else: + print(f"\n LOCAL AUTHORITY IMPACT: Not present in response") + + # Constituency Impact (if present) + const_impact = result.get("constituency_impact") + if const_impact: + by_const = const_impact.get("by_constituency", {}) + print(f"\n CONSTITUENCY IMPACT:") + print(f" Number of constituencies: {len(by_const)}") + + +def test_local_authority_simulation(reform_policy_id: int): + """Test 1: Run simulation for a specific local authority.""" + print_header("TEST 1: Local Authority Simulation (Leicester)") + + print( + """ + This test runs an economy simulation for a specific UK local authority. + We're using Leicester as it's a well-known unitary authority. + + Expected: The API should accept the local_authority/Leicester region + and return economic impact results. + """ + ) + + wait_for_confirmation( + "Press Enter to run the local authority simulation..." + ) + + region = "local_authority/Leicester" + result = poll_economy_endpoint( + region, reform_policy_id, "Leicester Local Authority" + ) + + if result: + display_results(result, "Leicester Local Authority") + print( + "\n [TEST 1 PASSED] Local authority simulation completed successfully" + ) + return True + else: + print("\n [TEST 1 FAILED] Local authority simulation failed") + return False + + +def test_uk_wide_simulation(reform_policy_id: int): + """Test 2: Run UK-wide simulation and check for local_authority_impact.""" + print_header("TEST 2: UK-Wide Simulation (Check local_authority_impact)") + + print( + """ + This test runs an economy simulation for the entire UK. + + Expected: The API should return results that include: + - Standard budgetary/poverty/decile impacts + - constituency_impact (existing feature) + - local_authority_impact (NEW feature we just added) + + We'll verify that local_authority_impact is present and contains + data for all 360 UK local authorities. + """ + ) + + wait_for_confirmation("Press Enter to run the UK-wide simulation...") + + region = "uk" + result = poll_economy_endpoint(region, reform_policy_id, "UK-wide") + + if result: + display_results(result, "UK-wide") + + # Verify local_authority_impact is present + la_impact = result.get("local_authority_impact") + if la_impact: + by_la = la_impact.get("by_local_authority", {}) + if len(by_la) == 360: + print( + f"\n [OK] local_authority_impact contains all 360 local authorities" + ) + else: + print( + f"\n [WARNING] Expected 360 local authorities, got {len(by_la)}" + ) + + # Check outcomes_by_region has all UK nations + outcomes = la_impact.get("outcomes_by_region", {}) + expected_regions = [ + "uk", + "england", + "scotland", + "wales", + "northern_ireland", + ] + for r in expected_regions: + if r in outcomes: + print(f" [OK] {r} region present in outcomes") + else: + print(f" [MISSING] {r} region not in outcomes") + + print( + "\n [TEST 2 PASSED] UK-wide simulation includes local_authority_impact" + ) + return True + else: + print( + "\n [TEST 2 FAILED] local_authority_impact not present in response" + ) + return False + else: + print("\n [TEST 2 FAILED] UK-wide simulation failed") + return False + + +def test_wales_simulation(reform_policy_id: int): + """Test 3: Run Wales simulation and check local authorities are filtered.""" + print_header("TEST 3: Wales Simulation (Filter Check)") + + print( + """ + This test runs an economy simulation for Wales only. + + Expected: The API should return results where: + - The simulation is filtered to Wales + - If local_authority_impact is present, it should only contain + Welsh local authorities (codes starting with 'W') + - Wales has exactly 22 principal areas + + Note: The local_authority_impact breakdown may only be calculated + for UK-wide simulations. This test will verify the behavior. + """ + ) + + wait_for_confirmation("Press Enter to run the Wales simulation...") + + region = "country/wales" + result = poll_economy_endpoint(region, reform_policy_id, "Wales") + + if result: + display_results(result, "Wales") + + la_impact = result.get("local_authority_impact") + if la_impact: + by_la = la_impact.get("by_local_authority", {}) + print(f"\n Local authorities in response: {len(by_la)}") + + # If filtering is implemented, we'd expect 22 Welsh LAs + if len(by_la) == 22: + print( + f" [OK] Correctly filtered to 22 Welsh local authorities" + ) + elif len(by_la) == 360: + print( + f" [INFO] All 360 LAs returned (filtering not applied at LA level)" + ) + else: + print(f" [INFO] Got {len(by_la)} local authorities") + + print("\n [TEST 3 PASSED] Wales simulation completed") + return True + else: + print( + f"\n [INFO] local_authority_impact not present for country-level simulation" + ) + print( + " This may be expected behavior - LA breakdown may only be for UK-wide" + ) + print( + "\n [TEST 3 PASSED] Wales simulation completed (no LA breakdown)" + ) + return True + else: + print("\n [TEST 3 FAILED] Wales simulation failed") + return False + + +def main(): + """Main test runner.""" + print_header("UK Local Authority API Test Script") + + print( + """ + This script tests the UK Local Authority feature in the PolicyEngine API. + + It will: + 1. Check API health + 2. Create a test reform policy + 3. Verify baseline policy exists + 4. Run TEST 1: Local Authority simulation (Leicester) + 5. Run TEST 2: UK-wide simulation (check local_authority_impact) + 6. Run TEST 3: Wales simulation (filter check) + + Prerequisites (you need 3 other terminals running): + - Terminal 1: redis-server + - Terminal 2: FLASK_DEBUG=1 python policyengine_api/worker.py + - Terminal 3: make debug + - HUGGING_FACE_TOKEN environment variable set (for UK data access) + + You will be prompted before each major step. + """ + ) + + wait_for_confirmation("Press Enter to begin testing...") + + # Step 0: Check API health + if not check_api_health(): + print("\n[ABORT] API is not available. Please start the server first.") + return + + wait_for_confirmation("API is healthy. Press Enter to continue...") + + # Step 1: Create reform policy + reform_policy_id = create_reform_policy() + if reform_policy_id is None: + print("\n[ABORT] Failed to create reform policy.") + return + + # Step 2: Verify baseline policy + if not verify_baseline_policy_exists(): + print("\n[WARNING] Baseline policy not found. Tests may fail.") + wait_for_confirmation("Press Enter to continue anyway...") + + print_header("Setup Complete - Ready to Run Tests") + print( + f""" + Configuration: + - API Base URL: {API_BASE_URL} + - Country: {COUNTRY_ID} + - Reform Policy ID: {reform_policy_id} + - Baseline Policy ID: {BASELINE_POLICY_ID} + - Time Period: {TIME_PERIOD} + - Dataset: {DATASET} + """ + ) + + wait_for_confirmation("Press Enter to start running tests...") + + # Run tests + results = [] + + # Test 1: Local Authority simulation + results.append( + ( + "Local Authority (Leicester)", + test_local_authority_simulation(reform_policy_id), + ) + ) + wait_for_confirmation( + "Test 1 complete. Press Enter to continue to Test 2..." + ) + + # Test 2: UK-wide simulation + results.append( + ("UK-Wide with LA Impact", test_uk_wide_simulation(reform_policy_id)) + ) + wait_for_confirmation( + "Test 2 complete. Press Enter to continue to Test 3..." + ) + + # Test 3: Wales simulation + results.append(("Wales Filter", test_wales_simulation(reform_policy_id))) + + # Summary + print_header("Test Summary") + print("\n Results:") + for test_name, passed in results: + status = "[PASSED]" if passed else "[FAILED]" + print(f" {status} {test_name}") + + all_passed = all(r[1] for r in results) + if all_passed: + print("\n All tests passed!") + else: + print("\n Some tests failed. Review output above for details.") + + print("\n" + "=" * 70) + print(" Testing complete.") + print("=" * 70 + "\n") + + +if __name__ == "__main__": + main() diff --git a/scripts/verify_country_filtering_bug.ipynb b/scripts/verify_country_filtering_bug.ipynb new file mode 100644 index 000000000..73c71e701 --- /dev/null +++ b/scripts/verify_country_filtering_bug.ipynb @@ -0,0 +1,1147 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "cell-0", + "metadata": {}, + "source": [ + "# Verifying UK Country Filtering Bug in policyengine.py\n", + "\n", + "This notebook verifies the bug that occurs when filtering simulations by UK country (e.g., Wales).\n", + "\n", + "## The Bug\n", + "When running a simulation filtered to a specific UK country (e.g., `region=\"country/wales\"`), we get:\n", + "```\n", + "ValueError: Unable to set value \"[ True True True ... False False False]\" for variable \n", + "\"would_evade_tv_licence_fee\", as its length is 8470 while there are 4108 households in the simulation.\n", + "```\n", + "\n", + "## Root Cause Hypothesis\n", + "The country filtering code in `policyengine/simulation.py` uses DataFrame subsetting:\n", + "1. Exports simulation to DataFrame via `to_input_dataframe()`\n", + "2. Filters DataFrame rows by country\n", + "3. Creates new simulation from filtered DataFrame\n", + "\n", + "The issue is that entity linkage variables (like `household_id`) may not be properly \n", + "exported/imported, causing entity count mismatches during variable calculations." + ] + }, + { + "cell_type": "markdown", + "id": "cell-1", + "metadata": {}, + "source": [ + "## Step 1: Setup and Version Check" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "cell-2", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import traceback\n", + "\n", + "# Import policyengine (the high-level package)\n", + "import policyengine\n", + "from policyengine import Simulation\n", + "\n", + "# Also import the underlying UK simulation for manual testing\n", + "from policyengine_uk import Microsimulation as UKMicrosimulation" + ] + }, + { + "cell_type": "markdown", + "id": "cell-3", + "metadata": {}, + "source": [ + "## Step 2: Create UK-Wide Baseline Simulation\n", + "\n", + "First, create a standard UK-wide simulation to understand the data structure." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "cell-4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Creating UK-wide simulation...\n", + "(This may take a minute to download data)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "No data provided, using default dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n", + "Using dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n", + "Downloading enhanced_frs_2023_24.h5 from bucket policyengine-uk-data-private\n", + "WARNING:root:No version specified for policyengine-uk-data-private, enhanced_frs_2023_24.h5. Using latest version: 1.29.4\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "=== UK-Wide Simulation Structure ===\n", + "Person count: 115612\n", + "Household count: 53508\n", + "BenUnit count: 61858\n" + ] + } + ], + "source": [ + "# Create UK-wide simulation using policyengine.Simulation\n", + "print(\"Creating UK-wide simulation...\")\n", + "print(\"(This may take a minute to download data)\")\n", + "\n", + "sim_uk = Simulation(country=\"uk\", scope=\"macro\")\n", + "\n", + "# Get the underlying microsimulation\n", + "underlying_sim = sim_uk.baseline_simulation\n", + "\n", + "print(f\"\\n=== UK-Wide Simulation Structure ===\")\n", + "print(f\"Person count: {underlying_sim.persons.count}\")\n", + "print(f\"Household count: {underlying_sim.household.count}\")\n", + "print(f\"BenUnit count: {underlying_sim.benunit.count}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "cell-5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Creating UK-wide simulation...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "No data provided, using default dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n", + "Using dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n", + "Downloading enhanced_frs_2023_24.h5 from bucket policyengine-uk-data-private\n", + "WARNING:root:No version specified for policyengine-uk-data-private, enhanced_frs_2023_24.h5. Using latest version: 1.29.4\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "=== UK-Wide Simulation Structure ===\n", + "Person count: 115612\n", + "Household count: 53508\n", + "BenUnit count: 61858\n" + ] + } + ], + "source": [ + "# Create a UK-wide simulation (no region filter)\n", + "print(\"Creating UK-wide simulation...\")\n", + "sim_uk = Simulation(country=\"uk\", scope=\"macro\")\n", + "\n", + "# Access the underlying country simulation\n", + "underlying_sim = sim_uk.baseline_simulation\n", + "\n", + "print(f\"\\n=== UK-Wide Simulation Structure ===\")\n", + "print(f\"Person count: {underlying_sim.persons.count}\")\n", + "print(f\"Household count: {underlying_sim.household.count}\")\n", + "print(f\"BenUnit count: {underlying_sim.benunit.count}\")" + ] + }, + { + "cell_type": "markdown", + "id": "cell-6", + "metadata": {}, + "source": [ + "## Step 3: Examine to_input_dataframe() Export\n", + "\n", + "This is what `_apply_region_to_simulation` uses to get the data before filtering." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cell-7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Exporting simulation to DataFrame...\n", + "\n", + "=== Exported DataFrame ===\n", + "Shape: (115612, 1127)\n", + "Number of rows (should be person count): 115612\n", + "Number of columns: 1127\n" + ] + } + ], + "source": [ + "# Export the simulation to DataFrame\n", + "print(\"Exporting simulation to DataFrame...\")\n", + "df = underlying_sim.to_input_dataframe()\n", + "\n", + "print(f\"\\n=== Exported DataFrame ===\")\n", + "print(f\"Shape: {df.shape}\")\n", + "print(f\"Number of rows (should be person count): {len(df)}\")\n", + "print(f\"Number of columns: {len(df.columns)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "cell-8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "=== Critical Entity Columns ===\n", + " person_id: FOUND (8 columns)\n", + " - person_id__2023\n", + " - person_id__2024\n", + " - person_id__2025\n", + " ... and 5 more\n", + " household_id: FOUND (8 columns)\n", + " - household_id__2023\n", + " - household_id__2024\n", + " - household_id__2025\n", + " ... and 5 more\n", + " person_household_id: FOUND (8 columns)\n", + " - person_household_id__2023\n", + " - person_household_id__2024\n", + " - person_household_id__2025\n", + " ... and 5 more\n", + " benunit_id: FOUND (8 columns)\n", + " - benunit_id__2023\n", + " - benunit_id__2024\n", + " - benunit_id__2025\n", + " ... and 5 more\n", + " person_benunit_id: FOUND (8 columns)\n", + " - person_benunit_id__2023\n", + " - person_benunit_id__2024\n", + " - person_benunit_id__2025\n", + " ... and 5 more\n" + ] + } + ], + "source": [ + "# Check for critical entity linkage columns\n", + "print(\"\\n=== Critical Entity Columns ===\")\n", + "\n", + "critical_patterns = [\n", + " 'person_id',\n", + " 'household_id', \n", + " 'person_household_id',\n", + " 'benunit_id',\n", + " 'person_benunit_id'\n", + "]\n", + "\n", + "for pattern in critical_patterns:\n", + " matching = [c for c in df.columns if c.startswith(pattern)]\n", + " if matching:\n", + " print(f\" {pattern}: FOUND ({len(matching)} columns)\")\n", + " for col in matching[:3]: # Show first 3\n", + " print(f\" - {col}\")\n", + " if len(matching) > 3:\n", + " print(f\" ... and {len(matching) - 3} more\")\n", + " else:\n", + " print(f\" {pattern}: MISSING!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "cell-9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "=== household_id Export Analysis ===\n", + "Column: household_id__2023\n", + " Length: 115612\n", + " Unique values: 53508\n", + " Min: 1, Max: 67019\n", + " Sample values: [2 1 2 2 2 2 3 6 6 3]\n" + ] + } + ], + "source": [ + "# Check if household_id is exported and examine its values\n", + "hh_id_cols = [c for c in df.columns if c.startswith('household_id__')]\n", + "\n", + "print(\"\\n=== household_id Export Analysis ===\")\n", + "if hh_id_cols:\n", + " col = hh_id_cols[0]\n", + " print(f\"Column: {col}\")\n", + " print(f\" Length: {len(df[col])}\")\n", + " print(f\" Unique values: {df[col].nunique()}\")\n", + " print(f\" Min: {df[col].min()}, Max: {df[col].max()}\")\n", + " print(f\" Sample values: {df[col].values[:10]}\")\n", + "else:\n", + " print(\"household_id NOT exported!\")\n", + " print(\"This could be the root cause of the bug.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "cell-10", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "=== person_household_id Export Analysis ===\n", + "Column: person_household_id__2023\n", + " Length: 115612\n", + " Unique values (should match household count): 53508\n", + " Expected household count: 53508\n", + " [OK] Unique count matches household count\n" + ] + } + ], + "source": [ + "# Check person_household_id linkage\n", + "phh_id_cols = [c for c in df.columns if c.startswith('person_household_id__')]\n", + "\n", + "print(\"\\n=== person_household_id Export Analysis ===\")\n", + "if phh_id_cols:\n", + " col = phh_id_cols[0]\n", + " print(f\"Column: {col}\")\n", + " print(f\" Length: {len(df[col])}\")\n", + " print(f\" Unique values (should match household count): {df[col].nunique()}\")\n", + " print(f\" Expected household count: {underlying_sim.household.count}\")\n", + " \n", + " if df[col].nunique() == underlying_sim.household.count:\n", + " print(\" [OK] Unique count matches household count\")\n", + " else:\n", + " print(\" [WARNING] Mismatch!\")\n", + "else:\n", + " print(\"person_household_id NOT exported! This is critical.\")" + ] + }, + { + "cell_type": "markdown", + "id": "cell-11", + "metadata": {}, + "source": [ + "## Step 4: Manually Reproduce the Wales Filtering\n", + "\n", + "Let's manually do what `_apply_region_to_simulation` does to identify where it breaks." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "cell-12", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== Step 4a: Calculate country at person level ===\n", + "Country array shape: (115612,)\n", + "\n", + "Welsh persons: 8,470\n", + "Non-Welsh persons: 107,142\n" + ] + } + ], + "source": [ + "# Step 4a: Get country at person level (same as policyengine.py:296-298)\n", + "print(\"=== Step 4a: Calculate country at person level ===\")\n", + "country_person = underlying_sim.calculate(\"country\", map_to=\"person\").values\n", + "print(f\"Country array shape: {country_person.shape}\")\n", + "\n", + "# Create Wales mask\n", + "wales_mask = country_person == \"WALES\"\n", + "print(f\"\\nWelsh persons: {wales_mask.sum():,}\")\n", + "print(f\"Non-Welsh persons: {(~wales_mask).sum():,}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "cell-13", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "=== Step 4b: Filter DataFrame to Wales ===\n", + "Filtered DataFrame shape: (8470, 1127)\n", + "Number of Welsh persons: 8470\n" + ] + } + ], + "source": [ + "# Step 4b: Filter DataFrame to Wales (same as policyengine.py:299-300)\n", + "print(\"\\n=== Step 4b: Filter DataFrame to Wales ===\")\n", + "df_wales = df[wales_mask]\n", + "print(f\"Filtered DataFrame shape: {df_wales.shape}\")\n", + "print(f\"Number of Welsh persons: {len(df_wales)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "cell-14", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "=== Step 4c: Examine person_household_id in filtered data ===\n", + "Column: person_household_id__2023\n", + " Length: 8470\n", + " Unique households in Wales: 4108\n", + " Min household ID: 2.0\n", + " Max household ID: 66996.0\n", + " Sample values: [2. 2. 2. 2. 2. 6. 6. 6. 6. 7.]\n", + " [INFO] Household IDs are NOT contiguous (gaps from filtering)\n", + " This is expected - they're original UK-wide IDs\n" + ] + } + ], + "source": [ + "# Check what person_household_id looks like in filtered data\n", + "print(\"\\n=== Step 4c: Examine person_household_id in filtered data ===\")\n", + "if phh_id_cols:\n", + " col = phh_id_cols[0]\n", + " welsh_phh = df_wales[col].values\n", + " print(f\"Column: {col}\")\n", + " print(f\" Length: {len(welsh_phh)}\")\n", + " print(f\" Unique households in Wales: {len(np.unique(welsh_phh))}\")\n", + " print(f\" Min household ID: {welsh_phh.min()}\")\n", + " print(f\" Max household ID: {welsh_phh.max()}\")\n", + " print(f\" Sample values: {welsh_phh[:10]}\")\n", + " \n", + " # Check if IDs are contiguous\n", + " unique_hh = np.unique(welsh_phh)\n", + " if np.array_equal(unique_hh, np.arange(len(unique_hh))):\n", + " print(\" [INFO] Household IDs are contiguous 0-based\")\n", + " else:\n", + " print(\" [INFO] Household IDs are NOT contiguous (gaps from filtering)\")\n", + " print(f\" This is expected - they're original UK-wide IDs\")" + ] + }, + { + "cell_type": "markdown", + "id": "cell-15", + "metadata": {}, + "source": [ + "## Step 5: Try to Create Simulation from Filtered DataFrame\n", + "\n", + "This is where the error should occur." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "cell-16", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== Step 5a: Create simulation from filtered DataFrame ===\n", + "(This is what policyengine.py:299-300 does)\n", + "\n", + "[ERROR] Failed to create simulation: Unable to set value \"[ 39361. 39361. 39361. ... 134410.5 134410.5 6000. ]\" for variable \"corporate_wealth\", as its length is 8470 while there are 4108 households in the simulation.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\n", + " File \"/var/folders/qf/7fglpnr94wsc5xgbqkp80xbw0000gn/T/ipykernel_19066/2037714397.py\", line 7, in \n", + " new_sim = UKMicrosimulation(dataset=df_wales)\n", + " File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_uk/simulation.py\", line 100, in __init__\n", + " self.build_from_dataframe(dataset)\n", + " ~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^\n", + " File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_uk/simulation.py\", line 286, in build_from_dataframe\n", + " self.set_input(variable, time_period, df[column])\n", + " ~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_core/simulations/simulation.py\", line 1241, in set_input\n", + " self.get_holder(variable_name).set_input(\n", + " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^\n", + " period, value, self.branch_name\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " )\n", + " ^\n", + " File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_core/holders/holder.py\", line 244, in set_input\n", + " return self._set(period, array, branch_name)\n", + " ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_core/holders/holder.py\", line 286, in _set\n", + " value = self._to_array(value)\n", + " File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_core/holders/holder.py\", line 253, in _to_array\n", + " raise ValueError(\n", + " ...<7 lines>...\n", + " )\n", + "ValueError: Unable to set value \"[ 39361. 39361. 39361. ... 134410.5 134410.5 6000. ]\" for variable \"corporate_wealth\", as its length is 8470 while there are 4108 households in the simulation.\n" + ] + } + ], + "source": [ + "# Step 5a: Create new simulation from filtered DataFrame\n", + "print(\"=== Step 5a: Create simulation from filtered DataFrame ===\")\n", + "print(\"(This is what policyengine.py:299-300 does)\")\n", + "print()\n", + "\n", + "try:\n", + " new_sim = UKMicrosimulation(dataset=df_wales)\n", + " \n", + " print(f\"New simulation created successfully!\")\n", + " print(f\" Person count: {new_sim.persons.count}\")\n", + " print(f\" Household count: {new_sim.household.count}\")\n", + " print(f\" BenUnit count: {new_sim.benunit.count}\")\n", + " \n", + " # Critical check\n", + " if new_sim.household.count == new_sim.persons.count:\n", + " print(\"\\n [ERROR] Household count equals person count!\")\n", + " print(\" Entity linkage was lost during filtering.\")\n", + " \n", + "except Exception as e:\n", + " print(f\"[ERROR] Failed to create simulation: {e}\")\n", + " traceback.print_exc()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "cell-17", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "=== Step 5b: Check household_id holder ===\n", + "Error checking household_id: name 'new_sim' is not defined\n" + ] + } + ], + "source": [ + "# Step 5b: Check if household_id holder has data\n", + "print(\"\\n=== Step 5b: Check household_id holder ===\")\n", + "\n", + "try:\n", + " hh_id_holder = new_sim.get_holder(\"household_id\")\n", + " known_periods = list(hh_id_holder.get_known_periods())\n", + " print(f\"household_id known periods: {known_periods}\")\n", + " \n", + " if known_periods:\n", + " period = known_periods[0]\n", + " arr = hh_id_holder.get_array(period)\n", + " print(f\" Period {period}: array shape = {arr.shape if arr is not None else 'None'}\")\n", + " if arr is not None:\n", + " print(f\" Values sample: {arr[:10]}\")\n", + " else:\n", + " print(\" No known periods - household_id was not set as input!\")\n", + "except Exception as e:\n", + " print(f\"Error checking household_id: {e}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "cell-18", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "=== Step 5c: Calculate household_id ===\n", + "Error calculating household_id: name 'new_sim' is not defined\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\n", + " File \"/var/folders/qf/7fglpnr94wsc5xgbqkp80xbw0000gn/T/ipykernel_19066/1284064109.py\", line 5, in \n", + " hh_ids = new_sim.calculate(\"household_id\", 2025)\n", + " ^^^^^^^\n", + "NameError: name 'new_sim' is not defined\n" + ] + } + ], + "source": [ + "# Step 5c: Try to calculate household_id\n", + "print(\"\\n=== Step 5c: Calculate household_id ===\")\n", + "\n", + "try:\n", + " hh_ids = new_sim.calculate(\"household_id\", 2025)\n", + " print(f\"household_id calculation result:\")\n", + " print(f\" Length: {len(hh_ids)}\")\n", + " print(f\" Expected (household count): {new_sim.household.count}\")\n", + " \n", + " if len(hh_ids) == new_sim.household.count:\n", + " print(\" [OK] Length matches household count\")\n", + " else:\n", + " print(f\" [ERROR] Length mismatch! Got {len(hh_ids)}, expected {new_sim.household.count}\")\n", + " \n", + "except Exception as e:\n", + " print(f\"Error calculating household_id: {e}\")\n", + " traceback.print_exc()" + ] + }, + { + "cell_type": "markdown", + "id": "cell-19", + "metadata": {}, + "source": [ + "## Step 6: Try to Calculate would_evade_tv_licence_fee\n", + "\n", + "This is the variable that triggers the error in production." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "cell-20", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== Step 6: Calculate would_evade_tv_licence_fee ===\n", + "(This calculation uses random(household) internally)\n", + "\n", + "Unexpected error: NameError: name 'new_sim' is not defined\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\n", + " File \"/var/folders/qf/7fglpnr94wsc5xgbqkp80xbw0000gn/T/ipykernel_19066/1304269510.py\", line 7, in \n", + " result = new_sim.calculate(\"would_evade_tv_licence_fee\", 2025)\n", + " ^^^^^^^\n", + "NameError: name 'new_sim' is not defined\n" + ] + } + ], + "source": [ + "# Step 6: Calculate the problematic variable\n", + "print(\"=== Step 6: Calculate would_evade_tv_licence_fee ===\")\n", + "print(\"(This calculation uses random(household) internally)\")\n", + "print()\n", + "\n", + "try:\n", + " result = new_sim.calculate(\"would_evade_tv_licence_fee\", 2025)\n", + " print(f\"Calculation succeeded!\")\n", + " print(f\" Result length: {len(result)}\")\n", + " print(f\" Expected (household count): {new_sim.household.count}\")\n", + " print(f\" Result dtype: {result.dtype}\")\n", + " \n", + "except ValueError as e:\n", + " print(f\"[EXPECTED ERROR] ValueError:\")\n", + " print(f\" {e}\")\n", + " print()\n", + " print(\"This confirms the bug!\")\n", + " \n", + " # Parse the error message\n", + " error_str = str(e)\n", + " if \"length is\" in error_str and \"while there are\" in error_str:\n", + " print(\"\\nThe error indicates:\")\n", + " print(\" - The formula returned an array sized for persons\")\n", + " print(\" - But the variable is household-level\")\n", + " print(\" - This means random(household) returned wrong-sized array\")\n", + " \n", + "except Exception as e:\n", + " print(f\"Unexpected error: {type(e).__name__}: {e}\")\n", + " traceback.print_exc()" + ] + }, + { + "cell_type": "markdown", + "id": "cell-21", + "metadata": {}, + "source": [ + "## Step 7: Test Using policyengine.Simulation Directly\n", + "\n", + "Now let's test using the high-level API to confirm the bug occurs there too." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "cell-22", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== Step 7: Test with policyengine.Simulation ===\n", + "Creating Simulation with region='country/wales'...\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "No data provided, using default dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n", + "Using dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n", + "Downloading enhanced_frs_2023_24.h5 from bucket policyengine-uk-data-private\n", + "WARNING:root:No version specified for policyengine-uk-data-private, enhanced_frs_2023_24.h5. Using latest version: 1.29.4\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DataFrame columns: ['miscellaneous_income__2023', 'miscellaneous_income__2024', 'miscellaneous_income__2025', 'miscellaneous_income__2026', 'miscellaneous_income__2027', 'miscellaneous_income__2028', 'miscellaneous_income__2029', 'miscellaneous_income__2030', 'corporate_wealth__2023', 'corporate_wealth__2024', 'corporate_wealth__2025', 'corporate_wealth__2026', 'corporate_wealth__2027', 'corporate_wealth__2028', 'corporate_wealth__2029', 'corporate_wealth__2030', 'non_residential_property_value__2023', 'non_residential_property_value__2024', 'non_residential_property_value__2025', 'non_residential_property_value__2026', 'non_residential_property_value__2027', 'non_residential_property_value__2028', 'non_residential_property_value__2029', 'non_residential_property_value__2030', 'employment_income_before_lsr__2023', 'employment_income_before_lsr__2024', 'employment_income_before_lsr__2025', 'employment_income_before_lsr__2026', 'employment_income_before_lsr__2027', 'employment_income_before_lsr__2028', 'employment_income_before_lsr__2029', 'employment_income_before_lsr__2030', 'property_income__2023', 'property_income__2024', 'property_income__2025', 'property_income__2026', 'property_income__2027', 'property_income__2028', 'property_income__2029', 'property_income__2030', 'savings_interest_income__2023', 'savings_interest_income__2024', 'savings_interest_income__2025', 'savings_interest_income__2026', 'savings_interest_income__2027', 'savings_interest_income__2028', 'savings_interest_income__2029', 'savings_interest_income__2030', 'main_residence_value__2023', 'main_residence_value__2024', 'main_residence_value__2025', 'main_residence_value__2026', 'main_residence_value__2027', 'main_residence_value__2028', 'main_residence_value__2029', 'main_residence_value__2030', 'rent__2023', 'rent__2024', 'rent__2025', 'rent__2026', 'rent__2027', 'rent__2028', 'rent__2029', 'rent__2030', 'private_pension_income__2023', 'private_pension_income__2024', 'private_pension_income__2025', 'private_pension_income__2026', 'private_pension_income__2027', 'private_pension_income__2028', 'private_pension_income__2029', 'private_pension_income__2030', 'self_employment_income__2023', 'self_employment_income__2024', 'self_employment_income__2025', 'self_employment_income__2026', 'self_employment_income__2027', 'self_employment_income__2028', 'self_employment_income__2029', 'self_employment_income__2030', 'private_transfer_income__2023', 'private_transfer_income__2024', 'private_transfer_income__2025', 'private_transfer_income__2026', 'private_transfer_income__2027', 'private_transfer_income__2028', 'private_transfer_income__2029', 'private_transfer_income__2030', 'age__2023', 'age__2024', 'age__2025', 'age__2026', 'age__2027', 'age__2028', 'age__2029', 'age__2030', 'owned_land__2023', 'owned_land__2024', 'owned_land__2025', 'owned_land__2026', 'owned_land__2027', 'owned_land__2028', 'owned_land__2029', 'owned_land__2030', 'lump_sum_income__2023', 'lump_sum_income__2024', 'lump_sum_income__2025', 'lump_sum_income__2026', 'lump_sum_income__2027', 'lump_sum_income__2028', 'lump_sum_income__2029', 'lump_sum_income__2030', 'council_tax_band__2023', 'council_tax_band__2024', 'council_tax_band__2025', 'council_tax_band__2026', 'council_tax_band__2027', 'council_tax_band__2028', 'council_tax_band__2029', 'council_tax_band__2030', 'other_residential_property_value__2023', 'other_residential_property_value__2024', 'other_residential_property_value__2025', 'other_residential_property_value__2026', 'other_residential_property_value__2027', 'other_residential_property_value__2028', 'other_residential_property_value__2029', 'other_residential_property_value__2030', 'dividend_income__2023', 'dividend_income__2024', 'dividend_income__2025', 'dividend_income__2026', 'dividend_income__2027', 'dividend_income__2028', 'dividend_income__2029', 'dividend_income__2030', 'maintenance_income__2023', 'maintenance_income__2024', 'maintenance_income__2025', 'maintenance_income__2026', 'maintenance_income__2027', 'maintenance_income__2028', 'maintenance_income__2029', 'maintenance_income__2030', 'petrol_spending__2023', 'petrol_spending__2024', 'petrol_spending__2025', 'petrol_spending__2026', 'petrol_spending__2027', 'petrol_spending__2028', 'petrol_spending__2029', 'petrol_spending__2030', 'health_consumption__2023', 'health_consumption__2024', 'health_consumption__2025', 'health_consumption__2026', 'health_consumption__2027', 'health_consumption__2028', 'health_consumption__2029', 'health_consumption__2030', 'household_furnishings_consumption__2023', 'household_furnishings_consumption__2024', 'household_furnishings_consumption__2025', 'household_furnishings_consumption__2026', 'household_furnishings_consumption__2027', 'household_furnishings_consumption__2028', 'household_furnishings_consumption__2029', 'household_furnishings_consumption__2030', 'restaurants_and_hotels_consumption__2023', 'restaurants_and_hotels_consumption__2024', 'restaurants_and_hotels_consumption__2025', 'restaurants_and_hotels_consumption__2026', 'restaurants_and_hotels_consumption__2027', 'restaurants_and_hotels_consumption__2028', 'restaurants_and_hotels_consumption__2029', 'restaurants_and_hotels_consumption__2030', 'miscellaneous_consumption__2023', 'miscellaneous_consumption__2024', 'miscellaneous_consumption__2025', 'miscellaneous_consumption__2026', 'miscellaneous_consumption__2027', 'miscellaneous_consumption__2028', 'miscellaneous_consumption__2029', 'miscellaneous_consumption__2030', 'recreation_consumption__2023', 'recreation_consumption__2024', 'recreation_consumption__2025', 'recreation_consumption__2026', 'recreation_consumption__2027', 'recreation_consumption__2028', 'recreation_consumption__2029', 'recreation_consumption__2030', 'domestic_energy_consumption__2023', 'domestic_energy_consumption__2024', 'domestic_energy_consumption__2025', 'domestic_energy_consumption__2026', 'domestic_energy_consumption__2027', 'domestic_energy_consumption__2028', 'domestic_energy_consumption__2029', 'domestic_energy_consumption__2030', 'alcohol_and_tobacco_consumption__2023', 'alcohol_and_tobacco_consumption__2024', 'alcohol_and_tobacco_consumption__2025', 'alcohol_and_tobacco_consumption__2026', 'alcohol_and_tobacco_consumption__2027', 'alcohol_and_tobacco_consumption__2028', 'alcohol_and_tobacco_consumption__2029', 'alcohol_and_tobacco_consumption__2030', 'clothing_and_footwear_consumption__2023', 'clothing_and_footwear_consumption__2024', 'clothing_and_footwear_consumption__2025', 'clothing_and_footwear_consumption__2026', 'clothing_and_footwear_consumption__2027', 'clothing_and_footwear_consumption__2028', 'clothing_and_footwear_consumption__2029', 'clothing_and_footwear_consumption__2030', 'education_consumption__2023', 'education_consumption__2024', 'education_consumption__2025', 'education_consumption__2026', 'education_consumption__2027', 'education_consumption__2028', 'education_consumption__2029', 'education_consumption__2030', 'communication_consumption__2023', 'communication_consumption__2024', 'communication_consumption__2025', 'communication_consumption__2026', 'communication_consumption__2027', 'communication_consumption__2028', 'communication_consumption__2029', 'communication_consumption__2030', 'housing_water_and_electricity_consumption__2023', 'housing_water_and_electricity_consumption__2024', 'housing_water_and_electricity_consumption__2025', 'housing_water_and_electricity_consumption__2026', 'housing_water_and_electricity_consumption__2027', 'housing_water_and_electricity_consumption__2028', 'housing_water_and_electricity_consumption__2029', 'housing_water_and_electricity_consumption__2030', 'diesel_spending__2023', 'diesel_spending__2024', 'diesel_spending__2025', 'diesel_spending__2026', 'diesel_spending__2027', 'diesel_spending__2028', 'diesel_spending__2029', 'diesel_spending__2030', 'food_and_non_alcoholic_beverages_consumption__2023', 'food_and_non_alcoholic_beverages_consumption__2024', 'food_and_non_alcoholic_beverages_consumption__2025', 'food_and_non_alcoholic_beverages_consumption__2026', 'food_and_non_alcoholic_beverages_consumption__2027', 'food_and_non_alcoholic_beverages_consumption__2028', 'food_and_non_alcoholic_beverages_consumption__2029', 'food_and_non_alcoholic_beverages_consumption__2030', 'transport_consumption__2023', 'transport_consumption__2024', 'transport_consumption__2025', 'transport_consumption__2026', 'transport_consumption__2027', 'transport_consumption__2028', 'transport_consumption__2029', 'transport_consumption__2030', 'childcare_expenses__2023', 'childcare_expenses__2024', 'childcare_expenses__2025', 'childcare_expenses__2026', 'childcare_expenses__2027', 'childcare_expenses__2028', 'childcare_expenses__2029', 'childcare_expenses__2030', 'water_and_sewerage_charges__2023', 'water_and_sewerage_charges__2024', 'water_and_sewerage_charges__2025', 'water_and_sewerage_charges__2026', 'water_and_sewerage_charges__2027', 'water_and_sewerage_charges__2028', 'water_and_sewerage_charges__2029', 'water_and_sewerage_charges__2030', 'maintenance_expenses__2023', 'maintenance_expenses__2024', 'maintenance_expenses__2025', 'maintenance_expenses__2026', 'maintenance_expenses__2027', 'maintenance_expenses__2028', 'maintenance_expenses__2029', 'maintenance_expenses__2030', 'employee_pension_contributions_reported__2023', 'employee_pension_contributions_reported__2024', 'employee_pension_contributions_reported__2025', 'employee_pension_contributions_reported__2026', 'employee_pension_contributions_reported__2027', 'employee_pension_contributions_reported__2028', 'employee_pension_contributions_reported__2029', 'employee_pension_contributions_reported__2030', 'mortgage_capital_repayment__2023', 'mortgage_capital_repayment__2024', 'mortgage_capital_repayment__2025', 'mortgage_capital_repayment__2026', 'mortgage_capital_repayment__2027', 'mortgage_capital_repayment__2028', 'mortgage_capital_repayment__2029', 'mortgage_capital_repayment__2030', 'pension_contributions_via_salary_sacrifice__2023', 'pension_contributions_via_salary_sacrifice__2024', 'pension_contributions_via_salary_sacrifice__2025', 'pension_contributions_via_salary_sacrifice__2026', 'pension_contributions_via_salary_sacrifice__2027', 'pension_contributions_via_salary_sacrifice__2028', 'pension_contributions_via_salary_sacrifice__2029', 'pension_contributions_via_salary_sacrifice__2030', 'council_tax__2023', 'council_tax__2024', 'council_tax__2025', 'council_tax__2026', 'council_tax__2027', 'council_tax__2028', 'council_tax__2029', 'council_tax__2030', 'mortgage_interest_repayment__2023', 'mortgage_interest_repayment__2024', 'mortgage_interest_repayment__2025', 'mortgage_interest_repayment__2026', 'mortgage_interest_repayment__2027', 'mortgage_interest_repayment__2028', 'mortgage_interest_repayment__2029', 'mortgage_interest_repayment__2030', 'housing_service_charges__2023', 'housing_service_charges__2024', 'housing_service_charges__2025', 'housing_service_charges__2026', 'housing_service_charges__2027', 'housing_service_charges__2028', 'housing_service_charges__2029', 'housing_service_charges__2030', 'employer_pension_contributions__2023', 'employer_pension_contributions__2024', 'employer_pension_contributions__2025', 'employer_pension_contributions__2026', 'employer_pension_contributions__2027', 'employer_pension_contributions__2028', 'employer_pension_contributions__2029', 'employer_pension_contributions__2030', 'personal_pension_contributions__2023', 'personal_pension_contributions__2024', 'personal_pension_contributions__2025', 'personal_pension_contributions__2026', 'personal_pension_contributions__2027', 'personal_pension_contributions__2028', 'personal_pension_contributions__2029', 'personal_pension_contributions__2030', 'attends_private_school__2023', 'attends_private_school__2024', 'attends_private_school__2025', 'attends_private_school__2026', 'attends_private_school__2027', 'attends_private_school__2028', 'attends_private_school__2029', 'attends_private_school__2030', 'region__2023', 'region__2024', 'region__2025', 'region__2026', 'region__2027', 'region__2028', 'region__2029', 'region__2030', 'brma__2023', 'brma__2024', 'brma__2025', 'brma__2026', 'brma__2027', 'brma__2028', 'brma__2029', 'brma__2030', 'net_financial_wealth__2023', 'net_financial_wealth__2024', 'net_financial_wealth__2025', 'net_financial_wealth__2026', 'net_financial_wealth__2027', 'net_financial_wealth__2028', 'net_financial_wealth__2029', 'net_financial_wealth__2030', 'property_wealth__2023', 'property_wealth__2024', 'property_wealth__2025', 'property_wealth__2026', 'property_wealth__2027', 'property_wealth__2028', 'property_wealth__2029', 'property_wealth__2030', 'savings__2023', 'savings__2024', 'savings__2025', 'savings__2026', 'savings__2027', 'savings__2028', 'savings__2029', 'savings__2030', 'num_vehicles__2023', 'num_vehicles__2024', 'num_vehicles__2025', 'num_vehicles__2026', 'num_vehicles__2027', 'num_vehicles__2028', 'num_vehicles__2029', 'num_vehicles__2030', 'gross_financial_wealth__2023', 'gross_financial_wealth__2024', 'gross_financial_wealth__2025', 'gross_financial_wealth__2026', 'gross_financial_wealth__2027', 'gross_financial_wealth__2028', 'gross_financial_wealth__2029', 'gross_financial_wealth__2030', 'relation_type__2025', 'is_enhanced_disabled_for_benefits__2023', 'is_enhanced_disabled_for_benefits__2024', 'is_enhanced_disabled_for_benefits__2025', 'is_enhanced_disabled_for_benefits__2026', 'is_enhanced_disabled_for_benefits__2027', 'is_enhanced_disabled_for_benefits__2028', 'is_enhanced_disabled_for_benefits__2029', 'is_enhanced_disabled_for_benefits__2030', 'is_higher_earner__2023', 'is_higher_earner__2024', 'is_higher_earner__2025', 'is_higher_earner__2026', 'is_higher_earner__2027', 'is_higher_earner__2028', 'is_higher_earner__2029', 'is_higher_earner__2030', 'gender__2023', 'gender__2024', 'gender__2025', 'gender__2026', 'gender__2027', 'gender__2028', 'gender__2029', 'gender__2030', 'person_id__2023', 'person_id__2024', 'person_id__2025', 'person_id__2026', 'person_id__2027', 'person_id__2028', 'person_id__2029', 'person_id__2030', 'household_weight__2023', 'household_weight__2024', 'household_weight__2025', 'household_weight__2026', 'household_weight__2027', 'household_weight__2028', 'household_weight__2029', 'household_weight__2030', 'is_benunit_head__2023', 'is_benunit_head__2024', 'is_benunit_head__2025', 'is_benunit_head__2026', 'is_benunit_head__2027', 'is_benunit_head__2028', 'is_benunit_head__2029', 'is_benunit_head__2030', 'person_weight__2025', 'person_weight__2026', 'person_weight__2027', 'person_weight__2028', 'person_weight__2029', 'person_weight__2023', 'person_weight__2024', 'person_weight__2030', 'person_benunit_id__2023', 'person_benunit_id__2024', 'person_benunit_id__2025', 'person_benunit_id__2026', 'person_benunit_id__2027', 'person_benunit_id__2028', 'person_benunit_id__2029', 'person_benunit_id__2030', 'person_household_id__2023', 'person_household_id__2024', 'person_household_id__2025', 'person_household_id__2026', 'person_household_id__2027', 'person_household_id__2028', 'person_household_id__2029', 'person_household_id__2030', 'tenure_type__2023', 'tenure_type__2024', 'tenure_type__2025', 'tenure_type__2026', 'tenure_type__2027', 'tenure_type__2028', 'tenure_type__2029', 'tenure_type__2030', 'marital_status__2023', 'marital_status__2024', 'marital_status__2025', 'marital_status__2026', 'marital_status__2027', 'marital_status__2028', 'marital_status__2029', 'marital_status__2030', 'is_household_head__2023', 'is_household_head__2024', 'is_household_head__2025', 'is_household_head__2026', 'is_household_head__2027', 'is_household_head__2028', 'is_household_head__2029', 'is_household_head__2030', 'current_education__2023', 'current_education__2024', 'current_education__2025', 'current_education__2026', 'current_education__2027', 'current_education__2028', 'current_education__2029', 'current_education__2030', 'household_owns_tv__2023', 'household_owns_tv__2024', 'household_owns_tv__2025', 'household_owns_tv__2026', 'household_owns_tv__2027', 'household_owns_tv__2028', 'household_owns_tv__2029', 'household_owns_tv__2030', 'is_severely_disabled_for_benefits__2023', 'is_severely_disabled_for_benefits__2024', 'is_severely_disabled_for_benefits__2025', 'is_severely_disabled_for_benefits__2026', 'is_severely_disabled_for_benefits__2027', 'is_severely_disabled_for_benefits__2028', 'is_severely_disabled_for_benefits__2029', 'is_severely_disabled_for_benefits__2030', 'accommodation_type__2023', 'accommodation_type__2024', 'accommodation_type__2025', 'accommodation_type__2026', 'accommodation_type__2027', 'accommodation_type__2028', 'accommodation_type__2029', 'accommodation_type__2030', 'is_married__2023', 'is_married__2024', 'is_married__2025', 'is_married__2026', 'is_married__2027', 'is_married__2028', 'is_married__2029', 'is_married__2030', 'benunit_id__2023', 'benunit_id__2024', 'benunit_id__2025', 'benunit_id__2026', 'benunit_id__2027', 'benunit_id__2028', 'benunit_id__2029', 'benunit_id__2030', 'is_disabled_for_benefits__2023', 'is_disabled_for_benefits__2024', 'is_disabled_for_benefits__2025', 'is_disabled_for_benefits__2026', 'is_disabled_for_benefits__2027', 'is_disabled_for_benefits__2028', 'is_disabled_for_benefits__2029', 'is_disabled_for_benefits__2030', 'eldest_adult_age__2025', 'is_adult__2025', 'benunit_weight__2025', 'benunit_weight__2026', 'benunit_weight__2027', 'benunit_weight__2028', 'benunit_weight__2029', 'household_id__2023', 'household_id__2024', 'household_id__2025', 'household_id__2026', 'household_id__2027', 'household_id__2028', 'household_id__2029', 'household_id__2030', 'structural_insurance_payments__2023', 'structural_insurance_payments__2024', 'structural_insurance_payments__2025', 'structural_insurance_payments__2026', 'structural_insurance_payments__2027', 'structural_insurance_payments__2028', 'structural_insurance_payments__2029', 'structural_insurance_payments__2030', 'main_residential_property_purchased_is_first_home__2023', 'main_residential_property_purchased_is_first_home__2024', 'main_residential_property_purchased_is_first_home__2025', 'main_residential_property_purchased_is_first_home__2026', 'main_residential_property_purchased_is_first_home__2027', 'main_residential_property_purchased_is_first_home__2028', 'main_residential_property_purchased_is_first_home__2029', 'main_residential_property_purchased_is_first_home__2030', 'full_rate_vat_expenditure_rate__2023', 'full_rate_vat_expenditure_rate__2024', 'full_rate_vat_expenditure_rate__2025', 'full_rate_vat_expenditure_rate__2026', 'full_rate_vat_expenditure_rate__2027', 'full_rate_vat_expenditure_rate__2028', 'full_rate_vat_expenditure_rate__2029', 'full_rate_vat_expenditure_rate__2030', 'external_child_payments__2023', 'external_child_payments__2024', 'external_child_payments__2025', 'external_child_payments__2026', 'external_child_payments__2027', 'external_child_payments__2028', 'external_child_payments__2029', 'external_child_payments__2030', 'statutory_maternity_pay__2023', 'statutory_maternity_pay__2024', 'statutory_maternity_pay__2025', 'statutory_maternity_pay__2026', 'statutory_maternity_pay__2027', 'statutory_maternity_pay__2028', 'statutory_maternity_pay__2029', 'statutory_maternity_pay__2030', 'employment_status__2023', 'employment_status__2024', 'employment_status__2025', 'employment_status__2026', 'employment_status__2027', 'employment_status__2028', 'employment_status__2029', 'employment_status__2030', 'is_single__2025', 'statutory_sick_pay__2023', 'statutory_sick_pay__2024', 'statutory_sick_pay__2025', 'statutory_sick_pay__2026', 'statutory_sick_pay__2027', 'statutory_sick_pay__2028', 'statutory_sick_pay__2029', 'statutory_sick_pay__2030', 'hours_worked__2023', 'hours_worked__2024', 'hours_worked__2025', 'hours_worked__2026', 'hours_worked__2027', 'hours_worked__2028', 'hours_worked__2029', 'hours_worked__2030', 'rail_usage__2023', 'rail_usage__2024', 'rail_usage__2025', 'rail_usage__2026', 'rail_usage__2027', 'rail_usage__2028', 'rail_usage__2029', 'rail_usage__2030', 'rail_subsidy_spending__2023', 'rail_subsidy_spending__2024', 'rail_subsidy_spending__2025', 'rail_subsidy_spending__2026', 'rail_subsidy_spending__2027', 'rail_subsidy_spending__2028', 'rail_subsidy_spending__2029', 'rail_subsidy_spending__2030', 'bus_subsidy_spending__2023', 'bus_subsidy_spending__2024', 'bus_subsidy_spending__2025', 'bus_subsidy_spending__2026', 'bus_subsidy_spending__2027', 'bus_subsidy_spending__2028', 'bus_subsidy_spending__2029', 'bus_subsidy_spending__2030', 'outpatient_visits__2023', 'outpatient_visits__2024', 'outpatient_visits__2025', 'outpatient_visits__2026', 'outpatient_visits__2027', 'outpatient_visits__2028', 'outpatient_visits__2029', 'outpatient_visits__2030', 'nhs_outpatient_spending__2023', 'nhs_outpatient_spending__2024', 'nhs_outpatient_spending__2025', 'nhs_outpatient_spending__2026', 'nhs_outpatient_spending__2027', 'nhs_outpatient_spending__2028', 'nhs_outpatient_spending__2029', 'nhs_outpatient_spending__2030', 'nhs_a_and_e_spending__2023', 'nhs_a_and_e_spending__2024', 'nhs_a_and_e_spending__2025', 'nhs_a_and_e_spending__2026', 'nhs_a_and_e_spending__2027', 'nhs_a_and_e_spending__2028', 'nhs_a_and_e_spending__2029', 'nhs_a_and_e_spending__2030', 'a_and_e_visits__2023', 'a_and_e_visits__2024', 'a_and_e_visits__2025', 'a_and_e_visits__2026', 'a_and_e_visits__2027', 'a_and_e_visits__2028', 'a_and_e_visits__2029', 'a_and_e_visits__2030', 'admitted_patient_visits__2023', 'admitted_patient_visits__2024', 'admitted_patient_visits__2025', 'admitted_patient_visits__2026', 'admitted_patient_visits__2027', 'admitted_patient_visits__2028', 'admitted_patient_visits__2029', 'admitted_patient_visits__2030', 'nhs_admitted_patient_spending__2023', 'nhs_admitted_patient_spending__2024', 'nhs_admitted_patient_spending__2025', 'nhs_admitted_patient_spending__2026', 'nhs_admitted_patient_spending__2027', 'nhs_admitted_patient_spending__2028', 'nhs_admitted_patient_spending__2029', 'nhs_admitted_patient_spending__2030', 'healthy_start_vouchers__2023', 'healthy_start_vouchers__2024', 'healthy_start_vouchers__2025', 'healthy_start_vouchers__2026', 'healthy_start_vouchers__2027', 'healthy_start_vouchers__2028', 'healthy_start_vouchers__2029', 'healthy_start_vouchers__2030', 'education_grants__2023', 'education_grants__2024', 'education_grants__2025', 'education_grants__2026', 'education_grants__2027', 'education_grants__2028', 'education_grants__2029', 'education_grants__2030', 'jsa_contrib_reported__2023', 'jsa_contrib_reported__2024', 'jsa_contrib_reported__2025', 'jsa_contrib_reported__2026', 'jsa_contrib_reported__2027', 'jsa_contrib_reported__2028', 'jsa_contrib_reported__2029', 'jsa_contrib_reported__2030', 'sda_reported__2023', 'sda_reported__2024', 'sda_reported__2025', 'sda_reported__2026', 'sda_reported__2027', 'sda_reported__2028', 'sda_reported__2029', 'sda_reported__2030', 'adult_ema__2023', 'adult_ema__2024', 'adult_ema__2025', 'adult_ema__2026', 'adult_ema__2027', 'adult_ema__2028', 'adult_ema__2029', 'adult_ema__2030', 'winter_fuel_allowance_reported__2023', 'winter_fuel_allowance_reported__2024', 'winter_fuel_allowance_reported__2025', 'winter_fuel_allowance_reported__2026', 'winter_fuel_allowance_reported__2027', 'winter_fuel_allowance_reported__2028', 'winter_fuel_allowance_reported__2029', 'winter_fuel_allowance_reported__2030', 'child_tax_credit_reported__2023', 'child_tax_credit_reported__2024', 'child_tax_credit_reported__2025', 'child_tax_credit_reported__2026', 'child_tax_credit_reported__2027', 'child_tax_credit_reported__2028', 'child_tax_credit_reported__2029', 'child_tax_credit_reported__2030', 'working_tax_credit_reported__2023', 'working_tax_credit_reported__2024', 'working_tax_credit_reported__2025', 'working_tax_credit_reported__2026', 'working_tax_credit_reported__2027', 'working_tax_credit_reported__2028', 'working_tax_credit_reported__2029', 'working_tax_credit_reported__2030', 'bsp_reported__2023', 'bsp_reported__2024', 'bsp_reported__2025', 'bsp_reported__2026', 'bsp_reported__2027', 'bsp_reported__2028', 'bsp_reported__2029', 'bsp_reported__2030', 'carers_allowance_reported__2023', 'carers_allowance_reported__2024', 'carers_allowance_reported__2025', 'carers_allowance_reported__2026', 'carers_allowance_reported__2027', 'carers_allowance_reported__2028', 'carers_allowance_reported__2029', 'carers_allowance_reported__2030', 'access_fund__2023', 'access_fund__2024', 'access_fund__2025', 'access_fund__2026', 'access_fund__2027', 'access_fund__2028', 'access_fund__2029', 'access_fund__2030', 'ssmg_reported__2023', 'ssmg_reported__2024', 'ssmg_reported__2025', 'ssmg_reported__2026', 'ssmg_reported__2027', 'ssmg_reported__2028', 'ssmg_reported__2029', 'ssmg_reported__2030', 'incapacity_benefit_reported__2023', 'incapacity_benefit_reported__2024', 'incapacity_benefit_reported__2025', 'incapacity_benefit_reported__2026', 'incapacity_benefit_reported__2027', 'incapacity_benefit_reported__2028', 'incapacity_benefit_reported__2029', 'incapacity_benefit_reported__2030', 'iidb_reported__2023', 'iidb_reported__2024', 'iidb_reported__2025', 'iidb_reported__2026', 'iidb_reported__2027', 'iidb_reported__2028', 'iidb_reported__2029', 'iidb_reported__2030', 'attendance_allowance_reported__2023', 'attendance_allowance_reported__2024', 'attendance_allowance_reported__2025', 'attendance_allowance_reported__2026', 'attendance_allowance_reported__2027', 'attendance_allowance_reported__2028', 'attendance_allowance_reported__2029', 'attendance_allowance_reported__2030', 'student_loans__2023', 'student_loans__2024', 'student_loans__2025', 'student_loans__2026', 'student_loans__2027', 'student_loans__2028', 'student_loans__2029', 'student_loans__2030', 'esa_income_reported__2023', 'esa_income_reported__2024', 'esa_income_reported__2025', 'esa_income_reported__2026', 'esa_income_reported__2027', 'esa_income_reported__2028', 'esa_income_reported__2029', 'esa_income_reported__2030', 'state_pension_reported__2023', 'state_pension_reported__2024', 'state_pension_reported__2025', 'state_pension_reported__2026', 'state_pension_reported__2027', 'state_pension_reported__2028', 'state_pension_reported__2029', 'state_pension_reported__2030', 'afcs_reported__2023', 'afcs_reported__2024', 'afcs_reported__2025', 'afcs_reported__2026', 'afcs_reported__2027', 'afcs_reported__2028', 'afcs_reported__2029', 'afcs_reported__2030', 'council_tax_benefit_reported__2023', 'council_tax_benefit_reported__2024', 'council_tax_benefit_reported__2025', 'council_tax_benefit_reported__2026', 'council_tax_benefit_reported__2027', 'council_tax_benefit_reported__2028', 'council_tax_benefit_reported__2029', 'council_tax_benefit_reported__2030', 'income_support_reported__2023', 'income_support_reported__2024', 'income_support_reported__2025', 'income_support_reported__2026', 'income_support_reported__2027', 'income_support_reported__2028', 'income_support_reported__2029', 'income_support_reported__2030', 'esa_contrib_reported__2023', 'esa_contrib_reported__2024', 'esa_contrib_reported__2025', 'esa_contrib_reported__2026', 'esa_contrib_reported__2027', 'esa_contrib_reported__2028', 'esa_contrib_reported__2029', 'esa_contrib_reported__2030', 'jsa_income_reported__2023', 'jsa_income_reported__2024', 'jsa_income_reported__2025', 'jsa_income_reported__2026', 'jsa_income_reported__2027', 'jsa_income_reported__2028', 'jsa_income_reported__2029', 'jsa_income_reported__2030', 'child_ema__2023', 'child_ema__2024', 'child_ema__2025', 'child_ema__2026', 'child_ema__2027', 'child_ema__2028', 'child_ema__2029', 'child_ema__2030', 'dla_sc_reported__2023', 'dla_sc_reported__2024', 'dla_sc_reported__2025', 'dla_sc_reported__2026', 'dla_sc_reported__2027', 'dla_sc_reported__2028', 'dla_sc_reported__2029', 'dla_sc_reported__2030', 'dla_m_reported__2023', 'dla_m_reported__2024', 'dla_m_reported__2025', 'dla_m_reported__2026', 'dla_m_reported__2027', 'dla_m_reported__2028', 'dla_m_reported__2029', 'dla_m_reported__2030', 'housing_benefit_reported__2023', 'housing_benefit_reported__2024', 'housing_benefit_reported__2025', 'housing_benefit_reported__2026', 'housing_benefit_reported__2027', 'housing_benefit_reported__2028', 'housing_benefit_reported__2029', 'housing_benefit_reported__2030', 'would_claim_uc__2023', 'would_claim_uc__2024', 'would_claim_uc__2025', 'would_claim_uc__2026', 'would_claim_uc__2027', 'would_claim_uc__2028', 'would_claim_uc__2029', 'would_claim_uc__2030', 'universal_credit_reported__2023', 'universal_credit_reported__2024', 'universal_credit_reported__2025', 'universal_credit_reported__2026', 'universal_credit_reported__2027', 'universal_credit_reported__2028', 'universal_credit_reported__2029', 'universal_credit_reported__2030', 'uc_standard_allowance_claimant_type__2025', 'uc_standard_allowance__2025', 'uc_standard_allowance__2026', 'uc_standard_allowance__2027', 'uc_standard_allowance__2028', 'uc_standard_allowance__2029', 'uc_limited_capability_for_WRA__2026', 'uc_limited_capability_for_WRA__2027', 'uc_limited_capability_for_WRA__2028', 'uc_limited_capability_for_WRA__2029', 'uc_LCWRA_element__2026', 'uc_LCWRA_element__2027', 'uc_LCWRA_element__2028', 'uc_LCWRA_element__2029', 'pip_m_reported__2023', 'pip_m_reported__2024', 'pip_m_reported__2025', 'pip_m_reported__2026', 'pip_m_reported__2027', 'pip_m_reported__2028', 'pip_m_reported__2029', 'pip_m_reported__2030', 'pip_dl_reported__2023', 'pip_dl_reported__2024', 'pip_dl_reported__2025', 'pip_dl_reported__2026', 'pip_dl_reported__2027', 'pip_dl_reported__2028', 'pip_dl_reported__2029', 'pip_dl_reported__2030', 'pension_credit_reported__2023', 'pension_credit_reported__2024', 'pension_credit_reported__2025', 'pension_credit_reported__2026', 'pension_credit_reported__2027', 'pension_credit_reported__2028', 'pension_credit_reported__2029', 'pension_credit_reported__2030', 'would_claim_pc__2023', 'would_claim_pc__2024', 'would_claim_pc__2025', 'would_claim_pc__2026', 'would_claim_pc__2027', 'would_claim_pc__2028', 'would_claim_pc__2029', 'would_claim_pc__2030', 'would_evade_tv_licence_fee__2023', 'would_evade_tv_licence_fee__2024', 'would_evade_tv_licence_fee__2025', 'would_evade_tv_licence_fee__2026', 'would_evade_tv_licence_fee__2027', 'would_evade_tv_licence_fee__2028', 'would_evade_tv_licence_fee__2029', 'would_evade_tv_licence_fee__2030', 'free_school_fruit_veg__2023', 'free_school_fruit_veg__2024', 'free_school_fruit_veg__2025', 'free_school_fruit_veg__2026', 'free_school_fruit_veg__2027', 'free_school_fruit_veg__2028', 'free_school_fruit_veg__2029', 'free_school_fruit_veg__2030', 'dfe_education_spending__2023', 'dfe_education_spending__2024', 'dfe_education_spending__2025', 'dfe_education_spending__2026', 'dfe_education_spending__2027', 'dfe_education_spending__2028', 'dfe_education_spending__2029', 'dfe_education_spending__2030', 'free_school_meals__2023', 'free_school_meals__2024', 'free_school_meals__2025', 'free_school_meals__2026', 'free_school_meals__2027', 'free_school_meals__2028', 'free_school_meals__2029', 'free_school_meals__2030', 'would_claim_extended_childcare__2023', 'would_claim_extended_childcare__2024', 'would_claim_extended_childcare__2025', 'would_claim_extended_childcare__2026', 'would_claim_extended_childcare__2027', 'would_claim_extended_childcare__2028', 'would_claim_extended_childcare__2029', 'would_claim_extended_childcare__2030', 'maximum_extended_childcare_hours_usage__2023', 'maximum_extended_childcare_hours_usage__2024', 'maximum_extended_childcare_hours_usage__2025', 'maximum_extended_childcare_hours_usage__2026', 'maximum_extended_childcare_hours_usage__2027', 'maximum_extended_childcare_hours_usage__2028', 'maximum_extended_childcare_hours_usage__2029', 'maximum_extended_childcare_hours_usage__2030', 'would_claim_targeted_childcare__2023', 'would_claim_targeted_childcare__2024', 'would_claim_targeted_childcare__2025', 'would_claim_targeted_childcare__2026', 'would_claim_targeted_childcare__2027', 'would_claim_targeted_childcare__2028', 'would_claim_targeted_childcare__2029', 'would_claim_targeted_childcare__2030', 'would_claim_universal_childcare__2023', 'would_claim_universal_childcare__2024', 'would_claim_universal_childcare__2025', 'would_claim_universal_childcare__2026', 'would_claim_universal_childcare__2027', 'would_claim_universal_childcare__2028', 'would_claim_universal_childcare__2029', 'would_claim_universal_childcare__2030', 'student_loan_repayments__2023', 'student_loan_repayments__2024', 'student_loan_repayments__2025', 'student_loan_repayments__2026', 'student_loan_repayments__2027', 'student_loan_repayments__2028', 'student_loan_repayments__2029', 'student_loan_repayments__2030', 'would_claim_child_benefit__2023', 'would_claim_child_benefit__2024', 'would_claim_child_benefit__2025', 'would_claim_child_benefit__2026', 'would_claim_child_benefit__2027', 'would_claim_child_benefit__2028', 'would_claim_child_benefit__2029', 'would_claim_child_benefit__2030', 'child_benefit_reported__2023', 'child_benefit_reported__2024', 'child_benefit_reported__2025', 'child_benefit_reported__2026', 'child_benefit_reported__2027', 'child_benefit_reported__2028', 'child_benefit_reported__2029', 'child_benefit_reported__2030', 'capital_gains_before_response__2023', 'capital_gains_before_response__2024', 'capital_gains_before_response__2025', 'capital_gains_before_response__2026', 'capital_gains_before_response__2027', 'capital_gains_before_response__2028', 'capital_gains_before_response__2029', 'capital_gains_before_response__2030', 'tax_free_savings_income__2023', 'tax_free_savings_income__2024', 'tax_free_savings_income__2025', 'tax_free_savings_income__2026', 'tax_free_savings_income__2027', 'tax_free_savings_income__2028', 'tax_free_savings_income__2029', 'tax_free_savings_income__2030', 'would_claim_tfc__2023', 'would_claim_tfc__2024', 'would_claim_tfc__2025', 'would_claim_tfc__2026', 'would_claim_tfc__2027', 'would_claim_tfc__2028', 'would_claim_tfc__2029', 'would_claim_tfc__2030', 'student_loan_plan__2023', 'student_loan_plan__2024', 'student_loan_plan__2025', 'student_loan_plan__2026', 'student_loan_plan__2027', 'student_loan_plan__2028', 'student_loan_plan__2029', 'student_loan_plan__2030', 'domestic_rates__2023', 'domestic_rates__2024', 'domestic_rates__2025', 'domestic_rates__2026', 'domestic_rates__2027', 'domestic_rates__2028', 'domestic_rates__2029', 'domestic_rates__2030']\n", + "DataFrame shape: (115612, 1127)\n", + "'person_household_id' columns: ['person_household_id__2023', 'person_household_id__2024', 'person_household_id__2025', 'person_household_id__2026', 'person_household_id__2027', 'person_household_id__2028', 'person_household_id__2029', 'person_household_id__2030']\n", + "Filtered DataFrame shape: (8470, 1127)\n", + "[ERROR] ValueError: Unable to set value \"[ 39361. 39361. 39361. ... 134410.5 134410.5 6000. ]\" for variable \"corporate_wealth\", as its length is 8470 while there are 4108 households in the simulation.\n", + "\n", + "This confirms the bug exists in the high-level API.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\n", + " File \"/var/folders/qf/7fglpnr94wsc5xgbqkp80xbw0000gn/T/ipykernel_19066/3661659745.py\", line 7, in \n", + " sim_wales = Simulation(country=\"uk\", scope=\"macro\", region=\"country/wales\")\n", + " File \"/Users/administrator/Documents/PolicyEngine/policyengine.py/policyengine/simulation.py\", line 110, in __init__\n", + " self._initialise_simulations()\n", + " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^\n", + " File \"/Users/administrator/Documents/PolicyEngine/policyengine.py/policyengine/simulation.py\", line 202, in _initialise_simulations\n", + " self.baseline_simulation = self._initialise_simulation(\n", + " ~~~~~~~~~~~~~~~~~~~~~~~~~~~^\n", + " scope=self.options.scope,\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " ...<5 lines>...\n", + " subsample=self.options.subsample,\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " )\n", + " ^\n", + " File \"/Users/administrator/Documents/PolicyEngine/policyengine.py/policyengine/simulation.py\", line 260, in _initialise_simulation\n", + " simulation = self._apply_region_to_simulation(\n", + " country=country,\n", + " ...<4 lines>...\n", + " time_period=time_period,\n", + " )\n", + " File \"/Users/administrator/Documents/PolicyEngine/policyengine.py/policyengine/simulation.py\", line 307, in _apply_region_to_simulation\n", + " simulation = simulation_type(\n", + " dataset=filtered_df, reform=reform\n", + " )\n", + " File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_uk/simulation.py\", line 100, in __init__\n", + " self.build_from_dataframe(dataset)\n", + " ~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^\n", + " File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_uk/simulation.py\", line 286, in build_from_dataframe\n", + " self.set_input(variable, time_period, df[column])\n", + " ~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_core/simulations/simulation.py\", line 1241, in set_input\n", + " self.get_holder(variable_name).set_input(\n", + " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^\n", + " period, value, self.branch_name\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " )\n", + " ^\n", + " File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_core/holders/holder.py\", line 244, in set_input\n", + " return self._set(period, array, branch_name)\n", + " ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_core/holders/holder.py\", line 286, in _set\n", + " value = self._to_array(value)\n", + " File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_core/holders/holder.py\", line 253, in _to_array\n", + " raise ValueError(\n", + " ...<7 lines>...\n", + " )\n", + "ValueError: Unable to set value \"[ 39361. 39361. 39361. ... 134410.5 134410.5 6000. ]\" for variable \"corporate_wealth\", as its length is 8470 while there are 4108 households in the simulation.\n" + ] + } + ], + "source": [ + "# Test with policyengine.Simulation using region=\"country/wales\"\n", + "print(\"=== Step 7: Test with policyengine.Simulation ===\")\n", + "print(\"Creating Simulation with region='country/wales'...\")\n", + "print()\n", + "\n", + "try:\n", + " sim_wales = Simulation(country=\"uk\", scope=\"macro\", region=\"country/wales\")\n", + " \n", + " wales_underlying = sim_wales.baseline_simulation\n", + " print(f\"Wales simulation created!\")\n", + " print(f\" Person count: {wales_underlying.persons.count}\")\n", + " print(f\" Household count: {wales_underlying.household.count}\")\n", + " \n", + " # Try calculating the problematic variable\n", + " print(\"\\nCalculating would_evade_tv_licence_fee...\")\n", + " result = sim_wales.calculate(\"would_evade_tv_licence_fee\")\n", + " print(f\" Result length: {len(result)}\")\n", + " print(\" [OK] No error!\")\n", + " \n", + "except Exception as e:\n", + " print(f\"[ERROR] {type(e).__name__}: {e}\")\n", + " print()\n", + " print(\"This confirms the bug exists in the high-level API.\")\n", + " traceback.print_exc()" + ] + }, + { + "cell_type": "markdown", + "id": "cell-23", + "metadata": {}, + "source": [ + "## Step 8: Compare with Constituency Filtering (Should Work)\n", + "\n", + "Constituency filtering uses weight adjustment instead of DataFrame subsetting." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "cell-24", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "No data provided, using default dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n", + "Using dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n", + "Downloading enhanced_frs_2023_24.h5 from bucket policyengine-uk-data-private\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== Step 8: Test Constituency Filtering ===\n", + "Creating Simulation with region='constituency/Cardiff South and Penarth'...\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:No version specified for policyengine-uk-data-private, enhanced_frs_2023_24.h5. Using latest version: 1.29.4\n", + "WARNING:root:No metadata found for blob policyengine-uk-data-private, constituencies_2024.csv, so it has no version attached.\n", + "WARNING:root:No version specified for policyengine-uk-data-private, constituencies_2024.csv. Using latest version: None\n", + "WARNING:root:No version specified for policyengine-uk-data-private, parliamentary_constituency_weights.h5. Using latest version: 1.29.4\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Constituency simulation created!\n", + " Person count: 115612\n", + " Household count: 53508\n", + " (Full UK counts, but weights adjusted for constituency)\n", + "\n", + "Calculating would_evade_tv_licence_fee...\n", + "[ERROR] AttributeError: 'Simulation' object has no attribute 'calculate'\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\n", + " File \"/var/folders/qf/7fglpnr94wsc5xgbqkp80xbw0000gn/T/ipykernel_19066/2462177757.py\", line 21, in \n", + " result = sim_const.calculate(\"would_evade_tv_licence_fee\")\n", + " ^^^^^^^^^^^^^^^^^^^\n", + "AttributeError: 'Simulation' object has no attribute 'calculate'\n" + ] + } + ], + "source": [ + "# Test constituency filtering\n", + "print(\"=== Step 8: Test Constituency Filtering ===\")\n", + "print(\"Creating Simulation with region='constituency/Cardiff South and Penarth'...\")\n", + "print()\n", + "\n", + "try:\n", + " sim_const = Simulation(\n", + " country=\"uk\", \n", + " scope=\"macro\", \n", + " region=\"constituency/Cardiff South and Penarth\"\n", + " )\n", + " \n", + " const_underlying = sim_const.baseline_simulation\n", + " print(f\"Constituency simulation created!\")\n", + " print(f\" Person count: {const_underlying.persons.count}\")\n", + " print(f\" Household count: {const_underlying.household.count}\")\n", + " print(\" (Full UK counts, but weights adjusted for constituency)\")\n", + " \n", + " # Try calculating the problematic variable\n", + " print(\"\\nCalculating would_evade_tv_licence_fee...\")\n", + " result = sim_const.calculate(\"would_evade_tv_licence_fee\")\n", + " print(f\" Result length: {len(result)}\")\n", + " print(\" [OK] Constituency filtering works!\")\n", + " \n", + "except Exception as e:\n", + " print(f\"[ERROR] {type(e).__name__}: {e}\")\n", + " traceback.print_exc()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "cell-25", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "=== Step 8b: Test Local Authority Filtering ===\n", + "Creating Simulation with region='local_authority/Cardiff'...\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "No data provided, using default dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n", + "Using dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n", + "Downloading enhanced_frs_2023_24.h5 from bucket policyengine-uk-data-private\n", + "WARNING:root:No version specified for policyengine-uk-data-private, enhanced_frs_2023_24.h5. Using latest version: 1.29.4\n", + "WARNING:root:No metadata found for blob policyengine-uk-data-private, local_authorities_2021.csv, so it has no version attached.\n", + "WARNING:root:No version specified for policyengine-uk-data-private, local_authorities_2021.csv. Using latest version: None\n", + "WARNING:root:No version specified for policyengine-uk-data-private, local_authority_weights.h5. Using latest version: 1.29.4\n" + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[17], line 7\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28mprint\u001b[39m()\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m----> 7\u001b[0m sim_la \u001b[38;5;241m=\u001b[39m \u001b[43mSimulation\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43mcountry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muk\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43mscope\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmacro\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[43mregion\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlocal_authority/Cardiff\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 11\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 13\u001b[0m la_underlying \u001b[38;5;241m=\u001b[39m sim_la\u001b[38;5;241m.\u001b[39mbaseline_simulation\n\u001b[1;32m 14\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mLocal Authority simulation created!\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/simulation.py:110\u001b[0m, in \u001b[0;36mSimulation.__init__\u001b[0;34m(self, **options)\u001b[0m\n\u001b[1;32m 108\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_set_data(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mdata)\n\u001b[1;32m 109\u001b[0m logging\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mData loaded\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 110\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_initialise_simulations\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 111\u001b[0m logging\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSimulations initialised\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 112\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcheck_data_version()\n", + "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/simulation.py:202\u001b[0m, in \u001b[0;36mSimulation._initialise_simulations\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 201\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_initialise_simulations\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m--> 202\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbaseline_simulation \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_initialise_simulation\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 203\u001b[0m \u001b[43m \u001b[49m\u001b[43mscope\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mscope\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 204\u001b[0m \u001b[43m \u001b[49m\u001b[43mcountry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcountry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 205\u001b[0m \u001b[43m \u001b[49m\u001b[43mreform\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbaseline\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 206\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 207\u001b[0m \u001b[43m \u001b[49m\u001b[43mtime_period\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtime_period\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 208\u001b[0m \u001b[43m \u001b[49m\u001b[43mregion\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mregion\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 209\u001b[0m \u001b[43m \u001b[49m\u001b[43msubsample\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msubsample\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 210\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 212\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mreform \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 213\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreform_simulation \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_initialise_simulation(\n\u001b[1;32m 214\u001b[0m scope\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mscope,\n\u001b[1;32m 215\u001b[0m country\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mcountry,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 220\u001b[0m subsample\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39msubsample,\n\u001b[1;32m 221\u001b[0m )\n", + "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/simulation.py:260\u001b[0m, in \u001b[0;36mSimulation._initialise_simulation\u001b[0;34m(self, country, scope, reform, data, time_period, region, subsample)\u001b[0m\n\u001b[1;32m 257\u001b[0m simulation\u001b[38;5;241m.\u001b[39mdefault_calculation_period \u001b[38;5;241m=\u001b[39m time_period\n\u001b[1;32m 259\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m region \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 260\u001b[0m simulation \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_apply_region_to_simulation\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 261\u001b[0m \u001b[43m \u001b[49m\u001b[43mcountry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcountry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 262\u001b[0m \u001b[43m \u001b[49m\u001b[43msimulation\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msimulation\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 263\u001b[0m \u001b[43m \u001b[49m\u001b[43msimulation_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_simulation_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 264\u001b[0m \u001b[43m \u001b[49m\u001b[43mregion\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mregion\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 265\u001b[0m \u001b[43m \u001b[49m\u001b[43mreform\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreform\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 266\u001b[0m \u001b[43m \u001b[49m\u001b[43mtime_period\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtime_period\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 267\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 269\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m subsample \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 270\u001b[0m simulation \u001b[38;5;241m=\u001b[39m simulation\u001b[38;5;241m.\u001b[39msubsample(subsample)\n", + "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/simulation.py:366\u001b[0m, in \u001b[0;36mSimulation._apply_region_to_simulation\u001b[0;34m(self, country, simulation, simulation_type, region, reform, time_period)\u001b[0m\n\u001b[1;32m 362\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 363\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 364\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mLocal authority \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mla\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m not found. See \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mla_names_local_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m for the list of available local authorities.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 365\u001b[0m )\n\u001b[0;32m--> 366\u001b[0m weights_local_path \u001b[38;5;241m=\u001b[39m \u001b[43mdownload\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 367\u001b[0m \u001b[43m \u001b[49m\u001b[43mgcs_bucket\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpolicyengine-uk-data-private\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 368\u001b[0m \u001b[43m \u001b[49m\u001b[43mgcs_key\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlocal_authority_weights.h5\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 369\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 371\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m h5py\u001b[38;5;241m.\u001b[39mFile(weights_local_path, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[1;32m 372\u001b[0m weights \u001b[38;5;241m=\u001b[39m f[\u001b[38;5;28mstr\u001b[39m(time_period)][\u001b[38;5;241m.\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;241m.\u001b[39m]\n", + "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/utils/data_download.py:38\u001b[0m, in \u001b[0;36mdownload\u001b[0;34m(gcs_key, gcs_bucket, version, return_version)\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 22\u001b[0m \u001b[38;5;124;03mDownload a file from Google Cloud Storage.\u001b[39;00m\n\u001b[1;32m 23\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 35\u001b[0m \u001b[38;5;124;03m Otherwise: just the local_path string\u001b[39;00m\n\u001b[1;32m 36\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 37\u001b[0m logging\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUsing Google Cloud Storage for download.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 38\u001b[0m local_path, downloaded_version \u001b[38;5;241m=\u001b[39m \u001b[43mdownload_file_from_gcs\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 39\u001b[0m \u001b[43m \u001b[49m\u001b[43mbucket_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgcs_bucket\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 40\u001b[0m \u001b[43m \u001b[49m\u001b[43mgcs_key\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgcs_key\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 41\u001b[0m \u001b[43m \u001b[49m\u001b[43mversion\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mversion\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 42\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 43\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m return_version:\n\u001b[1;32m 44\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m local_path, downloaded_version\n", + "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/utils/google_cloud_bucket.py:75\u001b[0m, in \u001b[0;36mdownload_file_from_gcs\u001b[0;34m(bucket_name, gcs_key, version)\u001b[0m\n\u001b[1;32m 72\u001b[0m local_path \u001b[38;5;241m=\u001b[39m DATASETS_DIR \u001b[38;5;241m/\u001b[39m gcs_key\n\u001b[1;32m 73\u001b[0m local_path\u001b[38;5;241m.\u001b[39mparent\u001b[38;5;241m.\u001b[39mmkdir(parents\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, exist_ok\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m---> 75\u001b[0m version \u001b[38;5;241m=\u001b[39m \u001b[43m_get_client\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdownload\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 76\u001b[0m \u001b[43m \u001b[49m\u001b[43mbucket_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 77\u001b[0m \u001b[43m \u001b[49m\u001b[43mgcs_key\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 78\u001b[0m \u001b[43m \u001b[49m\u001b[43mlocal_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 79\u001b[0m \u001b[43m \u001b[49m\u001b[43mversion\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mversion\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 80\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_version\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 81\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 82\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mstr\u001b[39m(local_path), version\n", + "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/utils/data/caching_google_storage_client.py:64\u001b[0m, in \u001b[0;36mCachingGoogleStorageClient.download\u001b[0;34m(self, bucket, key, target, version, return_version)\u001b[0m\n\u001b[1;32m 60\u001b[0m version \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclient\u001b[38;5;241m.\u001b[39m_get_latest_version(bucket, key)\n\u001b[1;32m 61\u001b[0m logging\u001b[38;5;241m.\u001b[39mwarning(\n\u001b[1;32m 62\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo version specified for \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbucket\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkey\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m. Using latest version: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mversion\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 63\u001b[0m )\n\u001b[0;32m---> 64\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msync\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbucket\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mversion\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 65\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcache\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_data_key(bucket, key, version))\n\u001b[1;32m 66\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(data) \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28mbytes\u001b[39m:\n", + "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/utils/data/caching_google_storage_client.py:106\u001b[0m, in \u001b[0;36mCachingGoogleStorageClient.sync\u001b[0;34m(self, bucket, key, version)\u001b[0m\n\u001b[1;32m 104\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCache exists and crc is unchanged for \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mid_string\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m .\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 105\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[0;32m--> 106\u001b[0m [content, downloaded_crc] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdownload\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 107\u001b[0m \u001b[43m \u001b[49m\u001b[43mbucket\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mversion\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mversion\u001b[49m\n\u001b[1;32m 108\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 109\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\n\u001b[1;32m 110\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDownloaded new version of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mid_string\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m with crc \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdownloaded_crc\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 111\u001b[0m )\n\u001b[1;32m 113\u001b[0m \u001b[38;5;66;03m# atomic transaction to update both the data and the metadata\u001b[39;00m\n\u001b[1;32m 114\u001b[0m \u001b[38;5;66;03m# at the same time.\u001b[39;00m\n", + "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/utils/data/version_aware_storage_client.py:171\u001b[0m, in \u001b[0;36mVersionAwareStorageClient.download\u001b[0;34m(self, bucket_name, key, version)\u001b[0m\n\u001b[1;32m 166\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\n\u001b[1;32m 167\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDownloading \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbucket_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkey\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 168\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, version: \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;250m \u001b[39m\u001b[38;5;241m+\u001b[39m\u001b[38;5;250m \u001b[39mversion\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mif\u001b[39;00m\u001b[38;5;250m \u001b[39mversion\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01melse\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 169\u001b[0m )\n\u001b[1;32m 170\u001b[0m blob \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_blob(bucket_name, key, version)\n\u001b[0;32m--> 171\u001b[0m content \u001b[38;5;241m=\u001b[39m \u001b[43mblob\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdownload_as_bytes\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 172\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\n\u001b[1;32m 173\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDownloaded \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbucket_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkey\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 174\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, version: \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;250m \u001b[39m\u001b[38;5;241m+\u001b[39m\u001b[38;5;250m \u001b[39mversion\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mif\u001b[39;00m\u001b[38;5;250m \u001b[39mversion\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01melse\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 175\u001b[0m )\n\u001b[1;32m 176\u001b[0m \u001b[38;5;66;03m# According to documentation, blob.crc32c is updated as a side effect of\u001b[39;00m\n\u001b[1;32m 177\u001b[0m \u001b[38;5;66;03m# downloading the content. This should be the CRC of the downloaded\u001b[39;00m\n\u001b[1;32m 178\u001b[0m \u001b[38;5;66;03m# content (avoiding race conditions with the cloud).\u001b[39;00m\n", + "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/cloud/storage/blob.py:1530\u001b[0m, in \u001b[0;36mBlob.download_as_bytes\u001b[0;34m(self, client, start, end, raw_download, if_etag_match, if_etag_not_match, if_generation_match, if_generation_not_match, if_metageneration_match, if_metageneration_not_match, timeout, checksum, retry, single_shot_download)\u001b[0m\n\u001b[1;32m 1527\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m create_trace_span(name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mStorage.Blob.downloadAsBytes\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m 1528\u001b[0m string_buffer \u001b[38;5;241m=\u001b[39m BytesIO()\n\u001b[0;32m-> 1530\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_prep_and_do_download\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1531\u001b[0m \u001b[43m \u001b[49m\u001b[43mstring_buffer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1532\u001b[0m \u001b[43m \u001b[49m\u001b[43mclient\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mclient\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1533\u001b[0m \u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1534\u001b[0m \u001b[43m \u001b[49m\u001b[43mend\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mend\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1535\u001b[0m \u001b[43m \u001b[49m\u001b[43mraw_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mraw_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1536\u001b[0m \u001b[43m \u001b[49m\u001b[43mif_etag_match\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mif_etag_match\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1537\u001b[0m \u001b[43m \u001b[49m\u001b[43mif_etag_not_match\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mif_etag_not_match\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1538\u001b[0m \u001b[43m \u001b[49m\u001b[43mif_generation_match\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mif_generation_match\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1539\u001b[0m \u001b[43m \u001b[49m\u001b[43mif_generation_not_match\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mif_generation_not_match\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1540\u001b[0m \u001b[43m \u001b[49m\u001b[43mif_metageneration_match\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mif_metageneration_match\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1541\u001b[0m \u001b[43m \u001b[49m\u001b[43mif_metageneration_not_match\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mif_metageneration_not_match\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1542\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1543\u001b[0m \u001b[43m \u001b[49m\u001b[43mchecksum\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchecksum\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1544\u001b[0m \u001b[43m \u001b[49m\u001b[43mretry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1545\u001b[0m \u001b[43m \u001b[49m\u001b[43msingle_shot_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msingle_shot_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1546\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1547\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m string_buffer\u001b[38;5;241m.\u001b[39mgetvalue()\n", + "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/cloud/storage/blob.py:4659\u001b[0m, in \u001b[0;36mBlob._prep_and_do_download\u001b[0;34m(self, file_obj, client, start, end, raw_download, if_etag_match, if_etag_not_match, if_generation_match, if_generation_not_match, if_metageneration_match, if_metageneration_not_match, timeout, checksum, retry, single_shot_download, command)\u001b[0m\n\u001b[1;32m 4656\u001b[0m transport \u001b[38;5;241m=\u001b[39m client\u001b[38;5;241m.\u001b[39m_http\n\u001b[1;32m 4658\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 4659\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_do_download\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 4660\u001b[0m \u001b[43m \u001b[49m\u001b[43mtransport\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4661\u001b[0m \u001b[43m \u001b[49m\u001b[43mfile_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4662\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_url\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4663\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4664\u001b[0m \u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4665\u001b[0m \u001b[43m \u001b[49m\u001b[43mend\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4666\u001b[0m \u001b[43m \u001b[49m\u001b[43mraw_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4667\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4668\u001b[0m \u001b[43m \u001b[49m\u001b[43mchecksum\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchecksum\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4669\u001b[0m \u001b[43m \u001b[49m\u001b[43mretry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4670\u001b[0m \u001b[43m \u001b[49m\u001b[43msingle_shot_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msingle_shot_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4671\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4672\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m InvalidResponse \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[1;32m 4673\u001b[0m _raise_from_invalid_response(exc)\n", + "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/cloud/storage/blob.py:1094\u001b[0m, in \u001b[0;36mBlob._do_download\u001b[0;34m(self, transport, file_obj, download_url, headers, start, end, raw_download, timeout, checksum, retry, single_shot_download)\u001b[0m\n\u001b[1;32m 1076\u001b[0m download \u001b[38;5;241m=\u001b[39m klass(\n\u001b[1;32m 1077\u001b[0m download_url,\n\u001b[1;32m 1078\u001b[0m stream\u001b[38;5;241m=\u001b[39mfile_obj,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1087\u001b[0m single_shot_download\u001b[38;5;241m=\u001b[39msingle_shot_download,\n\u001b[1;32m 1088\u001b[0m )\n\u001b[1;32m 1089\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m create_trace_span(\n\u001b[1;32m 1090\u001b[0m name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mStorage.\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdownload_class\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/consume\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 1091\u001b[0m attributes\u001b[38;5;241m=\u001b[39mextra_attributes,\n\u001b[1;32m 1092\u001b[0m api_request\u001b[38;5;241m=\u001b[39margs,\n\u001b[1;32m 1093\u001b[0m ):\n\u001b[0;32m-> 1094\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mdownload\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconsume\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtransport\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1095\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_extract_headers_from_download(response)\n\u001b[1;32m 1096\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/cloud/storage/_media/requests/download.py:280\u001b[0m, in \u001b[0;36mDownload.consume\u001b[0;34m(self, transport, timeout)\u001b[0m\n\u001b[1;32m 276\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_write_to_stream(result)\n\u001b[1;32m 278\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m result\n\u001b[0;32m--> 280\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_request_helpers\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwait_and_retry\u001b[49m\u001b[43m(\u001b[49m\u001b[43mretriable_request\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_strategy\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/cloud/storage/_media/requests/_request_helpers.py:107\u001b[0m, in \u001b[0;36mwait_and_retry\u001b[0;34m(func, retry_strategy)\u001b[0m\n\u001b[1;32m 105\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m retry_strategy:\n\u001b[1;32m 106\u001b[0m func \u001b[38;5;241m=\u001b[39m retry_strategy(func)\n\u001b[0;32m--> 107\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:294\u001b[0m, in \u001b[0;36mRetry.__call__..retry_wrapped_func\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 290\u001b[0m target \u001b[38;5;241m=\u001b[39m functools\u001b[38;5;241m.\u001b[39mpartial(func, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 291\u001b[0m sleep_generator \u001b[38;5;241m=\u001b[39m exponential_sleep_generator(\n\u001b[1;32m 292\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_initial, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_maximum, multiplier\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_multiplier\n\u001b[1;32m 293\u001b[0m )\n\u001b[0;32m--> 294\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mretry_target\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 295\u001b[0m \u001b[43m \u001b[49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 296\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_predicate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 297\u001b[0m \u001b[43m \u001b[49m\u001b[43msleep_generator\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 298\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 299\u001b[0m \u001b[43m \u001b[49m\u001b[43mon_error\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mon_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 300\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:147\u001b[0m, in \u001b[0;36mretry_target\u001b[0;34m(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)\u001b[0m\n\u001b[1;32m 145\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[1;32m 146\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 147\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mtarget\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 148\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m inspect\u001b[38;5;241m.\u001b[39misawaitable(result):\n\u001b[1;32m 149\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(_ASYNC_RETRY_WARNING)\n", + "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/cloud/storage/_media/requests/download.py:276\u001b[0m, in \u001b[0;36mDownload.consume..retriable_request\u001b[0;34m()\u001b[0m\n\u001b[1;32m 273\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(msg) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mexc\u001b[39;00m\n\u001b[1;32m 274\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_bytes_downloaded \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m--> 276\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_write_to_stream\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresult\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 278\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m result\n", + "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/cloud/storage/_media/requests/download.py:149\u001b[0m, in \u001b[0;36mDownload._write_to_stream\u001b[0;34m(self, response)\u001b[0m\n\u001b[1;32m 144\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 145\u001b[0m body_iter \u001b[38;5;241m=\u001b[39m response\u001b[38;5;241m.\u001b[39miter_content(\n\u001b[1;32m 146\u001b[0m chunk_size\u001b[38;5;241m=\u001b[39m_request_helpers\u001b[38;5;241m.\u001b[39m_SINGLE_GET_CHUNK_SIZE,\n\u001b[1;32m 147\u001b[0m decode_unicode\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 148\u001b[0m )\n\u001b[0;32m--> 149\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mchunk\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mbody_iter\u001b[49m\u001b[43m:\u001b[49m\n\u001b[1;32m 150\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_stream\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwrite\u001b[49m\u001b[43m(\u001b[49m\u001b[43mchunk\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 151\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_bytes_downloaded\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mchunk\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/requests/models.py:820\u001b[0m, in \u001b[0;36mResponse.iter_content..generate\u001b[0;34m()\u001b[0m\n\u001b[1;32m 818\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mraw, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstream\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m 819\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 820\u001b[0m \u001b[38;5;28;01myield from\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mraw\u001b[38;5;241m.\u001b[39mstream(chunk_size, decode_content\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 821\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ProtocolError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 822\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ChunkedEncodingError(e)\n", + "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/urllib3/response.py:1253\u001b[0m, in \u001b[0;36mHTTPResponse.stream\u001b[0;34m(self, amt, decode_content)\u001b[0m\n\u001b[1;32m 1247\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1248\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m (\n\u001b[1;32m 1249\u001b[0m \u001b[38;5;129;01mnot\u001b[39;00m is_fp_closed(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp)\n\u001b[1;32m 1250\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoded_buffer) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 1251\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoder \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoder\u001b[38;5;241m.\u001b[39mhas_unconsumed_tail)\n\u001b[1;32m 1252\u001b[0m ):\n\u001b[0;32m-> 1253\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mamt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_content\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1255\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data:\n\u001b[1;32m 1256\u001b[0m \u001b[38;5;28;01myield\u001b[39;00m data\n", + "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/urllib3/response.py:1108\u001b[0m, in \u001b[0;36mHTTPResponse.read\u001b[0;34m(self, amt, decode_content, cache_content)\u001b[0m\n\u001b[1;32m 1105\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoded_buffer) \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m amt:\n\u001b[1;32m 1106\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoded_buffer\u001b[38;5;241m.\u001b[39mget(amt)\n\u001b[0;32m-> 1108\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_raw_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1110\u001b[0m flush_decoder \u001b[38;5;241m=\u001b[39m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mor\u001b[39;00m (amt \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m data)\n\u001b[1;32m 1112\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 1113\u001b[0m \u001b[38;5;129;01mnot\u001b[39;00m data\n\u001b[1;32m 1114\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoded_buffer) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 1115\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoder \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoder\u001b[38;5;241m.\u001b[39mhas_unconsumed_tail)\n\u001b[1;32m 1116\u001b[0m ):\n", + "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/urllib3/response.py:1024\u001b[0m, in \u001b[0;36mHTTPResponse._raw_read\u001b[0;34m(self, amt, read1)\u001b[0m\n\u001b[1;32m 1021\u001b[0m fp_closed \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mclosed\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 1023\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_error_catcher():\n\u001b[0;32m-> 1024\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fp_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mread1\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mread1\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m fp_closed \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1025\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m amt \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m data:\n\u001b[1;32m 1026\u001b[0m \u001b[38;5;66;03m# Platform-specific: Buggy versions of Python.\u001b[39;00m\n\u001b[1;32m 1027\u001b[0m \u001b[38;5;66;03m# Close the connection when no data is returned\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1032\u001b[0m \u001b[38;5;66;03m# not properly close the connection in all cases. There is\u001b[39;00m\n\u001b[1;32m 1033\u001b[0m \u001b[38;5;66;03m# no harm in redundantly calling close.\u001b[39;00m\n\u001b[1;32m 1034\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp\u001b[38;5;241m.\u001b[39mclose()\n", + "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/urllib3/response.py:1007\u001b[0m, in \u001b[0;36mHTTPResponse._fp_read\u001b[0;34m(self, amt, read1)\u001b[0m\n\u001b[1;32m 1004\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp\u001b[38;5;241m.\u001b[39mread1(amt) \u001b[38;5;28;01mif\u001b[39;00m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp\u001b[38;5;241m.\u001b[39mread1()\n\u001b[1;32m 1005\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1006\u001b[0m \u001b[38;5;66;03m# StringIO doesn't like amt=None\u001b[39;00m\n\u001b[0;32m-> 1007\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp\u001b[38;5;241m.\u001b[39mread()\n", + "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/http/client.py:479\u001b[0m, in \u001b[0;36mHTTPResponse.read\u001b[0;34m(self, amt)\u001b[0m\n\u001b[1;32m 476\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlength \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m amt \u001b[38;5;241m>\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlength:\n\u001b[1;32m 477\u001b[0m \u001b[38;5;66;03m# clip the read to the \"end of response\"\u001b[39;00m\n\u001b[1;32m 478\u001b[0m amt \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlength\n\u001b[0;32m--> 479\u001b[0m s \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 480\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m s \u001b[38;5;129;01mand\u001b[39;00m amt:\n\u001b[1;32m 481\u001b[0m \u001b[38;5;66;03m# Ideally, we would raise IncompleteRead if the content-length\u001b[39;00m\n\u001b[1;32m 482\u001b[0m \u001b[38;5;66;03m# wasn't satisfied, but it might break compatibility.\u001b[39;00m\n\u001b[1;32m 483\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_close_conn()\n", + "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/socket.py:719\u001b[0m, in \u001b[0;36mSocketIO.readinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 717\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mOSError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcannot read from timed out object\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 718\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 719\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrecv_into\u001b[49m\u001b[43m(\u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 720\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m timeout:\n\u001b[1;32m 721\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_timeout_occurred \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n", + "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/ssl.py:1304\u001b[0m, in \u001b[0;36mSSLSocket.recv_into\u001b[0;34m(self, buffer, nbytes, flags)\u001b[0m\n\u001b[1;32m 1300\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m flags \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 1301\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 1302\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnon-zero flags not allowed in calls to recv_into() on \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m\n\u001b[1;32m 1303\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m)\n\u001b[0;32m-> 1304\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnbytes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbuffer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1305\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1306\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39mrecv_into(buffer, nbytes, flags)\n", + "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/ssl.py:1138\u001b[0m, in \u001b[0;36mSSLSocket.read\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m 1136\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1137\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m buffer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1138\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sslobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbuffer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1139\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1140\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_sslobj\u001b[38;5;241m.\u001b[39mread(\u001b[38;5;28mlen\u001b[39m)\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "# Test local authority filtering\n", + "print(\"\\n=== Step 8b: Test Local Authority Filtering ===\")\n", + "print(\"Creating Simulation with region='local_authority/Cardiff'...\")\n", + "print()\n", + "\n", + "try:\n", + " sim_la = Simulation(\n", + " country=\"uk\", \n", + " scope=\"macro\", \n", + " region=\"local_authority/Cardiff\"\n", + " )\n", + " \n", + " la_underlying = sim_la.baseline_simulation\n", + " print(f\"Local Authority simulation created!\")\n", + " print(f\" Person count: {la_underlying.persons.count}\")\n", + " print(f\" Household count: {la_underlying.household.count}\")\n", + " print(\" (Full UK counts, but weights adjusted for LA)\")\n", + " \n", + " # Try calculating the problematic variable\n", + " print(\"\\nCalculating would_evade_tv_licence_fee...\")\n", + " result = sim_la.calculate(\"would_evade_tv_licence_fee\")\n", + " print(f\" Result length: {len(result)}\")\n", + " print(\" [OK] Local authority filtering works!\")\n", + " \n", + "except Exception as e:\n", + " print(f\"[ERROR] {type(e).__name__}: {e}\")\n", + " traceback.print_exc()" + ] + }, + { + "cell_type": "markdown", + "id": "cell-26", + "metadata": {}, + "source": [ + "## Step 9: Deep Dive - Check random() Function Behavior" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cell-27", + "metadata": {}, + "outputs": [], + "source": [ + "# Check what random(household) would return in the broken simulation\n", + "print(\"=== Step 9: Investigate random() function behavior ===\")\n", + "\n", + "# Import the random function\n", + "from policyengine_core.commons.formulas import random\n", + "\n", + "try:\n", + " # Get household population from the new (potentially broken) simulation\n", + " hh_pop = new_sim.household\n", + " print(f\"Household population count: {hh_pop.count}\")\n", + " \n", + " # Check what household_id returns when calculated via population\n", + " print(\"\\nCalling hh_pop('household_id', 2025)...\")\n", + " hh_ids_from_pop = hh_pop(\"household_id\", 2025)\n", + " print(f\" Result length: {len(hh_ids_from_pop)}\")\n", + " print(f\" Expected: {hh_pop.count}\")\n", + " \n", + " if len(hh_ids_from_pop) != hh_pop.count:\n", + " print(f\"\\n [BUG CONFIRMED] household_id returned {len(hh_ids_from_pop)} values\")\n", + " print(f\" but household population only has {hh_pop.count} entities!\")\n", + " print(\" This is why random(household) fails.\")\n", + " \n", + "except Exception as e:\n", + " print(f\"Error: {e}\")\n", + " traceback.print_exc()" + ] + }, + { + "cell_type": "markdown", + "id": "cell-28", + "metadata": {}, + "source": [ + "## Summary and Conclusions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cell-29", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"=\"*70)\n", + "print(\"DIAGNOSTIC SUMMARY\")\n", + "print(\"=\"*70)\n", + "\n", + "print(\"\"\"\n", + "FINDINGS:\n", + "\n", + "1. COUNTRY FILTERING (country/wales):\n", + " - Uses to_input_dataframe() + DataFrame subsetting + new Simulation()\n", + " - Creates entity count mismatch between persons and households\n", + " - Breaks when calculating variables that use random(household)\n", + "\n", + "2. CONSTITUENCY/LA FILTERING:\n", + " - Uses weight adjustment on existing simulation\n", + " - Preserves entity structure\n", + " - Works correctly\n", + "\n", + "ROOT CAUSE:\n", + " - The to_input_dataframe() -> filter -> new Simulation() approach\n", + " doesn't properly preserve entity relationships\n", + " - Either household_id isn't properly exported/imported, OR\n", + " - The entity membership mapping gets corrupted during rebuild\n", + "\n", + "RECOMMENDED FIX:\n", + " - Use weight-based filtering for country filtering (like constituency/LA)\n", + " - Zero out weights for households not in the target country\n", + " - This preserves entity structure and avoids the export/import complexity\n", + "\n", + "Example fix for policyengine/simulation.py:\n", + "\n", + " if \"country/\" in region:\n", + " country_name = region.split(\"/\")[1]\n", + " country = simulation.calculate(\"country\", map_to=\"household\").values\n", + " is_in_country = (country == country_name.upper())\n", + " current_weights = simulation.calculate(\n", + " \"household_weight\", simulation.default_calculation_period\n", + " )\n", + " simulation.set_input(\n", + " \"household_weight\",\n", + " simulation.default_calculation_period,\n", + " current_weights * is_in_country # Zero out non-matching\n", + " )\n", + "\"\"\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py-3.13", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/unit/endpoints/economy/test_compare.py b/tests/unit/endpoints/economy/test_compare.py index 0b8cf5f94..17ff66275 100644 --- a/tests/unit/endpoints/economy/test_compare.py +++ b/tests/unit/endpoints/economy/test_compare.py @@ -307,9 +307,13 @@ def test__downloads_from_correct_repos( # Verify correct repos are used calls = mock_download.call_args_list - assert calls[0][1]["repo"] == "policyengine/policyengine-uk-data-private" + assert ( + calls[0][1]["repo"] == "policyengine/policyengine-uk-data-private" + ) assert calls[0][1]["repo_filename"] == "local_authority_weights.h5" - assert calls[1][1]["repo"] == "policyengine/policyengine-uk-data-public" + assert ( + calls[1][1]["repo"] == "policyengine/policyengine-uk-data-public" + ) assert calls[1][1]["repo_filename"] == "local_authorities_2021.csv" def test__given_constituency_region__returns_none(self): @@ -541,12 +545,16 @@ def test__given_non_uk_country_nigeria__returns_none(self): def test__given_local_authority_region__returns_none(self): """When simulating a local authority, constituency breakdown should not be computed.""" - result = uk_constituency_breakdown({}, {}, "uk", "local_authority/Leicester") + result = uk_constituency_breakdown( + {}, {}, "uk", "local_authority/Leicester" + ) assert result is None def test__given_local_authority_region_with_code__returns_none(self): """When simulating a local authority by code, constituency breakdown should not be computed.""" - result = uk_constituency_breakdown({}, {}, "uk", "local_authority/E06000016") + result = uk_constituency_breakdown( + {}, {}, "uk", "local_authority/E06000016" + ) assert result is None @patch( diff --git a/tests/unit/test_country.py b/tests/unit/test_country.py index 1b597ec0a..b57e8ceee 100644 --- a/tests/unit/test_country.py +++ b/tests/unit/test_country.py @@ -60,9 +60,7 @@ def test__local_authority_regions_have_type_field(self, uk_regions): def test__specific_local_authorities_present(self, uk_regions): """Verify specific local authorities are present in metadata.""" local_authority_names = [ - r["name"] - for r in uk_regions - if r.get("type") == "local_authority" + r["name"] for r in uk_regions if r.get("type") == "local_authority" ] # Check some well-known local authorities assert "local_authority/Hartlepool" in local_authority_names From 945cf040db810392b3826d9c92dc20bc02efe7f9 Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Tue, 16 Dec 2025 13:31:29 +0400 Subject: [PATCH 5/7] fix: Explicitly sort parliamentary constituency and local authority lists --- policyengine_api/country.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/policyengine_api/country.py b/policyengine_api/country.py index 4c602b347..29f64fbbe 100644 --- a/policyengine_api/country.py +++ b/policyengine_api/country.py @@ -73,6 +73,7 @@ def build_microsimulation_options(self) -> dict: Path(__file__).parent / "data" / "constituencies_2024.csv" ) constituency_names = pd.read_csv(constituency_names_path) + constituency_names = constituency_names.sort_values("name") region = [ dict(name="uk", label="the UK", type="national"), dict(name="country/england", label="England", type="country"), @@ -96,6 +97,7 @@ def build_microsimulation_options(self) -> dict: Path(__file__).parent / "data" / "local_authorities_2021.csv" ) local_authority_names = pd.read_csv(local_authority_names_path) + local_authority_names = local_authority_names.sort_values("name") for i in range(len(local_authority_names)): region.append( dict( From 80e676cfdf9bbaae8b93e55e7255871e646981bb Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Tue, 16 Dec 2025 14:11:19 +0400 Subject: [PATCH 6/7] fix: Un-merge accidentally merged testing code --- scripts/.datasets/constituencies_2024.csv | 651 ---------- scripts/.datasets/local_authorities_2021.csv | 361 ------ scripts/BUG_REPORT_build_from_dataframe.md | 172 --- scripts/diagnose_country_filtering.ipynb | 503 -------- scripts/prove_build_from_dataframe_bug.ipynb | 841 ------------- scripts/test_local_authority_api.py | 570 --------- scripts/verify_country_filtering_bug.ipynb | 1147 ------------------ 7 files changed, 4245 deletions(-) delete mode 100644 scripts/.datasets/constituencies_2024.csv delete mode 100644 scripts/.datasets/local_authorities_2021.csv delete mode 100644 scripts/BUG_REPORT_build_from_dataframe.md delete mode 100644 scripts/diagnose_country_filtering.ipynb delete mode 100644 scripts/prove_build_from_dataframe_bug.ipynb delete mode 100755 scripts/test_local_authority_api.py delete mode 100644 scripts/verify_country_filtering_bug.ipynb diff --git a/scripts/.datasets/constituencies_2024.csv b/scripts/.datasets/constituencies_2024.csv deleted file mode 100644 index bd9a1df28..000000000 --- a/scripts/.datasets/constituencies_2024.csv +++ /dev/null @@ -1,651 +0,0 @@ -code,name,x,y -E14001063,Aldershot,56,-40 -E14001064,Aldridge-Brownhills,56,-30 -E14001065,Altrincham and Sale West,52,-25 -E14001066,Amber Valley,58,-27 -E14001067,Arundel and South Downs,61,-44 -E14001068,Ashfield,60,-27 -E14001069,Ashford,72,-42 -E14001070,Ashton-under-Lyne,54,-23 -E14001071,Aylesbury,60,-35 -E14001072,Banbury,58,-33 -E14001073,Barking,68,-38 -E14001074,Barnsley North,57,-23 -E14001075,Barnsley South,58,-23 -E14001076,Barrow and Furness,54,-16 -E14001077,Basildon and Billericay,67,-34 -E14001078,Basingstoke,55,-39 -E14001079,Bassetlaw,61,-26 -E14001080,Bath,51,-40 -E14001081,Battersea,62,-41 -E14001082,Beaconsfield,57,-37 -E14001083,Beckenham and Penge,65,-43 -E14001084,Bedford,63,-32 -E14001085,Bermondsey and Old Southwark,64,-40 -E14001086,Bethnal Green and Stepney,65,-39 -E14001087,Beverley and Holderness,64,-22 -E14001088,Bexhill and Battle,70,-44 -E14001089,Bexleyheath and Crayford,67,-39 -E14001090,Bicester and Woodstock,59,-34 -E14001091,Birkenhead,49,-27 -E14001092,Birmingham Edgbaston,53,-33 -E14001093,Birmingham Erdington,54,-31 -E14001094,Birmingham Hall Green and Moseley,55,-32 -E14001095,Birmingham Hodge Hill and Solihull North,55,-31 -E14001096,Birmingham Ladywood,54,-32 -E14001097,Birmingham Northfield,54,-34 -E14001098,Birmingham Perry Barr,53,-31 -E14001099,Birmingham Selly Oak,54,-33 -E14001100,Birmingham Yardley,56,-32 -E14001101,Bishop Auckland,54,-14 -E14001102,Blackburn,53,-19 -E14001103,Blackley and Middleton South,53,-23 -E14001104,Blackpool North and Fleetwood,53,-18 -E14001105,Blackpool South,52,-18 -E14001106,Blaydon and Consett,55,-14 -E14001107,Blyth and Ashington,55,-12 -E14001108,Bognor Regis and Littlehampton,63,-44 -E14001109,Bolsover,60,-26 -E14001110,Bolton North East,52,-21 -E14001111,Bolton South and Walkden,52,-22 -E14001112,Bolton West,51,-21 -E14001113,Bootle,49,-22 -E14001114,Boston and Skegness,64,-26 -E14001115,Bournemouth East,52,-43 -E14001116,Bournemouth West,52,-42 -E14001117,Bracknell,56,-39 -E14001118,Bradford East,58,-20 -E14001119,Bradford South,56,-21 -E14001120,Bradford West,57,-20 -E14001121,Braintree,67,-31 -E14001122,Brent East,61,-38 -E14001123,Brent West,60,-38 -E14001124,Brentford and Isleworth,60,-40 -E14001125,Brentwood and Ongar,66,-33 -E14001126,Bridgwater,48,-41 -E14001127,Bridlington and The Wolds,63,-20 -E14001128,Brigg and Immingham,62,-24 -E14001129,Brighton Kemptown and Peacehaven,67,-45 -E14001130,Brighton Pavilion,67,-44 -E14001131,Bristol Central,51,-38 -E14001132,Bristol East,52,-38 -E14001133,Bristol North East,51,-37 -E14001134,Bristol North West,50,-38 -E14001135,Bristol South,51,-39 -E14001136,Broadland and Fakenham,66,-27 -E14001137,Bromley and Biggin Hill,67,-42 -E14001138,Bromsgrove,52,-33 -E14001139,Broxbourne,66,-35 -E14001140,Broxtowe,59,-27 -E14001141,Buckingham and Bletchley,60,-34 -E14001142,Burnley,55,-19 -E14001143,Burton and Uttoxeter,56,-28 -E14001144,Bury North,53,-21 -E14001145,Bury South,53,-22 -E14001146,Bury St Edmunds and Stowmarket,68,-31 -E14001147,Calder Valley,56,-20 -E14001148,Camborne and Redruth,43,-45 -E14001149,Cambridge,65,-30 -E14001150,Cannock Chase,54,-29 -E14001151,Canterbury,71,-41 -E14001152,Carlisle,53,-14 -E14001153,Carshalton and Wallington,62,-43 -E14001154,Castle Point,69,-36 -E14001155,Central Devon,47,-42 -E14001156,Central Suffolk and North Ipswich,68,-29 -E14001157,Chatham and Aylesford,69,-40 -E14001158,Cheadle,55,-26 -E14001159,Chelmsford,67,-33 -E14001160,Chelsea and Fulham,61,-40 -E14001161,Cheltenham,52,-36 -E14001162,Chesham and Amersham,59,-36 -E14001163,Chester North and Neston,50,-28 -E14001164,Chester South and Eddisbury,51,-27 -E14001165,Chesterfield,59,-26 -E14001166,Chichester,60,-44 -E14001167,Chingford and Woodford Green,64,-35 -E14001168,Chippenham,52,-39 -E14001169,Chipping Barnet,62,-36 -E14001170,Chorley,53,-20 -E14001171,Christchurch,53,-42 -E14001172,Cities of London and Westminster,63,-40 -E14001173,City of Durham,55,-16 -E14001174,Clacton,69,-32 -E14001175,Clapham and Brixton Hill,62,-42 -E14001176,Colchester,68,-32 -E14001177,Colne Valley,55,-23 -E14001178,Congleton,54,-27 -E14001179,Corby and East Northamptonshire,62,-30 -E14001180,Coventry East,57,-33 -E14001181,Coventry North West,56,-33 -E14001182,Coventry South,57,-34 -E14001183,Cramlington and Killingworth,56,-12 -E14001184,Crawley,69,-44 -E14001185,Crewe and Nantwich,53,-27 -E14001186,Croydon East,65,-42 -E14001187,Croydon South,64,-43 -E14001188,Croydon West,63,-43 -E14001189,Dagenham and Rainham,67,-37 -E14001190,Darlington,55,-17 -E14001191,Dartford,68,-40 -E14001192,Daventry,60,-32 -E14001193,Derby North,58,-28 -E14001194,Derby South,57,-28 -E14001195,Derbyshire Dales,57,-26 -E14001196,Dewsbury and Batley,57,-22 -E14001197,Didcot and Wantage,54,-38 -E14001198,Doncaster Central,60,-23 -E14001199,Doncaster East and the Isle of Axholme,61,-23 -E14001200,Doncaster North,61,-22 -E14001201,Dorking and Horley,59,-43 -E14001202,Dover and Deal,72,-41 -E14001203,Droitwich and Evesham,54,-36 -E14001204,Dudley,51,-31 -E14001205,Dulwich and West Norwood,63,-42 -E14001206,Dunstable and Leighton Buzzard,62,-33 -E14001207,Ealing Central and Acton,59,-39 -E14001208,Ealing North,59,-38 -E14001209,Ealing Southall,58,-39 -E14001210,Earley and Woodley,56,-36 -E14001211,Easington,57,-16 -E14001212,East Grinstead and Uckfield,69,-43 -E14001213,East Ham,67,-38 -E14001214,East Hampshire,55,-41 -E14001215,East Surrey,67,-43 -E14001216,East Thanet,71,-39 -E14001217,East Wiltshire,53,-41 -E14001218,East Worthing and Shoreham,65,-44 -E14001219,Eastbourne,69,-45 -E14001220,Eastleigh,54,-41 -E14001221,Edmonton and Winchmore Hill,64,-36 -E14001222,Ellesmere Port and Bromborough,50,-27 -E14001223,Eltham and Chislehurst,66,-41 -E14001224,Ely and East Cambridgeshire,66,-30 -E14001225,Enfield North,62,-35 -E14001226,Epping Forest,67,-35 -E14001227,Epsom and Ewell,60,-43 -E14001228,Erewash,59,-28 -E14001229,Erith and Thamesmead,67,-40 -E14001230,Esher and Walton,58,-42 -E14001231,Exeter,48,-42 -E14001232,Exmouth and Exeter East,48,-43 -E14001233,Fareham and Waterlooville,55,-43 -E14001234,Farnham and Bordon,56,-42 -E14001235,Faversham and Mid Kent,71,-40 -E14001236,Feltham and Heston,59,-40 -E14001237,Filton and Bradley Stoke,50,-37 -E14001238,Finchley and Golders Green,61,-37 -E14001239,Folkestone and Hythe,71,-42 -E14001240,Forest of Dean,50,-35 -E14001241,Frome and East Somerset,50,-41 -E14001242,Fylde,51,-19 -E14001243,Gainsborough,61,-25 -E14001244,Gateshead Central and Whickham,56,-15 -E14001245,Gedling,61,-28 -E14001246,Gillingham and Rainham,70,-40 -E14001247,Glastonbury and Somerton,49,-41 -E14001248,Gloucester,51,-35 -E14001249,Godalming and Ash,57,-42 -E14001250,Goole and Pocklington,61,-21 -E14001251,Gorton and Denton,55,-24 -E14001252,Gosport,57,-43 -E14001253,Grantham and Bourne,63,-28 -E14001254,Gravesham,68,-39 -E14001255,Great Grimsby and Cleethorpes,63,-24 -E14001256,Great Yarmouth,67,-27 -E14001257,Greenwich and Woolwich,66,-40 -E14001258,Guildford,56,-41 -E14001259,Hackney North and Stoke Newington,64,-38 -E14001260,Hackney South and Shoreditch,64,-39 -E14001261,Halesowen,51,-33 -E14001262,Halifax,55,-21 -E14001263,Hamble Valley,56,-43 -E14001264,Hammersmith and Chiswick,60,-39 -E14001265,Hampstead and Highgate,62,-38 -E14001266,"Harborough, Oadby and Wigston",61,-31 -E14001267,Harlow,67,-32 -E14001268,Harpenden and Berkhamsted,62,-34 -E14001269,Harrogate and Knaresborough,59,-18 -E14001270,Harrow East,60,-37 -E14001271,Harrow West,59,-37 -E14001272,Hartlepool,59,-16 -E14001273,Harwich and North Essex,69,-31 -E14001274,Hastings and Rye,70,-43 -E14001275,Havant,59,-44 -E14001276,Hayes and Harlington,58,-38 -E14001277,Hazel Grove,55,-25 -E14001278,Hemel Hempstead,64,-34 -E14001279,Hendon,61,-36 -E14001280,Henley and Thame,58,-35 -E14001281,Hereford and South Herefordshire,51,-34 -E14001282,Herne Bay and Sandwich,72,-40 -E14001283,Hertford and Stortford,66,-32 -E14001284,Hertsmere,66,-34 -E14001285,Hexham,53,-13 -E14001286,Heywood and Middleton North,54,-20 -E14001287,High Peak,56,-25 -E14001288,Hinckley and Bosworth,58,-30 -E14001289,Hitchin,64,-32 -E14001290,Holborn and St Pancras,62,-39 -E14001291,Honiton and Sidmouth,49,-43 -E14001292,Hornchurch and Upminster,66,-37 -E14001293,Hornsey and Friern Barnet,63,-36 -E14001294,Horsham,62,-44 -E14001295,Houghton and Sunderland South,57,-15 -E14001296,Hove and Portslade,66,-44 -E14001297,Huddersfield,56,-22 -E14001298,Huntingdon,63,-31 -E14001299,Hyndburn,54,-19 -E14001300,Ilford North,65,-36 -E14001301,Ilford South,65,-37 -E14001302,Ipswich,68,-30 -E14001303,Isle of Wight East,54,-45 -E14001304,Isle of Wight West,53,-45 -E14001305,Islington North,63,-38 -E14001306,Islington South and Finsbury,63,-39 -E14001307,Jarrow and Gateshead East,57,-14 -E14001308,Keighley and Ilkley,56,-19 -E14001309,Kenilworth and Southam,56,-34 -E14001310,Kensington and Bayswater,61,-39 -E14001311,Kettering,61,-30 -E14001312,Kingston and Surbiton,59,-42 -E14001313,Kingston upon Hull East,63,-22 -E14001314,Kingston upon Hull North and Cottingham,62,-21 -E14001315,Kingston upon Hull West and Haltemprice,62,-22 -E14001316,Kingswinford and South Staffordshire,52,-30 -E14001317,Knowsley,50,-23 -E14001318,Lancaster and Wyre,54,-18 -E14001319,Leeds Central and Headingley,60,-20 -E14001320,Leeds East,61,-20 -E14001321,Leeds North East,59,-19 -E14001322,Leeds North West,58,-19 -E14001323,Leeds South,59,-21 -E14001324,Leeds South West and Morley,58,-21 -E14001325,Leeds West and Pudsey,59,-20 -E14001326,Leicester East,60,-30 -E14001327,Leicester South,60,-31 -E14001328,Leicester West,59,-31 -E14001329,Leigh and Atherton,51,-25 -E14001330,Lewes,68,-45 -E14001331,Lewisham East,66,-42 -E14001332,Lewisham North,65,-40 -E14001333,Lewisham West and East Dulwich,65,-41 -E14001334,Leyton and Wanstead,64,-37 -E14001335,Lichfield,56,-29 -E14001336,Lincoln,62,-25 -E14001337,Liverpool Garston,50,-25 -E14001338,Liverpool Riverside,49,-24 -E14001339,Liverpool Walton,49,-23 -E14001340,Liverpool Wavertree,49,-25 -E14001341,Liverpool West Derby,50,-24 -E14001342,Loughborough,59,-30 -E14001343,Louth and Horncastle,63,-25 -E14001344,Lowestoft,68,-28 -E14001345,Luton North,63,-33 -E14001346,Luton South and South Bedfordshire,63,-34 -E14001347,Macclesfield,56,-26 -E14001348,Maidenhead,57,-36 -E14001349,Maidstone and Malling,69,-41 -E14001350,Makerfield,51,-22 -E14001351,Maldon,69,-33 -E14001352,Manchester Central,54,-24 -E14001353,Manchester Rusholme,53,-25 -E14001354,Manchester Withington,54,-26 -E14001355,Mansfield,61,-27 -E14001356,Melksham and Devizes,52,-40 -E14001357,Melton and Syston,61,-29 -E14001358,Meriden and Solihull East,55,-33 -E14001359,Mid Bedfordshire,62,-32 -E14001360,Mid Buckinghamshire,59,-35 -E14001361,Mid Cheshire,52,-27 -E14001362,Mid Derbyshire,57,-27 -E14001363,Mid Dorset and North Poole,50,-43 -E14001364,Mid Leicestershire,58,-31 -E14001365,Mid Norfolk,65,-28 -E14001366,Mid Sussex,68,-43 -E14001367,Middlesbrough and Thornaby East,57,-17 -E14001368,Middlesbrough South and East Cleveland,59,-17 -E14001369,Milton Keynes Central,61,-34 -E14001370,Milton Keynes North,61,-33 -E14001371,Mitcham and Morden,61,-43 -E14001372,Morecambe and Lunesdale,54,-17 -E14001373,New Forest East,54,-43 -E14001374,New Forest West,53,-43 -E14001375,Newark,62,-26 -E14001376,Newbury,54,-37 -E14001377,Newcastle upon Tyne Central and West,54,-13 -E14001378,Newcastle upon Tyne East and Wallsend,56,-14 -E14001379,Newcastle upon Tyne North,55,-13 -E14001380,Newcastle-under-Lyme,52,-28 -E14001381,Newton Abbot,47,-43 -E14001382,Newton Aycliffe and Spennymoor,56,-16 -E14001383,Normanton and Hemsworth,59,-23 -E14001384,North Bedfordshire,62,-31 -E14001385,North Cornwall,45,-43 -E14001386,North Cotswolds,53,-37 -E14001387,North Devon,46,-41 -E14001388,North Dorset,51,-42 -E14001389,North Durham,54,-15 -E14001390,North East Cambridgeshire,64,-29 -E14001391,North East Derbyshire,58,-26 -E14001392,North East Hampshire,56,-38 -E14001393,North East Hertfordshire,65,-32 -E14001394,North East Somerset and Hanham,50,-39 -E14001395,North Herefordshire,52,-34 -E14001396,North Norfolk,65,-27 -E14001397,North Northumberland,54,-12 -E14001398,North Shropshire,50,-29 -E14001399,North Somerset,49,-39 -E14001400,North Warwickshire and Bedworth,57,-32 -E14001401,North West Cambridgeshire,64,-30 -E14001402,North West Essex,66,-31 -E14001403,North West Hampshire,54,-39 -E14001404,North West Leicestershire,58,-29 -E14001405,North West Norfolk,64,-28 -E14001406,Northampton North,61,-32 -E14001407,Northampton South,60,-33 -E14001408,Norwich North,66,-28 -E14001409,Norwich South,66,-29 -E14001410,Nottingham East,60,-29 -E14001411,Nottingham North and Kimberley,60,-28 -E14001412,Nottingham South,59,-29 -E14001413,Nuneaton,57,-31 -E14001414,Old Bexley and Sidcup,67,-41 -E14001415,Oldham East and Saddleworth,55,-22 -E14001416,"Oldham West, Chadderton and Royton",54,-22 -E14001417,Orpington,66,-43 -E14001418,Ossett and Denby Dale,58,-22 -E14001419,Oxford East,58,-34 -E14001420,Oxford West and Abingdon,57,-35 -E14001421,Peckham,64,-41 -E14001422,Pendle and Clitheroe,56,-18 -E14001423,Penistone and Stocksbridge,56,-23 -E14001424,Penrith and Solway,52,-15 -E14001425,Peterborough,63,-29 -E14001426,Plymouth Moor View,46,-43 -E14001427,Plymouth Sutton and Devonport,47,-44 -E14001428,"Pontefract, Castleford and Knottingley",60,-22 -E14001429,Poole,51,-43 -E14001430,Poplar and Limehouse,66,-39 -E14001431,Portsmouth North,58,-43 -E14001432,Portsmouth South,58,-44 -E14001433,Preston,52,-19 -E14001434,Putney,61,-41 -E14001435,Queen's Park and Maida Vale,62,-40 -E14001436,Rawmarsh and Conisbrough,60,-24 -E14001437,Rayleigh and Wickford,68,-34 -E14001438,Reading Central,55,-37 -E14001439,Reading West and Mid Berkshire,55,-36 -E14001440,Redcar,58,-17 -E14001441,Redditch,53,-35 -E14001442,Reigate,68,-44 -E14001443,Ribble Valley,55,-18 -E14001444,Richmond and Northallerton,57,-18 -E14001445,Richmond Park,59,-41 -E14001446,Rochdale,54,-21 -E14001447,Rochester and Strood,69,-39 -E14001448,Romford,66,-36 -E14001449,Romsey and Southampton North,54,-40 -E14001450,Rossendale and Darwen,55,-20 -E14001451,Rother Valley,60,-25 -E14001452,Rotherham,59,-24 -E14001453,Rugby,58,-32 -E14001454,"Ruislip, Northwood and Pinner",60,-36 -E14001455,Runcorn and Helsby,51,-28 -E14001456,Runnymede and Weybridge,57,-41 -E14001457,Rushcliffe,62,-28 -E14001458,Rutland and Stamford,62,-29 -E14001459,Salford,53,-24 -E14001460,Salisbury,52,-41 -E14001461,Scarborough and Whitby,61,-19 -E14001462,Scunthorpe,61,-24 -E14001463,Sefton Central,50,-20 -E14001464,Selby,60,-21 -E14001465,Sevenoaks,68,-42 -E14001466,Sheffield Brightside and Hillsborough,58,-24 -E14001467,Sheffield Central,58,-25 -E14001468,Sheffield Hallam,57,-24 -E14001469,Sheffield Heeley,57,-25 -E14001470,Sheffield South East,59,-25 -E14001471,Sherwood Forest,62,-27 -E14001472,Shipley,57,-19 -E14001473,Shrewsbury,51,-30 -E14001474,Sittingbourne and Sheppey,70,-39 -E14001475,Skipton and Ripon,58,-18 -E14001476,Sleaford and North Hykeham,63,-26 -E14001477,Slough,56,-37 -E14001478,Smethwick,53,-32 -E14001479,Solihull West and Shirley,55,-34 -E14001480,South Basildon and East Thurrock,68,-36 -E14001481,South Cambridgeshire,65,-31 -E14001482,South Cotswolds,53,-38 -E14001483,South Derbyshire,57,-29 -E14001484,South Devon,48,-45 -E14001485,South Dorset,51,-44 -E14001486,South East Cornwall,46,-44 -E14001487,South Holland and The Deepings,63,-27 -E14001488,South Leicestershire,59,-32 -E14001489,South Norfolk,67,-29 -E14001490,South Northamptonshire,59,-33 -E14001491,South Ribble,52,-20 -E14001492,South Shields,58,-14 -E14001493,South Shropshire,50,-31 -E14001494,South Suffolk,69,-30 -E14001495,South West Devon,47,-45 -E14001496,South West Hertfordshire,61,-35 -E14001497,South West Norfolk,65,-29 -E14001498,South West Wiltshire,51,-41 -E14001499,Southampton Itchen,55,-42 -E14001500,Southampton Test,54,-42 -E14001501,Southend East and Rochford,69,-34 -E14001502,Southend West and Leigh,68,-35 -E14001503,Southgate and Wood Green,63,-35 -E14001504,Southport,50,-19 -E14001505,Spelthorne,58,-40 -E14001506,Spen Valley,57,-21 -E14001507,St Albans,65,-34 -E14001508,St Austell and Newquay,45,-44 -E14001509,St Helens North,50,-21 -E14001510,St Helens South and Whiston,50,-22 -E14001511,St Ives,43,-46 -E14001512,St Neots and Mid Cambridgeshire,64,-31 -E14001513,Stafford,54,-28 -E14001514,Staffordshire Moorlands,56,-27 -E14001515,Stalybridge and Hyde,56,-24 -E14001516,Stevenage,64,-33 -E14001517,Stockport,54,-25 -E14001518,Stockton North,58,-16 -E14001519,Stockton West,56,-17 -E14001520,Stoke-on-Trent Central,55,-28 -E14001521,Stoke-on-Trent North,55,-27 -E14001522,Stoke-on-Trent South,55,-29 -E14001523,"Stone, Great Wyrley and Penkridge",53,-28 -E14001524,Stourbridge,51,-32 -E14001525,Stratford and Bow,65,-38 -E14001526,Stratford-on-Avon,54,-35 -E14001527,Streatham and Croydon North,64,-42 -E14001528,Stretford and Urmston,52,-24 -E14001529,Stroud,52,-37 -E14001530,Suffolk Coastal,69,-29 -E14001531,Sunderland Central,58,-15 -E14001532,Surrey Heath,57,-39 -E14001533,Sussex Weald,70,-42 -E14001534,Sutton and Cheam,60,-42 -E14001535,Sutton Coldfield,56,-31 -E14001536,Swindon North,53,-39 -E14001537,Swindon South,53,-40 -E14001538,Tamworth,57,-30 -E14001539,Tatton,52,-26 -E14001540,Taunton and Wellington,49,-42 -E14001541,Telford,52,-29 -E14001542,Tewkesbury,53,-36 -E14001543,The Wrekin,51,-29 -E14001544,Thirsk and Malton,60,-18 -E14001545,Thornbury and Yate,51,-36 -E14001546,Thurrock,67,-36 -E14001547,Tipton and Wednesbury,52,-31 -E14001548,Tiverton and Minehead,47,-41 -E14001549,Tonbridge,68,-41 -E14001550,Tooting,61,-42 -E14001551,Torbay,48,-44 -E14001552,Torridge and Tavistock,46,-42 -E14001553,Tottenham,62,-37 -E14001554,Truro and Falmouth,44,-45 -E14001555,Tunbridge Wells,69,-42 -E14001556,Twickenham,58,-41 -E14001557,Tynemouth,56,-13 -E14001558,Uxbridge and South Ruislip,58,-37 -E14001559,Vauxhall and Camberwell Green,63,-41 -E14001560,Wakefield and Rothwell,59,-22 -E14001561,Wallasey,48,-27 -E14001562,Walsall and Bloxwich,55,-30 -E14001563,Walthamstow,63,-37 -E14001564,Warrington North,51,-23 -E14001565,Warrington South,51,-24 -E14001566,Warwick and Leamington,55,-35 -E14001567,Washington and Gateshead South,55,-15 -E14001568,Watford,65,-35 -E14001569,Waveney Valley,67,-28 -E14001570,Weald of Kent,70,-41 -E14001571,Wellingborough and Rushden,63,-30 -E14001572,Wells and Mendip Hills,50,-40 -E14001573,Welwyn Hatfield,65,-33 -E14001574,West Bromwich,52,-32 -E14001575,West Dorset,50,-44 -E14001576,West Ham and Beckton,66,-38 -E14001577,West Lancashire,49,-21 -E14001578,West Suffolk,67,-30 -E14001579,West Worcestershire,52,-35 -E14001580,Westmorland and Lonsdale,53,-15 -E14001581,Weston-super-Mare,49,-40 -E14001582,Wetherby and Easingwold,62,-20 -E14001583,Whitehaven and Workington,53,-16 -E14001584,Widnes and Halewood,51,-26 -E14001585,Wigan,51,-20 -E14001586,Wimbledon,60,-41 -E14001587,Winchester,55,-40 -E14001588,Windsor,57,-38 -E14001589,Wirral West,49,-28 -E14001590,Witham,68,-33 -E14001591,Witney,56,-35 -E14001592,Woking,57,-40 -E14001593,Wokingham,55,-38 -E14001594,Wolverhampton North East,53,-29 -E14001595,Wolverhampton South East,54,-30 -E14001596,Wolverhampton West,53,-30 -E14001597,Worcester,53,-34 -E14001598,Worsley and Eccles,52,-23 -E14001599,Worthing West,64,-44 -E14001600,Wycombe,58,-36 -E14001601,Wyre Forest,50,-33 -E14001602,Wythenshawe and Sale East,53,-26 -E14001603,Yeovil,50,-42 -E14001604,York Central,60,-19 -E14001605,York Outer,61,-18 -N05000001,Belfast East,45,-17 -N05000002,Belfast North,45,-16 -N05000003,Belfast South and Mid Down,45,-18 -N05000004,Belfast West,44,-17 -N05000005,East Antrim,45,-15 -N05000006,East Londonderry,43,-15 -N05000007,Fermanagh and South Tyrone,42,-17 -N05000008,Foyle,42,-15 -N05000009,Lagan Valley,44,-18 -N05000010,Mid Ulster,43,-16 -N05000011,Newry and Armagh,44,-19 -N05000012,North Antrim,44,-15 -N05000013,North Down,46,-16 -N05000014,South Antrim,44,-16 -N05000015,South Down,46,-18 -N05000016,Strangford,46,-17 -N05000017,Upper Bann,43,-18 -N05000018,West Tyrone,42,-16 -S14000021,East Renfrewshire,48,-11 -S14000027,Na h-Eileanan an Iar,47,-2 -S14000045,Midlothian,52,-11 -S14000048,North Ayrshire and Arran,48,-10 -S14000051,Orkney and Shetland,51,0 -S14000060,Aberdeen North,52,-3 -S14000061,Aberdeen South,52,-4 -S14000062,Aberdeenshire North and Moray East,51,-3 -S14000063,Airdrie and Shotts,50,-11 -S14000064,Alloa and Grangemouth,50,-7 -S14000065,Angus and Perthshire Glens,50,-5 -S14000066,Arbroath and Broughty Ferry,52,-5 -S14000067,"Argyll, Bute and South Lochaber",49,-5 -S14000068,Bathgate and Linlithgow,51,-9 -S14000069,"Caithness, Sutherland and Easter Ross",50,-2 -S14000070,Coatbridge and Bellshill,50,-12 -S14000071,Cowdenbeath and Kirkcaldy,52,-7 -S14000072,Cumbernauld and Kirkintilloch,50,-8 -S14000073,Dumfries and Galloway,51,-13 -S14000074,"Dumfriesshire, Clydesdale and Tweeddale",52,-13 -S14000075,Dundee Central,50,-6 -S14000076,Dunfermline and Dollar,51,-7 -S14000077,East Kilbride and Strathaven,48,-13 -S14000078,Edinburgh East and Musselburgh,54,-10 -S14000079,Edinburgh North and Leith,53,-9 -S14000080,Edinburgh South,53,-10 -S14000081,Edinburgh South West,52,-10 -S14000082,Edinburgh West,52,-9 -S14000083,Falkirk,51,-8 -S14000084,Glasgow East,51,-10 -S14000085,Glasgow North,49,-9 -S14000086,Glasgow North East,50,-9 -S14000087,Glasgow South,49,-11 -S14000088,Glasgow South West,50,-10 -S14000089,Glasgow West,49,-8 -S14000090,Glenrothes and Mid Fife,52,-6 -S14000091,Gordon and Buchan,50,-4 -S14000092,Hamilton and Clyde Valley,51,-12 -S14000093,Inverclyde and Renfrewshire West,48,-8 -S14000094,"Inverness, Skye and West Ross-shire",49,-3 -S14000095,Livingston,51,-11 -S14000096,Lothian East,53,-11 -S14000097,Mid Dunbartonshire,49,-7 -S14000098,"Moray West, Nairn and Strathspey",49,-4 -S14000099,"Motherwell, Wishaw and Carluke",52,-12 -S14000100,North East Fife,51,-6 -S14000101,Paisley and Renfrewshire North,48,-9 -S14000102,Paisley and Renfrewshire South,49,-10 -S14000103,Perth and Kinross-shire,51,-5 -S14000104,Rutherglen,49,-12 -S14000105,Stirling and Strathallan,49,-6 -S14000106,West Dunbartonshire,48,-7 -S14000107,"Ayr, Carrick and Cumnock",49,-13 -S14000108,"Berwickshire, Roxburgh and Selkirk",53,-12 -S14000109,Central Ayrshire,48,-12 -S14000110,Kilmarnock and Loudoun,50,-13 -S14000111,West Aberdeenshire and Kincardine,51,-4 -W07000081,Aberafan Maesteg,46,-36 -W07000082,Alyn and Deeside,49,-29 -W07000083,Bangor Aberconwy,47,-31 -W07000084,Blaenau Gwent and Rhymney,49,-33 -W07000085,"Brecon, Radnor and Cwm Tawe",50,-32 -W07000086,Bridgend,46,-37 -W07000087,Caerfyrddin,49,-32 -W07000088,Caerphilly,49,-35 -W07000089,Cardiff East,48,-37 -W07000090,Cardiff North,48,-36 -W07000091,Cardiff South and Penarth,48,-38 -W07000092,Cardiff West,47,-37 -W07000093,Ceredigion Preseli,48,-34 -W07000094,Clwyd East,49,-30 -W07000095,Clwyd North,48,-30 -W07000096,Dwyfor Meirionnydd,48,-31 -W07000097,Gower,44,-37 -W07000098,Llanelli,45,-36 -W07000099,Merthyr Tydfil and Aberdare,49,-34 -W07000100,Mid and South Pembrokeshire,44,-36 -W07000101,Monmouthshire,50,-36 -W07000102,Montgomeryshire and Glyndwr,49,-31 -W07000103,Neath and Swansea East,47,-35 -W07000104,Newport East,49,-37 -W07000105,Newport West and Islwyn,49,-36 -W07000106,Pontypridd,48,-35 -W07000107,Rhondda and Ogmore,47,-36 -W07000108,Swansea West,45,-37 -W07000109,Torfaen,50,-34 -W07000110,Vale of Glamorgan,47,-38 -W07000111,Wrexham,50,-30 -W07000112,Ynys Môn,46,-29 diff --git a/scripts/.datasets/local_authorities_2021.csv b/scripts/.datasets/local_authorities_2021.csv deleted file mode 100644 index 9fcf922ed..000000000 --- a/scripts/.datasets/local_authorities_2021.csv +++ /dev/null @@ -1,361 +0,0 @@ -code,x,y,name -E06000001,8.0,19.0,Hartlepool -E06000002,9.0,18.0,Middlesbrough -E06000003,9.0,19.0,Redcar and Cleveland -E06000004,8.0,18.0,Stockton-on-Tees -E06000005,7.0,18.0,Darlington -E06000006,1.0,11.0,Halton -E06000007,2.0,11.0,Warrington -E06000008,4.0,15.0,Blackburn with Darwen -E06000009,2.0,15.0,Blackpool -E06000010,10.0,15.0,"Kingston upon Hull, City of" -E06000011,11.0,16.0,East Riding of Yorkshire -E06000012,11.0,14.0,North East Lincolnshire -E06000013,10.0,14.0,North Lincolnshire -E06000014,9.0,17.0,York -E06000015,6.0,11.0,Derby -E06000016,8.0,8.0,Leicester -E06000017,10.0,9.0,Rutland -E06000018,8.0,10.0,Nottingham -E06000019,0.0,8.0,"Herefordshire, County of" -E06000020,2.0,9.0,Telford and Wrekin -E06000021,3.0,10.0,Stoke-on-Trent -E06000022,1.0,3.0,Bath and North East Somerset -E06000023,0.0,3.0,"Bristol, City of" -E06000024,0.0,2.0,North Somerset -E06000025,1.0,4.0,South Gloucestershire -E06000026,-4.0,-2.0,Plymouth -E06000027,-3.0,-2.0,Torbay -E06000030,2.0,4.0,Swindon -E06000031,11.0,9.0,Peterborough -E06000032,10.0,7.0,Luton -E06000033,16.0,6.0,Southend-on-Sea -E06000034,15.0,4.0,Thurrock -E06000035,15.0,1.0,Medway -E06000036,4.0,2.0,Bracknell Forest -E06000037,2.0,2.0,West Berkshire -E06000038,2.0,3.0,Reading -E06000039,6.0,4.0,Slough -E06000040,4.0,3.0,Windsor and Maidenhead -E06000041,3.0,3.0,Wokingham -E06000042,6.0,5.0,Milton Keynes -E06000043,9.0,-2.0,Brighton and Hove -E06000044,4.0,-1.0,Portsmouth -E06000045,2.0,0.0,Southampton -E06000046,1.0,-2.0,Isle of Wight -E06000047,6.0,18.0,County Durham -E06000049,4.0,11.0,Cheshire East -E06000050,3.0,11.0,Cheshire West and Chester -E06000051,1.0,9.0,Shropshire -E06000052,-5.0,-2.0,Cornwall -E06000053,-7.0,-3.0,Isles of Scilly -E06000054,1.0,2.0,Wiltshire -E06000055,9.0,7.0,Bedford -E06000056,9.0,6.0,Central Bedfordshire -E06000057,5.0,20.0,Northumberland -E06000058,0.0,0.0,"Bournemouth, Christchurch and Poole" -E06000059,-1.0,0.0,Dorset -E06000060,5.0,5.0,Buckinghamshire -E06000061,9.0,9.0,North Northamptonshire -E06000062,7.0,6.0,West Northamptonshire -E06000063,0.0,0.0,Cumberland -E06000064,0.0,0.0,Westmorland and Furness -E06000065,0.0,0.0,North Yorkshire -E06000066,0.0,0.0,Somerset -E07000008,12.0,8.0,Cambridge -E07000009,12.0,9.0,East Cambridgeshire -E07000010,13.0,10.0,Fenland -E07000011,10.0,8.0,Huntingdonshire -E07000012,11.0,8.0,South Cambridgeshire -E07000032,7.0,11.0,Amber Valley -E07000033,10.0,12.0,Bolsover -E07000034,9.0,12.0,Chesterfield -E07000035,7.0,12.0,Derbyshire Dales -E07000036,7.0,9.0,Erewash -E07000037,7.0,13.0,High Peak -E07000038,8.0,12.0,North East Derbyshire -E07000039,6.0,10.0,South Derbyshire -E07000040,-2.0,-1.0,East Devon -E07000041,-3.0,-1.0,Exeter -E07000042,-2.0,0.0,Mid Devon -E07000043,-3.0,1.0,North Devon -E07000044,-4.0,-3.0,South Hams -E07000045,-2.0,-2.0,Teignbridge -E07000046,-4.0,-1.0,Torridge -E07000047,-3.0,0.0,West Devon -E07000061,10.0,-2.0,Eastbourne -E07000062,13.0,-2.0,Hastings -E07000063,10.0,-1.0,Lewes -E07000064,12.0,-2.0,Rother -E07000065,11.0,-2.0,Wealden -E07000066,14.0,5.0,Basildon -E07000067,14.0,7.0,Braintree -E07000068,13.0,5.0,Brentwood -E07000069,15.0,5.0,Castle Point -E07000070,14.0,6.0,Chelmsford -E07000071,15.0,8.0,Colchester -E07000072,12.0,5.0,Epping Forest -E07000073,13.0,6.0,Harlow -E07000074,15.0,7.0,Maldon -E07000075,15.0,6.0,Rochford -E07000076,16.0,8.0,Tendring -E07000077,13.0,7.0,Uttlesford -E07000078,1.0,5.0,Cheltenham -E07000079,2.0,5.0,Cotswold -E07000080,-1.0,6.0,Forest of Dean -E07000081,0.0,6.0,Gloucester -E07000082,0.0,5.0,Stroud -E07000083,1.0,6.0,Tewkesbury -E07000084,2.0,1.0,Basingstoke and Deane -E07000085,4.0,0.0,East Hampshire -E07000086,3.0,0.0,Eastleigh -E07000087,2.0,-1.0,Fareham -E07000088,3.0,-1.0,Gosport -E07000089,3.0,2.0,Hart -E07000090,5.0,0.0,Havant -E07000091,1.0,0.0,New Forest -E07000092,4.0,1.0,Rushmoor -E07000093,1.0,1.0,Test Valley -E07000094,3.0,1.0,Winchester -E07000095,12.0,6.0,Broxbourne -E07000096,8.0,6.0,Dacorum -E07000098,9.0,5.0,Hertsmere -E07000099,11.0,7.0,North Hertfordshire -E07000102,7.0,5.0,Three Rivers -E07000103,8.0,5.0,Watford -E07000105,12.0,-1.0,Ashford -E07000106,15.0,0.0,Canterbury -E07000107,13.0,1.0,Dartford -E07000108,14.0,-1.0,Dover -E07000109,14.0,1.0,Gravesham -E07000110,14.0,0.0,Maidstone -E07000111,12.0,0.0,Sevenoaks -E07000112,13.0,-1.0,Folkestone and Hythe -E07000113,16.0,0.0,Swale -E07000114,15.0,-1.0,Thanet -E07000115,13.0,0.0,Tonbridge and Malling -E07000116,11.0,-1.0,Tunbridge Wells -E07000117,6.0,15.0,Burnley -E07000118,3.0,14.0,Chorley -E07000119,4.0,16.0,Fylde -E07000120,5.0,15.0,Hyndburn -E07000121,3.0,17.0,Lancaster -E07000122,6.0,16.0,Pendle -E07000123,5.0,16.0,Preston -E07000124,5.0,17.0,Ribble Valley -E07000125,6.0,14.0,Rossendale -E07000126,3.0,15.0,South Ribble -E07000127,2.0,13.0,West Lancashire -E07000128,3.0,16.0,Wyre -E07000129,7.0,7.0,Blaby -E07000130,8.0,9.0,Charnwood -E07000131,8.0,7.0,Harborough -E07000132,7.0,8.0,Hinckley and Bosworth -E07000133,11.0,10.0,Melton -E07000134,6.0,9.0,North West Leicestershire -E07000135,9.0,8.0,Oadby and Wigston -E07000136,12.0,12.0,Boston -E07000137,12.0,13.0,East Lindsey -E07000138,11.0,12.0,Lincoln -E07000139,11.0,11.0,North Kesteven -E07000140,12.0,11.0,South Holland -E07000141,12.0,10.0,South Kesteven -E07000142,11.0,13.0,West Lindsey -E07000143,14.0,10.0,Breckland -E07000144,15.0,12.0,Broadland -E07000145,15.0,11.0,Great Yarmouth -E07000146,13.0,11.0,King's Lynn and West Norfolk -E07000147,14.0,12.0,North Norfolk -E07000148,14.0,11.0,Norwich -E07000149,15.0,10.0,South Norfolk -E07000170,8.0,11.0,Ashfield -E07000171,10.0,13.0,Bassetlaw -E07000172,7.0,10.0,Broxtowe -E07000173,9.0,10.0,Gedling -E07000174,9.0,11.0,Mansfield -E07000175,10.0,11.0,Newark and Sherwood -E07000176,10.0,10.0,Rushcliffe -E07000177,4.0,5.0,Cherwell -E07000178,4.0,4.0,Oxford -E07000179,5.0,4.0,South Oxfordshire -E07000180,3.0,4.0,Vale of White Horse -E07000181,3.0,5.0,West Oxfordshire -E07000192,3.0,9.0,Cannock Chase -E07000193,5.0,11.0,East Staffordshire -E07000194,4.0,9.0,Lichfield -E07000195,2.0,10.0,Newcastle-under-Lyme -E07000196,2.0,8.0,South Staffordshire -E07000197,4.0,10.0,Stafford -E07000198,5.0,10.0,Staffordshire Moorlands -E07000199,5.0,9.0,Tamworth -E07000200,14.0,8.0,Babergh -E07000202,15.0,9.0,Ipswich -E07000203,14.0,9.0,Mid Suffolk -E07000207,7.0,2.0,Elmbridge -E07000208,8.0,0.0,Epsom and Ewell -E07000209,5.0,1.0,Guildford -E07000210,6.0,1.0,Mole Valley -E07000211,7.0,0.0,Reigate and Banstead -E07000212,5.0,3.0,Runnymede -E07000213,6.0,3.0,Spelthorne -E07000214,5.0,2.0,Surrey Heath -E07000215,9.0,-1.0,Tandridge -E07000216,6.0,0.0,Waverley -E07000217,6.0,2.0,Woking -E07000218,6.0,8.0,North Warwickshire -E07000219,6.0,7.0,Nuneaton and Bedworth -E07000220,6.0,6.0,Rugby -E07000221,3.0,6.0,Stratford-on-Avon -E07000222,4.0,6.0,Warwick -E07000223,8.0,-2.0,Adur -E07000224,6.0,-2.0,Arun -E07000225,5.0,-1.0,Chichester -E07000226,8.0,-1.0,Crawley -E07000227,6.0,-1.0,Horsham -E07000228,7.0,-1.0,Mid Sussex -E07000229,7.0,-2.0,Worthing -E07000234,2.0,7.0,Bromsgrove -E07000235,-1.0,7.0,Malvern Hills -E07000236,4.0,7.0,Redditch -E07000237,0.0,7.0,Worcester -E07000238,2.0,6.0,Wychavon -E07000239,1.0,8.0,Wyre Forest -E07000240,10.0,6.0,St Albans -E07000241,11.0,6.0,Welwyn Hatfield -E07000242,13.0,8.0,East Hertfordshire -E07000243,12.0,7.0,Stevenage -E07000244,16.0,10.0,East Suffolk -E07000245,13.0,9.0,West Suffolk -E08000001,4.0,14.0,Bolton -E08000002,5.0,14.0,Bury -E08000003,5.0,12.0,Manchester -E08000004,5.0,13.0,Oldham -E08000005,7.0,14.0,Rochdale -E08000006,4.0,13.0,Salford -E08000007,6.0,12.0,Stockport -E08000008,6.0,13.0,Tameside -E08000009,4.0,12.0,Trafford -E08000010,3.0,13.0,Wigan -E08000011,2.0,12.0,Knowsley -E08000012,1.0,13.0,Liverpool -E08000013,3.0,12.0,St. Helens -E08000014,2.0,14.0,Sefton -E08000015,1.0,12.0,Wirral -E08000016,8.0,14.0,Barnsley -E08000017,9.0,14.0,Doncaster -E08000018,9.0,13.0,Rotherham -E08000019,8.0,13.0,Sheffield -E08000021,5.0,19.0,Newcastle upon Tyne -E08000022,6.0,20.0,North Tyneside -E08000023,7.0,20.0,South Tyneside -E08000024,7.0,19.0,Sunderland -E08000025,5.0,8.0,Birmingham -E08000026,5.0,6.0,Coventry -E08000027,1.0,7.0,Dudley -E08000028,3.0,7.0,Sandwell -E08000029,5.0,7.0,Solihull -E08000030,4.0,8.0,Walsall -E08000031,3.0,8.0,Wolverhampton -E08000032,7.0,16.0,Bradford -E08000033,7.0,15.0,Calderdale -E08000034,8.0,15.0,Kirklees -E08000035,8.0,16.0,Leeds -E08000036,9.0,15.0,Wakefield -E08000037,6.0,19.0,Gateshead -E09000001,11.0,2.0,City of London -E09000002,13.0,3.0,Barking and Dagenham -E09000003,10.0,5.0,Barnet -E09000004,12.0,1.0,Bexley -E09000005,10.0,4.0,Brent -E09000006,11.0,0.0,Bromley -E09000007,11.0,4.0,Camden -E09000008,10.0,0.0,Croydon -E09000009,9.0,4.0,Ealing -E09000010,11.0,5.0,Enfield -E09000011,11.0,1.0,Greenwich -E09000012,12.0,3.0,Hackney -E09000013,8.0,3.0,Hammersmith and Fulham -E09000014,12.0,4.0,Haringey -E09000015,8.0,4.0,Harrow -E09000016,14.0,3.0,Havering -E09000017,7.0,4.0,Hillingdon -E09000018,7.0,3.0,Hounslow -E09000019,11.0,3.0,Islington -E09000020,9.0,3.0,Kensington and Chelsea -E09000021,7.0,1.0,Kingston upon Thames -E09000022,10.0,2.0,Lambeth -E09000023,10.0,1.0,Lewisham -E09000024,8.0,1.0,Merton -E09000025,13.0,2.0,Newham -E09000026,14.0,4.0,Redbridge -E09000027,8.0,2.0,Richmond upon Thames -E09000028,9.0,1.0,Southwark -E09000029,9.0,0.0,Sutton -E09000030,12.0,2.0,Tower Hamlets -E09000031,13.0,4.0,Waltham Forest -E09000032,9.0,2.0,Wandsworth -E09000033,10.0,3.0,Westminster -N09000001,-4.0,16.0,Antrim and Newtownabbey -N09000002,-5.0,16.0,"Armagh City, Banbridge and Craigavon" -N09000003,-4.0,17.0,Belfast -N09000004,-5.0,18.0,Causeway Coast and Glens -N09000005,-6.0,17.0,Derry City and Strabane -N09000006,-6.0,16.0,Fermanagh and Omagh -N09000007,-5.0,15.0,Lisburn and Castlereagh -N09000008,-4.0,18.0,Mid and East Antrim -N09000009,-5.0,17.0,Mid Ulster -N09000010,-4.0,15.0,"Newry, Mourne and Down" -S12000005,2.0,24.0,Clackmannanshire -S12000006,4.0,20.0,Dumfries and Galloway -S12000008,3.0,20.0,East Ayrshire -S12000010,5.0,22.0,East Lothian -S12000011,2.0,20.0,East Renfrewshire -S12000013,-1.0,27.0,Na h-Eileanan Siar -S12000014,2.0,23.0,Falkirk -S12000017,1.0,26.0,Highland -S12000018,0.0,21.0,Inverclyde -S12000019,3.0,21.0,Midlothian -S12000020,2.0,26.0,Moray -S12000021,1.0,20.0,North Ayrshire -S12000023,4.0,28.0,Orkney Islands -S12000026,4.0,21.0,Scottish Borders -S12000027,5.0,30.0,Shetland Islands -S12000028,1.0,19.0,South Ayrshire -S12000029,2.0,21.0,South Lanarkshire -S12000030,1.0,24.0,Stirling -S12000033,4.0,26.0,Aberdeen City -S12000034,3.0,26.0,Aberdeenshire -S12000035,0.0,24.0,Argyll and Bute -S12000036,4.0,22.0,City of Edinburgh -S12000038,1.0,22.0,Renfrewshire -S12000039,0.0,23.0,West Dunbartonshire -S12000040,3.0,22.0,West Lothian -S12000041,2.0,25.0,Angus -S12000042,3.0,25.0,Dundee City -S12000045,1.0,23.0,East Dunbartonshire -S12000047,3.0,24.0,Fife -S12000048,1.0,25.0,Perth and Kinross -S12000049,1.0,21.0,Glasgow City -S12000050,2.0,22.0,North Lanarkshire -W06000001,-2.0,12.0,Isle of Anglesey -W06000002,-2.0,10.0,Gwynedd -W06000003,-1.0,10.0,Conwy -W06000004,0.0,10.0,Denbighshire -W06000005,0.0,11.0,Flintshire -W06000006,1.0,10.0,Wrexham -W06000008,-2.0,9.0,Ceredigion -W06000009,-5.0,6.0,Pembrokeshire -W06000010,-4.0,6.0,Carmarthenshire -W06000011,-4.0,5.0,Swansea -W06000012,-3.0,5.0,Neath Port Talbot -W06000013,-3.0,6.0,Bridgend -W06000014,-2.0,4.0,Vale of Glamorgan -W06000015,-2.0,5.0,Cardiff -W06000016,-3.0,7.0,Rhondda Cynon Taf -W06000018,-2.0,6.0,Caerphilly -W06000019,0.0,9.0,Blaenau Gwent -W06000020,-2.0,7.0,Torfaen -W06000021,-1.0,8.0,Monmouthshire -W06000022,-1.0,5.0,Newport -W06000023,-1.0,9.0,Powys -W06000024,-2.0,8.0,Merthyr Tydfil diff --git a/scripts/BUG_REPORT_build_from_dataframe.md b/scripts/BUG_REPORT_build_from_dataframe.md deleted file mode 100644 index 503557e56..000000000 --- a/scripts/BUG_REPORT_build_from_dataframe.md +++ /dev/null @@ -1,172 +0,0 @@ -# Bug Report: Entity-Level Aggregation Missing in `build_from_dataframe` - -## Summary - -The `build_from_dataframe` method in `policyengine_uk` does not aggregate person-level data to entity-level before calling `set_input()`, causing UK country filtering (e.g., Wales) to fail with array length mismatch errors. - -## Affected Repository - -**Repository:** `policyengine-uk` -**File:** `policyengine_uk/simulation.py` -**Method:** `build_from_dataframe()` -**Approximate Lines:** 281-286 (may vary by version) - -## Symptoms - -When running a UK simulation filtered to a specific country (e.g., Wales), the following error occurs: - -``` -ValueError: Unable to set value "[ True True True ... False False False]" -for variable "would_evade_tv_licence_fee", as its length is 8470 -while there are 4108 households in the simulation. -``` - -The error occurs because: -- 8,470 = number of Welsh **persons** in the dataset -- 4,108 = number of Welsh **households** in the dataset -- The code tries to assign person-level arrays to household-level variables - -## Root Cause - -### The Bug Location - -```python -# In policyengine_uk/simulation.py, build_from_dataframe method: - -# Set input values for each variable and time period -for column in df: - variable, time_period = column.split("__") - if variable not in self.tax_benefit_system.variables: - continue - self.set_input(variable, time_period, df[column]) # <-- BUG HERE -``` - -### Why This Fails - -1. **`to_input_dataframe()`** exports ALL variables at **person level** (one row per person), regardless of the variable's native entity. This is by design - it creates a flat DataFrame where each row represents a person. - -2. **`build_from_dataframe()`** correctly builds the entity structure: - - Extracts `person_household_id` to determine household membership - - Creates the correct number of households (e.g., 4,108 for Wales) - - Sets up person-to-household relationships properly - -3. **BUT** the loop that sets variable values does NOT check if aggregation is needed. It passes person-level arrays (8,470 values) directly to `set_input()` for household-level variables that only have 4,108 entities. - -### The Correct Approach - -The `policyengine_core` library's `build_from_dataset()` method handles this correctly in `policyengine_core/simulations/simulation.py`: - -```python -# From policyengine_core/simulations/simulation.py, build_from_dataset method: - -if len(data[variable]) != len(population.ids): - population: GroupPopulation - entity_level_data = population.value_from_first_person(data[variable]) -else: - entity_level_data = data[variable] - -self.set_input(variable_name, time_period, entity_level_data) -``` - -## Required Fix - -### Current Buggy Code - -```python -# Set input values for each variable and time period -for column in df: - variable, time_period = column.split("__") - if variable not in self.tax_benefit_system.variables: - continue - self.set_input(variable, time_period, df[column]) -``` - -### Fixed Code - -```python -# Set input values for each variable and time period -for column in df: - variable, time_period = column.split("__") - if variable not in self.tax_benefit_system.variables: - continue - - # Get variable metadata and target population - var_meta = self.tax_benefit_system.get_variable(variable) - entity = var_meta.entity - population = self.get_population(entity.plural) - - data = df[column].values - - # Check if aggregation is needed (data is person-level but variable is group-level) - if len(data) != population.count: - # Aggregate from person-level to entity-level using first person's value - data = population.value_from_first_person(data) - - self.set_input(variable, time_period, data) -``` - -## Technical Details - -### What `value_from_first_person()` Does - -This method aggregates person-level data to group-level by taking the value from the first person in each group. For household-level variables (like `would_evade_tv_licence_fee`), all persons in a household share the same value, so taking the first person's value is correct. - -The method is defined in `policyengine_core` on `GroupPopulation` objects. - -### Why This Pattern Works - -- Person-level variables: `len(data) == population.count` (no aggregation needed) -- Group-level variables exported at person level: `len(data) != population.count` (aggregation needed) - -### Entity Structure in UK Model - -The UK tax-benefit system has these entities: -- `person` - Individual people -- `benunit` - Benefit units (roughly: nuclear families) -- `household` - Households (one or more benefit units sharing accommodation) - -When filtering to Wales: -- ~8,470 persons -- ~4,108 households -- Variable ratio depending on household composition - -## Reproduction Steps - -1. Create a UK macro simulation: `Simulation(country="uk", scope="macro")` -2. Filter to a UK country: `Simulation(country="uk", scope="macro", region="country/wales")` -3. The filtering process: - - Calls `to_input_dataframe()` on the baseline simulation - - Filters the DataFrame to Welsh persons only - - Calls `Microsimulation(dataset=filtered_df)` which invokes `build_from_dataframe()` -4. Error occurs when `build_from_dataframe()` tries to set household-level variables - -## Verification - -A Jupyter notebook proving this bug exists at: -`policyengine-api/scripts/prove_build_from_dataframe_bug.ipynb` - -The notebook: -1. Creates a UK simulation and exports to DataFrame -2. Filters to Wales (8,470 persons, 4,108 households) -3. Manually traces through `build_from_dataframe()` step by step -4. Shows entity structure is correctly built (4,108 households) -5. Demonstrates the `set_input()` call fails with length mismatch -6. Shows the fix (aggregation) works correctly - -## Impact - -This bug affects: -- UK country filtering (`country/wales`, `country/scotland`, `country/northern_ireland`, `country/england`) -- Any code path that uses `build_from_dataframe()` with a filtered DataFrame - -This bug does NOT affect: -- Constituency filtering (uses weight adjustment, not DataFrame subsetting) -- Local authority filtering (uses weight adjustment, not DataFrame subsetting) -- UK-wide simulations (no filtering needed) - -## Notes for Implementation - -1. The fix is minimal - just wrap the existing `set_input()` call with a length check and aggregation -2. No new dependencies are needed - `value_from_first_person()` is already available on population objects -3. The fix matches the existing pattern in `policyengine_core`'s `build_from_dataset()` method -4. Consider adding a unit test that creates a simulation from a filtered DataFrame and verifies household-level variables work correctly diff --git a/scripts/diagnose_country_filtering.ipynb b/scripts/diagnose_country_filtering.ipynb deleted file mode 100644 index e9d2b1498..000000000 --- a/scripts/diagnose_country_filtering.ipynb +++ /dev/null @@ -1,503 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Diagnosing UK Country Filtering Issue in policyengine.py\n", - "\n", - "This notebook tests whether `policyengine.py` properly filters simulations by UK country (e.g., Wales).\n", - "\n", - "## The Issue\n", - "When running a simulation filtered to a specific UK country (e.g., `country/wales`), we get:\n", - "```\n", - "ValueError: Unable to set value \"[ True True True ... False False False]\" for variable \n", - "\"would_evade_tv_licence_fee\", as its length is 8470 while there are 4108 households in the simulation.\n", - "```\n", - "\n", - "## Hypothesis\n", - "The `to_input_dataframe()` method doesn't export `person_household_id`, causing the filtered simulation\n", - "to lose entity relationship information and incorrectly set up household counts." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Step 1: Setup and Imports" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "from policyengine import Simulation\n", - "\n", - "# Check policyengine version\n", - "import policyengine\n", - "print(f\"policyengine version: {policyengine.__version__}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Step 2: Create a Baseline UK Simulation\n", - "\n", - "First, let's create a standard UK-wide simulation and examine its structure." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Create a UK-wide simulation (no region filter)\n", - "print(\"Creating UK-wide simulation...\")\n", - "sim_uk = Simulation(country=\"uk\", scope=\"macro\")\n", - "\n", - "# Access the underlying country simulation\n", - "underlying_sim = sim_uk.baseline_simulation\n", - "\n", - "print(f\"\\n=== UK-Wide Simulation Structure ===\")\n", - "print(f\"Person count: {underlying_sim.persons.count}\")\n", - "print(f\"Household count: {underlying_sim.household.count}\")\n", - "print(f\"BenUnit count: {underlying_sim.benunit.count}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Check the country distribution in the UK simulation\n", - "country_values = sim_uk.calculate(\"country\")\n", - "print(\"\\n=== Country Distribution (Household Level) ===\")\n", - "print(country_values.value_counts())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Check person-level country distribution\n", - "country_person = underlying_sim.calculate(\"country\", map_to=\"person\")\n", - "unique, counts = np.unique(country_person, return_counts=True)\n", - "print(\"\\n=== Country Distribution (Person Level) ===\")\n", - "for u, c in zip(unique, counts):\n", - " print(f\" {u}: {c} persons\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Step 3: Test `to_input_dataframe()` Export\n", - "\n", - "Let's examine what columns are exported by `to_input_dataframe()` to see if entity linkage variables are included." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Export the simulation to a dataframe\n", - "print(\"Exporting simulation to DataFrame...\")\n", - "df = underlying_sim.to_input_dataframe()\n", - "\n", - "print(f\"\\n=== Exported DataFrame ===\")\n", - "print(f\"Shape: {df.shape}\")\n", - "print(f\"Number of columns: {len(df.columns)}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Check for entity ID and linkage columns\n", - "print(\"\\n=== Entity-Related Columns ===\")\n", - "\n", - "id_columns = [c for c in df.columns if '_id' in c.lower()]\n", - "print(f\"\\nColumns containing '_id': {len(id_columns)}\")\n", - "for col in sorted(id_columns):\n", - " print(f\" - {col}\")\n", - "\n", - "# Specifically check for critical columns\n", - "critical_cols = ['person_id', 'household_id', 'person_household_id', 'benunit_id', 'person_benunit_id']\n", - "print(f\"\\n=== Critical Entity Linkage Columns ===\")\n", - "for col_base in critical_cols:\n", - " matching = [c for c in df.columns if c.startswith(col_base)]\n", - " if matching:\n", - " print(f\" {col_base}: FOUND -> {matching}\")\n", - " else:\n", - " print(f\" {col_base}: MISSING!\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Check if person_household_id has known periods in the simulation\n", - "print(\"\\n=== Checking Known Periods for Entity Linkage Variables ===\")\n", - "\n", - "for var_name in ['person_id', 'household_id', 'person_household_id', 'person_benunit_id']:\n", - " try:\n", - " holder = underlying_sim.get_holder(var_name)\n", - " known_periods = holder.get_known_periods()\n", - " print(f\" {var_name}: known_periods = {list(known_periods)}\")\n", - " except Exception as e:\n", - " print(f\" {var_name}: ERROR - {e}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Step 4: Simulate Country Filtering (Wales)\n", - "\n", - "Now let's create a Wales-filtered simulation and see what happens." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Create a Wales simulation\n", - "print(\"Creating Wales simulation...\")\n", - "print(\"(This may trigger the error we're diagnosing)\")\n", - "print()\n", - "\n", - "try:\n", - " sim_wales = Simulation(country=\"uk\", scope=\"macro\", region=\"country/wales\")\n", - " wales_underlying = sim_wales.baseline_simulation\n", - " \n", - " print(f\"\\n=== Wales Simulation Structure ===\")\n", - " print(f\"Person count: {wales_underlying.persons.count}\")\n", - " print(f\"Household count: {wales_underlying.household.count}\")\n", - " print(f\"BenUnit count: {wales_underlying.benunit.count}\")\n", - " \n", - " # Check if counts make sense\n", - " if wales_underlying.household.count == wales_underlying.persons.count:\n", - " print(\"\\n*** WARNING: Household count equals person count! ***\")\n", - " print(\"This suggests entity linkage was lost during filtering.\")\n", - " \n", - "except Exception as e:\n", - " print(f\"\\n*** ERROR creating Wales simulation ***\")\n", - " print(f\"Error type: {type(e).__name__}\")\n", - " print(f\"Error message: {e}\")\n", - " import traceback\n", - " traceback.print_exc()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Step 5: Manual Reproduction of the Filtering Process\n", - "\n", - "Let's manually reproduce what `_apply_region_to_simulation` does to understand where it breaks." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Step-by-step reproduction of the filtering logic\n", - "print(\"=== Manual Reproduction of Country Filtering ===\")\n", - "\n", - "# Step 1: Export to DataFrame\n", - "print(\"\\n[Step 1] Exporting to DataFrame...\")\n", - "df = underlying_sim.to_input_dataframe()\n", - "print(f\" DataFrame shape: {df.shape}\")\n", - "print(f\" Columns with 'household': {[c for c in df.columns if 'household' in c.lower()][:10]}...\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Step 2: Calculate country at person level\n", - "print(\"\\n[Step 2] Calculating country at person level...\")\n", - "country_person_level = underlying_sim.calculate(\"country\", map_to=\"person\").values\n", - "print(f\" Country array shape: {country_person_level.shape}\")\n", - "print(f\" Unique values: {np.unique(country_person_level)}\")\n", - "\n", - "# Count Welsh persons\n", - "wales_mask = country_person_level == \"WALES\"\n", - "print(f\" Welsh persons: {wales_mask.sum()}\")\n", - "print(f\" Non-Welsh persons: {(~wales_mask).sum()}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Step 3: Filter DataFrame to Wales\n", - "print(\"\\n[Step 3] Filtering DataFrame to Wales...\")\n", - "df_wales = df[wales_mask]\n", - "print(f\" Filtered DataFrame shape: {df_wales.shape}\")\n", - "\n", - "# Check what person_household_id looks like in filtered data\n", - "phh_cols = [c for c in df_wales.columns if 'person_household_id' in c]\n", - "if phh_cols:\n", - " print(f\" person_household_id columns: {phh_cols}\")\n", - " for col in phh_cols:\n", - " vals = df_wales[col].values\n", - " print(f\" {col}: {len(np.unique(vals))} unique values\")\n", - "else:\n", - " print(\" person_household_id: NOT IN DATAFRAME!\")\n", - " print(\" This is likely the root cause of the issue.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Step 4: Try to create a new simulation from filtered DataFrame\n", - "print(\"\\n[Step 4] Creating new simulation from filtered DataFrame...\")\n", - "\n", - "from policyengine_uk import Microsimulation\n", - "\n", - "try:\n", - " new_sim = Microsimulation(dataset=df_wales)\n", - " \n", - " print(f\" New simulation created!\")\n", - " print(f\" Person count: {new_sim.persons.count}\")\n", - " print(f\" Household count: {new_sim.household.count}\")\n", - " \n", - " # Critical check\n", - " if new_sim.household.count == new_sim.persons.count:\n", - " print(\"\\n *** CONFIRMED: Household count equals person count! ***\")\n", - " print(\" The entity linkage was lost because person_household_id is missing.\")\n", - " elif new_sim.household.count == len(np.unique(df_wales.iloc[:, 0])):\n", - " print(\"\\n *** Household count matches first column's unique values ***\")\n", - " print(\" This confirms the fallback behavior in build_from_dataset()\")\n", - " \n", - "except Exception as e:\n", - " print(f\" Error creating simulation: {e}\")\n", - " import traceback\n", - " traceback.print_exc()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Step 5: Try to calculate would_evade_tv_licence_fee (this should trigger the error)\n", - "print(\"\\n[Step 5] Attempting to calculate would_evade_tv_licence_fee...\")\n", - "\n", - "try:\n", - " # This calculation uses random(household), which will fail if household count is wrong\n", - " result = new_sim.calculate(\"would_evade_tv_licence_fee\")\n", - " print(f\" Calculation succeeded!\")\n", - " print(f\" Result shape: {result.shape}\")\n", - " print(f\" Result dtype: {result.dtype}\")\n", - "except ValueError as e:\n", - " print(f\" *** ValueError (expected): ***\")\n", - " print(f\" {e}\")\n", - " \n", - " # Parse the error to understand the mismatch\n", - " error_str = str(e)\n", - " if \"length is\" in error_str and \"while there are\" in error_str:\n", - " print(f\"\\n This confirms the array size mismatch issue.\")\n", - "except Exception as e:\n", - " print(f\" Unexpected error: {type(e).__name__}: {e}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Step 6: Deeper Investigation - What Does household_id Return?\n", - "\n", - "Let's check what `household_id` returns in the broken simulation." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Check household_id in the new (potentially broken) simulation\n", - "print(\"=== Investigating household_id in Filtered Simulation ===\")\n", - "\n", - "try:\n", - " # This is what random() calls internally\n", - " hh_ids = new_sim.calculate(\"household_id\", 2025)\n", - " print(f\"household_id result length: {len(hh_ids)}\")\n", - " print(f\"household_id unique count: {len(np.unique(hh_ids))}\")\n", - " print(f\"Expected household count: {new_sim.household.count}\")\n", - " \n", - " if len(hh_ids) != new_sim.household.count:\n", - " print(f\"\\n*** MISMATCH: household_id has {len(hh_ids)} values but simulation has {new_sim.household.count} households ***\")\n", - "except Exception as e:\n", - " print(f\"Error: {e}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Check the holder for household_id\n", - "print(\"\\n=== Checking household_id Holder ===\")\n", - "try:\n", - " holder = new_sim.get_holder(\"household_id\")\n", - " known_periods = holder.get_known_periods()\n", - " print(f\"Known periods: {list(known_periods)}\")\n", - " \n", - " for period in known_periods:\n", - " arr = holder.get_array(period)\n", - " print(f\" Period {period}: array shape = {arr.shape if arr is not None else 'None'}\")\n", - "except Exception as e:\n", - " print(f\"Error: {e}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Step 7: Compare with Working Approaches (Constituency/LA)\n", - "\n", - "Constituency and LA filtering use weight adjustment instead of DataFrame subsetting. Let's verify this works." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Test constituency filtering (should work)\n", - "print(\"=== Testing Constituency Filtering (Should Work) ===\")\n", - "\n", - "try:\n", - " sim_constituency = Simulation(country=\"uk\", scope=\"macro\", region=\"constituency/Cardiff South and Penarth\")\n", - " const_underlying = sim_constituency.baseline_simulation\n", - " \n", - " print(f\"Constituency simulation created successfully!\")\n", - " print(f\" Person count: {const_underlying.persons.count}\")\n", - " print(f\" Household count: {const_underlying.household.count}\")\n", - " \n", - " # Try the problematic calculation\n", - " result = sim_constituency.calculate(\"would_evade_tv_licence_fee\")\n", - " print(f\" would_evade_tv_licence_fee calculated successfully!\")\n", - " print(f\" Result length: {len(result)}\")\n", - " \n", - "except Exception as e:\n", - " print(f\"Error: {type(e).__name__}: {e}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Test local authority filtering (should work)\n", - "print(\"\\n=== Testing Local Authority Filtering (Should Work) ===\")\n", - "\n", - "try:\n", - " sim_la = Simulation(country=\"uk\", scope=\"macro\", region=\"local_authority/Cardiff\")\n", - " la_underlying = sim_la.baseline_simulation\n", - " \n", - " print(f\"LA simulation created successfully!\")\n", - " print(f\" Person count: {la_underlying.persons.count}\")\n", - " print(f\" Household count: {la_underlying.household.count}\")\n", - " \n", - " # Try the problematic calculation\n", - " result = sim_la.calculate(\"would_evade_tv_licence_fee\")\n", - " print(f\" would_evade_tv_licence_fee calculated successfully!\")\n", - " print(f\" Result length: {len(result)}\")\n", - " \n", - "except Exception as e:\n", - " print(f\"Error: {type(e).__name__}: {e}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Summary and Conclusions" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(\"=\"*70)\n", - "print(\"DIAGNOSIS SUMMARY\")\n", - "print(\"=\"*70)\n", - "\n", - "print(\"\"\"\n", - "Based on the tests above:\n", - "\n", - "1. COUNTRY FILTERING (country/wales):\n", - " - Uses to_input_dataframe() + DataFrame subsetting + new Microsimulation()\n", - " - FAILS because person_household_id is not exported\n", - " - Results in household count = person count (entity linkage lost)\n", - "\n", - "2. CONSTITUENCY FILTERING (constituency/...):\n", - " - Uses weight adjustment on existing simulation\n", - " - WORKS because entity structure is preserved\n", - "\n", - "3. LOCAL AUTHORITY FILTERING (local_authority/...):\n", - " - Uses weight adjustment on existing simulation \n", - " - WORKS because entity structure is preserved\n", - "\n", - "ROOT CAUSE:\n", - "- to_input_dataframe() only exports variables with known periods\n", - "- person_household_id doesn't have known periods (it's derived from dataset structure)\n", - "- When building from filtered DataFrame, the fallback creates 1 household per person\n", - "\n", - "RECOMMENDED FIX:\n", - "- Option A: Fix to_input_dataframe() to always export entity linkage variables\n", - "- Option B: Use weight-zeroing for country filtering (like constituency/LA)\n", - "\"\"\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.11.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/scripts/prove_build_from_dataframe_bug.ipynb b/scripts/prove_build_from_dataframe_bug.ipynb deleted file mode 100644 index a65202fc9..000000000 --- a/scripts/prove_build_from_dataframe_bug.ipynb +++ /dev/null @@ -1,841 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "cell-0", - "metadata": {}, - "source": [ - "# Proving the Bug in policyengine_uk's build_from_dataframe Method\n", - "\n", - "This notebook proves that the UK country filtering bug is caused by `policyengine_uk`'s \n", - "`build_from_dataframe` method not handling entity-level aggregation.\n", - "\n", - "## The Bug Location\n", - "**File:** `policyengine_uk/simulation.py` \n", - "**Method:** `build_from_dataframe()` \n", - "**Lines:** 281-286\n", - "\n", - "```python\n", - "# Set input values for each variable and time period\n", - "for column in df:\n", - " variable, time_period = column.split(\"__\")\n", - " if variable not in self.tax_benefit_system.variables:\n", - " continue\n", - " self.set_input(variable, time_period, df[column]) # <-- BUG: No entity-level check!\n", - "```\n", - "\n", - "## The Problem\n", - "1. `to_input_dataframe()` exports ALL variables at **person level** (one row per person)\n", - "2. `build_from_dataframe()` correctly builds entity structure with proper counts\n", - "3. BUT it then tries to `set_input()` with person-level arrays for household-level variables\n", - "4. This causes a length mismatch error" - ] - }, - { - "cell_type": "markdown", - "id": "cell-1", - "metadata": {}, - "source": [ - "## Step 1: Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "cell-2", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "policyengine_uk version: unknown\n", - "policyengine_uk location: /opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_uk/__init__.py\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "import traceback\n", - "import inspect\n", - "\n", - "from policyengine import Simulation\n", - "from policyengine_uk import Simulation as UKSimulation\n", - "\n", - "# Show where policyengine_uk is loaded from\n", - "import policyengine_uk\n", - "version = getattr(policyengine_uk, '__version__', 'unknown')\n", - "print(f\"policyengine_uk version: {version}\")\n", - "print(f\"policyengine_uk location: {policyengine_uk.__file__}\")" - ] - }, - { - "cell_type": "markdown", - "id": "cell-3", - "metadata": {}, - "source": [ - "## Step 2: Examine the Buggy Code\n", - "\n", - "Let's look at the actual `build_from_dataframe` method to confirm the bug." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "cell-4", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "=== build_from_dataframe source code ===\n", - " def build_from_dataframe(self, df: pd.DataFrame) -> None:\n", - " \"\"\"Build simulation from a pandas DataFrame.\n", - "\n", - " Args:\n", - " df: DataFrame with columns in format \"variable_name__time_period\"\n", - " \"\"\"\n", - "\n", - " def get_first_array(variable_name: str) -> pd.Series:\n", - " \"\"\"Extract the first array for a given variable name pattern.\"\"\"\n", - " columns = df.columns[df.columns.str.contains(variable_name + \"__\")]\n", - " return df[columns[0]]\n", - "\n", - " # Extract ID columns\n", - " (\n", - " person_id,\n", - " person_benunit_id,\n", - " person_household_id,\n", - " benunit_id,\n", - " household_id,\n", - " ) = map(\n", - " get_first_array,\n", - " [\n", - " \"person_id\",\n", - " \"person_benunit_id\",\n", - " \"person_household_id\",\n", - " \"benunit_id\",\n", - " \"household_id\",\n", - " ],\n", - " )\n", - "\n", - " # Build entity structure\n", - " self.build_from_ids(\n", - " person_id,\n", - " person_benunit_id,\n", - " person_household_id,\n", - " benunit_id,\n", - " household_id,\n", - " )\n", - "\n", - " # Set input values for each variable and time period\n", - " for column in df:\n", - " variable, time_period = column.split(\"__\")\n", - " if variable not in self.tax_benefit_system.variables:\n", - " continue\n", - " self.set_input(variable, time_period, df[column])\n", - "\n" - ] - } - ], - "source": [ - "# Show the source code of build_from_dataframe\n", - "print(\"=== build_from_dataframe source code ===\")\n", - "print(inspect.getsource(UKSimulation.build_from_dataframe))" - ] - }, - { - "cell_type": "markdown", - "id": "cell-5", - "metadata": {}, - "source": [ - "## Step 3: Create Test Data\n", - "\n", - "Create a UK simulation and export to DataFrame, then filter to Wales." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "cell-6", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Creating UK-wide simulation...\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "No data provided, using default dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n", - "Using dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n", - "Downloading enhanced_frs_2023_24.h5 from bucket policyengine-uk-data-private\n", - "WARNING:root:No version specified for policyengine-uk-data-private, enhanced_frs_2023_24.h5. Using latest version: 1.29.4\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "UK-wide entity counts:\n", - " Persons: 115,612\n", - " Households: 53,508\n" - ] - } - ], - "source": [ - "# Create UK-wide simulation\n", - "print(\"Creating UK-wide simulation...\")\n", - "sim_uk = Simulation(country=\"uk\", scope=\"macro\")\n", - "underlying_sim = sim_uk.baseline_simulation\n", - "\n", - "print(f\"\\nUK-wide entity counts:\")\n", - "print(f\" Persons: {underlying_sim.persons.count:,}\")\n", - "print(f\" Households: {underlying_sim.household.count:,}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "cell-7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Exporting to DataFrame...\n", - "\n", - "Filtered DataFrame:\n", - " Rows (Welsh persons): 8,470\n", - " Columns: 1,127\n" - ] - } - ], - "source": [ - "# Export to DataFrame and filter to Wales\n", - "print(\"Exporting to DataFrame...\")\n", - "df = underlying_sim.to_input_dataframe()\n", - "\n", - "# Filter to Wales\n", - "country_person = underlying_sim.calculate(\"country\", map_to=\"person\").values\n", - "wales_mask = country_person == \"WALES\"\n", - "df_wales = df[wales_mask]\n", - "\n", - "print(f\"\\nFiltered DataFrame:\")\n", - "print(f\" Rows (Welsh persons): {len(df_wales):,}\")\n", - "print(f\" Columns: {len(df_wales.columns):,}\")" - ] - }, - { - "cell_type": "markdown", - "id": "cell-8", - "metadata": {}, - "source": [ - "## Step 4: Prove the DataFrame Has Person-Level Data for Household Variables\n", - "\n", - "This is the key insight: `to_input_dataframe()` exports EVERYTHING at person level." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "cell-9", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "=== Household-Level Variables in DataFrame ===\n", - "Found 392 household-level variable columns in DataFrame\n", - "\n", - "First 10 household variables:\n", - " - corporate_wealth__2023\n", - " - corporate_wealth__2024\n", - " - corporate_wealth__2025\n", - " - corporate_wealth__2026\n", - " - corporate_wealth__2027\n", - " - corporate_wealth__2028\n", - " - corporate_wealth__2029\n", - " - corporate_wealth__2030\n", - " - non_residential_property_value__2023\n", - " - non_residential_property_value__2024\n" - ] - } - ], - "source": [ - "# Find household-level variables in the DataFrame\n", - "print(\"=== Household-Level Variables in DataFrame ===\")\n", - "\n", - "tax_benefit_system = underlying_sim.tax_benefit_system\n", - "household_vars_in_df = []\n", - "\n", - "for col in df_wales.columns:\n", - " var_name = col.split(\"__\")[0]\n", - " if var_name in tax_benefit_system.variables:\n", - " var_meta = tax_benefit_system.get_variable(var_name)\n", - " if var_meta.entity.key == \"household\":\n", - " household_vars_in_df.append((col, var_name))\n", - "\n", - "print(f\"Found {len(household_vars_in_df)} household-level variable columns in DataFrame\")\n", - "print(f\"\\nFirst 10 household variables:\")\n", - "for col, var_name in household_vars_in_df[:10]:\n", - " print(f\" - {col}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "cell-10", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "=== THE CRITICAL MISMATCH ===\n", - "\n", - "DataFrame rows (person-level): 8,470\n", - "Expected Welsh households: 4,108\n", - "\n", - "Example: 'corporate_wealth__2025'\n", - " Data length in DataFrame: 8,470\n", - " Should be (household count): 4,108\n", - "\n", - " MISMATCH: 8,470 != 4,108\n", - "\n", - "This is why set_input() fails!\n" - ] - } - ], - "source": [ - "# Show the mismatch: DataFrame rows vs expected household count\n", - "print(\"=== THE CRITICAL MISMATCH ===\")\n", - "print()\n", - "\n", - "# Get expected Welsh household count from person_household_id\n", - "phh_col = [c for c in df_wales.columns if c.startswith('person_household_id__')][0]\n", - "welsh_household_count = df_wales[phh_col].nunique()\n", - "\n", - "print(f\"DataFrame rows (person-level): {len(df_wales):,}\")\n", - "print(f\"Expected Welsh households: {welsh_household_count:,}\")\n", - "print()\n", - "\n", - "# Show a specific household variable\n", - "example_var = \"corporate_wealth__2025\" if \"corporate_wealth__2025\" in df_wales.columns else household_vars_in_df[0][0]\n", - "print(f\"Example: '{example_var}'\")\n", - "print(f\" Data length in DataFrame: {len(df_wales[example_var]):,}\")\n", - "print(f\" Should be (household count): {welsh_household_count:,}\")\n", - "print()\n", - "print(f\" MISMATCH: {len(df_wales[example_var]):,} != {welsh_household_count:,}\")\n", - "print()\n", - "print(\"This is why set_input() fails!\")" - ] - }, - { - "cell_type": "markdown", - "id": "cell-11", - "metadata": {}, - "source": [ - "## Step 5: Trace Through build_from_dataframe Step-by-Step\n", - "\n", - "Let's manually execute what `build_from_dataframe` does to see exactly where it fails." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "cell-12", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "=== Step 5a: Extract ID columns ===\n", - "person_id length: 8470\n", - "person_household_id length: 8470\n", - "person_household_id unique values: 4108\n", - "household_id length: 8470\n", - "household_id unique values: 4108\n" - ] - } - ], - "source": [ - "# Step 5a: Extract ID columns (lines 249-270 of build_from_dataframe)\n", - "print(\"=== Step 5a: Extract ID columns ===\")\n", - "\n", - "def get_first_array(df, variable_name):\n", - " columns = df.columns[df.columns.str.contains(variable_name + \"__\")]\n", - " return df[columns[0]]\n", - "\n", - "person_id = get_first_array(df_wales, \"person_id\")\n", - "person_benunit_id = get_first_array(df_wales, \"person_benunit_id\")\n", - "person_household_id = get_first_array(df_wales, \"person_household_id\")\n", - "benunit_id = get_first_array(df_wales, \"benunit_id\")\n", - "household_id = get_first_array(df_wales, \"household_id\")\n", - "\n", - "print(f\"person_id length: {len(person_id)}\")\n", - "print(f\"person_household_id length: {len(person_household_id)}\")\n", - "print(f\"person_household_id unique values: {person_household_id.nunique()}\")\n", - "print(f\"household_id length: {len(household_id)}\")\n", - "print(f\"household_id unique values: {household_id.nunique()}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "cell-13", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "=== Step 5b: Build entity structure (build_from_ids) ===\n", - "Person entity count: 8470\n", - "Benunit entity count: 4664\n", - "Household entity count: 4108\n", - "\n", - "Entity structure is CORRECT! 4108 households were created.\n" - ] - } - ], - "source": [ - "# Step 5b: Build entity structure (lines 273-279 - build_from_ids)\n", - "print(\"\\n=== Step 5b: Build entity structure (build_from_ids) ===\")\n", - "\n", - "from policyengine_core.simulations.simulation_builder import SimulationBuilder\n", - "from policyengine_uk.tax_benefit_system import CountryTaxBenefitSystem\n", - "\n", - "# Create a fresh simulation to test\n", - "test_tbs = CountryTaxBenefitSystem()\n", - "builder = SimulationBuilder()\n", - "builder.populations = test_tbs.instantiate_entities()\n", - "\n", - "# Declare entities - this is what build_from_ids does\n", - "builder.declare_person_entity(\"person\", person_id.values)\n", - "builder.declare_entity(\"benunit\", np.unique(benunit_id.values))\n", - "builder.declare_entity(\"household\", np.unique(household_id.values))\n", - "\n", - "print(f\"Person entity count: {len(builder.populations['person'].ids)}\")\n", - "print(f\"Benunit entity count: {len(builder.populations['benunit'].ids)}\")\n", - "print(f\"Household entity count: {len(builder.populations['household'].ids)}\")\n", - "print()\n", - "print(\"Entity structure is CORRECT! 4108 households were created.\")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "cell-14", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "=== Step 5c: Complete entity setup ===\n", - "Test simulation created:\n", - " Persons: 8470\n", - " Households: 4108\n", - "\n", - "Entity counts are CORRECT at this point!\n" - ] - } - ], - "source": [ - "# Step 5c: Complete entity setup with joins\n", - "print(\"\\n=== Step 5c: Complete entity setup ===\")\n", - "\n", - "builder.join_with_persons(\n", - " builder.populations[\"benunit\"],\n", - " person_benunit_id.values,\n", - " np.array([\"member\"] * len(person_benunit_id)),\n", - ")\n", - "builder.join_with_persons(\n", - " builder.populations[\"household\"],\n", - " person_household_id.values,\n", - " np.array([\"member\"] * len(person_household_id)),\n", - ")\n", - "\n", - "# Create simulation with these populations\n", - "from policyengine_core.simulations import Simulation as CoreSimulation\n", - "from policyengine_core.tracers import SimpleTracer\n", - "\n", - "class TestSimulation(CoreSimulation):\n", - " default_input_period = 2025\n", - " default_calculation_period = 2025\n", - "\n", - "test_sim = TestSimulation.__new__(TestSimulation)\n", - "test_sim.tax_benefit_system = test_tbs\n", - "test_sim.branch_name = \"default\"\n", - "test_sim.invalidated_caches = set()\n", - "test_sim.branches = {}\n", - "\n", - "# Initialize required attributes that build_from_populations expects\n", - "test_sim.debug = False\n", - "test_sim.trace = False\n", - "test_sim.tracer = SimpleTracer()\n", - "test_sim.opt_out_cache = False\n", - "test_sim.max_spiral_loops = 10\n", - "test_sim.memory_config = None\n", - "test_sim._data_storage_dir = None\n", - "\n", - "test_sim.build_from_populations(builder.populations)\n", - "\n", - "print(f\"Test simulation created:\")\n", - "print(f\" Persons: {test_sim.persons.count}\")\n", - "print(f\" Households: {test_sim.household.count}\")\n", - "print()\n", - "print(\"Entity counts are CORRECT at this point!\")" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "cell-15", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "=== Step 5d: THE BUG - set_input without aggregation ===\n", - "\n", - "Attempting to set 'corporate_wealth' for period 2025\n", - " Variable entity: household\n", - " Data length: 8470\n", - " Household count: 4108\n", - "\n", - "ERROR (expected): Unable to set value \"[ 42531.723 42531.723 42531.723 ... 145237.94 145237.94\n", - " 6483.3296]\" for variable \"corporate_wealth\", as its length is 8470 while there are 4108 households in the simulation.\n", - "\n", - "============================================================\n", - "BUG PROVEN!\n", - "============================================================\n", - "\n", - "The build_from_dataframe method calls set_input() with\n", - "person-level data (8470 values) for a household-level\n", - "variable, but there are only 4108 households.\n" - ] - } - ], - "source": [ - "# Step 5d: THE BUG - Try to set_input for a household variable with person-level data\n", - "print(\"\\n=== Step 5d: THE BUG - set_input without aggregation ===\")\n", - "print()\n", - "\n", - "# This is what build_from_dataframe does at lines 281-286:\n", - "# for column in df:\n", - "# variable, time_period = column.split(\"__\")\n", - "# if variable not in self.tax_benefit_system.variables:\n", - "# continue\n", - "# self.set_input(variable, time_period, df[column]) # <-- BUG!\n", - "\n", - "# Let's simulate this for a household variable\n", - "test_column = example_var\n", - "variable_name, time_period = test_column.split(\"__\")\n", - "\n", - "print(f\"Attempting to set '{variable_name}' for period {time_period}\")\n", - "print(f\" Variable entity: {test_tbs.get_variable(variable_name).entity.key}\")\n", - "print(f\" Data length: {len(df_wales[test_column])}\")\n", - "print(f\" Household count: {test_sim.household.count}\")\n", - "print()\n", - "\n", - "try:\n", - " test_sim.set_input(variable_name, time_period, df_wales[test_column].values)\n", - " print(\"SUCCESS - No error (unexpected!)\")\n", - "except ValueError as e:\n", - " print(f\"ERROR (expected): {e}\")\n", - " print()\n", - " print(\"=\"*60)\n", - " print(\"BUG PROVEN!\")\n", - " print(\"=\"*60)\n", - " print()\n", - " print(\"The build_from_dataframe method calls set_input() with\")\n", - " print(\"person-level data (8470 values) for a household-level\")\n", - " print(f\"variable, but there are only {test_sim.household.count} households.\")" - ] - }, - { - "cell_type": "markdown", - "id": "cell-16", - "metadata": {}, - "source": [ - "## Step 6: Show What the Fix Should Look Like\n", - "\n", - "The fix needs to check if aggregation is required before calling `set_input()`." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "cell-17", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "=== The Fix: Aggregate Before set_input ===\n", - "\n", - "Variable: corporate_wealth\n", - "Entity: household\n", - "Data length: 8470\n", - "Population count: 4108\n", - "\n", - "Aggregation needed: 8470 != 4108\n", - "\n", - "After aggregation: 4108 values\n", - "\n", - "SUCCESS! set_input worked with aggregated data.\n" - ] - } - ], - "source": [ - "# Demonstrate the correct approach: aggregate before set_input\n", - "print(\"=== The Fix: Aggregate Before set_input ===\")\n", - "print()\n", - "\n", - "variable_name, time_period = example_var.split(\"__\")\n", - "var_meta = test_tbs.get_variable(variable_name)\n", - "entity = var_meta.entity\n", - "population = test_sim.get_population(entity.plural)\n", - "\n", - "data = df_wales[example_var].values\n", - "\n", - "print(f\"Variable: {variable_name}\")\n", - "print(f\"Entity: {entity.key}\")\n", - "print(f\"Data length: {len(data)}\")\n", - "print(f\"Population count: {population.count}\")\n", - "print()\n", - "\n", - "# Check if aggregation is needed\n", - "if len(data) != population.count:\n", - " print(f\"Aggregation needed: {len(data)} != {population.count}\")\n", - " print()\n", - " \n", - " # Use value_from_first_person to aggregate\n", - " aggregated_data = population.value_from_first_person(data)\n", - " print(f\"After aggregation: {len(aggregated_data)} values\")\n", - " print()\n", - " \n", - " # Now set_input should work\n", - " try:\n", - " test_sim.set_input(variable_name, time_period, aggregated_data)\n", - " print(f\"SUCCESS! set_input worked with aggregated data.\")\n", - " except Exception as e:\n", - " print(f\"Still failed: {e}\")\n", - "else:\n", - " print(\"No aggregation needed\")" - ] - }, - { - "cell_type": "markdown", - "id": "cell-18", - "metadata": {}, - "source": [ - "## Step 7: Show the Required Code Fix\n", - "\n", - "Here's what the fixed `build_from_dataframe` method should look like." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "cell-19", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "=== Required Fix for build_from_dataframe ===\n", - "\n", - "CURRENT CODE (buggy):\n", - "```python\n", - "# Set input values for each variable and time period\n", - "for column in df:\n", - " variable, time_period = column.split(\"__\")\n", - " if variable not in self.tax_benefit_system.variables:\n", - " continue\n", - " self.set_input(variable, time_period, df[column])\n", - "```\n", - "\n", - "FIXED CODE:\n", - "```python\n", - "# Set input values for each variable and time period\n", - "for column in df:\n", - " variable, time_period = column.split(\"__\")\n", - " if variable not in self.tax_benefit_system.variables:\n", - " continue\n", - " \n", - " # Get variable metadata and target population\n", - " var_meta = self.tax_benefit_system.get_variable(variable)\n", - " entity = var_meta.entity\n", - " population = self.get_population(entity.plural)\n", - " \n", - " data = df[column].values\n", - " \n", - " # Check if aggregation is needed (data is person-level but variable is group-level)\n", - " if len(data) != population.count:\n", - " # Aggregate from person-level to entity-level\n", - " data = population.value_from_first_person(data)\n", - " \n", - " self.set_input(variable, time_period, data)\n", - "```\n", - "\n" - ] - } - ], - "source": [ - "print(\"=== Required Fix for build_from_dataframe ===\")\n", - "print()\n", - "print(\"\"\"CURRENT CODE (buggy):\n", - "```python\n", - "# Set input values for each variable and time period\n", - "for column in df:\n", - " variable, time_period = column.split(\"__\")\n", - " if variable not in self.tax_benefit_system.variables:\n", - " continue\n", - " self.set_input(variable, time_period, df[column])\n", - "```\n", - "\n", - "FIXED CODE:\n", - "```python\n", - "# Set input values for each variable and time period\n", - "for column in df:\n", - " variable, time_period = column.split(\"__\")\n", - " if variable not in self.tax_benefit_system.variables:\n", - " continue\n", - " \n", - " # Get variable metadata and target population\n", - " var_meta = self.tax_benefit_system.get_variable(variable)\n", - " entity = var_meta.entity\n", - " population = self.get_population(entity.plural)\n", - " \n", - " data = df[column].values\n", - " \n", - " # Check if aggregation is needed (data is person-level but variable is group-level)\n", - " if len(data) != population.count:\n", - " # Aggregate from person-level to entity-level\n", - " data = population.value_from_first_person(data)\n", - " \n", - " self.set_input(variable, time_period, data)\n", - "```\n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "id": "cell-20", - "metadata": {}, - "source": [ - "## Summary" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "cell-21", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "======================================================================\n", - "SUMMARY: BUG PROVEN\n", - "======================================================================\n", - "\n", - "LOCATION:\n", - " File: policyengine_uk/simulation.py\n", - " Method: build_from_dataframe()\n", - " Lines: 281-286\n", - "\n", - "ROOT CAUSE:\n", - " The method iterates through DataFrame columns and calls set_input()\n", - " without checking if the data length matches the target entity count.\n", - " \n", - " - to_input_dataframe() exports ALL variables at PERSON level\n", - " - build_from_ids() correctly creates entity structure (e.g., 4108 households)\n", - " - BUT the loop then tries to set 8470 person-level values for \n", - " household-level variables that only have 4108 entities\n", - "\n", - "THE FIX:\n", - " Before calling set_input(), check if len(data) != population.count.\n", - " If so, aggregate using population.value_from_first_person(data).\n", - "\n", - "NOTE:\n", - " This is the same aggregation logic that policyengine_core's\n", - " build_from_dataset() method uses (simulation.py lines 406-414).\n", - " The policyengine_uk version simply forgot to include it.\n", - "\n" - ] - } - ], - "source": [ - "print(\"=\"*70)\n", - "print(\"SUMMARY: BUG PROVEN\")\n", - "print(\"=\"*70)\n", - "print(\"\"\"\n", - "LOCATION:\n", - " File: policyengine_uk/simulation.py\n", - " Method: build_from_dataframe()\n", - " Lines: 281-286\n", - "\n", - "ROOT CAUSE:\n", - " The method iterates through DataFrame columns and calls set_input()\n", - " without checking if the data length matches the target entity count.\n", - " \n", - " - to_input_dataframe() exports ALL variables at PERSON level\n", - " - build_from_ids() correctly creates entity structure (e.g., 4108 households)\n", - " - BUT the loop then tries to set 8470 person-level values for \n", - " household-level variables that only have 4108 entities\n", - "\n", - "THE FIX:\n", - " Before calling set_input(), check if len(data) != population.count.\n", - " If so, aggregate using population.value_from_first_person(data).\n", - "\n", - "NOTE:\n", - " This is the same aggregation logic that policyengine_core's\n", - " build_from_dataset() method uses (simulation.py lines 406-414).\n", - " The policyengine_uk version simply forgot to include it.\n", - "\"\"\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "py-3.13", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.13.5" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/scripts/test_local_authority_api.py b/scripts/test_local_authority_api.py deleted file mode 100755 index 81eeb8575..000000000 --- a/scripts/test_local_authority_api.py +++ /dev/null @@ -1,570 +0,0 @@ -#!/usr/bin/env python3 -""" -Test script for UK Local Authority API functionality. - -This script tests the economy-wide simulation API for: -1. A specific UK local authority (e.g., Leicester) -2. UK-wide calculation (to confirm local_authority_impact is returned) -3. Scotland country filter (to confirm authorities are filtered by country) - -SETUP INSTRUCTIONS: -=================== - -You need THREE terminal windows: - -Terminal 1 - Start Redis: - redis-server - -Terminal 2 - Start the API worker (handles economy calculations): - FLASK_DEBUG=1 python policyengine_api/worker.py - -Terminal 3 - Start the API server: - make debug - -Then run this script in a 4th terminal: - python scripts/test_local_authority_api.py - -NOTE: UK calculations require access to the policyengine-uk-data-private -HuggingFace repo. Make sure HUGGING_FACE_TOKEN is set in your environment. -""" - -import requests -import json -import time -import sqlite3 -from pathlib import Path - -# Configuration -API_BASE_URL = "http://127.0.0.1:5000" -COUNTRY_ID = "uk" -BASELINE_POLICY_ID = 1 # UK current law -TIME_PERIOD = 2025 -DATASET = "default" - -# Raise the UK income tax base rate by 6 percentage points (20% -> 26%) -SAMPLE_REFORM = { - "gov.hmrc.income_tax.rates.uk[0].rate": {"2025-01-01.2100-12-31": 0.26} -} - - -def print_header(title: str): - """Print a formatted header.""" - print("\n" + "=" * 70) - print(f" {title}") - print("=" * 70) - - -def print_step(step_num: int, description: str): - """Print a step description.""" - print(f"\n[Step {step_num}] {description}") - print("-" * 50) - - -def wait_for_confirmation(message: str = "Press Enter to continue..."): - """Wait for user confirmation before proceeding.""" - input(f"\n>>> {message}") - - -def check_api_health(): - """Check if the API is running and healthy.""" - print_step(0, "Checking API Health") - - try: - response = requests.get(f"{API_BASE_URL}/liveness-check", timeout=5) - if response.status_code == 200: - print(f" [OK] API is running at {API_BASE_URL}") - return True - else: - print(f" [ERROR] API returned status {response.status_code}") - return False - except requests.exceptions.ConnectionError: - print(f" [ERROR] Cannot connect to API at {API_BASE_URL}") - print(" Make sure the API server is running. You need 3 terminals:") - print("") - print(" Terminal 1 - Start Redis:") - print(" redis-server") - print("") - print(" Terminal 2 - Start the API worker:") - print(" FLASK_DEBUG=1 python policyengine_api/worker.py") - print("") - print(" Terminal 3 - Start the API server:") - print(" make debug") - return False - - -def create_reform_policy(): - """Create a reform policy and return its ID.""" - print_step(1, "Creating Reform Policy") - - print(f" Reform to be created:") - print(f" {json.dumps(SAMPLE_REFORM, indent=4)}") - - wait_for_confirmation("Press Enter to create the reform policy...") - - payload = { - "label": "Test LA Reform - UC Standard Allowance Increase", - "data": SAMPLE_REFORM, - } - - response = requests.post( - f"{API_BASE_URL}/{COUNTRY_ID}/policy", - json=payload, - headers={"Content-Type": "application/json"}, - ) - - print(f" Response status: {response.status_code}") - result = response.json() - print(f" Response body: {json.dumps(result, indent=4)}") - - if response.status_code in [200, 201]: - policy_id = result["result"]["policy_id"] - print(f" [OK] Reform policy created/found with ID: {policy_id}") - return policy_id - else: - print(f" [ERROR] Failed to create policy") - return None - - -def verify_baseline_policy_exists(): - """Verify the baseline (current law) policy exists.""" - print_step(2, "Verifying Baseline Policy Exists") - - print(f" Checking policy ID: {BASELINE_POLICY_ID}") - - response = requests.get( - f"{API_BASE_URL}/{COUNTRY_ID}/policy/{BASELINE_POLICY_ID}" - ) - - print(f" Response status: {response.status_code}") - - if response.status_code == 200: - result = response.json() - policy_data = result.get("result", {}) - print(f" Policy label: {policy_data.get('label', 'N/A')}") - print(f" [OK] Baseline policy exists") - return True - else: - print(f" [ERROR] Baseline policy not found") - print( - " You may need to initialize the database with the current law policy" - ) - return False - - -def poll_economy_endpoint( - region: str, reform_policy_id: int, description: str -): - """ - Poll the economy endpoint until the calculation is complete. - - Returns the result data or None if failed. - """ - print(f"\n Polling for: {description}") - print(f" Region: {region}") - print(f" Reform Policy ID: {reform_policy_id}") - print(f" Baseline Policy ID: {BASELINE_POLICY_ID}") - print(f" Time Period: {TIME_PERIOD}") - - url = f"{API_BASE_URL}/{COUNTRY_ID}/economy/{reform_policy_id}/over/{BASELINE_POLICY_ID}" - params = { - "region": region, - "dataset": DATASET, - "time_period": TIME_PERIOD, - "target": "general", - } - - print(f"\n Full URL: {url}") - print(f" Query params: {params}") - - wait_for_confirmation("Press Enter to start polling the API...") - - max_attempts = 60 # 5 minutes with 5-second intervals - attempt = 0 - - while attempt < max_attempts: - attempt += 1 - print(f"\n Attempt {attempt}/{max_attempts}...") - - try: - response = requests.get(url, params=params, timeout=30) - result = response.json() - - status = result.get("status") - print(f" Status: {status}") - - if status == "ok": - print(f" [OK] Calculation complete!") - return result.get("result") - elif status == "computing": - print(f" Calculation in progress... waiting 5 seconds") - time.sleep(5) - elif status == "error": - print(f" [ERROR] Calculation failed") - print(f" Message: {result.get('message')}") - return None - else: - print(f" Unknown status: {status}") - time.sleep(5) - - except requests.exceptions.Timeout: - print(f" Request timed out, retrying...") - time.sleep(5) - except Exception as e: - print(f" Error: {e}") - time.sleep(5) - - print(f" [ERROR] Timed out waiting for calculation") - return None - - -def display_results(result: dict, description: str): - """Display key results from the economy calculation.""" - print(f"\n Results for: {description}") - print(" " + "-" * 40) - - if result is None: - print(" No results available") - return - - # Budgetary impact - budget = result.get("budget") - if budget: - print(f"\n BUDGETARY IMPACT:") - for key, value in budget.items(): - if isinstance(value, (int, float)): - print(f" {key}: {value:,.2f}") - else: - print(f" {key}: {value}") - - # Decile impact summary - decile = result.get("decile") - if decile: - print(f"\n DECILE IMPACT (sample):") - relative = decile.get("relative", {}) - if relative: - for d in ["1", "5", "10"]: - if d in relative: - print(f" Decile {d}: {relative[d]*100:.2f}%") - - # Poverty impact - poverty = result.get("poverty") - if poverty: - print(f"\n POVERTY IMPACT:") - deep_poverty = poverty.get("deep_poverty", {}) - regular_poverty = poverty.get("poverty", {}) - if deep_poverty: - print( - f" Deep poverty change: {deep_poverty.get('change', 'N/A')}" - ) - if regular_poverty: - print( - f" Poverty change: {regular_poverty.get('change', 'N/A')}" - ) - - # Local Authority Impact (if present) - la_impact = result.get("local_authority_impact") - if la_impact: - print(f"\n LOCAL AUTHORITY IMPACT:") - by_la = la_impact.get("by_local_authority", {}) - print(f" Number of local authorities: {len(by_la)}") - - # Show first 5 local authorities - print(f" Sample local authorities:") - for i, (name, data) in enumerate(list(by_la.items())[:5]): - avg_change = data.get("average_household_income_change", 0) - rel_change = data.get("relative_household_income_change", 0) - print( - f" {name}: avg={avg_change:.2f}, rel={rel_change*100:.3f}%" - ) - - # Outcomes by region - outcomes = la_impact.get("outcomes_by_region", {}) - if outcomes: - print(f"\n Outcomes by UK region:") - for region, buckets in outcomes.items(): - total = sum(buckets.values()) - print(f" {region}: {total} LAs") - for bucket, count in buckets.items(): - if count > 0: - print(f" - {bucket}: {count}") - else: - print(f"\n LOCAL AUTHORITY IMPACT: Not present in response") - - # Constituency Impact (if present) - const_impact = result.get("constituency_impact") - if const_impact: - by_const = const_impact.get("by_constituency", {}) - print(f"\n CONSTITUENCY IMPACT:") - print(f" Number of constituencies: {len(by_const)}") - - -def test_local_authority_simulation(reform_policy_id: int): - """Test 1: Run simulation for a specific local authority.""" - print_header("TEST 1: Local Authority Simulation (Leicester)") - - print( - """ - This test runs an economy simulation for a specific UK local authority. - We're using Leicester as it's a well-known unitary authority. - - Expected: The API should accept the local_authority/Leicester region - and return economic impact results. - """ - ) - - wait_for_confirmation( - "Press Enter to run the local authority simulation..." - ) - - region = "local_authority/Leicester" - result = poll_economy_endpoint( - region, reform_policy_id, "Leicester Local Authority" - ) - - if result: - display_results(result, "Leicester Local Authority") - print( - "\n [TEST 1 PASSED] Local authority simulation completed successfully" - ) - return True - else: - print("\n [TEST 1 FAILED] Local authority simulation failed") - return False - - -def test_uk_wide_simulation(reform_policy_id: int): - """Test 2: Run UK-wide simulation and check for local_authority_impact.""" - print_header("TEST 2: UK-Wide Simulation (Check local_authority_impact)") - - print( - """ - This test runs an economy simulation for the entire UK. - - Expected: The API should return results that include: - - Standard budgetary/poverty/decile impacts - - constituency_impact (existing feature) - - local_authority_impact (NEW feature we just added) - - We'll verify that local_authority_impact is present and contains - data for all 360 UK local authorities. - """ - ) - - wait_for_confirmation("Press Enter to run the UK-wide simulation...") - - region = "uk" - result = poll_economy_endpoint(region, reform_policy_id, "UK-wide") - - if result: - display_results(result, "UK-wide") - - # Verify local_authority_impact is present - la_impact = result.get("local_authority_impact") - if la_impact: - by_la = la_impact.get("by_local_authority", {}) - if len(by_la) == 360: - print( - f"\n [OK] local_authority_impact contains all 360 local authorities" - ) - else: - print( - f"\n [WARNING] Expected 360 local authorities, got {len(by_la)}" - ) - - # Check outcomes_by_region has all UK nations - outcomes = la_impact.get("outcomes_by_region", {}) - expected_regions = [ - "uk", - "england", - "scotland", - "wales", - "northern_ireland", - ] - for r in expected_regions: - if r in outcomes: - print(f" [OK] {r} region present in outcomes") - else: - print(f" [MISSING] {r} region not in outcomes") - - print( - "\n [TEST 2 PASSED] UK-wide simulation includes local_authority_impact" - ) - return True - else: - print( - "\n [TEST 2 FAILED] local_authority_impact not present in response" - ) - return False - else: - print("\n [TEST 2 FAILED] UK-wide simulation failed") - return False - - -def test_wales_simulation(reform_policy_id: int): - """Test 3: Run Wales simulation and check local authorities are filtered.""" - print_header("TEST 3: Wales Simulation (Filter Check)") - - print( - """ - This test runs an economy simulation for Wales only. - - Expected: The API should return results where: - - The simulation is filtered to Wales - - If local_authority_impact is present, it should only contain - Welsh local authorities (codes starting with 'W') - - Wales has exactly 22 principal areas - - Note: The local_authority_impact breakdown may only be calculated - for UK-wide simulations. This test will verify the behavior. - """ - ) - - wait_for_confirmation("Press Enter to run the Wales simulation...") - - region = "country/wales" - result = poll_economy_endpoint(region, reform_policy_id, "Wales") - - if result: - display_results(result, "Wales") - - la_impact = result.get("local_authority_impact") - if la_impact: - by_la = la_impact.get("by_local_authority", {}) - print(f"\n Local authorities in response: {len(by_la)}") - - # If filtering is implemented, we'd expect 22 Welsh LAs - if len(by_la) == 22: - print( - f" [OK] Correctly filtered to 22 Welsh local authorities" - ) - elif len(by_la) == 360: - print( - f" [INFO] All 360 LAs returned (filtering not applied at LA level)" - ) - else: - print(f" [INFO] Got {len(by_la)} local authorities") - - print("\n [TEST 3 PASSED] Wales simulation completed") - return True - else: - print( - f"\n [INFO] local_authority_impact not present for country-level simulation" - ) - print( - " This may be expected behavior - LA breakdown may only be for UK-wide" - ) - print( - "\n [TEST 3 PASSED] Wales simulation completed (no LA breakdown)" - ) - return True - else: - print("\n [TEST 3 FAILED] Wales simulation failed") - return False - - -def main(): - """Main test runner.""" - print_header("UK Local Authority API Test Script") - - print( - """ - This script tests the UK Local Authority feature in the PolicyEngine API. - - It will: - 1. Check API health - 2. Create a test reform policy - 3. Verify baseline policy exists - 4. Run TEST 1: Local Authority simulation (Leicester) - 5. Run TEST 2: UK-wide simulation (check local_authority_impact) - 6. Run TEST 3: Wales simulation (filter check) - - Prerequisites (you need 3 other terminals running): - - Terminal 1: redis-server - - Terminal 2: FLASK_DEBUG=1 python policyengine_api/worker.py - - Terminal 3: make debug - - HUGGING_FACE_TOKEN environment variable set (for UK data access) - - You will be prompted before each major step. - """ - ) - - wait_for_confirmation("Press Enter to begin testing...") - - # Step 0: Check API health - if not check_api_health(): - print("\n[ABORT] API is not available. Please start the server first.") - return - - wait_for_confirmation("API is healthy. Press Enter to continue...") - - # Step 1: Create reform policy - reform_policy_id = create_reform_policy() - if reform_policy_id is None: - print("\n[ABORT] Failed to create reform policy.") - return - - # Step 2: Verify baseline policy - if not verify_baseline_policy_exists(): - print("\n[WARNING] Baseline policy not found. Tests may fail.") - wait_for_confirmation("Press Enter to continue anyway...") - - print_header("Setup Complete - Ready to Run Tests") - print( - f""" - Configuration: - - API Base URL: {API_BASE_URL} - - Country: {COUNTRY_ID} - - Reform Policy ID: {reform_policy_id} - - Baseline Policy ID: {BASELINE_POLICY_ID} - - Time Period: {TIME_PERIOD} - - Dataset: {DATASET} - """ - ) - - wait_for_confirmation("Press Enter to start running tests...") - - # Run tests - results = [] - - # Test 1: Local Authority simulation - results.append( - ( - "Local Authority (Leicester)", - test_local_authority_simulation(reform_policy_id), - ) - ) - wait_for_confirmation( - "Test 1 complete. Press Enter to continue to Test 2..." - ) - - # Test 2: UK-wide simulation - results.append( - ("UK-Wide with LA Impact", test_uk_wide_simulation(reform_policy_id)) - ) - wait_for_confirmation( - "Test 2 complete. Press Enter to continue to Test 3..." - ) - - # Test 3: Wales simulation - results.append(("Wales Filter", test_wales_simulation(reform_policy_id))) - - # Summary - print_header("Test Summary") - print("\n Results:") - for test_name, passed in results: - status = "[PASSED]" if passed else "[FAILED]" - print(f" {status} {test_name}") - - all_passed = all(r[1] for r in results) - if all_passed: - print("\n All tests passed!") - else: - print("\n Some tests failed. Review output above for details.") - - print("\n" + "=" * 70) - print(" Testing complete.") - print("=" * 70 + "\n") - - -if __name__ == "__main__": - main() diff --git a/scripts/verify_country_filtering_bug.ipynb b/scripts/verify_country_filtering_bug.ipynb deleted file mode 100644 index 73c71e701..000000000 --- a/scripts/verify_country_filtering_bug.ipynb +++ /dev/null @@ -1,1147 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "cell-0", - "metadata": {}, - "source": [ - "# Verifying UK Country Filtering Bug in policyengine.py\n", - "\n", - "This notebook verifies the bug that occurs when filtering simulations by UK country (e.g., Wales).\n", - "\n", - "## The Bug\n", - "When running a simulation filtered to a specific UK country (e.g., `region=\"country/wales\"`), we get:\n", - "```\n", - "ValueError: Unable to set value \"[ True True True ... False False False]\" for variable \n", - "\"would_evade_tv_licence_fee\", as its length is 8470 while there are 4108 households in the simulation.\n", - "```\n", - "\n", - "## Root Cause Hypothesis\n", - "The country filtering code in `policyengine/simulation.py` uses DataFrame subsetting:\n", - "1. Exports simulation to DataFrame via `to_input_dataframe()`\n", - "2. Filters DataFrame rows by country\n", - "3. Creates new simulation from filtered DataFrame\n", - "\n", - "The issue is that entity linkage variables (like `household_id`) may not be properly \n", - "exported/imported, causing entity count mismatches during variable calculations." - ] - }, - { - "cell_type": "markdown", - "id": "cell-1", - "metadata": {}, - "source": [ - "## Step 1: Setup and Version Check" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "cell-2", - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "import traceback\n", - "\n", - "# Import policyengine (the high-level package)\n", - "import policyengine\n", - "from policyengine import Simulation\n", - "\n", - "# Also import the underlying UK simulation for manual testing\n", - "from policyengine_uk import Microsimulation as UKMicrosimulation" - ] - }, - { - "cell_type": "markdown", - "id": "cell-3", - "metadata": {}, - "source": [ - "## Step 2: Create UK-Wide Baseline Simulation\n", - "\n", - "First, create a standard UK-wide simulation to understand the data structure." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "cell-4", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Creating UK-wide simulation...\n", - "(This may take a minute to download data)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "No data provided, using default dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n", - "Using dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n", - "Downloading enhanced_frs_2023_24.h5 from bucket policyengine-uk-data-private\n", - "WARNING:root:No version specified for policyengine-uk-data-private, enhanced_frs_2023_24.h5. Using latest version: 1.29.4\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "=== UK-Wide Simulation Structure ===\n", - "Person count: 115612\n", - "Household count: 53508\n", - "BenUnit count: 61858\n" - ] - } - ], - "source": [ - "# Create UK-wide simulation using policyengine.Simulation\n", - "print(\"Creating UK-wide simulation...\")\n", - "print(\"(This may take a minute to download data)\")\n", - "\n", - "sim_uk = Simulation(country=\"uk\", scope=\"macro\")\n", - "\n", - "# Get the underlying microsimulation\n", - "underlying_sim = sim_uk.baseline_simulation\n", - "\n", - "print(f\"\\n=== UK-Wide Simulation Structure ===\")\n", - "print(f\"Person count: {underlying_sim.persons.count}\")\n", - "print(f\"Household count: {underlying_sim.household.count}\")\n", - "print(f\"BenUnit count: {underlying_sim.benunit.count}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "cell-5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Creating UK-wide simulation...\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "No data provided, using default dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n", - "Using dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n", - "Downloading enhanced_frs_2023_24.h5 from bucket policyengine-uk-data-private\n", - "WARNING:root:No version specified for policyengine-uk-data-private, enhanced_frs_2023_24.h5. Using latest version: 1.29.4\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "=== UK-Wide Simulation Structure ===\n", - "Person count: 115612\n", - "Household count: 53508\n", - "BenUnit count: 61858\n" - ] - } - ], - "source": [ - "# Create a UK-wide simulation (no region filter)\n", - "print(\"Creating UK-wide simulation...\")\n", - "sim_uk = Simulation(country=\"uk\", scope=\"macro\")\n", - "\n", - "# Access the underlying country simulation\n", - "underlying_sim = sim_uk.baseline_simulation\n", - "\n", - "print(f\"\\n=== UK-Wide Simulation Structure ===\")\n", - "print(f\"Person count: {underlying_sim.persons.count}\")\n", - "print(f\"Household count: {underlying_sim.household.count}\")\n", - "print(f\"BenUnit count: {underlying_sim.benunit.count}\")" - ] - }, - { - "cell_type": "markdown", - "id": "cell-6", - "metadata": {}, - "source": [ - "## Step 3: Examine to_input_dataframe() Export\n", - "\n", - "This is what `_apply_region_to_simulation` uses to get the data before filtering." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "cell-7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Exporting simulation to DataFrame...\n", - "\n", - "=== Exported DataFrame ===\n", - "Shape: (115612, 1127)\n", - "Number of rows (should be person count): 115612\n", - "Number of columns: 1127\n" - ] - } - ], - "source": [ - "# Export the simulation to DataFrame\n", - "print(\"Exporting simulation to DataFrame...\")\n", - "df = underlying_sim.to_input_dataframe()\n", - "\n", - "print(f\"\\n=== Exported DataFrame ===\")\n", - "print(f\"Shape: {df.shape}\")\n", - "print(f\"Number of rows (should be person count): {len(df)}\")\n", - "print(f\"Number of columns: {len(df.columns)}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "cell-8", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "=== Critical Entity Columns ===\n", - " person_id: FOUND (8 columns)\n", - " - person_id__2023\n", - " - person_id__2024\n", - " - person_id__2025\n", - " ... and 5 more\n", - " household_id: FOUND (8 columns)\n", - " - household_id__2023\n", - " - household_id__2024\n", - " - household_id__2025\n", - " ... and 5 more\n", - " person_household_id: FOUND (8 columns)\n", - " - person_household_id__2023\n", - " - person_household_id__2024\n", - " - person_household_id__2025\n", - " ... and 5 more\n", - " benunit_id: FOUND (8 columns)\n", - " - benunit_id__2023\n", - " - benunit_id__2024\n", - " - benunit_id__2025\n", - " ... and 5 more\n", - " person_benunit_id: FOUND (8 columns)\n", - " - person_benunit_id__2023\n", - " - person_benunit_id__2024\n", - " - person_benunit_id__2025\n", - " ... and 5 more\n" - ] - } - ], - "source": [ - "# Check for critical entity linkage columns\n", - "print(\"\\n=== Critical Entity Columns ===\")\n", - "\n", - "critical_patterns = [\n", - " 'person_id',\n", - " 'household_id', \n", - " 'person_household_id',\n", - " 'benunit_id',\n", - " 'person_benunit_id'\n", - "]\n", - "\n", - "for pattern in critical_patterns:\n", - " matching = [c for c in df.columns if c.startswith(pattern)]\n", - " if matching:\n", - " print(f\" {pattern}: FOUND ({len(matching)} columns)\")\n", - " for col in matching[:3]: # Show first 3\n", - " print(f\" - {col}\")\n", - " if len(matching) > 3:\n", - " print(f\" ... and {len(matching) - 3} more\")\n", - " else:\n", - " print(f\" {pattern}: MISSING!\")" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "cell-9", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "=== household_id Export Analysis ===\n", - "Column: household_id__2023\n", - " Length: 115612\n", - " Unique values: 53508\n", - " Min: 1, Max: 67019\n", - " Sample values: [2 1 2 2 2 2 3 6 6 3]\n" - ] - } - ], - "source": [ - "# Check if household_id is exported and examine its values\n", - "hh_id_cols = [c for c in df.columns if c.startswith('household_id__')]\n", - "\n", - "print(\"\\n=== household_id Export Analysis ===\")\n", - "if hh_id_cols:\n", - " col = hh_id_cols[0]\n", - " print(f\"Column: {col}\")\n", - " print(f\" Length: {len(df[col])}\")\n", - " print(f\" Unique values: {df[col].nunique()}\")\n", - " print(f\" Min: {df[col].min()}, Max: {df[col].max()}\")\n", - " print(f\" Sample values: {df[col].values[:10]}\")\n", - "else:\n", - " print(\"household_id NOT exported!\")\n", - " print(\"This could be the root cause of the bug.\")" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "cell-10", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "=== person_household_id Export Analysis ===\n", - "Column: person_household_id__2023\n", - " Length: 115612\n", - " Unique values (should match household count): 53508\n", - " Expected household count: 53508\n", - " [OK] Unique count matches household count\n" - ] - } - ], - "source": [ - "# Check person_household_id linkage\n", - "phh_id_cols = [c for c in df.columns if c.startswith('person_household_id__')]\n", - "\n", - "print(\"\\n=== person_household_id Export Analysis ===\")\n", - "if phh_id_cols:\n", - " col = phh_id_cols[0]\n", - " print(f\"Column: {col}\")\n", - " print(f\" Length: {len(df[col])}\")\n", - " print(f\" Unique values (should match household count): {df[col].nunique()}\")\n", - " print(f\" Expected household count: {underlying_sim.household.count}\")\n", - " \n", - " if df[col].nunique() == underlying_sim.household.count:\n", - " print(\" [OK] Unique count matches household count\")\n", - " else:\n", - " print(\" [WARNING] Mismatch!\")\n", - "else:\n", - " print(\"person_household_id NOT exported! This is critical.\")" - ] - }, - { - "cell_type": "markdown", - "id": "cell-11", - "metadata": {}, - "source": [ - "## Step 4: Manually Reproduce the Wales Filtering\n", - "\n", - "Let's manually do what `_apply_region_to_simulation` does to identify where it breaks." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "cell-12", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "=== Step 4a: Calculate country at person level ===\n", - "Country array shape: (115612,)\n", - "\n", - "Welsh persons: 8,470\n", - "Non-Welsh persons: 107,142\n" - ] - } - ], - "source": [ - "# Step 4a: Get country at person level (same as policyengine.py:296-298)\n", - "print(\"=== Step 4a: Calculate country at person level ===\")\n", - "country_person = underlying_sim.calculate(\"country\", map_to=\"person\").values\n", - "print(f\"Country array shape: {country_person.shape}\")\n", - "\n", - "# Create Wales mask\n", - "wales_mask = country_person == \"WALES\"\n", - "print(f\"\\nWelsh persons: {wales_mask.sum():,}\")\n", - "print(f\"Non-Welsh persons: {(~wales_mask).sum():,}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "cell-13", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "=== Step 4b: Filter DataFrame to Wales ===\n", - "Filtered DataFrame shape: (8470, 1127)\n", - "Number of Welsh persons: 8470\n" - ] - } - ], - "source": [ - "# Step 4b: Filter DataFrame to Wales (same as policyengine.py:299-300)\n", - "print(\"\\n=== Step 4b: Filter DataFrame to Wales ===\")\n", - "df_wales = df[wales_mask]\n", - "print(f\"Filtered DataFrame shape: {df_wales.shape}\")\n", - "print(f\"Number of Welsh persons: {len(df_wales)}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "cell-14", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "=== Step 4c: Examine person_household_id in filtered data ===\n", - "Column: person_household_id__2023\n", - " Length: 8470\n", - " Unique households in Wales: 4108\n", - " Min household ID: 2.0\n", - " Max household ID: 66996.0\n", - " Sample values: [2. 2. 2. 2. 2. 6. 6. 6. 6. 7.]\n", - " [INFO] Household IDs are NOT contiguous (gaps from filtering)\n", - " This is expected - they're original UK-wide IDs\n" - ] - } - ], - "source": [ - "# Check what person_household_id looks like in filtered data\n", - "print(\"\\n=== Step 4c: Examine person_household_id in filtered data ===\")\n", - "if phh_id_cols:\n", - " col = phh_id_cols[0]\n", - " welsh_phh = df_wales[col].values\n", - " print(f\"Column: {col}\")\n", - " print(f\" Length: {len(welsh_phh)}\")\n", - " print(f\" Unique households in Wales: {len(np.unique(welsh_phh))}\")\n", - " print(f\" Min household ID: {welsh_phh.min()}\")\n", - " print(f\" Max household ID: {welsh_phh.max()}\")\n", - " print(f\" Sample values: {welsh_phh[:10]}\")\n", - " \n", - " # Check if IDs are contiguous\n", - " unique_hh = np.unique(welsh_phh)\n", - " if np.array_equal(unique_hh, np.arange(len(unique_hh))):\n", - " print(\" [INFO] Household IDs are contiguous 0-based\")\n", - " else:\n", - " print(\" [INFO] Household IDs are NOT contiguous (gaps from filtering)\")\n", - " print(f\" This is expected - they're original UK-wide IDs\")" - ] - }, - { - "cell_type": "markdown", - "id": "cell-15", - "metadata": {}, - "source": [ - "## Step 5: Try to Create Simulation from Filtered DataFrame\n", - "\n", - "This is where the error should occur." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "cell-16", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "=== Step 5a: Create simulation from filtered DataFrame ===\n", - "(This is what policyengine.py:299-300 does)\n", - "\n", - "[ERROR] Failed to create simulation: Unable to set value \"[ 39361. 39361. 39361. ... 134410.5 134410.5 6000. ]\" for variable \"corporate_wealth\", as its length is 8470 while there are 4108 households in the simulation.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Traceback (most recent call last):\n", - " File \"/var/folders/qf/7fglpnr94wsc5xgbqkp80xbw0000gn/T/ipykernel_19066/2037714397.py\", line 7, in \n", - " new_sim = UKMicrosimulation(dataset=df_wales)\n", - " File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_uk/simulation.py\", line 100, in __init__\n", - " self.build_from_dataframe(dataset)\n", - " ~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^\n", - " File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_uk/simulation.py\", line 286, in build_from_dataframe\n", - " self.set_input(variable, time_period, df[column])\n", - " ~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - " File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_core/simulations/simulation.py\", line 1241, in set_input\n", - " self.get_holder(variable_name).set_input(\n", - " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^\n", - " period, value, self.branch_name\n", - " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - " )\n", - " ^\n", - " File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_core/holders/holder.py\", line 244, in set_input\n", - " return self._set(period, array, branch_name)\n", - " ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - " File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_core/holders/holder.py\", line 286, in _set\n", - " value = self._to_array(value)\n", - " File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_core/holders/holder.py\", line 253, in _to_array\n", - " raise ValueError(\n", - " ...<7 lines>...\n", - " )\n", - "ValueError: Unable to set value \"[ 39361. 39361. 39361. ... 134410.5 134410.5 6000. ]\" for variable \"corporate_wealth\", as its length is 8470 while there are 4108 households in the simulation.\n" - ] - } - ], - "source": [ - "# Step 5a: Create new simulation from filtered DataFrame\n", - "print(\"=== Step 5a: Create simulation from filtered DataFrame ===\")\n", - "print(\"(This is what policyengine.py:299-300 does)\")\n", - "print()\n", - "\n", - "try:\n", - " new_sim = UKMicrosimulation(dataset=df_wales)\n", - " \n", - " print(f\"New simulation created successfully!\")\n", - " print(f\" Person count: {new_sim.persons.count}\")\n", - " print(f\" Household count: {new_sim.household.count}\")\n", - " print(f\" BenUnit count: {new_sim.benunit.count}\")\n", - " \n", - " # Critical check\n", - " if new_sim.household.count == new_sim.persons.count:\n", - " print(\"\\n [ERROR] Household count equals person count!\")\n", - " print(\" Entity linkage was lost during filtering.\")\n", - " \n", - "except Exception as e:\n", - " print(f\"[ERROR] Failed to create simulation: {e}\")\n", - " traceback.print_exc()" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "cell-17", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "=== Step 5b: Check household_id holder ===\n", - "Error checking household_id: name 'new_sim' is not defined\n" - ] - } - ], - "source": [ - "# Step 5b: Check if household_id holder has data\n", - "print(\"\\n=== Step 5b: Check household_id holder ===\")\n", - "\n", - "try:\n", - " hh_id_holder = new_sim.get_holder(\"household_id\")\n", - " known_periods = list(hh_id_holder.get_known_periods())\n", - " print(f\"household_id known periods: {known_periods}\")\n", - " \n", - " if known_periods:\n", - " period = known_periods[0]\n", - " arr = hh_id_holder.get_array(period)\n", - " print(f\" Period {period}: array shape = {arr.shape if arr is not None else 'None'}\")\n", - " if arr is not None:\n", - " print(f\" Values sample: {arr[:10]}\")\n", - " else:\n", - " print(\" No known periods - household_id was not set as input!\")\n", - "except Exception as e:\n", - " print(f\"Error checking household_id: {e}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "cell-18", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "=== Step 5c: Calculate household_id ===\n", - "Error calculating household_id: name 'new_sim' is not defined\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Traceback (most recent call last):\n", - " File \"/var/folders/qf/7fglpnr94wsc5xgbqkp80xbw0000gn/T/ipykernel_19066/1284064109.py\", line 5, in \n", - " hh_ids = new_sim.calculate(\"household_id\", 2025)\n", - " ^^^^^^^\n", - "NameError: name 'new_sim' is not defined\n" - ] - } - ], - "source": [ - "# Step 5c: Try to calculate household_id\n", - "print(\"\\n=== Step 5c: Calculate household_id ===\")\n", - "\n", - "try:\n", - " hh_ids = new_sim.calculate(\"household_id\", 2025)\n", - " print(f\"household_id calculation result:\")\n", - " print(f\" Length: {len(hh_ids)}\")\n", - " print(f\" Expected (household count): {new_sim.household.count}\")\n", - " \n", - " if len(hh_ids) == new_sim.household.count:\n", - " print(\" [OK] Length matches household count\")\n", - " else:\n", - " print(f\" [ERROR] Length mismatch! Got {len(hh_ids)}, expected {new_sim.household.count}\")\n", - " \n", - "except Exception as e:\n", - " print(f\"Error calculating household_id: {e}\")\n", - " traceback.print_exc()" - ] - }, - { - "cell_type": "markdown", - "id": "cell-19", - "metadata": {}, - "source": [ - "## Step 6: Try to Calculate would_evade_tv_licence_fee\n", - "\n", - "This is the variable that triggers the error in production." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "cell-20", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "=== Step 6: Calculate would_evade_tv_licence_fee ===\n", - "(This calculation uses random(household) internally)\n", - "\n", - "Unexpected error: NameError: name 'new_sim' is not defined\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Traceback (most recent call last):\n", - " File \"/var/folders/qf/7fglpnr94wsc5xgbqkp80xbw0000gn/T/ipykernel_19066/1304269510.py\", line 7, in \n", - " result = new_sim.calculate(\"would_evade_tv_licence_fee\", 2025)\n", - " ^^^^^^^\n", - "NameError: name 'new_sim' is not defined\n" - ] - } - ], - "source": [ - "# Step 6: Calculate the problematic variable\n", - "print(\"=== Step 6: Calculate would_evade_tv_licence_fee ===\")\n", - "print(\"(This calculation uses random(household) internally)\")\n", - "print()\n", - "\n", - "try:\n", - " result = new_sim.calculate(\"would_evade_tv_licence_fee\", 2025)\n", - " print(f\"Calculation succeeded!\")\n", - " print(f\" Result length: {len(result)}\")\n", - " print(f\" Expected (household count): {new_sim.household.count}\")\n", - " print(f\" Result dtype: {result.dtype}\")\n", - " \n", - "except ValueError as e:\n", - " print(f\"[EXPECTED ERROR] ValueError:\")\n", - " print(f\" {e}\")\n", - " print()\n", - " print(\"This confirms the bug!\")\n", - " \n", - " # Parse the error message\n", - " error_str = str(e)\n", - " if \"length is\" in error_str and \"while there are\" in error_str:\n", - " print(\"\\nThe error indicates:\")\n", - " print(\" - The formula returned an array sized for persons\")\n", - " print(\" - But the variable is household-level\")\n", - " print(\" - This means random(household) returned wrong-sized array\")\n", - " \n", - "except Exception as e:\n", - " print(f\"Unexpected error: {type(e).__name__}: {e}\")\n", - " traceback.print_exc()" - ] - }, - { - "cell_type": "markdown", - "id": "cell-21", - "metadata": {}, - "source": [ - "## Step 7: Test Using policyengine.Simulation Directly\n", - "\n", - "Now let's test using the high-level API to confirm the bug occurs there too." - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "cell-22", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "=== Step 7: Test with policyengine.Simulation ===\n", - "Creating Simulation with region='country/wales'...\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "No data provided, using default dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n", - "Using dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n", - "Downloading enhanced_frs_2023_24.h5 from bucket policyengine-uk-data-private\n", - "WARNING:root:No version specified for policyengine-uk-data-private, enhanced_frs_2023_24.h5. Using latest version: 1.29.4\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "DataFrame columns: ['miscellaneous_income__2023', 'miscellaneous_income__2024', 'miscellaneous_income__2025', 'miscellaneous_income__2026', 'miscellaneous_income__2027', 'miscellaneous_income__2028', 'miscellaneous_income__2029', 'miscellaneous_income__2030', 'corporate_wealth__2023', 'corporate_wealth__2024', 'corporate_wealth__2025', 'corporate_wealth__2026', 'corporate_wealth__2027', 'corporate_wealth__2028', 'corporate_wealth__2029', 'corporate_wealth__2030', 'non_residential_property_value__2023', 'non_residential_property_value__2024', 'non_residential_property_value__2025', 'non_residential_property_value__2026', 'non_residential_property_value__2027', 'non_residential_property_value__2028', 'non_residential_property_value__2029', 'non_residential_property_value__2030', 'employment_income_before_lsr__2023', 'employment_income_before_lsr__2024', 'employment_income_before_lsr__2025', 'employment_income_before_lsr__2026', 'employment_income_before_lsr__2027', 'employment_income_before_lsr__2028', 'employment_income_before_lsr__2029', 'employment_income_before_lsr__2030', 'property_income__2023', 'property_income__2024', 'property_income__2025', 'property_income__2026', 'property_income__2027', 'property_income__2028', 'property_income__2029', 'property_income__2030', 'savings_interest_income__2023', 'savings_interest_income__2024', 'savings_interest_income__2025', 'savings_interest_income__2026', 'savings_interest_income__2027', 'savings_interest_income__2028', 'savings_interest_income__2029', 'savings_interest_income__2030', 'main_residence_value__2023', 'main_residence_value__2024', 'main_residence_value__2025', 'main_residence_value__2026', 'main_residence_value__2027', 'main_residence_value__2028', 'main_residence_value__2029', 'main_residence_value__2030', 'rent__2023', 'rent__2024', 'rent__2025', 'rent__2026', 'rent__2027', 'rent__2028', 'rent__2029', 'rent__2030', 'private_pension_income__2023', 'private_pension_income__2024', 'private_pension_income__2025', 'private_pension_income__2026', 'private_pension_income__2027', 'private_pension_income__2028', 'private_pension_income__2029', 'private_pension_income__2030', 'self_employment_income__2023', 'self_employment_income__2024', 'self_employment_income__2025', 'self_employment_income__2026', 'self_employment_income__2027', 'self_employment_income__2028', 'self_employment_income__2029', 'self_employment_income__2030', 'private_transfer_income__2023', 'private_transfer_income__2024', 'private_transfer_income__2025', 'private_transfer_income__2026', 'private_transfer_income__2027', 'private_transfer_income__2028', 'private_transfer_income__2029', 'private_transfer_income__2030', 'age__2023', 'age__2024', 'age__2025', 'age__2026', 'age__2027', 'age__2028', 'age__2029', 'age__2030', 'owned_land__2023', 'owned_land__2024', 'owned_land__2025', 'owned_land__2026', 'owned_land__2027', 'owned_land__2028', 'owned_land__2029', 'owned_land__2030', 'lump_sum_income__2023', 'lump_sum_income__2024', 'lump_sum_income__2025', 'lump_sum_income__2026', 'lump_sum_income__2027', 'lump_sum_income__2028', 'lump_sum_income__2029', 'lump_sum_income__2030', 'council_tax_band__2023', 'council_tax_band__2024', 'council_tax_band__2025', 'council_tax_band__2026', 'council_tax_band__2027', 'council_tax_band__2028', 'council_tax_band__2029', 'council_tax_band__2030', 'other_residential_property_value__2023', 'other_residential_property_value__2024', 'other_residential_property_value__2025', 'other_residential_property_value__2026', 'other_residential_property_value__2027', 'other_residential_property_value__2028', 'other_residential_property_value__2029', 'other_residential_property_value__2030', 'dividend_income__2023', 'dividend_income__2024', 'dividend_income__2025', 'dividend_income__2026', 'dividend_income__2027', 'dividend_income__2028', 'dividend_income__2029', 'dividend_income__2030', 'maintenance_income__2023', 'maintenance_income__2024', 'maintenance_income__2025', 'maintenance_income__2026', 'maintenance_income__2027', 'maintenance_income__2028', 'maintenance_income__2029', 'maintenance_income__2030', 'petrol_spending__2023', 'petrol_spending__2024', 'petrol_spending__2025', 'petrol_spending__2026', 'petrol_spending__2027', 'petrol_spending__2028', 'petrol_spending__2029', 'petrol_spending__2030', 'health_consumption__2023', 'health_consumption__2024', 'health_consumption__2025', 'health_consumption__2026', 'health_consumption__2027', 'health_consumption__2028', 'health_consumption__2029', 'health_consumption__2030', 'household_furnishings_consumption__2023', 'household_furnishings_consumption__2024', 'household_furnishings_consumption__2025', 'household_furnishings_consumption__2026', 'household_furnishings_consumption__2027', 'household_furnishings_consumption__2028', 'household_furnishings_consumption__2029', 'household_furnishings_consumption__2030', 'restaurants_and_hotels_consumption__2023', 'restaurants_and_hotels_consumption__2024', 'restaurants_and_hotels_consumption__2025', 'restaurants_and_hotels_consumption__2026', 'restaurants_and_hotels_consumption__2027', 'restaurants_and_hotels_consumption__2028', 'restaurants_and_hotels_consumption__2029', 'restaurants_and_hotels_consumption__2030', 'miscellaneous_consumption__2023', 'miscellaneous_consumption__2024', 'miscellaneous_consumption__2025', 'miscellaneous_consumption__2026', 'miscellaneous_consumption__2027', 'miscellaneous_consumption__2028', 'miscellaneous_consumption__2029', 'miscellaneous_consumption__2030', 'recreation_consumption__2023', 'recreation_consumption__2024', 'recreation_consumption__2025', 'recreation_consumption__2026', 'recreation_consumption__2027', 'recreation_consumption__2028', 'recreation_consumption__2029', 'recreation_consumption__2030', 'domestic_energy_consumption__2023', 'domestic_energy_consumption__2024', 'domestic_energy_consumption__2025', 'domestic_energy_consumption__2026', 'domestic_energy_consumption__2027', 'domestic_energy_consumption__2028', 'domestic_energy_consumption__2029', 'domestic_energy_consumption__2030', 'alcohol_and_tobacco_consumption__2023', 'alcohol_and_tobacco_consumption__2024', 'alcohol_and_tobacco_consumption__2025', 'alcohol_and_tobacco_consumption__2026', 'alcohol_and_tobacco_consumption__2027', 'alcohol_and_tobacco_consumption__2028', 'alcohol_and_tobacco_consumption__2029', 'alcohol_and_tobacco_consumption__2030', 'clothing_and_footwear_consumption__2023', 'clothing_and_footwear_consumption__2024', 'clothing_and_footwear_consumption__2025', 'clothing_and_footwear_consumption__2026', 'clothing_and_footwear_consumption__2027', 'clothing_and_footwear_consumption__2028', 'clothing_and_footwear_consumption__2029', 'clothing_and_footwear_consumption__2030', 'education_consumption__2023', 'education_consumption__2024', 'education_consumption__2025', 'education_consumption__2026', 'education_consumption__2027', 'education_consumption__2028', 'education_consumption__2029', 'education_consumption__2030', 'communication_consumption__2023', 'communication_consumption__2024', 'communication_consumption__2025', 'communication_consumption__2026', 'communication_consumption__2027', 'communication_consumption__2028', 'communication_consumption__2029', 'communication_consumption__2030', 'housing_water_and_electricity_consumption__2023', 'housing_water_and_electricity_consumption__2024', 'housing_water_and_electricity_consumption__2025', 'housing_water_and_electricity_consumption__2026', 'housing_water_and_electricity_consumption__2027', 'housing_water_and_electricity_consumption__2028', 'housing_water_and_electricity_consumption__2029', 'housing_water_and_electricity_consumption__2030', 'diesel_spending__2023', 'diesel_spending__2024', 'diesel_spending__2025', 'diesel_spending__2026', 'diesel_spending__2027', 'diesel_spending__2028', 'diesel_spending__2029', 'diesel_spending__2030', 'food_and_non_alcoholic_beverages_consumption__2023', 'food_and_non_alcoholic_beverages_consumption__2024', 'food_and_non_alcoholic_beverages_consumption__2025', 'food_and_non_alcoholic_beverages_consumption__2026', 'food_and_non_alcoholic_beverages_consumption__2027', 'food_and_non_alcoholic_beverages_consumption__2028', 'food_and_non_alcoholic_beverages_consumption__2029', 'food_and_non_alcoholic_beverages_consumption__2030', 'transport_consumption__2023', 'transport_consumption__2024', 'transport_consumption__2025', 'transport_consumption__2026', 'transport_consumption__2027', 'transport_consumption__2028', 'transport_consumption__2029', 'transport_consumption__2030', 'childcare_expenses__2023', 'childcare_expenses__2024', 'childcare_expenses__2025', 'childcare_expenses__2026', 'childcare_expenses__2027', 'childcare_expenses__2028', 'childcare_expenses__2029', 'childcare_expenses__2030', 'water_and_sewerage_charges__2023', 'water_and_sewerage_charges__2024', 'water_and_sewerage_charges__2025', 'water_and_sewerage_charges__2026', 'water_and_sewerage_charges__2027', 'water_and_sewerage_charges__2028', 'water_and_sewerage_charges__2029', 'water_and_sewerage_charges__2030', 'maintenance_expenses__2023', 'maintenance_expenses__2024', 'maintenance_expenses__2025', 'maintenance_expenses__2026', 'maintenance_expenses__2027', 'maintenance_expenses__2028', 'maintenance_expenses__2029', 'maintenance_expenses__2030', 'employee_pension_contributions_reported__2023', 'employee_pension_contributions_reported__2024', 'employee_pension_contributions_reported__2025', 'employee_pension_contributions_reported__2026', 'employee_pension_contributions_reported__2027', 'employee_pension_contributions_reported__2028', 'employee_pension_contributions_reported__2029', 'employee_pension_contributions_reported__2030', 'mortgage_capital_repayment__2023', 'mortgage_capital_repayment__2024', 'mortgage_capital_repayment__2025', 'mortgage_capital_repayment__2026', 'mortgage_capital_repayment__2027', 'mortgage_capital_repayment__2028', 'mortgage_capital_repayment__2029', 'mortgage_capital_repayment__2030', 'pension_contributions_via_salary_sacrifice__2023', 'pension_contributions_via_salary_sacrifice__2024', 'pension_contributions_via_salary_sacrifice__2025', 'pension_contributions_via_salary_sacrifice__2026', 'pension_contributions_via_salary_sacrifice__2027', 'pension_contributions_via_salary_sacrifice__2028', 'pension_contributions_via_salary_sacrifice__2029', 'pension_contributions_via_salary_sacrifice__2030', 'council_tax__2023', 'council_tax__2024', 'council_tax__2025', 'council_tax__2026', 'council_tax__2027', 'council_tax__2028', 'council_tax__2029', 'council_tax__2030', 'mortgage_interest_repayment__2023', 'mortgage_interest_repayment__2024', 'mortgage_interest_repayment__2025', 'mortgage_interest_repayment__2026', 'mortgage_interest_repayment__2027', 'mortgage_interest_repayment__2028', 'mortgage_interest_repayment__2029', 'mortgage_interest_repayment__2030', 'housing_service_charges__2023', 'housing_service_charges__2024', 'housing_service_charges__2025', 'housing_service_charges__2026', 'housing_service_charges__2027', 'housing_service_charges__2028', 'housing_service_charges__2029', 'housing_service_charges__2030', 'employer_pension_contributions__2023', 'employer_pension_contributions__2024', 'employer_pension_contributions__2025', 'employer_pension_contributions__2026', 'employer_pension_contributions__2027', 'employer_pension_contributions__2028', 'employer_pension_contributions__2029', 'employer_pension_contributions__2030', 'personal_pension_contributions__2023', 'personal_pension_contributions__2024', 'personal_pension_contributions__2025', 'personal_pension_contributions__2026', 'personal_pension_contributions__2027', 'personal_pension_contributions__2028', 'personal_pension_contributions__2029', 'personal_pension_contributions__2030', 'attends_private_school__2023', 'attends_private_school__2024', 'attends_private_school__2025', 'attends_private_school__2026', 'attends_private_school__2027', 'attends_private_school__2028', 'attends_private_school__2029', 'attends_private_school__2030', 'region__2023', 'region__2024', 'region__2025', 'region__2026', 'region__2027', 'region__2028', 'region__2029', 'region__2030', 'brma__2023', 'brma__2024', 'brma__2025', 'brma__2026', 'brma__2027', 'brma__2028', 'brma__2029', 'brma__2030', 'net_financial_wealth__2023', 'net_financial_wealth__2024', 'net_financial_wealth__2025', 'net_financial_wealth__2026', 'net_financial_wealth__2027', 'net_financial_wealth__2028', 'net_financial_wealth__2029', 'net_financial_wealth__2030', 'property_wealth__2023', 'property_wealth__2024', 'property_wealth__2025', 'property_wealth__2026', 'property_wealth__2027', 'property_wealth__2028', 'property_wealth__2029', 'property_wealth__2030', 'savings__2023', 'savings__2024', 'savings__2025', 'savings__2026', 'savings__2027', 'savings__2028', 'savings__2029', 'savings__2030', 'num_vehicles__2023', 'num_vehicles__2024', 'num_vehicles__2025', 'num_vehicles__2026', 'num_vehicles__2027', 'num_vehicles__2028', 'num_vehicles__2029', 'num_vehicles__2030', 'gross_financial_wealth__2023', 'gross_financial_wealth__2024', 'gross_financial_wealth__2025', 'gross_financial_wealth__2026', 'gross_financial_wealth__2027', 'gross_financial_wealth__2028', 'gross_financial_wealth__2029', 'gross_financial_wealth__2030', 'relation_type__2025', 'is_enhanced_disabled_for_benefits__2023', 'is_enhanced_disabled_for_benefits__2024', 'is_enhanced_disabled_for_benefits__2025', 'is_enhanced_disabled_for_benefits__2026', 'is_enhanced_disabled_for_benefits__2027', 'is_enhanced_disabled_for_benefits__2028', 'is_enhanced_disabled_for_benefits__2029', 'is_enhanced_disabled_for_benefits__2030', 'is_higher_earner__2023', 'is_higher_earner__2024', 'is_higher_earner__2025', 'is_higher_earner__2026', 'is_higher_earner__2027', 'is_higher_earner__2028', 'is_higher_earner__2029', 'is_higher_earner__2030', 'gender__2023', 'gender__2024', 'gender__2025', 'gender__2026', 'gender__2027', 'gender__2028', 'gender__2029', 'gender__2030', 'person_id__2023', 'person_id__2024', 'person_id__2025', 'person_id__2026', 'person_id__2027', 'person_id__2028', 'person_id__2029', 'person_id__2030', 'household_weight__2023', 'household_weight__2024', 'household_weight__2025', 'household_weight__2026', 'household_weight__2027', 'household_weight__2028', 'household_weight__2029', 'household_weight__2030', 'is_benunit_head__2023', 'is_benunit_head__2024', 'is_benunit_head__2025', 'is_benunit_head__2026', 'is_benunit_head__2027', 'is_benunit_head__2028', 'is_benunit_head__2029', 'is_benunit_head__2030', 'person_weight__2025', 'person_weight__2026', 'person_weight__2027', 'person_weight__2028', 'person_weight__2029', 'person_weight__2023', 'person_weight__2024', 'person_weight__2030', 'person_benunit_id__2023', 'person_benunit_id__2024', 'person_benunit_id__2025', 'person_benunit_id__2026', 'person_benunit_id__2027', 'person_benunit_id__2028', 'person_benunit_id__2029', 'person_benunit_id__2030', 'person_household_id__2023', 'person_household_id__2024', 'person_household_id__2025', 'person_household_id__2026', 'person_household_id__2027', 'person_household_id__2028', 'person_household_id__2029', 'person_household_id__2030', 'tenure_type__2023', 'tenure_type__2024', 'tenure_type__2025', 'tenure_type__2026', 'tenure_type__2027', 'tenure_type__2028', 'tenure_type__2029', 'tenure_type__2030', 'marital_status__2023', 'marital_status__2024', 'marital_status__2025', 'marital_status__2026', 'marital_status__2027', 'marital_status__2028', 'marital_status__2029', 'marital_status__2030', 'is_household_head__2023', 'is_household_head__2024', 'is_household_head__2025', 'is_household_head__2026', 'is_household_head__2027', 'is_household_head__2028', 'is_household_head__2029', 'is_household_head__2030', 'current_education__2023', 'current_education__2024', 'current_education__2025', 'current_education__2026', 'current_education__2027', 'current_education__2028', 'current_education__2029', 'current_education__2030', 'household_owns_tv__2023', 'household_owns_tv__2024', 'household_owns_tv__2025', 'household_owns_tv__2026', 'household_owns_tv__2027', 'household_owns_tv__2028', 'household_owns_tv__2029', 'household_owns_tv__2030', 'is_severely_disabled_for_benefits__2023', 'is_severely_disabled_for_benefits__2024', 'is_severely_disabled_for_benefits__2025', 'is_severely_disabled_for_benefits__2026', 'is_severely_disabled_for_benefits__2027', 'is_severely_disabled_for_benefits__2028', 'is_severely_disabled_for_benefits__2029', 'is_severely_disabled_for_benefits__2030', 'accommodation_type__2023', 'accommodation_type__2024', 'accommodation_type__2025', 'accommodation_type__2026', 'accommodation_type__2027', 'accommodation_type__2028', 'accommodation_type__2029', 'accommodation_type__2030', 'is_married__2023', 'is_married__2024', 'is_married__2025', 'is_married__2026', 'is_married__2027', 'is_married__2028', 'is_married__2029', 'is_married__2030', 'benunit_id__2023', 'benunit_id__2024', 'benunit_id__2025', 'benunit_id__2026', 'benunit_id__2027', 'benunit_id__2028', 'benunit_id__2029', 'benunit_id__2030', 'is_disabled_for_benefits__2023', 'is_disabled_for_benefits__2024', 'is_disabled_for_benefits__2025', 'is_disabled_for_benefits__2026', 'is_disabled_for_benefits__2027', 'is_disabled_for_benefits__2028', 'is_disabled_for_benefits__2029', 'is_disabled_for_benefits__2030', 'eldest_adult_age__2025', 'is_adult__2025', 'benunit_weight__2025', 'benunit_weight__2026', 'benunit_weight__2027', 'benunit_weight__2028', 'benunit_weight__2029', 'household_id__2023', 'household_id__2024', 'household_id__2025', 'household_id__2026', 'household_id__2027', 'household_id__2028', 'household_id__2029', 'household_id__2030', 'structural_insurance_payments__2023', 'structural_insurance_payments__2024', 'structural_insurance_payments__2025', 'structural_insurance_payments__2026', 'structural_insurance_payments__2027', 'structural_insurance_payments__2028', 'structural_insurance_payments__2029', 'structural_insurance_payments__2030', 'main_residential_property_purchased_is_first_home__2023', 'main_residential_property_purchased_is_first_home__2024', 'main_residential_property_purchased_is_first_home__2025', 'main_residential_property_purchased_is_first_home__2026', 'main_residential_property_purchased_is_first_home__2027', 'main_residential_property_purchased_is_first_home__2028', 'main_residential_property_purchased_is_first_home__2029', 'main_residential_property_purchased_is_first_home__2030', 'full_rate_vat_expenditure_rate__2023', 'full_rate_vat_expenditure_rate__2024', 'full_rate_vat_expenditure_rate__2025', 'full_rate_vat_expenditure_rate__2026', 'full_rate_vat_expenditure_rate__2027', 'full_rate_vat_expenditure_rate__2028', 'full_rate_vat_expenditure_rate__2029', 'full_rate_vat_expenditure_rate__2030', 'external_child_payments__2023', 'external_child_payments__2024', 'external_child_payments__2025', 'external_child_payments__2026', 'external_child_payments__2027', 'external_child_payments__2028', 'external_child_payments__2029', 'external_child_payments__2030', 'statutory_maternity_pay__2023', 'statutory_maternity_pay__2024', 'statutory_maternity_pay__2025', 'statutory_maternity_pay__2026', 'statutory_maternity_pay__2027', 'statutory_maternity_pay__2028', 'statutory_maternity_pay__2029', 'statutory_maternity_pay__2030', 'employment_status__2023', 'employment_status__2024', 'employment_status__2025', 'employment_status__2026', 'employment_status__2027', 'employment_status__2028', 'employment_status__2029', 'employment_status__2030', 'is_single__2025', 'statutory_sick_pay__2023', 'statutory_sick_pay__2024', 'statutory_sick_pay__2025', 'statutory_sick_pay__2026', 'statutory_sick_pay__2027', 'statutory_sick_pay__2028', 'statutory_sick_pay__2029', 'statutory_sick_pay__2030', 'hours_worked__2023', 'hours_worked__2024', 'hours_worked__2025', 'hours_worked__2026', 'hours_worked__2027', 'hours_worked__2028', 'hours_worked__2029', 'hours_worked__2030', 'rail_usage__2023', 'rail_usage__2024', 'rail_usage__2025', 'rail_usage__2026', 'rail_usage__2027', 'rail_usage__2028', 'rail_usage__2029', 'rail_usage__2030', 'rail_subsidy_spending__2023', 'rail_subsidy_spending__2024', 'rail_subsidy_spending__2025', 'rail_subsidy_spending__2026', 'rail_subsidy_spending__2027', 'rail_subsidy_spending__2028', 'rail_subsidy_spending__2029', 'rail_subsidy_spending__2030', 'bus_subsidy_spending__2023', 'bus_subsidy_spending__2024', 'bus_subsidy_spending__2025', 'bus_subsidy_spending__2026', 'bus_subsidy_spending__2027', 'bus_subsidy_spending__2028', 'bus_subsidy_spending__2029', 'bus_subsidy_spending__2030', 'outpatient_visits__2023', 'outpatient_visits__2024', 'outpatient_visits__2025', 'outpatient_visits__2026', 'outpatient_visits__2027', 'outpatient_visits__2028', 'outpatient_visits__2029', 'outpatient_visits__2030', 'nhs_outpatient_spending__2023', 'nhs_outpatient_spending__2024', 'nhs_outpatient_spending__2025', 'nhs_outpatient_spending__2026', 'nhs_outpatient_spending__2027', 'nhs_outpatient_spending__2028', 'nhs_outpatient_spending__2029', 'nhs_outpatient_spending__2030', 'nhs_a_and_e_spending__2023', 'nhs_a_and_e_spending__2024', 'nhs_a_and_e_spending__2025', 'nhs_a_and_e_spending__2026', 'nhs_a_and_e_spending__2027', 'nhs_a_and_e_spending__2028', 'nhs_a_and_e_spending__2029', 'nhs_a_and_e_spending__2030', 'a_and_e_visits__2023', 'a_and_e_visits__2024', 'a_and_e_visits__2025', 'a_and_e_visits__2026', 'a_and_e_visits__2027', 'a_and_e_visits__2028', 'a_and_e_visits__2029', 'a_and_e_visits__2030', 'admitted_patient_visits__2023', 'admitted_patient_visits__2024', 'admitted_patient_visits__2025', 'admitted_patient_visits__2026', 'admitted_patient_visits__2027', 'admitted_patient_visits__2028', 'admitted_patient_visits__2029', 'admitted_patient_visits__2030', 'nhs_admitted_patient_spending__2023', 'nhs_admitted_patient_spending__2024', 'nhs_admitted_patient_spending__2025', 'nhs_admitted_patient_spending__2026', 'nhs_admitted_patient_spending__2027', 'nhs_admitted_patient_spending__2028', 'nhs_admitted_patient_spending__2029', 'nhs_admitted_patient_spending__2030', 'healthy_start_vouchers__2023', 'healthy_start_vouchers__2024', 'healthy_start_vouchers__2025', 'healthy_start_vouchers__2026', 'healthy_start_vouchers__2027', 'healthy_start_vouchers__2028', 'healthy_start_vouchers__2029', 'healthy_start_vouchers__2030', 'education_grants__2023', 'education_grants__2024', 'education_grants__2025', 'education_grants__2026', 'education_grants__2027', 'education_grants__2028', 'education_grants__2029', 'education_grants__2030', 'jsa_contrib_reported__2023', 'jsa_contrib_reported__2024', 'jsa_contrib_reported__2025', 'jsa_contrib_reported__2026', 'jsa_contrib_reported__2027', 'jsa_contrib_reported__2028', 'jsa_contrib_reported__2029', 'jsa_contrib_reported__2030', 'sda_reported__2023', 'sda_reported__2024', 'sda_reported__2025', 'sda_reported__2026', 'sda_reported__2027', 'sda_reported__2028', 'sda_reported__2029', 'sda_reported__2030', 'adult_ema__2023', 'adult_ema__2024', 'adult_ema__2025', 'adult_ema__2026', 'adult_ema__2027', 'adult_ema__2028', 'adult_ema__2029', 'adult_ema__2030', 'winter_fuel_allowance_reported__2023', 'winter_fuel_allowance_reported__2024', 'winter_fuel_allowance_reported__2025', 'winter_fuel_allowance_reported__2026', 'winter_fuel_allowance_reported__2027', 'winter_fuel_allowance_reported__2028', 'winter_fuel_allowance_reported__2029', 'winter_fuel_allowance_reported__2030', 'child_tax_credit_reported__2023', 'child_tax_credit_reported__2024', 'child_tax_credit_reported__2025', 'child_tax_credit_reported__2026', 'child_tax_credit_reported__2027', 'child_tax_credit_reported__2028', 'child_tax_credit_reported__2029', 'child_tax_credit_reported__2030', 'working_tax_credit_reported__2023', 'working_tax_credit_reported__2024', 'working_tax_credit_reported__2025', 'working_tax_credit_reported__2026', 'working_tax_credit_reported__2027', 'working_tax_credit_reported__2028', 'working_tax_credit_reported__2029', 'working_tax_credit_reported__2030', 'bsp_reported__2023', 'bsp_reported__2024', 'bsp_reported__2025', 'bsp_reported__2026', 'bsp_reported__2027', 'bsp_reported__2028', 'bsp_reported__2029', 'bsp_reported__2030', 'carers_allowance_reported__2023', 'carers_allowance_reported__2024', 'carers_allowance_reported__2025', 'carers_allowance_reported__2026', 'carers_allowance_reported__2027', 'carers_allowance_reported__2028', 'carers_allowance_reported__2029', 'carers_allowance_reported__2030', 'access_fund__2023', 'access_fund__2024', 'access_fund__2025', 'access_fund__2026', 'access_fund__2027', 'access_fund__2028', 'access_fund__2029', 'access_fund__2030', 'ssmg_reported__2023', 'ssmg_reported__2024', 'ssmg_reported__2025', 'ssmg_reported__2026', 'ssmg_reported__2027', 'ssmg_reported__2028', 'ssmg_reported__2029', 'ssmg_reported__2030', 'incapacity_benefit_reported__2023', 'incapacity_benefit_reported__2024', 'incapacity_benefit_reported__2025', 'incapacity_benefit_reported__2026', 'incapacity_benefit_reported__2027', 'incapacity_benefit_reported__2028', 'incapacity_benefit_reported__2029', 'incapacity_benefit_reported__2030', 'iidb_reported__2023', 'iidb_reported__2024', 'iidb_reported__2025', 'iidb_reported__2026', 'iidb_reported__2027', 'iidb_reported__2028', 'iidb_reported__2029', 'iidb_reported__2030', 'attendance_allowance_reported__2023', 'attendance_allowance_reported__2024', 'attendance_allowance_reported__2025', 'attendance_allowance_reported__2026', 'attendance_allowance_reported__2027', 'attendance_allowance_reported__2028', 'attendance_allowance_reported__2029', 'attendance_allowance_reported__2030', 'student_loans__2023', 'student_loans__2024', 'student_loans__2025', 'student_loans__2026', 'student_loans__2027', 'student_loans__2028', 'student_loans__2029', 'student_loans__2030', 'esa_income_reported__2023', 'esa_income_reported__2024', 'esa_income_reported__2025', 'esa_income_reported__2026', 'esa_income_reported__2027', 'esa_income_reported__2028', 'esa_income_reported__2029', 'esa_income_reported__2030', 'state_pension_reported__2023', 'state_pension_reported__2024', 'state_pension_reported__2025', 'state_pension_reported__2026', 'state_pension_reported__2027', 'state_pension_reported__2028', 'state_pension_reported__2029', 'state_pension_reported__2030', 'afcs_reported__2023', 'afcs_reported__2024', 'afcs_reported__2025', 'afcs_reported__2026', 'afcs_reported__2027', 'afcs_reported__2028', 'afcs_reported__2029', 'afcs_reported__2030', 'council_tax_benefit_reported__2023', 'council_tax_benefit_reported__2024', 'council_tax_benefit_reported__2025', 'council_tax_benefit_reported__2026', 'council_tax_benefit_reported__2027', 'council_tax_benefit_reported__2028', 'council_tax_benefit_reported__2029', 'council_tax_benefit_reported__2030', 'income_support_reported__2023', 'income_support_reported__2024', 'income_support_reported__2025', 'income_support_reported__2026', 'income_support_reported__2027', 'income_support_reported__2028', 'income_support_reported__2029', 'income_support_reported__2030', 'esa_contrib_reported__2023', 'esa_contrib_reported__2024', 'esa_contrib_reported__2025', 'esa_contrib_reported__2026', 'esa_contrib_reported__2027', 'esa_contrib_reported__2028', 'esa_contrib_reported__2029', 'esa_contrib_reported__2030', 'jsa_income_reported__2023', 'jsa_income_reported__2024', 'jsa_income_reported__2025', 'jsa_income_reported__2026', 'jsa_income_reported__2027', 'jsa_income_reported__2028', 'jsa_income_reported__2029', 'jsa_income_reported__2030', 'child_ema__2023', 'child_ema__2024', 'child_ema__2025', 'child_ema__2026', 'child_ema__2027', 'child_ema__2028', 'child_ema__2029', 'child_ema__2030', 'dla_sc_reported__2023', 'dla_sc_reported__2024', 'dla_sc_reported__2025', 'dla_sc_reported__2026', 'dla_sc_reported__2027', 'dla_sc_reported__2028', 'dla_sc_reported__2029', 'dla_sc_reported__2030', 'dla_m_reported__2023', 'dla_m_reported__2024', 'dla_m_reported__2025', 'dla_m_reported__2026', 'dla_m_reported__2027', 'dla_m_reported__2028', 'dla_m_reported__2029', 'dla_m_reported__2030', 'housing_benefit_reported__2023', 'housing_benefit_reported__2024', 'housing_benefit_reported__2025', 'housing_benefit_reported__2026', 'housing_benefit_reported__2027', 'housing_benefit_reported__2028', 'housing_benefit_reported__2029', 'housing_benefit_reported__2030', 'would_claim_uc__2023', 'would_claim_uc__2024', 'would_claim_uc__2025', 'would_claim_uc__2026', 'would_claim_uc__2027', 'would_claim_uc__2028', 'would_claim_uc__2029', 'would_claim_uc__2030', 'universal_credit_reported__2023', 'universal_credit_reported__2024', 'universal_credit_reported__2025', 'universal_credit_reported__2026', 'universal_credit_reported__2027', 'universal_credit_reported__2028', 'universal_credit_reported__2029', 'universal_credit_reported__2030', 'uc_standard_allowance_claimant_type__2025', 'uc_standard_allowance__2025', 'uc_standard_allowance__2026', 'uc_standard_allowance__2027', 'uc_standard_allowance__2028', 'uc_standard_allowance__2029', 'uc_limited_capability_for_WRA__2026', 'uc_limited_capability_for_WRA__2027', 'uc_limited_capability_for_WRA__2028', 'uc_limited_capability_for_WRA__2029', 'uc_LCWRA_element__2026', 'uc_LCWRA_element__2027', 'uc_LCWRA_element__2028', 'uc_LCWRA_element__2029', 'pip_m_reported__2023', 'pip_m_reported__2024', 'pip_m_reported__2025', 'pip_m_reported__2026', 'pip_m_reported__2027', 'pip_m_reported__2028', 'pip_m_reported__2029', 'pip_m_reported__2030', 'pip_dl_reported__2023', 'pip_dl_reported__2024', 'pip_dl_reported__2025', 'pip_dl_reported__2026', 'pip_dl_reported__2027', 'pip_dl_reported__2028', 'pip_dl_reported__2029', 'pip_dl_reported__2030', 'pension_credit_reported__2023', 'pension_credit_reported__2024', 'pension_credit_reported__2025', 'pension_credit_reported__2026', 'pension_credit_reported__2027', 'pension_credit_reported__2028', 'pension_credit_reported__2029', 'pension_credit_reported__2030', 'would_claim_pc__2023', 'would_claim_pc__2024', 'would_claim_pc__2025', 'would_claim_pc__2026', 'would_claim_pc__2027', 'would_claim_pc__2028', 'would_claim_pc__2029', 'would_claim_pc__2030', 'would_evade_tv_licence_fee__2023', 'would_evade_tv_licence_fee__2024', 'would_evade_tv_licence_fee__2025', 'would_evade_tv_licence_fee__2026', 'would_evade_tv_licence_fee__2027', 'would_evade_tv_licence_fee__2028', 'would_evade_tv_licence_fee__2029', 'would_evade_tv_licence_fee__2030', 'free_school_fruit_veg__2023', 'free_school_fruit_veg__2024', 'free_school_fruit_veg__2025', 'free_school_fruit_veg__2026', 'free_school_fruit_veg__2027', 'free_school_fruit_veg__2028', 'free_school_fruit_veg__2029', 'free_school_fruit_veg__2030', 'dfe_education_spending__2023', 'dfe_education_spending__2024', 'dfe_education_spending__2025', 'dfe_education_spending__2026', 'dfe_education_spending__2027', 'dfe_education_spending__2028', 'dfe_education_spending__2029', 'dfe_education_spending__2030', 'free_school_meals__2023', 'free_school_meals__2024', 'free_school_meals__2025', 'free_school_meals__2026', 'free_school_meals__2027', 'free_school_meals__2028', 'free_school_meals__2029', 'free_school_meals__2030', 'would_claim_extended_childcare__2023', 'would_claim_extended_childcare__2024', 'would_claim_extended_childcare__2025', 'would_claim_extended_childcare__2026', 'would_claim_extended_childcare__2027', 'would_claim_extended_childcare__2028', 'would_claim_extended_childcare__2029', 'would_claim_extended_childcare__2030', 'maximum_extended_childcare_hours_usage__2023', 'maximum_extended_childcare_hours_usage__2024', 'maximum_extended_childcare_hours_usage__2025', 'maximum_extended_childcare_hours_usage__2026', 'maximum_extended_childcare_hours_usage__2027', 'maximum_extended_childcare_hours_usage__2028', 'maximum_extended_childcare_hours_usage__2029', 'maximum_extended_childcare_hours_usage__2030', 'would_claim_targeted_childcare__2023', 'would_claim_targeted_childcare__2024', 'would_claim_targeted_childcare__2025', 'would_claim_targeted_childcare__2026', 'would_claim_targeted_childcare__2027', 'would_claim_targeted_childcare__2028', 'would_claim_targeted_childcare__2029', 'would_claim_targeted_childcare__2030', 'would_claim_universal_childcare__2023', 'would_claim_universal_childcare__2024', 'would_claim_universal_childcare__2025', 'would_claim_universal_childcare__2026', 'would_claim_universal_childcare__2027', 'would_claim_universal_childcare__2028', 'would_claim_universal_childcare__2029', 'would_claim_universal_childcare__2030', 'student_loan_repayments__2023', 'student_loan_repayments__2024', 'student_loan_repayments__2025', 'student_loan_repayments__2026', 'student_loan_repayments__2027', 'student_loan_repayments__2028', 'student_loan_repayments__2029', 'student_loan_repayments__2030', 'would_claim_child_benefit__2023', 'would_claim_child_benefit__2024', 'would_claim_child_benefit__2025', 'would_claim_child_benefit__2026', 'would_claim_child_benefit__2027', 'would_claim_child_benefit__2028', 'would_claim_child_benefit__2029', 'would_claim_child_benefit__2030', 'child_benefit_reported__2023', 'child_benefit_reported__2024', 'child_benefit_reported__2025', 'child_benefit_reported__2026', 'child_benefit_reported__2027', 'child_benefit_reported__2028', 'child_benefit_reported__2029', 'child_benefit_reported__2030', 'capital_gains_before_response__2023', 'capital_gains_before_response__2024', 'capital_gains_before_response__2025', 'capital_gains_before_response__2026', 'capital_gains_before_response__2027', 'capital_gains_before_response__2028', 'capital_gains_before_response__2029', 'capital_gains_before_response__2030', 'tax_free_savings_income__2023', 'tax_free_savings_income__2024', 'tax_free_savings_income__2025', 'tax_free_savings_income__2026', 'tax_free_savings_income__2027', 'tax_free_savings_income__2028', 'tax_free_savings_income__2029', 'tax_free_savings_income__2030', 'would_claim_tfc__2023', 'would_claim_tfc__2024', 'would_claim_tfc__2025', 'would_claim_tfc__2026', 'would_claim_tfc__2027', 'would_claim_tfc__2028', 'would_claim_tfc__2029', 'would_claim_tfc__2030', 'student_loan_plan__2023', 'student_loan_plan__2024', 'student_loan_plan__2025', 'student_loan_plan__2026', 'student_loan_plan__2027', 'student_loan_plan__2028', 'student_loan_plan__2029', 'student_loan_plan__2030', 'domestic_rates__2023', 'domestic_rates__2024', 'domestic_rates__2025', 'domestic_rates__2026', 'domestic_rates__2027', 'domestic_rates__2028', 'domestic_rates__2029', 'domestic_rates__2030']\n", - "DataFrame shape: (115612, 1127)\n", - "'person_household_id' columns: ['person_household_id__2023', 'person_household_id__2024', 'person_household_id__2025', 'person_household_id__2026', 'person_household_id__2027', 'person_household_id__2028', 'person_household_id__2029', 'person_household_id__2030']\n", - "Filtered DataFrame shape: (8470, 1127)\n", - "[ERROR] ValueError: Unable to set value \"[ 39361. 39361. 39361. ... 134410.5 134410.5 6000. ]\" for variable \"corporate_wealth\", as its length is 8470 while there are 4108 households in the simulation.\n", - "\n", - "This confirms the bug exists in the high-level API.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Traceback (most recent call last):\n", - " File \"/var/folders/qf/7fglpnr94wsc5xgbqkp80xbw0000gn/T/ipykernel_19066/3661659745.py\", line 7, in \n", - " sim_wales = Simulation(country=\"uk\", scope=\"macro\", region=\"country/wales\")\n", - " File \"/Users/administrator/Documents/PolicyEngine/policyengine.py/policyengine/simulation.py\", line 110, in __init__\n", - " self._initialise_simulations()\n", - " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^\n", - " File \"/Users/administrator/Documents/PolicyEngine/policyengine.py/policyengine/simulation.py\", line 202, in _initialise_simulations\n", - " self.baseline_simulation = self._initialise_simulation(\n", - " ~~~~~~~~~~~~~~~~~~~~~~~~~~~^\n", - " scope=self.options.scope,\n", - " ^^^^^^^^^^^^^^^^^^^^^^^^^\n", - " ...<5 lines>...\n", - " subsample=self.options.subsample,\n", - " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - " )\n", - " ^\n", - " File \"/Users/administrator/Documents/PolicyEngine/policyengine.py/policyengine/simulation.py\", line 260, in _initialise_simulation\n", - " simulation = self._apply_region_to_simulation(\n", - " country=country,\n", - " ...<4 lines>...\n", - " time_period=time_period,\n", - " )\n", - " File \"/Users/administrator/Documents/PolicyEngine/policyengine.py/policyengine/simulation.py\", line 307, in _apply_region_to_simulation\n", - " simulation = simulation_type(\n", - " dataset=filtered_df, reform=reform\n", - " )\n", - " File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_uk/simulation.py\", line 100, in __init__\n", - " self.build_from_dataframe(dataset)\n", - " ~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^\n", - " File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_uk/simulation.py\", line 286, in build_from_dataframe\n", - " self.set_input(variable, time_period, df[column])\n", - " ~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - " File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_core/simulations/simulation.py\", line 1241, in set_input\n", - " self.get_holder(variable_name).set_input(\n", - " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^\n", - " period, value, self.branch_name\n", - " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - " )\n", - " ^\n", - " File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_core/holders/holder.py\", line 244, in set_input\n", - " return self._set(period, array, branch_name)\n", - " ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - " File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_core/holders/holder.py\", line 286, in _set\n", - " value = self._to_array(value)\n", - " File \"/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/policyengine_core/holders/holder.py\", line 253, in _to_array\n", - " raise ValueError(\n", - " ...<7 lines>...\n", - " )\n", - "ValueError: Unable to set value \"[ 39361. 39361. 39361. ... 134410.5 134410.5 6000. ]\" for variable \"corporate_wealth\", as its length is 8470 while there are 4108 households in the simulation.\n" - ] - } - ], - "source": [ - "# Test with policyengine.Simulation using region=\"country/wales\"\n", - "print(\"=== Step 7: Test with policyengine.Simulation ===\")\n", - "print(\"Creating Simulation with region='country/wales'...\")\n", - "print()\n", - "\n", - "try:\n", - " sim_wales = Simulation(country=\"uk\", scope=\"macro\", region=\"country/wales\")\n", - " \n", - " wales_underlying = sim_wales.baseline_simulation\n", - " print(f\"Wales simulation created!\")\n", - " print(f\" Person count: {wales_underlying.persons.count}\")\n", - " print(f\" Household count: {wales_underlying.household.count}\")\n", - " \n", - " # Try calculating the problematic variable\n", - " print(\"\\nCalculating would_evade_tv_licence_fee...\")\n", - " result = sim_wales.calculate(\"would_evade_tv_licence_fee\")\n", - " print(f\" Result length: {len(result)}\")\n", - " print(\" [OK] No error!\")\n", - " \n", - "except Exception as e:\n", - " print(f\"[ERROR] {type(e).__name__}: {e}\")\n", - " print()\n", - " print(\"This confirms the bug exists in the high-level API.\")\n", - " traceback.print_exc()" - ] - }, - { - "cell_type": "markdown", - "id": "cell-23", - "metadata": {}, - "source": [ - "## Step 8: Compare with Constituency Filtering (Should Work)\n", - "\n", - "Constituency filtering uses weight adjustment instead of DataFrame subsetting." - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "cell-24", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "No data provided, using default dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n", - "Using dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n", - "Downloading enhanced_frs_2023_24.h5 from bucket policyengine-uk-data-private\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "=== Step 8: Test Constituency Filtering ===\n", - "Creating Simulation with region='constituency/Cardiff South and Penarth'...\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:root:No version specified for policyengine-uk-data-private, enhanced_frs_2023_24.h5. Using latest version: 1.29.4\n", - "WARNING:root:No metadata found for blob policyengine-uk-data-private, constituencies_2024.csv, so it has no version attached.\n", - "WARNING:root:No version specified for policyengine-uk-data-private, constituencies_2024.csv. Using latest version: None\n", - "WARNING:root:No version specified for policyengine-uk-data-private, parliamentary_constituency_weights.h5. Using latest version: 1.29.4\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Constituency simulation created!\n", - " Person count: 115612\n", - " Household count: 53508\n", - " (Full UK counts, but weights adjusted for constituency)\n", - "\n", - "Calculating would_evade_tv_licence_fee...\n", - "[ERROR] AttributeError: 'Simulation' object has no attribute 'calculate'\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Traceback (most recent call last):\n", - " File \"/var/folders/qf/7fglpnr94wsc5xgbqkp80xbw0000gn/T/ipykernel_19066/2462177757.py\", line 21, in \n", - " result = sim_const.calculate(\"would_evade_tv_licence_fee\")\n", - " ^^^^^^^^^^^^^^^^^^^\n", - "AttributeError: 'Simulation' object has no attribute 'calculate'\n" - ] - } - ], - "source": [ - "# Test constituency filtering\n", - "print(\"=== Step 8: Test Constituency Filtering ===\")\n", - "print(\"Creating Simulation with region='constituency/Cardiff South and Penarth'...\")\n", - "print()\n", - "\n", - "try:\n", - " sim_const = Simulation(\n", - " country=\"uk\", \n", - " scope=\"macro\", \n", - " region=\"constituency/Cardiff South and Penarth\"\n", - " )\n", - " \n", - " const_underlying = sim_const.baseline_simulation\n", - " print(f\"Constituency simulation created!\")\n", - " print(f\" Person count: {const_underlying.persons.count}\")\n", - " print(f\" Household count: {const_underlying.household.count}\")\n", - " print(\" (Full UK counts, but weights adjusted for constituency)\")\n", - " \n", - " # Try calculating the problematic variable\n", - " print(\"\\nCalculating would_evade_tv_licence_fee...\")\n", - " result = sim_const.calculate(\"would_evade_tv_licence_fee\")\n", - " print(f\" Result length: {len(result)}\")\n", - " print(\" [OK] Constituency filtering works!\")\n", - " \n", - "except Exception as e:\n", - " print(f\"[ERROR] {type(e).__name__}: {e}\")\n", - " traceback.print_exc()" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "cell-25", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "=== Step 8b: Test Local Authority Filtering ===\n", - "Creating Simulation with region='local_authority/Cardiff'...\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "No data provided, using default dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n", - "Using dataset: gs://policyengine-uk-data-private/enhanced_frs_2023_24.h5\n", - "Downloading enhanced_frs_2023_24.h5 from bucket policyengine-uk-data-private\n", - "WARNING:root:No version specified for policyengine-uk-data-private, enhanced_frs_2023_24.h5. Using latest version: 1.29.4\n", - "WARNING:root:No metadata found for blob policyengine-uk-data-private, local_authorities_2021.csv, so it has no version attached.\n", - "WARNING:root:No version specified for policyengine-uk-data-private, local_authorities_2021.csv. Using latest version: None\n", - "WARNING:root:No version specified for policyengine-uk-data-private, local_authority_weights.h5. Using latest version: 1.29.4\n" - ] - }, - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[17], line 7\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28mprint\u001b[39m()\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m----> 7\u001b[0m sim_la \u001b[38;5;241m=\u001b[39m \u001b[43mSimulation\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43mcountry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muk\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43mscope\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmacro\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[43mregion\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlocal_authority/Cardiff\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 11\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 13\u001b[0m la_underlying \u001b[38;5;241m=\u001b[39m sim_la\u001b[38;5;241m.\u001b[39mbaseline_simulation\n\u001b[1;32m 14\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mLocal Authority simulation created!\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", - "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/simulation.py:110\u001b[0m, in \u001b[0;36mSimulation.__init__\u001b[0;34m(self, **options)\u001b[0m\n\u001b[1;32m 108\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_set_data(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mdata)\n\u001b[1;32m 109\u001b[0m logging\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mData loaded\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 110\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_initialise_simulations\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 111\u001b[0m logging\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSimulations initialised\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 112\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcheck_data_version()\n", - "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/simulation.py:202\u001b[0m, in \u001b[0;36mSimulation._initialise_simulations\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 201\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_initialise_simulations\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m--> 202\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbaseline_simulation \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_initialise_simulation\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 203\u001b[0m \u001b[43m \u001b[49m\u001b[43mscope\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mscope\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 204\u001b[0m \u001b[43m \u001b[49m\u001b[43mcountry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcountry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 205\u001b[0m \u001b[43m \u001b[49m\u001b[43mreform\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbaseline\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 206\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 207\u001b[0m \u001b[43m \u001b[49m\u001b[43mtime_period\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtime_period\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 208\u001b[0m \u001b[43m \u001b[49m\u001b[43mregion\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mregion\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 209\u001b[0m \u001b[43m \u001b[49m\u001b[43msubsample\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msubsample\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 210\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 212\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mreform \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 213\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreform_simulation \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_initialise_simulation(\n\u001b[1;32m 214\u001b[0m scope\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mscope,\n\u001b[1;32m 215\u001b[0m country\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mcountry,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 220\u001b[0m subsample\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39msubsample,\n\u001b[1;32m 221\u001b[0m )\n", - "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/simulation.py:260\u001b[0m, in \u001b[0;36mSimulation._initialise_simulation\u001b[0;34m(self, country, scope, reform, data, time_period, region, subsample)\u001b[0m\n\u001b[1;32m 257\u001b[0m simulation\u001b[38;5;241m.\u001b[39mdefault_calculation_period \u001b[38;5;241m=\u001b[39m time_period\n\u001b[1;32m 259\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m region \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 260\u001b[0m simulation \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_apply_region_to_simulation\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 261\u001b[0m \u001b[43m \u001b[49m\u001b[43mcountry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcountry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 262\u001b[0m \u001b[43m \u001b[49m\u001b[43msimulation\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msimulation\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 263\u001b[0m \u001b[43m \u001b[49m\u001b[43msimulation_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_simulation_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 264\u001b[0m \u001b[43m \u001b[49m\u001b[43mregion\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mregion\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 265\u001b[0m \u001b[43m \u001b[49m\u001b[43mreform\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreform\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 266\u001b[0m \u001b[43m \u001b[49m\u001b[43mtime_period\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtime_period\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 267\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 269\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m subsample \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 270\u001b[0m simulation \u001b[38;5;241m=\u001b[39m simulation\u001b[38;5;241m.\u001b[39msubsample(subsample)\n", - "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/simulation.py:366\u001b[0m, in \u001b[0;36mSimulation._apply_region_to_simulation\u001b[0;34m(self, country, simulation, simulation_type, region, reform, time_period)\u001b[0m\n\u001b[1;32m 362\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 363\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 364\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mLocal authority \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mla\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m not found. See \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mla_names_local_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m for the list of available local authorities.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 365\u001b[0m )\n\u001b[0;32m--> 366\u001b[0m weights_local_path \u001b[38;5;241m=\u001b[39m \u001b[43mdownload\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 367\u001b[0m \u001b[43m \u001b[49m\u001b[43mgcs_bucket\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpolicyengine-uk-data-private\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 368\u001b[0m \u001b[43m \u001b[49m\u001b[43mgcs_key\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlocal_authority_weights.h5\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 369\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 371\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m h5py\u001b[38;5;241m.\u001b[39mFile(weights_local_path, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[1;32m 372\u001b[0m weights \u001b[38;5;241m=\u001b[39m f[\u001b[38;5;28mstr\u001b[39m(time_period)][\u001b[38;5;241m.\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;241m.\u001b[39m]\n", - "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/utils/data_download.py:38\u001b[0m, in \u001b[0;36mdownload\u001b[0;34m(gcs_key, gcs_bucket, version, return_version)\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 22\u001b[0m \u001b[38;5;124;03mDownload a file from Google Cloud Storage.\u001b[39;00m\n\u001b[1;32m 23\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 35\u001b[0m \u001b[38;5;124;03m Otherwise: just the local_path string\u001b[39;00m\n\u001b[1;32m 36\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 37\u001b[0m logging\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUsing Google Cloud Storage for download.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 38\u001b[0m local_path, downloaded_version \u001b[38;5;241m=\u001b[39m \u001b[43mdownload_file_from_gcs\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 39\u001b[0m \u001b[43m \u001b[49m\u001b[43mbucket_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgcs_bucket\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 40\u001b[0m \u001b[43m \u001b[49m\u001b[43mgcs_key\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgcs_key\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 41\u001b[0m \u001b[43m \u001b[49m\u001b[43mversion\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mversion\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 42\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 43\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m return_version:\n\u001b[1;32m 44\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m local_path, downloaded_version\n", - "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/utils/google_cloud_bucket.py:75\u001b[0m, in \u001b[0;36mdownload_file_from_gcs\u001b[0;34m(bucket_name, gcs_key, version)\u001b[0m\n\u001b[1;32m 72\u001b[0m local_path \u001b[38;5;241m=\u001b[39m DATASETS_DIR \u001b[38;5;241m/\u001b[39m gcs_key\n\u001b[1;32m 73\u001b[0m local_path\u001b[38;5;241m.\u001b[39mparent\u001b[38;5;241m.\u001b[39mmkdir(parents\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, exist_ok\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m---> 75\u001b[0m version \u001b[38;5;241m=\u001b[39m \u001b[43m_get_client\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdownload\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 76\u001b[0m \u001b[43m \u001b[49m\u001b[43mbucket_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 77\u001b[0m \u001b[43m \u001b[49m\u001b[43mgcs_key\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 78\u001b[0m \u001b[43m \u001b[49m\u001b[43mlocal_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 79\u001b[0m \u001b[43m \u001b[49m\u001b[43mversion\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mversion\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 80\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_version\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 81\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 82\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mstr\u001b[39m(local_path), version\n", - "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/utils/data/caching_google_storage_client.py:64\u001b[0m, in \u001b[0;36mCachingGoogleStorageClient.download\u001b[0;34m(self, bucket, key, target, version, return_version)\u001b[0m\n\u001b[1;32m 60\u001b[0m version \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclient\u001b[38;5;241m.\u001b[39m_get_latest_version(bucket, key)\n\u001b[1;32m 61\u001b[0m logging\u001b[38;5;241m.\u001b[39mwarning(\n\u001b[1;32m 62\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo version specified for \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbucket\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkey\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m. Using latest version: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mversion\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 63\u001b[0m )\n\u001b[0;32m---> 64\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msync\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbucket\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mversion\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 65\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcache\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_data_key(bucket, key, version))\n\u001b[1;32m 66\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(data) \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28mbytes\u001b[39m:\n", - "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/utils/data/caching_google_storage_client.py:106\u001b[0m, in \u001b[0;36mCachingGoogleStorageClient.sync\u001b[0;34m(self, bucket, key, version)\u001b[0m\n\u001b[1;32m 104\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCache exists and crc is unchanged for \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mid_string\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m .\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 105\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[0;32m--> 106\u001b[0m [content, downloaded_crc] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdownload\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 107\u001b[0m \u001b[43m \u001b[49m\u001b[43mbucket\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mversion\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mversion\u001b[49m\n\u001b[1;32m 108\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 109\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\n\u001b[1;32m 110\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDownloaded new version of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mid_string\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m with crc \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdownloaded_crc\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 111\u001b[0m )\n\u001b[1;32m 113\u001b[0m \u001b[38;5;66;03m# atomic transaction to update both the data and the metadata\u001b[39;00m\n\u001b[1;32m 114\u001b[0m \u001b[38;5;66;03m# at the same time.\u001b[39;00m\n", - "File \u001b[0;32m~/Documents/PolicyEngine/policyengine.py/policyengine/utils/data/version_aware_storage_client.py:171\u001b[0m, in \u001b[0;36mVersionAwareStorageClient.download\u001b[0;34m(self, bucket_name, key, version)\u001b[0m\n\u001b[1;32m 166\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\n\u001b[1;32m 167\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDownloading \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbucket_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkey\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 168\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, version: \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;250m \u001b[39m\u001b[38;5;241m+\u001b[39m\u001b[38;5;250m \u001b[39mversion\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mif\u001b[39;00m\u001b[38;5;250m \u001b[39mversion\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01melse\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 169\u001b[0m )\n\u001b[1;32m 170\u001b[0m blob \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_blob(bucket_name, key, version)\n\u001b[0;32m--> 171\u001b[0m content \u001b[38;5;241m=\u001b[39m \u001b[43mblob\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdownload_as_bytes\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 172\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\n\u001b[1;32m 173\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDownloaded \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbucket_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkey\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 174\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, version: \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;250m \u001b[39m\u001b[38;5;241m+\u001b[39m\u001b[38;5;250m \u001b[39mversion\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mif\u001b[39;00m\u001b[38;5;250m \u001b[39mversion\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01melse\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 175\u001b[0m )\n\u001b[1;32m 176\u001b[0m \u001b[38;5;66;03m# According to documentation, blob.crc32c is updated as a side effect of\u001b[39;00m\n\u001b[1;32m 177\u001b[0m \u001b[38;5;66;03m# downloading the content. This should be the CRC of the downloaded\u001b[39;00m\n\u001b[1;32m 178\u001b[0m \u001b[38;5;66;03m# content (avoiding race conditions with the cloud).\u001b[39;00m\n", - "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/cloud/storage/blob.py:1530\u001b[0m, in \u001b[0;36mBlob.download_as_bytes\u001b[0;34m(self, client, start, end, raw_download, if_etag_match, if_etag_not_match, if_generation_match, if_generation_not_match, if_metageneration_match, if_metageneration_not_match, timeout, checksum, retry, single_shot_download)\u001b[0m\n\u001b[1;32m 1527\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m create_trace_span(name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mStorage.Blob.downloadAsBytes\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m 1528\u001b[0m string_buffer \u001b[38;5;241m=\u001b[39m BytesIO()\n\u001b[0;32m-> 1530\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_prep_and_do_download\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1531\u001b[0m \u001b[43m \u001b[49m\u001b[43mstring_buffer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1532\u001b[0m \u001b[43m \u001b[49m\u001b[43mclient\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mclient\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1533\u001b[0m \u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1534\u001b[0m \u001b[43m \u001b[49m\u001b[43mend\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mend\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1535\u001b[0m \u001b[43m \u001b[49m\u001b[43mraw_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mraw_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1536\u001b[0m \u001b[43m \u001b[49m\u001b[43mif_etag_match\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mif_etag_match\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1537\u001b[0m \u001b[43m \u001b[49m\u001b[43mif_etag_not_match\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mif_etag_not_match\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1538\u001b[0m \u001b[43m \u001b[49m\u001b[43mif_generation_match\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mif_generation_match\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1539\u001b[0m \u001b[43m \u001b[49m\u001b[43mif_generation_not_match\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mif_generation_not_match\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1540\u001b[0m \u001b[43m \u001b[49m\u001b[43mif_metageneration_match\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mif_metageneration_match\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1541\u001b[0m \u001b[43m \u001b[49m\u001b[43mif_metageneration_not_match\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mif_metageneration_not_match\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1542\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1543\u001b[0m \u001b[43m \u001b[49m\u001b[43mchecksum\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchecksum\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1544\u001b[0m \u001b[43m \u001b[49m\u001b[43mretry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1545\u001b[0m \u001b[43m \u001b[49m\u001b[43msingle_shot_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msingle_shot_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1546\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1547\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m string_buffer\u001b[38;5;241m.\u001b[39mgetvalue()\n", - "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/cloud/storage/blob.py:4659\u001b[0m, in \u001b[0;36mBlob._prep_and_do_download\u001b[0;34m(self, file_obj, client, start, end, raw_download, if_etag_match, if_etag_not_match, if_generation_match, if_generation_not_match, if_metageneration_match, if_metageneration_not_match, timeout, checksum, retry, single_shot_download, command)\u001b[0m\n\u001b[1;32m 4656\u001b[0m transport \u001b[38;5;241m=\u001b[39m client\u001b[38;5;241m.\u001b[39m_http\n\u001b[1;32m 4658\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 4659\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_do_download\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 4660\u001b[0m \u001b[43m \u001b[49m\u001b[43mtransport\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4661\u001b[0m \u001b[43m \u001b[49m\u001b[43mfile_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4662\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_url\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4663\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4664\u001b[0m \u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4665\u001b[0m \u001b[43m \u001b[49m\u001b[43mend\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4666\u001b[0m \u001b[43m \u001b[49m\u001b[43mraw_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4667\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4668\u001b[0m \u001b[43m \u001b[49m\u001b[43mchecksum\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchecksum\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4669\u001b[0m \u001b[43m \u001b[49m\u001b[43mretry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4670\u001b[0m \u001b[43m \u001b[49m\u001b[43msingle_shot_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msingle_shot_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4671\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4672\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m InvalidResponse \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[1;32m 4673\u001b[0m _raise_from_invalid_response(exc)\n", - "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/cloud/storage/blob.py:1094\u001b[0m, in \u001b[0;36mBlob._do_download\u001b[0;34m(self, transport, file_obj, download_url, headers, start, end, raw_download, timeout, checksum, retry, single_shot_download)\u001b[0m\n\u001b[1;32m 1076\u001b[0m download \u001b[38;5;241m=\u001b[39m klass(\n\u001b[1;32m 1077\u001b[0m download_url,\n\u001b[1;32m 1078\u001b[0m stream\u001b[38;5;241m=\u001b[39mfile_obj,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1087\u001b[0m single_shot_download\u001b[38;5;241m=\u001b[39msingle_shot_download,\n\u001b[1;32m 1088\u001b[0m )\n\u001b[1;32m 1089\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m create_trace_span(\n\u001b[1;32m 1090\u001b[0m name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mStorage.\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdownload_class\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/consume\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 1091\u001b[0m attributes\u001b[38;5;241m=\u001b[39mextra_attributes,\n\u001b[1;32m 1092\u001b[0m api_request\u001b[38;5;241m=\u001b[39margs,\n\u001b[1;32m 1093\u001b[0m ):\n\u001b[0;32m-> 1094\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mdownload\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconsume\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtransport\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1095\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_extract_headers_from_download(response)\n\u001b[1;32m 1096\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", - "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/cloud/storage/_media/requests/download.py:280\u001b[0m, in \u001b[0;36mDownload.consume\u001b[0;34m(self, transport, timeout)\u001b[0m\n\u001b[1;32m 276\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_write_to_stream(result)\n\u001b[1;32m 278\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m result\n\u001b[0;32m--> 280\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_request_helpers\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwait_and_retry\u001b[49m\u001b[43m(\u001b[49m\u001b[43mretriable_request\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_strategy\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/cloud/storage/_media/requests/_request_helpers.py:107\u001b[0m, in \u001b[0;36mwait_and_retry\u001b[0;34m(func, retry_strategy)\u001b[0m\n\u001b[1;32m 105\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m retry_strategy:\n\u001b[1;32m 106\u001b[0m func \u001b[38;5;241m=\u001b[39m retry_strategy(func)\n\u001b[0;32m--> 107\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:294\u001b[0m, in \u001b[0;36mRetry.__call__..retry_wrapped_func\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 290\u001b[0m target \u001b[38;5;241m=\u001b[39m functools\u001b[38;5;241m.\u001b[39mpartial(func, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 291\u001b[0m sleep_generator \u001b[38;5;241m=\u001b[39m exponential_sleep_generator(\n\u001b[1;32m 292\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_initial, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_maximum, multiplier\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_multiplier\n\u001b[1;32m 293\u001b[0m )\n\u001b[0;32m--> 294\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mretry_target\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 295\u001b[0m \u001b[43m \u001b[49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 296\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_predicate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 297\u001b[0m \u001b[43m \u001b[49m\u001b[43msleep_generator\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 298\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 299\u001b[0m \u001b[43m \u001b[49m\u001b[43mon_error\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mon_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 300\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/api_core/retry/retry_unary.py:147\u001b[0m, in \u001b[0;36mretry_target\u001b[0;34m(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)\u001b[0m\n\u001b[1;32m 145\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[1;32m 146\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 147\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mtarget\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 148\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m inspect\u001b[38;5;241m.\u001b[39misawaitable(result):\n\u001b[1;32m 149\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(_ASYNC_RETRY_WARNING)\n", - "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/cloud/storage/_media/requests/download.py:276\u001b[0m, in \u001b[0;36mDownload.consume..retriable_request\u001b[0;34m()\u001b[0m\n\u001b[1;32m 273\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(msg) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mexc\u001b[39;00m\n\u001b[1;32m 274\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_bytes_downloaded \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m--> 276\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_write_to_stream\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresult\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 278\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m result\n", - "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/google/cloud/storage/_media/requests/download.py:149\u001b[0m, in \u001b[0;36mDownload._write_to_stream\u001b[0;34m(self, response)\u001b[0m\n\u001b[1;32m 144\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 145\u001b[0m body_iter \u001b[38;5;241m=\u001b[39m response\u001b[38;5;241m.\u001b[39miter_content(\n\u001b[1;32m 146\u001b[0m chunk_size\u001b[38;5;241m=\u001b[39m_request_helpers\u001b[38;5;241m.\u001b[39m_SINGLE_GET_CHUNK_SIZE,\n\u001b[1;32m 147\u001b[0m decode_unicode\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 148\u001b[0m )\n\u001b[0;32m--> 149\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mchunk\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mbody_iter\u001b[49m\u001b[43m:\u001b[49m\n\u001b[1;32m 150\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_stream\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwrite\u001b[49m\u001b[43m(\u001b[49m\u001b[43mchunk\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 151\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_bytes_downloaded\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mchunk\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/requests/models.py:820\u001b[0m, in \u001b[0;36mResponse.iter_content..generate\u001b[0;34m()\u001b[0m\n\u001b[1;32m 818\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mraw, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstream\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m 819\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 820\u001b[0m \u001b[38;5;28;01myield from\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mraw\u001b[38;5;241m.\u001b[39mstream(chunk_size, decode_content\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 821\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ProtocolError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 822\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ChunkedEncodingError(e)\n", - "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/urllib3/response.py:1253\u001b[0m, in \u001b[0;36mHTTPResponse.stream\u001b[0;34m(self, amt, decode_content)\u001b[0m\n\u001b[1;32m 1247\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1248\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m (\n\u001b[1;32m 1249\u001b[0m \u001b[38;5;129;01mnot\u001b[39;00m is_fp_closed(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp)\n\u001b[1;32m 1250\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoded_buffer) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 1251\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoder \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoder\u001b[38;5;241m.\u001b[39mhas_unconsumed_tail)\n\u001b[1;32m 1252\u001b[0m ):\n\u001b[0;32m-> 1253\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mamt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_content\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1255\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data:\n\u001b[1;32m 1256\u001b[0m \u001b[38;5;28;01myield\u001b[39;00m data\n", - "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/urllib3/response.py:1108\u001b[0m, in \u001b[0;36mHTTPResponse.read\u001b[0;34m(self, amt, decode_content, cache_content)\u001b[0m\n\u001b[1;32m 1105\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoded_buffer) \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m amt:\n\u001b[1;32m 1106\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoded_buffer\u001b[38;5;241m.\u001b[39mget(amt)\n\u001b[0;32m-> 1108\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_raw_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1110\u001b[0m flush_decoder \u001b[38;5;241m=\u001b[39m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mor\u001b[39;00m (amt \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m data)\n\u001b[1;32m 1112\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 1113\u001b[0m \u001b[38;5;129;01mnot\u001b[39;00m data\n\u001b[1;32m 1114\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoded_buffer) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 1115\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoder \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_decoder\u001b[38;5;241m.\u001b[39mhas_unconsumed_tail)\n\u001b[1;32m 1116\u001b[0m ):\n", - "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/urllib3/response.py:1024\u001b[0m, in \u001b[0;36mHTTPResponse._raw_read\u001b[0;34m(self, amt, read1)\u001b[0m\n\u001b[1;32m 1021\u001b[0m fp_closed \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mclosed\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 1023\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_error_catcher():\n\u001b[0;32m-> 1024\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fp_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mread1\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mread1\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m fp_closed \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1025\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m amt \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m data:\n\u001b[1;32m 1026\u001b[0m \u001b[38;5;66;03m# Platform-specific: Buggy versions of Python.\u001b[39;00m\n\u001b[1;32m 1027\u001b[0m \u001b[38;5;66;03m# Close the connection when no data is returned\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1032\u001b[0m \u001b[38;5;66;03m# not properly close the connection in all cases. There is\u001b[39;00m\n\u001b[1;32m 1033\u001b[0m \u001b[38;5;66;03m# no harm in redundantly calling close.\u001b[39;00m\n\u001b[1;32m 1034\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp\u001b[38;5;241m.\u001b[39mclose()\n", - "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/site-packages/urllib3/response.py:1007\u001b[0m, in \u001b[0;36mHTTPResponse._fp_read\u001b[0;34m(self, amt, read1)\u001b[0m\n\u001b[1;32m 1004\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp\u001b[38;5;241m.\u001b[39mread1(amt) \u001b[38;5;28;01mif\u001b[39;00m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp\u001b[38;5;241m.\u001b[39mread1()\n\u001b[1;32m 1005\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1006\u001b[0m \u001b[38;5;66;03m# StringIO doesn't like amt=None\u001b[39;00m\n\u001b[0;32m-> 1007\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m amt \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fp\u001b[38;5;241m.\u001b[39mread()\n", - "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/http/client.py:479\u001b[0m, in \u001b[0;36mHTTPResponse.read\u001b[0;34m(self, amt)\u001b[0m\n\u001b[1;32m 476\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlength \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m amt \u001b[38;5;241m>\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlength:\n\u001b[1;32m 477\u001b[0m \u001b[38;5;66;03m# clip the read to the \"end of response\"\u001b[39;00m\n\u001b[1;32m 478\u001b[0m amt \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlength\n\u001b[0;32m--> 479\u001b[0m s \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mamt\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 480\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m s \u001b[38;5;129;01mand\u001b[39;00m amt:\n\u001b[1;32m 481\u001b[0m \u001b[38;5;66;03m# Ideally, we would raise IncompleteRead if the content-length\u001b[39;00m\n\u001b[1;32m 482\u001b[0m \u001b[38;5;66;03m# wasn't satisfied, but it might break compatibility.\u001b[39;00m\n\u001b[1;32m 483\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_close_conn()\n", - "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/socket.py:719\u001b[0m, in \u001b[0;36mSocketIO.readinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 717\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mOSError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcannot read from timed out object\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 718\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 719\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrecv_into\u001b[49m\u001b[43m(\u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 720\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m timeout:\n\u001b[1;32m 721\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_timeout_occurred \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n", - "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/ssl.py:1304\u001b[0m, in \u001b[0;36mSSLSocket.recv_into\u001b[0;34m(self, buffer, nbytes, flags)\u001b[0m\n\u001b[1;32m 1300\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m flags \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 1301\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 1302\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnon-zero flags not allowed in calls to recv_into() on \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m\n\u001b[1;32m 1303\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m)\n\u001b[0;32m-> 1304\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnbytes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbuffer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1305\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1306\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39mrecv_into(buffer, nbytes, flags)\n", - "File \u001b[0;32m/opt/miniconda3/envs/py-3.13/lib/python3.13/ssl.py:1138\u001b[0m, in \u001b[0;36mSSLSocket.read\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m 1136\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1137\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m buffer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1138\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sslobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbuffer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1139\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1140\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_sslobj\u001b[38;5;241m.\u001b[39mread(\u001b[38;5;28mlen\u001b[39m)\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], - "source": [ - "# Test local authority filtering\n", - "print(\"\\n=== Step 8b: Test Local Authority Filtering ===\")\n", - "print(\"Creating Simulation with region='local_authority/Cardiff'...\")\n", - "print()\n", - "\n", - "try:\n", - " sim_la = Simulation(\n", - " country=\"uk\", \n", - " scope=\"macro\", \n", - " region=\"local_authority/Cardiff\"\n", - " )\n", - " \n", - " la_underlying = sim_la.baseline_simulation\n", - " print(f\"Local Authority simulation created!\")\n", - " print(f\" Person count: {la_underlying.persons.count}\")\n", - " print(f\" Household count: {la_underlying.household.count}\")\n", - " print(\" (Full UK counts, but weights adjusted for LA)\")\n", - " \n", - " # Try calculating the problematic variable\n", - " print(\"\\nCalculating would_evade_tv_licence_fee...\")\n", - " result = sim_la.calculate(\"would_evade_tv_licence_fee\")\n", - " print(f\" Result length: {len(result)}\")\n", - " print(\" [OK] Local authority filtering works!\")\n", - " \n", - "except Exception as e:\n", - " print(f\"[ERROR] {type(e).__name__}: {e}\")\n", - " traceback.print_exc()" - ] - }, - { - "cell_type": "markdown", - "id": "cell-26", - "metadata": {}, - "source": [ - "## Step 9: Deep Dive - Check random() Function Behavior" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cell-27", - "metadata": {}, - "outputs": [], - "source": [ - "# Check what random(household) would return in the broken simulation\n", - "print(\"=== Step 9: Investigate random() function behavior ===\")\n", - "\n", - "# Import the random function\n", - "from policyengine_core.commons.formulas import random\n", - "\n", - "try:\n", - " # Get household population from the new (potentially broken) simulation\n", - " hh_pop = new_sim.household\n", - " print(f\"Household population count: {hh_pop.count}\")\n", - " \n", - " # Check what household_id returns when calculated via population\n", - " print(\"\\nCalling hh_pop('household_id', 2025)...\")\n", - " hh_ids_from_pop = hh_pop(\"household_id\", 2025)\n", - " print(f\" Result length: {len(hh_ids_from_pop)}\")\n", - " print(f\" Expected: {hh_pop.count}\")\n", - " \n", - " if len(hh_ids_from_pop) != hh_pop.count:\n", - " print(f\"\\n [BUG CONFIRMED] household_id returned {len(hh_ids_from_pop)} values\")\n", - " print(f\" but household population only has {hh_pop.count} entities!\")\n", - " print(\" This is why random(household) fails.\")\n", - " \n", - "except Exception as e:\n", - " print(f\"Error: {e}\")\n", - " traceback.print_exc()" - ] - }, - { - "cell_type": "markdown", - "id": "cell-28", - "metadata": {}, - "source": [ - "## Summary and Conclusions" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cell-29", - "metadata": {}, - "outputs": [], - "source": [ - "print(\"=\"*70)\n", - "print(\"DIAGNOSTIC SUMMARY\")\n", - "print(\"=\"*70)\n", - "\n", - "print(\"\"\"\n", - "FINDINGS:\n", - "\n", - "1. COUNTRY FILTERING (country/wales):\n", - " - Uses to_input_dataframe() + DataFrame subsetting + new Simulation()\n", - " - Creates entity count mismatch between persons and households\n", - " - Breaks when calculating variables that use random(household)\n", - "\n", - "2. CONSTITUENCY/LA FILTERING:\n", - " - Uses weight adjustment on existing simulation\n", - " - Preserves entity structure\n", - " - Works correctly\n", - "\n", - "ROOT CAUSE:\n", - " - The to_input_dataframe() -> filter -> new Simulation() approach\n", - " doesn't properly preserve entity relationships\n", - " - Either household_id isn't properly exported/imported, OR\n", - " - The entity membership mapping gets corrupted during rebuild\n", - "\n", - "RECOMMENDED FIX:\n", - " - Use weight-based filtering for country filtering (like constituency/LA)\n", - " - Zero out weights for households not in the target country\n", - " - This preserves entity structure and avoids the export/import complexity\n", - "\n", - "Example fix for policyengine/simulation.py:\n", - "\n", - " if \"country/\" in region:\n", - " country_name = region.split(\"/\")[1]\n", - " country = simulation.calculate(\"country\", map_to=\"household\").values\n", - " is_in_country = (country == country_name.upper())\n", - " current_weights = simulation.calculate(\n", - " \"household_weight\", simulation.default_calculation_period\n", - " )\n", - " simulation.set_input(\n", - " \"household_weight\",\n", - " simulation.default_calculation_period,\n", - " current_weights * is_in_country # Zero out non-matching\n", - " )\n", - "\"\"\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "py-3.13", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.13.5" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From 76ec6827c76a45091bdde612a827ea3d1fd30d88 Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Tue, 16 Dec 2025 14:13:26 +0400 Subject: [PATCH 7/7] fix: Gitignore .DS_Store --- .DS_Store | Bin 6148 -> 0 bytes .gitignore | 1 + 2 files changed, 1 insertion(+) delete mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index b2a6b3e6c870dde3f033c72e8cfe8a9aab8f5326..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKOG-mQ5UkdK0XJD@IalxoLx?BH1p*O5K%yk-`mJ&$ zdAwS@eht7D?@tfF48WA`h?9r0`MLYZZYpC$I`4SJ8;&2V!~XeYRDC(&+&dhx!2!FU z{QWj>y-ZV93P=GdAO)m=6!@hA-g{}&`$R=4AO)nrw*vlsXmrP3I3&iWgCRx$;)Lli zu49%UHct?H;gHA-&5}w?s?~^LNoT%QT`wFGlMbul!|KUa6N<&td4G#?SWi@x0#e{y zf!o~9y#GJaugw4FB<-Yt6!=#P*krj{F8E5-TSqVFy|&TUbg%iOyKx;9hG@scXvf@m fJDx{T)-_-AycZ6ML1#YbMEwl7E;1?b*9v?AELauN diff --git a/.gitignore b/.gitignore index 91af8c3c1..ff41b4b5a 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ dist/* **/*.h5 **/*.csv.gz .env +.DS_Store # Ignore generated credentials from google-github-actions/auth gha-creds-*.json