From f5bdfa8c1cf554470a29678835b58d16d985b628 Mon Sep 17 00:00:00 2001 From: nicolefindstar Date: Thu, 1 Aug 2024 17:04:30 +0200 Subject: [PATCH] Updated EIG, formatted Stata, added SPSS, R and Python --- .../EIG-indicator-tidyverse.R | 46 +++++++++ .../EIG-indicator.do | 97 ++++++++++--------- .../EIG-indicator.py | 39 ++++++++ .../EIG-indicator.sps | 81 ++++++++++++++++ 4 files changed, 218 insertions(+), 45 deletions(-) create mode 100644 Indicators/Engagement-in-income-generating-activities/EIG-indicator-tidyverse.R create mode 100644 Indicators/Engagement-in-income-generating-activities/EIG-indicator.py create mode 100644 Indicators/Engagement-in-income-generating-activities/EIG-indicator.sps diff --git a/Indicators/Engagement-in-income-generating-activities/EIG-indicator-tidyverse.R b/Indicators/Engagement-in-income-generating-activities/EIG-indicator-tidyverse.R new file mode 100644 index 0000000..f45acd4 --- /dev/null +++ b/Indicators/Engagement-in-income-generating-activities/EIG-indicator-tidyverse.R @@ -0,0 +1,46 @@ +#------------------------------------------------------------------------------ +# WFP Standardized Scripts +# Engagement in Income Generation Activities (EIG) Calculation +#------------------------------------------------------------------------------ + +# This script calculates the Engagement in Income Generation Activities (EIG) +# using standard variable names and sample data. +# Detailed guidelines can be found in the WFP documentation. + +library(tidyverse) +library(labelled) +library(expss) + +# Add sample data +data <- read_csv("~/GitHub/RAMResourcesScripts/Static/EIG_Sample_Survey.csv") + +# Rearrange variable names to ensure consistency in the dataset +data <- data %>% + rename_with(~ gsub("/", "", .), starts_with("v")) + +# Loop to account for up to 9 training types +for (i in 1:9) { + training_col <- paste0("PTrainingTypes", i) + data[[training_col]] <- ifelse(is.na(data[[training_col]]), 0, data[[training_col]]) +} + +# Calculate engagement in income generation activities +data <- data %>% + mutate(across(starts_with("PPostTrainingEmpl"), as.numeric), + across(starts_with("PPostTrainingIncome"), as.numeric), + PostTrainingEngagement = pmax(PPostTrainingEmpl, PPostTrainingIncome, na.rm = TRUE), + PTrainingPart = rowSums(select(., starts_with("PTrainingTypes")))) + +# Calculate household level variables +household_data <- data %>% + group_by(household_id) %>% + summarise(PostTrainingEngagement = sum(PostTrainingEngagement, na.rm = TRUE), + PTrainingPartNb = sum(PTrainingPart, na.rm = TRUE)) + +household_data <- household_data %>% + mutate(EIG = PostTrainingEngagement / PTrainingPartNb) + +# Summary statistics for full sample +summary(household_data$EIG) + +# End of Scripts \ No newline at end of file diff --git a/Indicators/Engagement-in-income-generating-activities/EIG-indicator.do b/Indicators/Engagement-in-income-generating-activities/EIG-indicator.do index dbb6c71..e6fd7c7 100644 --- a/Indicators/Engagement-in-income-generating-activities/EIG-indicator.do +++ b/Indicators/Engagement-in-income-generating-activities/EIG-indicator.do @@ -1,66 +1,73 @@ -***Create Engagement in Income Generation Activities [EIG] using standard variables names +*------------------------------------------------------------------------------ +* WFP Standardized Scripts +* Engagement in Income Generation Activities (EIG) Calculation +*------------------------------------------------------------------------------ -/* Import static sample data */ +* This script calculates the Engagement in Income Generation Activities (EIG) +* using standard variable names and sample data. +* Detailed guidelines can be found in the WFP documentation. + +* Import static sample data import delim using "../../Static/EIG_Sample_Survey.csv", clear case(preserve) bindquotes(strict) varn(1) -/* Rearrange variable names and codes to ensure consistency in the dataset*/ -* in particular, variables within repeats are imported with progressive integer names (v1-v2-v3...) as they would be all assigned the same name otherwise -* the loop below names variables as [Variablename]+[_number of option]+[_number of repetition] +* Rearrange variable names and codes to ensure consistency in the dataset +* In particular, variables within repeats are imported with progressive integer names (v1-v2-v3...) as they would be all assigned the same name otherwise +* The loop below names variables as [Variablename]+[_number of option]+[_number of repetition] qui su RepeatPAsstEIG_count -loc RepeatNum=`r(max)' +loc RepeatNum = `r(max)' -local num1=1 +local num1 = 1 foreach var of varlist v* { - local `var'_lab: variable label `var' - loc `var'_lab=subinstr(`"``var'_lab'"',"/","",.) - di `"``var'_lab'"' - cap rename `var' ``var'_lab' - if _rc==110 { - cap rename ``var'_lab' ``var'_lab'_1 - cap rename `var' ``var'_lab'_`num1' - if _rc==110 { - local num1=`num1'+1 - cap rename `var' ``var'_lab'_`num1' - } - } + local `var'_lab : variable label `var' + loc `var'_lab = subinstr(`"``var'_lab'"', "/", "", .) + di `"``var'_lab'"' + cap rename `var' ``var'_lab' + if _rc == 110 { + cap rename ``var'_lab' ``var'_lab'_1 + cap rename `var' ``var'_lab'_`num1' + if _rc == 110 { + local num1 = `num1' + 1 + cap rename `var' ``var'_lab'_`num1' + } + } } -assert `num1'==`RepeatNum' // check if all household members have been accounted in the loop +assert `num1' == `RepeatNum' // check if all household members have been accounted in the loop qui foreach var of varlist * { - cap destring `var', replace i("n/a") // destring variables that have "n/a", which will be replaced with "." (as per Stata convention) + cap destring `var', replace i("n/a") // destring variables that have "n/a", which will be replaced with "." (as per Stata convention) } -// repeat for the number of HH members participating. -forval i=1(1)`RepeatNum' { - local PTrainingPart - // loop is set to account for up to 9 training types, with 0 as no training. - forval j=1(1)9 { - cap confirm var PTrainingTypes`j'_`i' - if _rc==0 { - local PTrainingPart `PTrainingPart' PTrainingTypes`j'_`i' - } - } - - egen PostTrainingEngagement_`i'=rowmax(PPostTrainingEmpl_`i' PPostTrainingIncome_`i') // if individual was either employed or started a self-employment - local PostTrainingEngagement `PostTrainingEngagement' PostTrainingEngagement_`i' - - egen PTrainingPart_`i'=rowmax(`PTrainingPart') // if individual participated at least a training activity in the list - local PTrainingPartNb `PTrainingPartNb' PTrainingPart_`i' - +* Repeat for the number of HH members participating +forval i = 1(1)`RepeatNum' { + local PTrainingPart + * Loop is set to account for up to 9 training types, with 0 as no training + forval j = 1(1)9 { + cap confirm var PTrainingTypes`j'_`i' + if _rc == 0 { + local PTrainingPart `PTrainingPart' PTrainingTypes`j'_`i' + } + } + + egen PostTrainingEngagement_`i' = rowmax(PPostTrainingEmpl_`i' PPostTrainingIncome_`i') // if individual was either employed or started a self-employment + local PostTrainingEngagement `PostTrainingEngagement' PostTrainingEngagement_`i' + + egen PTrainingPart_`i' = rowmax(`PTrainingPart') // if individual participated at least a training activity in the list + local PTrainingPartNb `PTrainingPartNb' PTrainingPart_`i' } -// variables (counts and shares) are still at household level -egen PostTrainingEngagement=rowtotal(`PostTrainingEngagement') + +* Variables (counts and shares) are still at household level +egen PostTrainingEngagement = rowtotal(`PostTrainingEngagement') label var PostTrainingEngagement "Number of training participants engaging in income generating activities (self-employed or salaried)" -egen PTrainingPartNb=rowtotal(`PTrainingPartNb') +egen PTrainingPartNb = rowtotal(`PTrainingPartNb') label var PTrainingPartNb "Number of training participants" -gen EIG=PostTrainingEngagement/PTrainingPartNb +gen EIG = PostTrainingEngagement / PTrainingPartNb label var EIG "Share of training participants who were able to engage in income generating activities post-training" cap drop `PTrainingPartNb' `PostTrainingEngagement' -// example of summary statistic for full sample, more analysis code is provided in the dedicated repository -su EIG +* Example of summary statistic for full sample, more analysis code is provided in the dedicated repository +sum EIG -/* END OF DO-FILE */ \ No newline at end of file +* End of Scripts \ No newline at end of file diff --git a/Indicators/Engagement-in-income-generating-activities/EIG-indicator.py b/Indicators/Engagement-in-income-generating-activities/EIG-indicator.py new file mode 100644 index 0000000..a2ff214 --- /dev/null +++ b/Indicators/Engagement-in-income-generating-activities/EIG-indicator.py @@ -0,0 +1,39 @@ +#------------------------------------------------------------------------------ +# WFP Standardized Scripts +# Engagement in Income Generation Activities (EIG) Calculation +#------------------------------------------------------------------------------ + +# This script calculates the Engagement in Income Generation Activities (EIG) +# using standard variable names and sample data. +# Detailed guidelines can be found in the WFP documentation. + +import pandas as pd + +# Add sample data +data = pd.read_csv("~/GitHub/RAMResourcesScripts/Static/EIG_Sample_Survey.csv") + +# Rearrange variable names to ensure consistency in the dataset +data.columns = [col.replace("/", "") for col in data.columns] + +# Loop to account for up to 9 training types +for i in range(1, 10): + training_col = f'PTrainingTypes{i}' + if training_col in data.columns: + data[training_col] = data[training_col].replace('n/a', pd.NA).astype(float) + +# Calculate engagement in income generation activities +data['PostTrainingEngagement'] = data[['PPostTrainingEmpl', 'PPostTrainingIncome']].max(axis=1) +data['PTrainingPart'] = data[[col for col in data.columns if 'PTrainingTypes' in col]].sum(axis=1) + +# Calculate household level variables +household_data = data.groupby('household_id').agg( + PostTrainingEngagement=('PostTrainingEngagement', 'sum'), + PTrainingPartNb=('PTrainingPart', 'sum') +).reset_index() + +household_data['EIG'] = household_data['PostTrainingEngagement'] / household_data['PTrainingPartNb'] + +# Summary statistics for full sample +print(household_data['EIG'].describe()) + +# End of Scripts \ No newline at end of file diff --git a/Indicators/Engagement-in-income-generating-activities/EIG-indicator.sps b/Indicators/Engagement-in-income-generating-activities/EIG-indicator.sps new file mode 100644 index 0000000..6c76dc5 --- /dev/null +++ b/Indicators/Engagement-in-income-generating-activities/EIG-indicator.sps @@ -0,0 +1,81 @@ +*------------------------------------------------------------------------------ +* WFP Standardized Scripts +* Engagement in Income Generation Activities (EIG) Calculation +*------------------------------------------------------------------------------ + +* This script calculates the Engagement in Income Generation Activities (EIG) +* using standard variable names and sample data. +* Detailed guidelines can be found in the WFP documentation. + +* Import dataset +PRESERVE. +SET DECIMAL DOT. + +GET DATA /TYPE=TXT + /FILE="C:\Users\b\Desktop\demo\EIG_Sample_Survey.csv" + /ENCODING='UTF8' + /DELCASE=LINE + /DELIMITERS="," + /ARRANGEMENT=DELIMITED + /FIRSTCASE=2 + /VARIABLES= + v1 AUTO + v2 AUTO + v3 AUTO + * Add additional variables as needed + /MAP. +RESTORE. + +CACHE. +EXECUTE. +DATASET NAME DataSet1 WINDOW=FRONT. + +* Rearrange variable names and codes to ensure consistency in the dataset. +* In particular, variables within repeats are imported with progressive integer names (v1, v2, v3, ...). +* The loop below names variables as [VariableName]+[_number of option]+[_number of repetition]. + +* Get the maximum number of repeats. +FREQUENCIES VARIABLES=v1 /FORMAT=NOTABLE /STATISTICS=MAXIMUM. + +* Rename variables to ensure consistency. +* This assumes variable labels follow a specific pattern and may need adjustment based on actual data. +DO REPEAT oldvar=v1 TO v9 /index=1 TO 9. + RENAME VARIABLES (oldvar = Variable_!index). +END REPEAT. + +* Convert "n/a" to missing values and destring variables. +DO REPEAT var=Variable_1 TO Variable_9. + RECODE var ('n/a' = SYSMIS) INTO var. + EXECUTE. +END REPEAT. + +* Calculate indicators for each repeat. +DO REPEAT i=1 TO 9. + * Check participation in training activities. + COMPUTE PTrainingPart_!i = MAX(PTrainingTypes1_!i, PTrainingTypes2_!i, PTrainingTypes3_!i, PTrainingTypes4_!i, PTrainingTypes5_!i, PTrainingTypes6_!i, PTrainingTypes7_!i, PTrainingTypes8_!i, PTrainingTypes9_!i). + * Check engagement in income generating activities post-training. + COMPUTE PostTrainingEngagement_!i = MAX(PPostTrainingEmpl_!i, PPostTrainingIncome_!i). + EXECUTE. +END REPEAT. + +* Aggregate indicators to the household level. +AGGREGATE + /OUTFILE=* MODE=ADDVARIABLES + /BREAK= + /PostTrainingEngagement = SUM(PostTrainingEngagement_1 TO PostTrainingEngagement_9) + /PTrainingPartNb = SUM(PTrainingPart_1 TO PTrainingPart_9). + +* Calculate the EIG indicator. +COMPUTE EIG = PostTrainingEngagement / PTrainingPartNb. +VARIABLE LABELS EIG "Share of training participants who were able to engage in income generating activities post-training". +EXECUTE. + +* Drop unnecessary variables. +DELETE VARIABLES PostTrainingEngagement_1 TO PostTrainingEngagement_9 PTrainingPart_1 TO PTrainingPart_9. +EXECUTE. + +* Example of summary statistics for the full sample. +FREQUENCIES VARIABLES=EIG /STATISTICS=MEAN. +EXECUTE. + +* End of Scripts \ No newline at end of file