Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#------------------------------------------------------------------------------
# WFP Standardized Scripts
# Engagement in Income Generation Activities (EIG) Calculation
#------------------------------------------------------------------------------

# This script calculates the Engagement in Income Generation Activities (EIG)
# using standard variable names and sample data.
# Detailed guidelines can be found in the WFP documentation.

library(tidyverse)
library(labelled)
library(expss)

# Add sample data
data <- read_csv("~/GitHub/RAMResourcesScripts/Static/EIG_Sample_Survey.csv")

# Rearrange variable names to ensure consistency in the dataset
data <- data %>%
rename_with(~ gsub("/", "", .), starts_with("v"))

# Loop to account for up to 9 training types
for (i in 1:9) {
training_col <- paste0("PTrainingTypes", i)
data[[training_col]] <- ifelse(is.na(data[[training_col]]), 0, data[[training_col]])
}

# Calculate engagement in income generation activities
data <- data %>%
mutate(across(starts_with("PPostTrainingEmpl"), as.numeric),
across(starts_with("PPostTrainingIncome"), as.numeric),
PostTrainingEngagement = pmax(PPostTrainingEmpl, PPostTrainingIncome, na.rm = TRUE),
PTrainingPart = rowSums(select(., starts_with("PTrainingTypes"))))

# Calculate household level variables
household_data <- data %>%
group_by(household_id) %>%
summarise(PostTrainingEngagement = sum(PostTrainingEngagement, na.rm = TRUE),
PTrainingPartNb = sum(PTrainingPart, na.rm = TRUE))

household_data <- household_data %>%
mutate(EIG = PostTrainingEngagement / PTrainingPartNb)

# Summary statistics for full sample
summary(household_data$EIG)

# End of Scripts
Original file line number Diff line number Diff line change
@@ -1,66 +1,73 @@
***Create Engagement in Income Generation Activities [EIG] using standard variables names
*------------------------------------------------------------------------------
* WFP Standardized Scripts
* Engagement in Income Generation Activities (EIG) Calculation
*------------------------------------------------------------------------------

/* Import static sample data */
* This script calculates the Engagement in Income Generation Activities (EIG)
* using standard variable names and sample data.
* Detailed guidelines can be found in the WFP documentation.

* Import static sample data
import delim using "../../Static/EIG_Sample_Survey.csv", clear case(preserve) bindquotes(strict) varn(1)

/* Rearrange variable names and codes to ensure consistency in the dataset*/
* in particular, variables within repeats are imported with progressive integer names (v1-v2-v3...) as they would be all assigned the same name otherwise
* the loop below names variables as [Variablename]+[_number of option]+[_number of repetition]
* Rearrange variable names and codes to ensure consistency in the dataset
* In particular, variables within repeats are imported with progressive integer names (v1-v2-v3...) as they would be all assigned the same name otherwise
* The loop below names variables as [Variablename]+[_number of option]+[_number of repetition]

qui su RepeatPAsstEIG_count
loc RepeatNum=`r(max)'
loc RepeatNum = `r(max)'

local num1=1
local num1 = 1
foreach var of varlist v* {
local `var'_lab: variable label `var'
loc `var'_lab=subinstr(`"``var'_lab'"',"/","",.)
di `"``var'_lab'"'
cap rename `var' ``var'_lab'
if _rc==110 {
cap rename ``var'_lab' ``var'_lab'_1
cap rename `var' ``var'_lab'_`num1'
if _rc==110 {
local num1=`num1'+1
cap rename `var' ``var'_lab'_`num1'
}
}
local `var'_lab : variable label `var'
loc `var'_lab = subinstr(`"``var'_lab'"', "/", "", .)
di `"``var'_lab'"'
cap rename `var' ``var'_lab'
if _rc == 110 {
cap rename ``var'_lab' ``var'_lab'_1
cap rename `var' ``var'_lab'_`num1'
if _rc == 110 {
local num1 = `num1' + 1
cap rename `var' ``var'_lab'_`num1'
}
}
}

assert `num1'==`RepeatNum' // check if all household members have been accounted in the loop
assert `num1' == `RepeatNum' // check if all household members have been accounted in the loop

qui foreach var of varlist * {
cap destring `var', replace i("n/a") // destring variables that have "n/a", which will be replaced with "." (as per Stata convention)
cap destring `var', replace i("n/a") // destring variables that have "n/a", which will be replaced with "." (as per Stata convention)
}

// repeat for the number of HH members participating.
forval i=1(1)`RepeatNum' {
local PTrainingPart
// loop is set to account for up to 9 training types, with 0 as no training.
forval j=1(1)9 {
cap confirm var PTrainingTypes`j'_`i'
if _rc==0 {
local PTrainingPart `PTrainingPart' PTrainingTypes`j'_`i'
}
}

egen PostTrainingEngagement_`i'=rowmax(PPostTrainingEmpl_`i' PPostTrainingIncome_`i') // if individual was either employed or started a self-employment
local PostTrainingEngagement `PostTrainingEngagement' PostTrainingEngagement_`i'

egen PTrainingPart_`i'=rowmax(`PTrainingPart') // if individual participated at least a training activity in the list
local PTrainingPartNb `PTrainingPartNb' PTrainingPart_`i'

* Repeat for the number of HH members participating
forval i = 1(1)`RepeatNum' {
local PTrainingPart
* Loop is set to account for up to 9 training types, with 0 as no training
forval j = 1(1)9 {
cap confirm var PTrainingTypes`j'_`i'
if _rc == 0 {
local PTrainingPart `PTrainingPart' PTrainingTypes`j'_`i'
}
}

egen PostTrainingEngagement_`i' = rowmax(PPostTrainingEmpl_`i' PPostTrainingIncome_`i') // if individual was either employed or started a self-employment
local PostTrainingEngagement `PostTrainingEngagement' PostTrainingEngagement_`i'

egen PTrainingPart_`i' = rowmax(`PTrainingPart') // if individual participated at least a training activity in the list
local PTrainingPartNb `PTrainingPartNb' PTrainingPart_`i'
}
// variables (counts and shares) are still at household level
egen PostTrainingEngagement=rowtotal(`PostTrainingEngagement')

* Variables (counts and shares) are still at household level
egen PostTrainingEngagement = rowtotal(`PostTrainingEngagement')
label var PostTrainingEngagement "Number of training participants engaging in income generating activities (self-employed or salaried)"
egen PTrainingPartNb=rowtotal(`PTrainingPartNb')
egen PTrainingPartNb = rowtotal(`PTrainingPartNb')
label var PTrainingPartNb "Number of training participants"

gen EIG=PostTrainingEngagement/PTrainingPartNb
gen EIG = PostTrainingEngagement / PTrainingPartNb
label var EIG "Share of training participants who were able to engage in income generating activities post-training"
cap drop `PTrainingPartNb' `PostTrainingEngagement'
// example of summary statistic for full sample, more analysis code is provided in the dedicated repository
su EIG

* Example of summary statistic for full sample, more analysis code is provided in the dedicated repository
sum EIG

/* END OF DO-FILE */
* End of Scripts
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#------------------------------------------------------------------------------
# WFP Standardized Scripts
# Engagement in Income Generation Activities (EIG) Calculation
#------------------------------------------------------------------------------

# This script calculates the Engagement in Income Generation Activities (EIG)
# using standard variable names and sample data.
# Detailed guidelines can be found in the WFP documentation.

import pandas as pd

# Add sample data
data = pd.read_csv("~/GitHub/RAMResourcesScripts/Static/EIG_Sample_Survey.csv")

# Rearrange variable names to ensure consistency in the dataset
data.columns = [col.replace("/", "") for col in data.columns]

# Loop to account for up to 9 training types
for i in range(1, 10):
training_col = f'PTrainingTypes{i}'
if training_col in data.columns:
data[training_col] = data[training_col].replace('n/a', pd.NA).astype(float)

# Calculate engagement in income generation activities
data['PostTrainingEngagement'] = data[['PPostTrainingEmpl', 'PPostTrainingIncome']].max(axis=1)
data['PTrainingPart'] = data[[col for col in data.columns if 'PTrainingTypes' in col]].sum(axis=1)

# Calculate household level variables
household_data = data.groupby('household_id').agg(
PostTrainingEngagement=('PostTrainingEngagement', 'sum'),
PTrainingPartNb=('PTrainingPart', 'sum')
).reset_index()

household_data['EIG'] = household_data['PostTrainingEngagement'] / household_data['PTrainingPartNb']

# Summary statistics for full sample
print(household_data['EIG'].describe())

# End of Scripts
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
*------------------------------------------------------------------------------
* WFP Standardized Scripts
* Engagement in Income Generation Activities (EIG) Calculation
*------------------------------------------------------------------------------

* This script calculates the Engagement in Income Generation Activities (EIG)
* using standard variable names and sample data.
* Detailed guidelines can be found in the WFP documentation.

* Import dataset
PRESERVE.
SET DECIMAL DOT.

GET DATA /TYPE=TXT
/FILE="C:\Users\b\Desktop\demo\EIG_Sample_Survey.csv"
/ENCODING='UTF8'
/DELCASE=LINE
/DELIMITERS=","
/ARRANGEMENT=DELIMITED
/FIRSTCASE=2
/VARIABLES=
v1 AUTO
v2 AUTO
v3 AUTO
* Add additional variables as needed
/MAP.
RESTORE.

CACHE.
EXECUTE.
DATASET NAME DataSet1 WINDOW=FRONT.

* Rearrange variable names and codes to ensure consistency in the dataset.
* In particular, variables within repeats are imported with progressive integer names (v1, v2, v3, ...).
* The loop below names variables as [VariableName]+[_number of option]+[_number of repetition].

* Get the maximum number of repeats.
FREQUENCIES VARIABLES=v1 /FORMAT=NOTABLE /STATISTICS=MAXIMUM.

* Rename variables to ensure consistency.
* This assumes variable labels follow a specific pattern and may need adjustment based on actual data.
DO REPEAT oldvar=v1 TO v9 /index=1 TO 9.
RENAME VARIABLES (oldvar = Variable_!index).
END REPEAT.

* Convert "n/a" to missing values and destring variables.
DO REPEAT var=Variable_1 TO Variable_9.
RECODE var ('n/a' = SYSMIS) INTO var.
EXECUTE.
END REPEAT.

* Calculate indicators for each repeat.
DO REPEAT i=1 TO 9.
* Check participation in training activities.
COMPUTE PTrainingPart_!i = MAX(PTrainingTypes1_!i, PTrainingTypes2_!i, PTrainingTypes3_!i, PTrainingTypes4_!i, PTrainingTypes5_!i, PTrainingTypes6_!i, PTrainingTypes7_!i, PTrainingTypes8_!i, PTrainingTypes9_!i).
* Check engagement in income generating activities post-training.
COMPUTE PostTrainingEngagement_!i = MAX(PPostTrainingEmpl_!i, PPostTrainingIncome_!i).
EXECUTE.
END REPEAT.

* Aggregate indicators to the household level.
AGGREGATE
/OUTFILE=* MODE=ADDVARIABLES
/BREAK=
/PostTrainingEngagement = SUM(PostTrainingEngagement_1 TO PostTrainingEngagement_9)
/PTrainingPartNb = SUM(PTrainingPart_1 TO PTrainingPart_9).

* Calculate the EIG indicator.
COMPUTE EIG = PostTrainingEngagement / PTrainingPartNb.
VARIABLE LABELS EIG "Share of training participants who were able to engage in income generating activities post-training".
EXECUTE.

* Drop unnecessary variables.
DELETE VARIABLES PostTrainingEngagement_1 TO PostTrainingEngagement_9 PTrainingPart_1 TO PTrainingPart_9.
EXECUTE.

* Example of summary statistics for the full sample.
FREQUENCIES VARIABLES=EIG /STATISTICS=MEAN.
EXECUTE.

* End of Scripts