6 Composite indicators
composeR dplyr::case_when
6.1 Composite with 1 categorical variable - select one
This example will look at creating an improved source of water variable. The indicator of interest is f11_dinking_water_source and the options for un-improved source of water are:
- trucked_in_water_truck_with_a_tank_etc
- drinking_water_from_water_kiosk_booth_with_water_for_bottling
- bottled_water_water_purchased_in_bottles
- other_specify
#Creating a vector of un-improved source of water
unimproved_source <- c("trucked_in_water_truck_with_a_tank_etc", "drinking_water_from_water_kiosk_booth_with_water_for_bottling", "bottled_water_water_purchased_in_bottles", "other_specify")
#Using base R
main_dataset$wash_drinkingwater_improved_source_baser <- ifelse(main_dataset$f11_dinking_water_source %in% unimproved_source, "not_improved", "improved")
table(main_dataset$f11_dinking_water_source, main_dataset$wash_drinkingwater_improved_source_baser, useNA = "ifany")
##
## improved
## bottled_water_water_purchased_in_bottles 0
## drinking_water_from_water_kiosk_booth_with_water_for_bottling 0
## other_specify 0
## personal_well 414
## public_well_or_boreholes_shared_access 187
## tap_drinking_water_centralized_water_supply 479
## technical_piped_water 2
## trucked_in_water_truck_with_a_tank_etc 0
##
## not_improved
## bottled_water_water_purchased_in_bottles 68
## drinking_water_from_water_kiosk_booth_with_water_for_bottling 366
## other_specify 11
## personal_well 0
## public_well_or_boreholes_shared_access 0
## tap_drinking_water_centralized_water_supply 0
## technical_piped_water 0
## trucked_in_water_truck_with_a_tank_etc 90
#Using base R
main_dataset$wash_drinkingwater_improved_source_baser2 <- NA
main_dataset$wash_drinkingwater_improved_source_baser2[main_dataset$f11_dinking_water_source %in% unimproved_source] <- "not_improved"
main_dataset$wash_drinkingwater_improved_source_baser2[!(main_dataset$f11_dinking_water_source %in% unimproved_source)] <- "improved"
#Same same
table(main_dataset$wash_drinkingwater_improved_source_baser, main_dataset$wash_drinkingwater_improved_source_baser2, useNA = "ifany")
##
## improved not_improved
## improved 1082 0
## not_improved 0 535
#Using case_when
main_dataset <- main_dataset %>%
mutate(wash_improved_source_dplyr = case_when(f11_dinking_water_source %in% unimproved_source ~ "not_improved",
TRUE ~ "improved"))
table(main_dataset$f11_dinking_water_source, main_dataset$wash_improved_source_dplyr, useNA = "ifany")
##
## improved
## bottled_water_water_purchased_in_bottles 0
## drinking_water_from_water_kiosk_booth_with_water_for_bottling 0
## other_specify 0
## personal_well 414
## public_well_or_boreholes_shared_access 187
## tap_drinking_water_centralized_water_supply 479
## technical_piped_water 2
## trucked_in_water_truck_with_a_tank_etc 0
##
## not_improved
## bottled_water_water_purchased_in_bottles 68
## drinking_water_from_water_kiosk_booth_with_water_for_bottling 366
## other_specify 11
## personal_well 0
## public_well_or_boreholes_shared_access 0
## tap_drinking_water_centralized_water_supply 0
## technical_piped_water 0
## trucked_in_water_truck_with_a_tank_etc 90
#Same same
table(main_dataset$wash_drinkingwater_improved_source_baser, main_dataset$wash_improved_source_dplyr)
##
## improved not_improved
## improved 1082 0
## not_improved 0 535
In the previous example, there was no missing value. For this example, the indicator to be built will turn a yes/no question into a dummy variable (1 and 0). The variable of interest is b16_hohh_pension_eligible
##
## no yes <NA>
## 6 870 741
#Using base R
main_dataset$hohh_pension_eligible_dummy_baser <- ifelse(main_dataset$b16_hohh_pension_eligible == "yes", 1, 0)
table(main_dataset$b16_hohh_pension_eligible, main_dataset$hohh_pension_eligible_dummy_baser, useNA = "ifany")
##
## 0 1 <NA>
## no 6 0 0
## yes 0 870 0
## <NA> 0 0 741
#Using case_when
main_dataset <- main_dataset %>%
mutate(hohh_pension_eligible_dummy_dplyr = case_when(b16_hohh_pension_eligible == "yes" ~ 1,
b16_hohh_pension_eligible == "no" ~ 0))
table(main_dataset$b16_hohh_pension_eligible, main_dataset$hohh_pension_eligible_dummy_dplyr, useNA = "ifany")
##
## 0 1 <NA>
## no 6 0 0
## yes 0 870 0
## <NA> 0 0 741
#Same same
table(main_dataset$hohh_pension_eligible_dummy_baser, main_dataset$hohh_pension_eligible_dummy_dplyr, useNA = "ifany")
##
## 0 1 <NA>
## 0 6 0 0
## 1 0 870 0
## <NA> 0 0 741
#Watch out for NA. This was is not correct.
main_dataset <- main_dataset %>%
mutate(hohh_pension_eligible_dummy_dplyr2 = case_when(b16_hohh_pension_eligible == "yes" ~ 1,
TRUE ~ 0))
table(main_dataset$b16_hohh_pension_eligible, main_dataset$hohh_pension_eligible_dummy_dplyr2, useNA = "ifany")
##
## 0 1
## no 6 0
## yes 0 870
## <NA> 741 0
#Not same same
table(main_dataset$hohh_pension_eligible_dummy_baser, main_dataset$hohh_pension_eligible_dummy_dplyr2, useNA = "ifany")
##
## 0 1
## 0 6 0
## 1 0 870
## <NA> 741 0
6.2 Composite with 2 categorical variables
This example will look at creating an indicator whether or not the sources for drinking and for cooking, cleaning and non-drinking purposes are both improved. The indicators of interest are f11_dinking_water_source (and more specifically wash_drinkingwater_improved_source_baser from previous paragraph) and f14_technical_water_source (F14_What is your HH’s main source of water for cooking, cleaning, and non-drinking purposes).
First, a new variable has to be created, wash_otherwater_improved_source_baser.
#Using base R
main_dataset$wash_otherwater_improved_source_baser <- ifelse(main_dataset$f14_technical_water_source %in% unimproved_source, "not_improved", "improved")
main_dataset$wash_bothwater_improved_source_baser <- ifelse(main_dataset$wash_drinkingwater_improved_source_baser == "improved" & main_dataset$wash_otherwater_improved_source_baser == "improved", "both_improved", "not_both_improved")
table(main_dataset$wash_drinkingwater_improved_source_baser, main_dataset$wash_otherwater_improved_source_baser, main_dataset$wash_bothwater_improved_source_baser, useNA = "ifany")
## , , = both_improved
##
##
## improved not_improved
## improved 1074 0
## not_improved 0 0
##
## , , = not_both_improved
##
##
## improved not_improved
## improved 0 8
## not_improved 480 55
Now, the variable will be coded to have 3 categories instead: both improved, at least drinking water and not improved.
main_dataset$wash_bothwater_improved_source_baser2 <- ifelse(main_dataset$wash_drinkingwater_improved_source_baser == "improved" & main_dataset$wash_otherwater_improved_source_baser == "improved", "both_improved",
ifelse(main_dataset$wash_drinkingwater_improved_source_baser == "improved", "at_least_drinking", "not_both_improved"))
table(main_dataset$wash_drinkingwater_improved_source_baser, main_dataset$wash_otherwater_improved_source_baser, main_dataset$wash_bothwater_improved_source_baser2, useNA = "ifany")
## , , = at_least_drinking
##
##
## improved not_improved
## improved 0 8
## not_improved 0 0
##
## , , = both_improved
##
##
## improved not_improved
## improved 1074 0
## not_improved 0 0
##
## , , = not_both_improved
##
##
## improved not_improved
## improved 0 0
## not_improved 480 55
#Using dplyr
main_dataset <- main_dataset %>%
mutate(wash_bothwater_improved_source_dplyr = case_when(wash_drinkingwater_improved_source_baser == "improved" & wash_otherwater_improved_source_baser == "improved" ~ "both_improved",
wash_drinkingwater_improved_source_baser == "improved" ~ "at_least_drinking",
TRUE ~ "not_both_improved"
))
table(main_dataset$wash_bothwater_improved_source_dplyr, main_dataset$wash_bothwater_improved_source_baser2, useNA = "ifany")
##
## at_least_drinking both_improved not_both_improved
## at_least_drinking 8 0 0
## both_improved 0 1074 0
## not_both_improved 0 0 535
6.3 Composite with 1 categorical variable - select multiple
In this example we are creating an indicator to score whether or not drinking water is being processed or purified before usage. The indicator is calculated based on one categorical question f12_drinking_water_treat
#Using dplyr
main_dataset <- main_dataset %>%
mutate(
wash_indicator1 = case_when(
f12_drinking_water_treat.do_not_process_purify == 1 ~ 3 ,
f12_drinking_water_treat.cleaning_with_chemicals_chlorination == 1 |
f12_drinking_water_treat.water_precipitation == 1 |
f12_drinking_water_treat.filtering_the_water_pitcher_filter == 1 |
f12_drinking_water_treat.filtering_the_water_reverse_osmosis_filter == 1 |
f12_drinking_water_treat.boiling == 1 |
f12_drinking_water_treat.percolation == 1 ~ 1
) )
#Using base R
main_dataset$wash_indicator2 = ifelse(
main_dataset$f12_drinking_water_treat.do_not_process_purify == 1,3,
ifelse(
main_dataset$f12_drinking_water_treat.cleaning_with_chemicals_chlorination == 1 |
main_dataset$f12_drinking_water_treat.water_precipitation == 1 |
main_dataset$f12_drinking_water_treat.filtering_the_water_pitcher_filter == 1 |
main_dataset$f12_drinking_water_treat.filtering_the_water_reverse_osmosis_filter == 1 |
main_dataset$f12_drinking_water_treat.boiling == 1 |
main_dataset$f12_drinking_water_treat.percolation == 1,
1,NA))
6.4 Composite with 1 numerical variable
This example will look at creating one a categorical variable based on a number, f6_how_many_wood_hh_consumed_last_winter , ‘less than 5’, between 5 (included) and 10 and ‘10 and above’.
6.5 Composite with 2 numerical variables
This example will look at creating one of the indicators necessary to compute the FCS. In some cases, we need to check if the sum of number of days for 2 types of food are above 7 or not. If the sum is above 7, then it has to return 7 otherwise, the sum of both variables.
# Combine cereals/roots and meat/eggs and make maximum 7 days
## Using base R
main_dataset$fcs_cereal_roots <- ifelse((main_dataset$g1_cereals_consumption + main_dataset$g2_roots_consumption) > 7,
7,
main_dataset$g1_cereals_consumption + main_dataset$g2_roots_consumption)
## Using dplyr
main_dataset <- main_dataset %>%
mutate(fcs_meat_eggs = ifelse((g5_meat_consumption + g6_eggs_consumption) > 7, 7,
g5_meat_consumption + g6_eggs_consumption))
6.6 Composite with 2 or more numerical variables
This example will look at creating the food consumption score.
main_dataset <- main_dataset %>%
mutate(FCS_score_dplyr = fcs_cereal_roots * 2 + g3_vegetables_consumption * 1 + g4_fruits_consumption * 1 + fcs_meat_eggs * 4 + g7_pulses_consumption * 3 + g8_dairy_consumption * 4 + g9_oil_consumption * 0.5 + g10_sugar_consumption * 0.5)
main_dataset$FCS_score_baser <- (main_dataset$fcs_cereal_roots * 2)+
(main_dataset$g3_vegetables_consumption * 1)+
(main_dataset$g4_fruits_consumption * 1)+
(main_dataset$fcs_meat_eggs * 4)+
(main_dataset$g7_pulses_consumption * 3)+
(main_dataset$g8_dairy_consumption * 4)+
(main_dataset$g9_oil_consumption * 0.5)+
(main_dataset$g10_sugar_consumption * 0.5)
6.7 Household Hunger Score (HHS)
For this example we will use addindicators package to compute the HHS Link to the package: https://github.com/impact-initiatives/addindicators The package can be installed using devtools::install_github(“impact-initiatives/addindicators”)
# Align the coding of the frequencies
main_dataset <- main_dataset %>% mutate(
h3_1_how_often_day_and_night_hungry = case_when(
h3_1_how_often_day_and_night_hungry == "1_rarely_12" ~ "1_rarely_1_2",
TRUE ~ h3_1_how_often_day_and_night_hungry
)
)
main_dataset <- addindicators::add_hhs(
main_dataset,
hhs_nofoodhh_1 = "h1_nofood_noresources",
hhs_nofoodhh_1a = "h1_1_how_often_nofood_noresources",
hhs_sleephungry_2 = "h2_sleep_hungry",
hhs_sleephungry_2a = "h2_1_how_often_sleep_hungry",
hhs_alldaynight_3 = "h3_day_and_night_hungry",
hhs_alldaynight_3a = "h3_1_how_often_day_and_night_hungry",
yes_answer = "1_yes",
no_answer = "0_no",
rarely_answer = "1_rarely_1_2",
sometimes_answer = "2_sometimes_3_10",
often_answer = "3_often_10plus_times"
)
main_dataset %>% select(hhs_cat) %>% head()
## hhs_cat
## 1 No or Little
## 2 No or Little
## 3 No or Little
## 4 No or Little
## 5 No or Little
## 6 No or Little
6.8 Food Cconsumption Score (FCS)
For this example we will use addindicators package to compute the FCS Link to the package: https://github.com/impact-initiatives/addindicators The package can be installed using devtools::install_github(“impact-initiatives/addindicators”)
main_dataset <- addindicators::add_fcs(
main_dataset,
cutoffs = "normal 21.5-35", # "normal 21.5-35", or "alternative 28-42"
fcs_cereal = "g1_cereals_consumption",
fcs_legumes = "g2_roots_consumption" ,
fcs_veg = "g3_vegetables_consumption",
fcs_fruit = "g4_fruits_consumption",
fcs_meat = "g5_meat_consumption",
fcs_dairy = "g8_dairy_consumption",
fcs_sugar = "g10_sugar_consumption",
fcs_oil = "g9_oil_consumption"
)
main_dataset %>% select(fcs_score,fcs_cat) %>% head()
## fcs_score fcs_cat
## 1 56.0 Acceptable
## 2 42.0 Acceptable
## 3 112.0 Acceptable
## 4 112.0 Acceptable
## 5 38.5 Acceptable
## 6 75.0 Acceptable
6.9 Livelihood coping strategy index (LCSI)
For this example we will use addindicators package to compute the LCSI Link to the package: https://github.com/impact-initiatives/addindicators The package can be installed using devtools::install_github(“impact-initiatives/addindicators”)
main_dataset <- addindicators::add_lcsi(main_dataset,
lcsi_stress_vars = c("i1_coping_strategies", "i2_spent_savings", "i3_borrowed_food", "i4_live_another_family") ,
lcsi_crisis_vars = c("i6_removed_children_from_school_kindergarten", "i7_reduced_health_care", "i8_reduced_education") ,
lcsi_emergency_vars = c("i9_sold_house", "i10_hh_migrated", "i11_moved_for_work"),
yes_val = "yes",
no_val = "no_but_needed",
exhausted_val = "no_already_done_so_cant_do_it_anymore",
not_applicable_val = "no_was_not_needed")
main_dataset %>% select(lcsi_cat) %>% head()
## lcsi_cat
## 1 None
## 2 Crisis
## 3 Crisis
## 4 None
## 5 Crisis
## 6 Crisis
6.10 New indicators from a loop to main dataset
e.g. aggregating the number of children going to school from a loop
6.11 The co-occurrence of needs in an MSNI
The following example will fist create an choice multiple question and then will create a parent column which can be use for calculating co-occurrence of needs.
lsg_cols <- c("wash","education","health","food","protection")
for(i in lsg_cols){
col_name <- paste0("most_common_profile.",i)
most_common_data <- most_common_data %>% mutate(
!!sym(col_name) := as.numeric(!!sym(i)) >2 )}
most_common_data <- most_common_data %>% mutate(
most_common_profile = NA)
most_common_data <- most_common_data %>%
illuminate::recalculate_concerted_col_for_select_multiple(uuid = "uuid")