6 Composite indicators

composeR dplyr::case_when

6.1 Composite with 1 categorical variable - select one

This example will look at creating an improved source of water variable. The indicator of interest is f11_dinking_water_source and the options for un-improved source of water are:

trucked_in_water_truck_with_a_tank_etc
drinking_water_from_water_kiosk_booth_with_water_for_bottling
bottled_water_water_purchased_in_bottles
other_specify

#Creating a vector of un-improved source of water
unimproved_source <- c("trucked_in_water_truck_with_a_tank_etc", "drinking_water_from_water_kiosk_booth_with_water_for_bottling", "bottled_water_water_purchased_in_bottles", "other_specify")

#Using base R
main_dataset$wash_drinkingwater_improved_source_baser <- ifelse(main_dataset$f11_dinking_water_source %in% unimproved_source, "not_improved", "improved")
table(main_dataset$f11_dinking_water_source, main_dataset$wash_drinkingwater_improved_source_baser, useNA = "ifany")

##                                                                
##                                                                 improved
##   bottled_water_water_purchased_in_bottles                             0
##   drinking_water_from_water_kiosk_booth_with_water_for_bottling        0
##   other_specify                                                        0
##   personal_well                                                      414
##   public_well_or_boreholes_shared_access                             187
##   tap_drinking_water_centralized_water_supply                        479
##   technical_piped_water                                                2
##   trucked_in_water_truck_with_a_tank_etc                               0
##                                                                
##                                                                 not_improved
##   bottled_water_water_purchased_in_bottles                                68
##   drinking_water_from_water_kiosk_booth_with_water_for_bottling          366
##   other_specify                                                           11
##   personal_well                                                            0
##   public_well_or_boreholes_shared_access                                   0
##   tap_drinking_water_centralized_water_supply                              0
##   technical_piped_water                                                    0
##   trucked_in_water_truck_with_a_tank_etc                                  90

#Using base R 
main_dataset$wash_drinkingwater_improved_source_baser2 <- NA
main_dataset$wash_drinkingwater_improved_source_baser2[main_dataset$f11_dinking_water_source %in% unimproved_source] <- "not_improved"
main_dataset$wash_drinkingwater_improved_source_baser2[!(main_dataset$f11_dinking_water_source %in% unimproved_source)] <- "improved"

#Same same
table(main_dataset$wash_drinkingwater_improved_source_baser, main_dataset$wash_drinkingwater_improved_source_baser2, useNA = "ifany")

##               
##                improved not_improved
##   improved         1082            0
##   not_improved        0          535

#Using case_when
main_dataset <- main_dataset %>%
  mutate(wash_improved_source_dplyr = case_when(f11_dinking_water_source %in% unimproved_source ~ "not_improved",
                                                TRUE ~ "improved"))
table(main_dataset$f11_dinking_water_source, main_dataset$wash_improved_source_dplyr, useNA = "ifany")

##                                                                
##                                                                 improved
##   bottled_water_water_purchased_in_bottles                             0
##   drinking_water_from_water_kiosk_booth_with_water_for_bottling        0
##   other_specify                                                        0
##   personal_well                                                      414
##   public_well_or_boreholes_shared_access                             187
##   tap_drinking_water_centralized_water_supply                        479
##   technical_piped_water                                                2
##   trucked_in_water_truck_with_a_tank_etc                               0
##                                                                
##                                                                 not_improved
##   bottled_water_water_purchased_in_bottles                                68
##   drinking_water_from_water_kiosk_booth_with_water_for_bottling          366
##   other_specify                                                           11
##   personal_well                                                            0
##   public_well_or_boreholes_shared_access                                   0
##   tap_drinking_water_centralized_water_supply                              0
##   technical_piped_water                                                    0
##   trucked_in_water_truck_with_a_tank_etc                                  90

#Same same
table(main_dataset$wash_drinkingwater_improved_source_baser, main_dataset$wash_improved_source_dplyr)

##               
##                improved not_improved
##   improved         1082            0
##   not_improved        0          535

In the previous example, there was no missing value. For this example, the indicator to be built will turn a yes/no question into a dummy variable (1 and 0). The variable of interest is b16_hohh_pension_eligible

table(main_dataset$b16_hohh_pension_eligible, useNA = "ifany")

## 
##   no  yes <NA> 
##    6  870  741

#Using base R
main_dataset$hohh_pension_eligible_dummy_baser <- ifelse(main_dataset$b16_hohh_pension_eligible == "yes", 1, 0)

table(main_dataset$b16_hohh_pension_eligible, main_dataset$hohh_pension_eligible_dummy_baser, useNA = "ifany")

##       
##          0   1 <NA>
##   no     6   0    0
##   yes    0 870    0
##   <NA>   0   0  741

#Using case_when
main_dataset <- main_dataset %>%
  mutate(hohh_pension_eligible_dummy_dplyr = case_when(b16_hohh_pension_eligible == "yes" ~ 1,
                                                b16_hohh_pension_eligible == "no" ~ 0))

table(main_dataset$b16_hohh_pension_eligible, main_dataset$hohh_pension_eligible_dummy_dplyr, useNA = "ifany")

##       
##          0   1 <NA>
##   no     6   0    0
##   yes    0 870    0
##   <NA>   0   0  741

#Same same
table(main_dataset$hohh_pension_eligible_dummy_baser, main_dataset$hohh_pension_eligible_dummy_dplyr, useNA = "ifany")

##       
##          0   1 <NA>
##   0      6   0    0
##   1      0 870    0
##   <NA>   0   0  741

#Watch out for NA. This was is not correct. 
main_dataset <- main_dataset %>%
  mutate(hohh_pension_eligible_dummy_dplyr2 = case_when(b16_hohh_pension_eligible == "yes" ~ 1,
                                                TRUE ~ 0))
table(main_dataset$b16_hohh_pension_eligible, main_dataset$hohh_pension_eligible_dummy_dplyr2, useNA = "ifany")

##       
##          0   1
##   no     6   0
##   yes    0 870
##   <NA> 741   0

#Not same same
table(main_dataset$hohh_pension_eligible_dummy_baser, main_dataset$hohh_pension_eligible_dummy_dplyr2, useNA = "ifany")

##       
##          0   1
##   0      6   0
##   1      0 870
##   <NA> 741   0

6.2 Composite with 2 categorical variables

This example will look at creating an indicator whether or not the sources for drinking and for cooking, cleaning and non-drinking purposes are both improved. The indicators of interest are f11_dinking_water_source (and more specifically wash_drinkingwater_improved_source_baser from previous paragraph) and f14_technical_water_source (F14_What is your HH’s main source of water for cooking, cleaning, and non-drinking purposes).

First, a new variable has to be created, wash_otherwater_improved_source_baser.

#Using base R
main_dataset$wash_otherwater_improved_source_baser <- ifelse(main_dataset$f14_technical_water_source %in% unimproved_source, "not_improved", "improved")

main_dataset$wash_bothwater_improved_source_baser <- ifelse(main_dataset$wash_drinkingwater_improved_source_baser == "improved" & main_dataset$wash_otherwater_improved_source_baser == "improved", "both_improved", "not_both_improved")

table(main_dataset$wash_drinkingwater_improved_source_baser, main_dataset$wash_otherwater_improved_source_baser, main_dataset$wash_bothwater_improved_source_baser, useNA = "ifany")

## , ,  = both_improved
## 
##               
##                improved not_improved
##   improved         1074            0
##   not_improved        0            0
## 
## , ,  = not_both_improved
## 
##               
##                improved not_improved
##   improved            0            8
##   not_improved      480           55

Now, the variable will be coded to have 3 categories instead: both improved, at least drinking water and not improved.

main_dataset$wash_bothwater_improved_source_baser2 <- ifelse(main_dataset$wash_drinkingwater_improved_source_baser == "improved" & main_dataset$wash_otherwater_improved_source_baser == "improved", "both_improved", 
                                                             ifelse(main_dataset$wash_drinkingwater_improved_source_baser == "improved", "at_least_drinking", "not_both_improved"))

table(main_dataset$wash_drinkingwater_improved_source_baser, main_dataset$wash_otherwater_improved_source_baser, main_dataset$wash_bothwater_improved_source_baser2, useNA = "ifany")

## , ,  = at_least_drinking
## 
##               
##                improved not_improved
##   improved            0            8
##   not_improved        0            0
## 
## , ,  = both_improved
## 
##               
##                improved not_improved
##   improved         1074            0
##   not_improved        0            0
## 
## , ,  = not_both_improved
## 
##               
##                improved not_improved
##   improved            0            0
##   not_improved      480           55

#Using dplyr
main_dataset <- main_dataset %>%
  mutate(wash_bothwater_improved_source_dplyr = case_when(wash_drinkingwater_improved_source_baser == "improved" & wash_otherwater_improved_source_baser == "improved" ~ "both_improved",
                                                          wash_drinkingwater_improved_source_baser == "improved" ~ "at_least_drinking", 
                                                          TRUE ~ "not_both_improved"
                                                          ))

table(main_dataset$wash_bothwater_improved_source_dplyr,  main_dataset$wash_bothwater_improved_source_baser2, useNA = "ifany")

##                    
##                     at_least_drinking both_improved not_both_improved
##   at_least_drinking                 8             0                 0
##   both_improved                     0          1074                 0
##   not_both_improved                 0             0               535

6.3 Composite with 1 categorical variable - select multiple

In this example we are creating an indicator to score whether or not drinking water is being processed or purified before usage. The indicator is calculated based on one categorical question f12_drinking_water_treat

#Using dplyr
main_dataset <- main_dataset %>%
   mutate(
     wash_indicator1 = case_when(
       f12_drinking_water_treat.do_not_process_purify == 1 ~ 3 ,
       f12_drinking_water_treat.cleaning_with_chemicals_chlorination == 1 |
       f12_drinking_water_treat.water_precipitation == 1 |
       f12_drinking_water_treat.filtering_the_water_pitcher_filter == 1 |
       f12_drinking_water_treat.filtering_the_water_reverse_osmosis_filter == 1 |
       f12_drinking_water_treat.boiling == 1 |
       f12_drinking_water_treat.percolation == 1 ~ 1
   ) )


#Using base R
main_dataset$wash_indicator2 = ifelse(
  main_dataset$f12_drinking_water_treat.do_not_process_purify == 1,3,
  ifelse(
    main_dataset$f12_drinking_water_treat.cleaning_with_chemicals_chlorination == 1 |
       main_dataset$f12_drinking_water_treat.water_precipitation == 1 |
       main_dataset$f12_drinking_water_treat.filtering_the_water_pitcher_filter == 1 |
       main_dataset$f12_drinking_water_treat.filtering_the_water_reverse_osmosis_filter == 1 |
       main_dataset$f12_drinking_water_treat.boiling == 1 |
       main_dataset$f12_drinking_water_treat.percolation == 1,
    1,NA))

6.4 Composite with 1 numerical variable

This example will look at creating one a categorical variable based on a number, f6_how_many_wood_hh_consumed_last_winter , ‘less than 5’, between 5 (included) and 10 and ‘10 and above’.

main_dataset <- main_dataset %>%
  mutate(wood_consumed_categories = case_when(f6_how_many_wood_hh_consumed_last_winter < 5 ~ "less_than_5",
                                              f6_how_many_wood_hh_consumed_last_winter < 10 ~ "between5_and10",
                                              f6_how_many_wood_hh_consumed_last_winter >= 10 ~ "ten_above"))

6.5 Composite with 2 numerical variables

This example will look at creating one of the indicators necessary to compute the FCS. In some cases, we need to check if the sum of number of days for 2 types of food are above 7 or not. If the sum is above 7, then it has to return 7 otherwise, the sum of both variables.

# Combine cereals/roots and meat/eggs and make maximum 7 days
## Using base R
main_dataset$fcs_cereal_roots  <- ifelse((main_dataset$g1_cereals_consumption + main_dataset$g2_roots_consumption) > 7,
                                7,
                                main_dataset$g1_cereals_consumption + main_dataset$g2_roots_consumption)

## Using dplyr
main_dataset <- main_dataset %>%
  mutate(fcs_meat_eggs = ifelse((g5_meat_consumption + g6_eggs_consumption) > 7, 7,
                                 g5_meat_consumption + g6_eggs_consumption))

6.6 Composite with 2 or more numerical variables

This example will look at creating the food consumption score.

main_dataset <- main_dataset %>%
  mutate(FCS_score_dplyr = fcs_cereal_roots * 2 + g3_vegetables_consumption * 1 + g4_fruits_consumption * 1 + fcs_meat_eggs * 4 + g7_pulses_consumption * 3 + g8_dairy_consumption * 4 + g9_oil_consumption * 0.5 + g10_sugar_consumption * 0.5)

main_dataset$FCS_score_baser  <- (main_dataset$fcs_cereal_roots * 2)+
  (main_dataset$g3_vegetables_consumption * 1)+
  (main_dataset$g4_fruits_consumption * 1)+
  (main_dataset$fcs_meat_eggs * 4)+
  (main_dataset$g7_pulses_consumption * 3)+
  (main_dataset$g8_dairy_consumption * 4)+
  (main_dataset$g9_oil_consumption * 0.5)+
  (main_dataset$g10_sugar_consumption * 0.5)

6.7 Household Hunger Score (HHS)

For this example we will use addindicators package to compute the HHS Link to the package: https://github.com/impact-initiatives/addindicators The package can be installed using devtools::install_github(“impact-initiatives/addindicators”)

# Align the coding of the frequencies
main_dataset <- main_dataset %>% mutate(
  h3_1_how_often_day_and_night_hungry = case_when(
    h3_1_how_often_day_and_night_hungry == "1_rarely_12" ~ "1_rarely_1_2",
    TRUE ~ h3_1_how_often_day_and_night_hungry
  )
)


main_dataset <- addindicators::add_hhs(
  main_dataset,
  hhs_nofoodhh_1 = "h1_nofood_noresources",
  hhs_nofoodhh_1a = "h1_1_how_often_nofood_noresources",
  hhs_sleephungry_2 = "h2_sleep_hungry",
  hhs_sleephungry_2a = "h2_1_how_often_sleep_hungry",
  hhs_alldaynight_3 = "h3_day_and_night_hungry",
  hhs_alldaynight_3a = "h3_1_how_often_day_and_night_hungry",
  yes_answer = "1_yes",
  no_answer = "0_no",
  rarely_answer = "1_rarely_1_2",
  sometimes_answer = "2_sometimes_3_10",
  often_answer = "3_often_10plus_times"
)

main_dataset %>% select(hhs_cat) %>% head()

##        hhs_cat
## 1 No or Little
## 2 No or Little
## 3 No or Little
## 4 No or Little
## 5 No or Little
## 6 No or Little

6.8 Food Cconsumption Score (FCS)

For this example we will use addindicators package to compute the FCS Link to the package: https://github.com/impact-initiatives/addindicators The package can be installed using devtools::install_github(“impact-initiatives/addindicators”)

main_dataset <- addindicators::add_fcs(
  main_dataset,
  cutoffs = "normal 21.5-35", # "normal 21.5-35", or "alternative 28-42"
  fcs_cereal = "g1_cereals_consumption",
  fcs_legumes = "g2_roots_consumption" ,
  fcs_veg = "g3_vegetables_consumption",
  fcs_fruit = "g4_fruits_consumption",
  fcs_meat = "g5_meat_consumption",
  fcs_dairy = "g8_dairy_consumption",
  fcs_sugar = "g10_sugar_consumption",
  fcs_oil = "g9_oil_consumption"
)

main_dataset %>% select(fcs_score,fcs_cat) %>% head()

##   fcs_score    fcs_cat
## 1      56.0 Acceptable
## 2      42.0 Acceptable
## 3     112.0 Acceptable
## 4     112.0 Acceptable
## 5      38.5 Acceptable
## 6      75.0 Acceptable

6.9 Livelihood coping strategy index (LCSI)

For this example we will use addindicators package to compute the LCSI Link to the package: https://github.com/impact-initiatives/addindicators The package can be installed using devtools::install_github(“impact-initiatives/addindicators”)

main_dataset <- addindicators::add_lcsi(main_dataset,
                     lcsi_stress_vars = c("i1_coping_strategies", "i2_spent_savings", "i3_borrowed_food", "i4_live_another_family") ,
                     lcsi_crisis_vars = c("i6_removed_children_from_school_kindergarten", "i7_reduced_health_care", "i8_reduced_education") ,
                     lcsi_emergency_vars = c("i9_sold_house", "i10_hh_migrated", "i11_moved_for_work"),
                     yes_val = "yes",
                     no_val = "no_but_needed",
                     exhausted_val = "no_already_done_so_cant_do_it_anymore",
                     not_applicable_val = "no_was_not_needed")
  
main_dataset %>% select(lcsi_cat) %>% head()

##   lcsi_cat
## 1     None
## 2   Crisis
## 3   Crisis
## 4     None
## 5   Crisis
## 6   Crisis

6.10 New indicators from a loop to main dataset

e.g. aggregating the number of children going to school from a loop

6.11 The co-occurrence of needs in an MSNI

The following example will fist create an choice multiple question and then will create a parent column which can be use for calculating co-occurrence of needs.

lsg_cols <- c("wash","education","health","food","protection")

for(i in lsg_cols){
  col_name <- paste0("most_common_profile.",i)
  most_common_data <- most_common_data %>% mutate(
    !!sym(col_name) := as.numeric(!!sym(i)) >2 )}

most_common_data <- most_common_data %>% mutate(
  most_common_profile = NA)

most_common_data <- most_common_data %>% 
  illuminate::recalculate_concerted_col_for_select_multiple(uuid = "uuid")