4 Pre analysis

This section will show you different steps or tools that can be used before the analysis takes place such as checking your KOBO tool or following up your data collection.

4.1 Checking the XLS kobo tools for constraints errors

This function checks the relevant column in the questionnaire and flag issues with constraints of type (selected(question_name,question_answer)).

library(stringr)
library(purrr)
library(readxl)
library(qdapRegex)
library(tidyr)
library(magrittr)
library(dplyr)

questions <- read_xlsx("inputs/UKR2007_MSNA20_HH_Questionnaire_24JUL2020.xlsx",sheet="survey")
choices <- read_xlsx("inputs/UKR2007_MSNA20_HH_Questionnaire_24JUL2020.xlsx",sheet="choices")

check_answer_in_list <- function(constraint) {
  
  if(!str_detect(constraint,",")){
    return(TRUE)
  }

  question_regex <- "\\{([^()]+)\\}"
  answer_regex <- "\\'([^()]+)\\'"
  
  question <- gsub(question_regex, "\\1", str_extract_all(constraint, question_regex)[[1]])
  answer <- gsub(answer_regex, "\\1", str_extract_all(constraint, answer_regex)[[1]])
  
  question_type <- questions %>% 
                     filter(name==question) %>% 
                     filter(!grepl("^(begin|end)\\s+group$",type)) %>% 
                     pull(type)
  
  listname <- gsub("^.*\\s","",question_type)
  
  choices_list <- choices %>% filter(list_name==listname) %>% pull(name)
  
  return(answer %in% choices_list)
  
}

check_constraints <- function(questions,choices) {
  
questions <- mutate_at(questions, c("name", "type"), ~str_trim(.))
choices <- mutate_at(choices, c("list_name", "name"), ~str_trim(.))
  
  all_contraints <- questions %>% filter(grepl("selected",relevant)) %>% pull(relevant)
  all_contraints <- gsub('"',"'",all_contraints)

  rs_list <- map(all_contraints,~map_lgl(unlist(ex_default(.x, pattern = "selected\\s*\\([^\\)]*\\)")),check_answer_in_list))
  
  map2(rs_list,seq_along(rs_list), ~ if(length(which(!.x))!=0) {
    return(unlist(ex_default(all_contraints[.y], pattern = "selected\\s*\\([^\\)]*\\)"))[which(!.x)])
  } ) %>% unlist() %>% unique()
  
}

Call this function by passing the questionnaire and choices and the output will be the list of wrong constraints of type (selected(question_name,question_answer)) if any. An error means that the answer does not exist in the choices sheet

check_constraints(questions,choices) 
## [1] "selected(${b10_hohh_vulnerability}, 'older_person_60plus')"

4.2 Sampling

4.2.1 Simple Random sampling

4.2.2 Stratified Random sampling

4.2.3 Cluster sampling

4.2.4 2 stages random sampling

4.2.5 Sample distribution usin population raster