Skip to contents

implement cleaning log on raw data set.

Usage

create_clean_data(
  raw_dataset,
  raw_data_uuid_column = "uuid",
  cleaning_log,
  cleaning_log_uuid_column = "uuid",
  cleaning_log_question_column,
  cleaning_log_new_value_column,
  cleaning_log_change_type_column,
  change_response_value = "change_response",
  NA_response_value = "blank_response",
  no_change_value = "no_action",
  remove_survey_value = "remove_survey"
)

Arguments

raw_dataset

Raw dataset

raw_data_uuid_column

uuid column in the raw dataset. Default is uuid.

cleaning_log

cleaning + deletion log (data.frame).

cleaning_log_uuid_column

uuid column in the cleaning log. Default is "uuid".

cleaning_log_question_column

column in cleaning log which specifies data set column to change

cleaning_log_new_value_column

cleaning log column specifying the new correct value

cleaning_log_change_type_column

column in cleaning log which specifies change type to be made

change_response_value

values in change type column which should be changed to a new value.

NA_response_value

values in change type column which should be blank (NA).

no_change_value

values in change type column which should NOT be changed to a new value.

remove_survey_value

values in change type column which should be deleted from the data.

Value

clean data set

Examples

cleaning_log_test <- data.frame(
  uuid = paste0("uuid", 1:4),
  question = c("age", "gender", "pop_group", "strata"),
  change_type = c("blank_response", "no_change", "Delete", "change_res"),
  new_value = c(NA_character_, NA_character_, NA_character_, "st-a")
)
test_data <- data.frame(
  uuid = paste0("uuid", 1:4),
  age = c(180, 23, 45, 67),
  gender = c("male", "female", "male", "female"),
  pop_group = c("idp", "refugee", "host", "idp"),
  strata = c("a", "b", "c", "d")
)


review_cleaning_log(
  raw_dataset = test_data,
  raw_data_uuid_column = "uuid",
  cleaning_log = cleaning_log_test,
  cleaning_log_change_type_column = "change_type",
  change_response_value = "change_res",
  cleaning_log_question_column = "question",
  cleaning_log_uuid_column = "uuid",
  cleaning_log_new_value_column = "new_value"
)
#> [1] "no issues in cleaning log found"

create_clean_data(
  raw_dataset = test_data, raw_data_uuid_column = "uuid", cleaning_log = cleaning_log_test,
  cleaning_log_change_type_column = "change_type",
  change_response_value = "change_res",
  NA_response_value = "blank_response",
  no_change_value = "no_change",
  remove_survey_value = "Delete",
  cleaning_log_question_column = "question",
  cleaning_log_uuid_column = "uuid",
  cleaning_log_new_value_column = "new_value"
)
#> [1] "age"
#> [1] "strata"
#>    uuid age gender pop_group strata
#> 1 uuid1  NA   male       idp      a
#> 2 uuid2  23 female   refugee      b
#> 3 uuid4  67 female       idp   st-a