8 Outputs

8.1 From long to large table

How to move from a tidy format to a large format

8.2 Merge file

How to create a merge file

8.3 Graphs

8.3.1 spider graphs

8.3.2 prison graphs

8.3.3 Bar graphs

Loading libraries and the main dataset

library(tidyverse)
library(ggplot2)
library(plotly)
library(openxlsx)
library(data.table)
library(reshape)

main_dataset <- read.csv("inputs/UKR2007_MSNA20_HH_dataset_main_rcop.csv", na.strings = "")

Choosing the needed indicator for building bar graph. The indicators in the questionnaire could be in 2 types: Only one answer can be selected (select_one question) and multiple answers can be selected (select_multiple question). Will review it separately.

  1. For select one questions:

Preparing values for visualization (replacing xml values to lables)

# Loading questionnaire
questions <- read.xlsx("inputs/UKR2007_MSNA20_HH_Questionnaire_24JUL2020.xlsx", sheet = "survey", na.strings = "")
choices <- read.xlsx("inputs/UKR2007_MSNA20_HH_Questionnaire_24JUL2020.xlsx", sheet = "choices", na.strings = "")

# Based on value we get option type
q.list_name <- str_split(questions[questions$name == "b9_hohh_marital_status" & !is.na(questions$name), "type"], " ")[[1]][2]

# New table with xml and labels
labels <- choices %>%
  filter(list_name == q.list_name) %>%
  select(name, "label::English") %>%
  dplyr::rename(b9_hohh_marital_status = name, b9_hohh_marital_status_label = "label::English")

# Add a column with English labels to the main dataset
main_dataset <- merge(labels, main_dataset, by = 'b9_hohh_marital_status')

Building a bar graph

ggplot((main_dataset %>%
          filter(!is.na(b9_hohh_marital_status_label)) %>%
          dplyr::group_by(b9_hohh_marital_status_label) %>%
          dplyr::summarize(weight_sum = round(sum(stratum.weight), 2))), aes(y = reorder(b9_hohh_marital_status_label, weight_sum), x = weight_sum/sum(weight_sum))) +
  geom_bar(stat = "identity", fill = "#EE5859") +
  geom_text(aes(label = paste(round((weight_sum/sum(weight_sum)*100),0),"%")), color = "#58585A", size = 4, hjust = -0.1) +
  scale_x_continuous(labels = scales::percent) +
  theme(axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.y = element_blank(), axis.ticks.y = element_blank(), axis.text.y = element_text(color = "#58585A", size = 12),
  panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank())

Removing temporary data

rm(labels, q.list_name)
  1. For select multiple questions:

Preparing values for visualization (replacing xml values to lables; calculating results)

# Getting needed columns from the main dataset (indicator and weight)
visual_dataset <- main_dataset[,grepl("b10_hohh_vulnerability.|stratum.weight", names(main_dataset))]

# Reshaping the dataset
visual_dataset <- melt(visual_dataset, id.vars = "stratum.weight")

# Grouping by choices and getting sum of weights
visual_dataset <- visual_dataset %>%
  mutate(weight = stratum.weight * value) %>%
  group_by(variable) %>% 
  summarise(weight_sum = sum(as.numeric(weight))) %>%
  mutate(percentage = round(weight_sum / sum(main_dataset$stratum.weight)*100)) %>%
  dplyr::rename(b10_hohh_vulnerability = variable)

# Based on value we get option type and replacing xmls to the labels and 
q.list_name <- str_split(questions[questions$name == "b10_hohh_vulnerability" & !is.na(questions$name), "type"], " ")[[1]][2]

# New table with xml and labels
labels <- choices %>%
  filter(list_name == q.list_name) %>%
  select(name, "label::English") %>%
  dplyr::rename(b10_hohh_vulnerability = name, b10_hohh_vulnerability_label = "label::English") %>%
  mutate(b10_hohh_vulnerability = paste0("b10_hohh_vulnerability.", b10_hohh_vulnerability))

# Add a column with English labels to the visualization dataset
visual_dataset <- merge(labels, visual_dataset, by = 'b10_hohh_vulnerability')

Building a bar graph

ggplot(visual_dataset, aes(y = reorder(b10_hohh_vulnerability_label, percentage), x = percentage)) +
  geom_bar(stat = "identity", fill = "#EE5859") +
  geom_text(aes(label = paste(percentage, "%")), color = "#58585A", size = 4, hjust = -0.1) +
  scale_x_continuous(labels = scales::percent) +
  theme(axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.y = element_blank(), axis.ticks.y = element_blank(), axis.text.y = element_text(color = "#58585A", size = 12),
  panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank())

Removing temporary data

rm(labels, q.list_name)

8.3.4 Euler diagram

An Euler diagram is a diagrammatic means of representing sets and their relationships. They are particularly useful for explaining complex hierarchies and overlapping definitions. They are similar to another set diagramming technique, Venn diagrams. Unlike Venn diagrams, which show all possible relations between different sets, the Euler diagram shows only relevant relationships. Source

First let’s load Eulerr library and our dataset

main_dataset <- read.csv("inputs/UKR2007_MSNA20_HH_dataset_main_rcop.csv", na.strings = "")

Euler diagrams are particularly useful to visualize overlaps between HH characteristics that help better understand the demographic profile of the area. For example, let’s visualize overlaps between such HH characteristics as age, displacement status, income level, disability, and employment status.

library(eulerr)
library(magrittr)
library(dplyr)
library(tidyr)
library(purrr)
library(stringr)
library(tibble)
library(UpSetR)

vulnerability_data <- main_dataset %>%
    select(b10_hohh_vulnerability.disability_not_including_chronic_illness,b10_hohh_vulnerability.older_person,
           b10_hohh_vulnerability.unemployed,d1_hh_displacement_status,b15_hohh_income,X_uuid, strata, stratum.weight)%>%
  #select all necessary columns
    mutate(displaced = ifelse(d1_hh_displacement_status == "no", 0, 1),
           low_income = ifelse(as.numeric(b15_hohh_income) >= 2189, 0, 1))%>% #2189 UAH(83$) is minimal monthly income in 2020
  #calculate binary indicators in case they are not available in the dataset directly from select multiple questions
    select(-d1_hh_displacement_status, -b15_hohh_income)%>%
    setNames(c("disability", "60+", "unemployed", "uuid", "strata", "weight", "displaced", "low_income"))
  #remove unnecessary columns and rename other column

vulnerability_data <- #create column that will combine all binary columns into one (same approach as in ### Re-generate text                             column  for select multiple questions section of Data Cleaning chapter)
    map2_df(vulnerability_data, names(vulnerability_data), ~  replace(.x, .x==1, .y) %>% 
    replace(. == 0, NA)) %>%
    unite(combined, disability, `60+`, displaced, low_income, unemployed, remove = FALSE, na.rm = TRUE, sep = '&')%>%
    filter(!combined == "")

#calculate weighted summary statistics for each combination
vulnerability_summary <- vulnerability_data %>%
  select(strata, weight, combined)%>% 
  group_by(combined) %>% 
  summarise(weight_sum = sum(as.numeric(weight)))%>%
  mutate(per = weight_sum / sum(weight_sum) * 100)%>%
  filter(per > 1)%>%
  select(-weight_sum)

#convert data frame into named numeric vector that is used by plot function
vulnerability_input <-  vulnerability_summary %>%
                        deframe()

In this block we will build euler diagram using REACH color palette and some basic styling.

plot(euler(vulnerability_input),
     edges = FALSE,
     quantities = list(type = "percent", fontsize = 8),
     labels = c("60+", "Displaced", "Low income", "Disability", "Unemployed"),
     legend = list(labels = c("60+", "Displaced", "Low income", "Disability", "Unemployed")),
     fills = c("#7CB6C4","#B6C8B1","#F6E3E3","#D1CAB8","#D1D3D4")
     )

As an alternative, we also can build a Venn diagram that shows each relation (even not possible in reality). As you can see Venn diagram in this case is less readable and usable.

plot(venn(vulnerability_input),
     edges = FALSE,
     quantities = list(type = "percent", fontsize = 8), 
     labels = c("60+", "Displaced", "Low income", "Disability", "Unemployed"),
     legend = list(labels = c("60+", "Displaced", "Low income", "Disability", "Unemployed")),
     fills = c("#7CB6C4","#B6C8B1","#F6E3E3","#D1CAB8","#D1D3D4"))

Another popular type of diagram that shows relation is UpSetR diagram. Even with default styling, it’s quite good in the visualization of intersections between different HH characteristics.

upset(fromExpression(vulnerability_input), order.by = "freq")

8.3.5 Venn diagram

8.3.6 UpSet plots

8.3.7 boxplots

8.3.8 Circular barplots

For creating circular barplots, frequency table with selected indicators should be prepared first.

For the current example, let’s create the table on everyday consumption of different kinds of food in a HH depending on the area of living urban/rural

library(dplyr)
library(tidyr)

main_dataset <- separate(main_dataset, strata, into = c("zone", "type"), "_")

dnames <- names(main_dataset)

cons_names <- dnames[grepl("consumption", dnames)]

tidy_names <- c("Cereals", "Roots", "Vegetables", "Fruits", "Meat", "Eggs", "Pulses", "Dairy", "Oil", "Sugar", "Condiments")

table_food <- as.data.frame(matrix(nrow = 0, ncol = 5))

i <- 1
for (i in 1:length(cons_names)) {
ni <- cons_names[i]

rur <- main_dataset |> filter(type == "rural") |> filter(!is.na(ni)) |> count(!!sym(ni)) |> mutate(base = sum(n), prop = round((n/base)*100, 0), group = tidy_names[i], individual = "Rural")

urb <- main_dataset |> filter(type == "urban") |> filter(!is.na(ni)) |> count(!!sym(ni)) |> mutate(base = sum(n), prop = round((n/base)*100, 0), group = tidy_names[i], individual = "Urban")

names(rur) <- c("fre_cons", "n", "base", "prop", "group", "individual")
names(urb) <- c("fre_cons", "n", "base", "prop", "group", "individual")

table_food <- rbind(table_food, rur, urb)
}

table_food$value <- table_food$prop

For the current chart, the everyday consumption of certain products will be selected, so frequency of consumption should be filtered so it is == 7

table_food <- table_food |> filter(fre_cons == 7)

Adding “filler” field to make the bottom layer (unfilled space) with light grey. Filler should be equal to the “end of the scale”, e.g 100%

table_food$filler <- 100

Preparing grid data for the chart

table_food$group <- as.factor(table_food$group)
# Set a number of 'empty bar' to add at the end of each group
empty_bar <- 1
to_add <- data.frame( matrix(NA, empty_bar*nlevels(table_food$group), ncol(table_food)))
colnames(to_add) <- colnames(table_food)
to_add$group <- rep(levels(table_food$group), each=empty_bar)
table_food <- rbind(table_food, to_add)
table_food <- table_food %>% arrange(group)
table_food$id <- seq(1, nrow(table_food))

#one more empty bar
to_add <- data.frame( matrix(NA, empty_bar*nlevels(table_food$group), ncol(table_food)))
colnames(to_add) <- colnames(table_food)
to_add$group <- rep(levels(table_food$group), each=empty_bar)
table_food <- rbind(table_food, to_add)
table_food <- table_food %>% arrange(group)
table_food$id <- seq(1, nrow(table_food))
 
# Get the name and the y position of each label
label_data <- table_food
number_of_bar <- nrow(label_data)

# Subtract 0.5 because the letter must have the angle of the center of the bars. Not extreme right(1) or extreme left (0)
angle <- 90 - 360 * (label_data$id-0.5) / number_of_bar 
label_data$hjust <- ifelse(angle <- 85, 1, 0)
label_data$angle <- ifelse(angle <- 85, angle + 180, angle)

# prepare a data frame for base lines
base_data <- table_food %>% 
  group_by(group) %>% 
  dplyr::summarize(start=min(id), end=max(id) - empty_bar) %>% 
  rowwise() %>% 
  mutate(title=mean(c(start, end)))
 
# prepare a data frame for grid (scales)
grid_data <- base_data
grid_data$end <- grid_data$end[ c( nrow(grid_data), 1:nrow(grid_data)-1)] + 1
grid_data$start <- grid_data$start - 1
grid_data <- grid_data[-1,]
 
# fixing grid data
grid_data$start = grid_data$start - 0.2
grid_data$end = grid_data$end + 0.2

#label color - to put text within bars and placement of the label

label_data$lab_color = "black"
label_data$lab_placement= table_food$value-(table_food$value-1)

Picking the threshold for highlighting a bar in different color. Let’s put <= 10% in red, 11% - 50% in grey an 51% - 100% in green

table_food <- table_food |> mutate(highlight = case_when(table_food$value <= 10 ~ "bad",
                                                         table_food$value >= 11 & table_food$value <= 50 ~ "moderate",
                                                         table_food$value >= 51 ~ "good"))
table_food$highlight = as.factor(table_food$highlight)

8.3.8.1 Adding thermometer visual elemant (if needed)

cat_low = rep("bad", 10)
cat_med = rep("moderate", 41)
cat_high = rep("good", 49)
thermometer = data.frame(cat  = c(cat_low, cat_med, cat_high),
                        number = c(rep(1,100)),
                        pos  = c(rep(1,100)))

thermometer$cat = ordered(thermometer$cat, levels=c("bad", "moderate", "good"))
ball = data.frame(v = 0,
                  h = 1)
                        
ggplot(thermometer)+
geom_col(aes(x = pos, y = number, fill = cat), position = position_stack(reverse = T), width = 0.1)+
xlim(0,2)+
ylim(-5, 105)+
scale_fill_manual(breaks = c('bad', "moderate", "good"), values = c("moderate" = "dimgray", "good" = "seagreen", "bad" = "firebrick"))+
geom_point(data = ball, aes(x = h, y = v-5), color = "firebrick", size = 20)

8.3.8.2 Creating the plot itself

p <- ggplot(table_food, aes(x=as.factor(id), y=value), width = 0.3) +       
  # Note that id is a factor. If x is numeric, there is some space between the first bar
  # Placing base layer of bars
  geom_bar(aes(x=as.factor(id), y=filler), fill="#E0E2E2", stat="identity", alpha=1) + 
  geom_bar(aes(x=as.factor(id), y=value, fill=highlight), stat="identity", alpha=1) + 
  scale_fill_manual(breaks = c("moderate", 'good', "bad"), values = c("moderate" = "#93A0A9", "good" = "#A5C9A1", "bad" = "#EE5859")) +
  # Add a val=100/75/50/25 lines. Do it at the beginning to make sure barplots are OVER it.
  geom_segment(data=grid_data, aes(x = end, y = 75, xend = start, yend = 75), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
  geom_segment(data=grid_data, aes(x = end, y = 50, xend = start, yend = 50), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
  geom_segment(data=grid_data, aes(x = end, y = 25, xend = start, yend = 25), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
  
  theme_minimal(base_family = "Arial") +
  theme(
    legend.position = "none",
    axis.text = element_blank(),
    axis.title = element_blank(),
    panel.grid = element_blank()
  ) +
  coord_polar() + 
  
  # Add text showing the value of each 100/75/50/25 lines
  annotate("text", x = rep(max(table_food$id),3), y = c(25, 50, 75), label = c("25%", "50%", "75%") , color="grey", size=3 , angle=0, fontface="bold", hjust=1) +
  # Labels inside bars
  geom_text(data=label_data, aes(x=id, y=lab_placement+50, label=individual, hjust=hjust, ), color = "black", fontface="bold",alpha=0.8, size=2, inherit.aes = FALSE, family = "Arial Narrow") +
  # Value numbers at the beginning of each bar
  geom_text(data=label_data, aes(x=id, y=lab_placement+75, label=value, hjust=hjust, ), color = "black", fontface="bold",alpha=0.8, size=2, inherit.aes = FALSE, family = "Arial Narrow")+
  # labels of groups
  geom_text(data=base_data, aes(x = title, y = 25, label=group), size = 2, inherit.aes = FALSE)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2
## 3.4.0.
## ℹ Please use `linewidth` instead.
p
## Warning: Removed 22 rows containing missing values
## (`position_stack()`).
## Warning: Removed 22 rows containing missing values
## (`position_stack()`).
## Warning: Removed 22 rows containing missing values (`geom_text()`).
## Removed 22 rows containing missing values (`geom_text()`).
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

8.3.8.3 Adding a text in the center of the plot

p + geom_text(x = 0, aes(y = -65, label = "7 days / week"), size = 4.5, fontface = "bold", family = "Arial Narrow", color = "gray28")
## Warning: Removed 22 rows containing missing values
## (`position_stack()`).
## Removed 22 rows containing missing values
## (`position_stack()`).
## Warning: Removed 22 rows containing missing values (`geom_text()`).
## Removed 22 rows containing missing values (`geom_text()`).
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

8.3.8.4 Adding a thermometer object to the plot

p +  geom_col(data = thermometer, aes(x = 43.5, y = number, fill = cat), position = position_stack(reverse = T), width = 0.3, angle = 0)+
geom_point(data = ball, aes(x = 43.5, y = v-2), color = "#EE5859", size = 5)+
geom_text(aes(x = 42.8, y = 5, label = "Freq. of consumption"), color = "black", fontface="bold",alpha=0.6, size=2.3, angle= 100, hjust = 0, family = "Arial Narrow")
## Warning in geom_col(data = thermometer, aes(x = 43.5, y = number, fill = cat), :
## Ignoring unknown parameters: `angle`
## Warning: Removed 22 rows containing missing values
## (`position_stack()`).
## Removed 22 rows containing missing values
## (`position_stack()`).
## Warning: Removed 22 rows containing missing values (`geom_text()`).
## Removed 22 rows containing missing values (`geom_text()`).
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

8.4 Labels

8.4.1 Xml to Label

The following code transform column headers, select_one and select_multiple values from XML to Label.

Loading Libraries

library(tidyverse)

Dataset column headers function

xml2label_question <- function(tool_survey, tool_choices, col){
  # for each column check if it is a select multiple
  if (str_detect(col, "/")) {
    q.name <- str_split(col, "/")[[1]][1]
    c.name <- paste0(tail(str_split(col, "/")[[1]], -1), collapse="/")
  } else {
    q.name <- col
    c.name <- NA
  }
  
  # returning the label and make sure to include the label of multiple choices after /
  if (q.name %in% tool_survey$name){
    q <- tool_survey[tool_survey$name==q.name,]
    q.label <- q$`label::english`
    if (is.na(q.label) | q$type %in% c("note")) q.label <- q.name
    if (!is.na(c.name)){
      q.list_name=ifelse(q$list_name=="NA", NA, q$list_name)
      c.label <- tool_choices[tool_choices$list_name==q.list_name & tool_choices$name==c.name, "label::english"]
    } else c.label <- NA
    label <- ifelse(is.na(c.label), q.label, paste0(q.label, "/", c.label))
  } else label <- q.name
  return(label)
}

Select_one values function

xml2label_choices_one <- function(tool_survey, tool_choices, data, col) {
  # select the type column from each select_one question
  q.type <- tool_survey$type[tool_survey$name==col]
  
  # take the id of the choices to get the list name
  q.list_name <- str_split(q.type, " ")[[1]][2]
  
  # export the choices relevant to each select_one question
  choices <- tool_choices %>% filter(list_name == q.list_name) %>% 
      select(name, `label::english`) %>% rename(label=`label::english`)
  
  # replace the xml with label using left_join
  d.join <- data.frame(col=as.character(data[[col]])) %>% 
    left_join(choices, by=c("col"="name")) %>% select(label)
  
  # return only the new label column and replace it in the for loop using vectors 
  return(d.join$label)
}

Select_multiple values function

xml2label_choices_multiple <- function(tool_survey, tool_choices, data, col) {
  # select all the columns with all the options for each select_multiple
  d.join <- data %>% 
    select(contains(paste0(col,"/")))
  col_internal <- colnames(d.join)
  
  # for each column with options
  for(j in 1:length(col_internal)){
    # change all 1's to the xml answer
    xml_answer <- str_split(col_internal[j], "/")[[1]][2]
    d.join <- d.join %>% 
      mutate(!!sym(col_internal[j]) := ifelse(!!sym(col_internal[j]) == "1", xml_answer, NA))
    
    # get the list of the xml and label options for each select multiple questions
    choice_id <- filter(tool_survey, str_starts(name, str_split(col_internal[j],"/")[[1]][1])) %>% 
      select(list_name)
    choice_id <- choice_id$list_name
    t.choices <- tool_choices %>% 
      filter(list_name == choice_id) %>% 
      select(name, `label::english`) %>% rename(label = `label::english`)
    
    # replace the xml with label using left_join
    d.new.join <- data.frame(col=as.character(d.join[[col_internal[j]]])) %>%
      left_join(t.choices, by=c("col"="name")) %>% select(label)
    d.join[col_internal[j]] <- d.new.join$label
  }
  
  # concatenate all the answers, removing NAs in one cell and separated by a ';' 
  d.join <- d.join %>% 
    unite("Merged", everything(), sep= ";", na.rm = T)
  
  # return only the new label column and replace it in the for loop using vectors
  return(d.join$Merged)
}

Example

filename_tool <- "enter your tool link here"
filename_cleaned_data <- "enter your data hear"

# loading tool
tool_survey <- read_excel(filename_tool, sheet = "survey", col_types = "text") %>% 
  filter(!is.na(name)) %>% 
  # adding two columns showing the question type and the id of the list_name choices separately
  mutate(q.type = as.character(lapply(type, function(x) str_split(x, " ")[[1]][1])),
         list_name = as.character(lapply(type, function(x) str_split(x, " ")[[1]][2])))

tool_choices <- read_excel(filename_tool, sheet = "choices", col_types = "text") %>% 
  filter(!is.na(list_name))


# loading the data
main_dataset <- read_excel(filename_cleaned_data, col_types = "text")

# creating another data frame with changes

data_labeled <- main_dataset

# select_one values - XML to Label

tool_select_one <- tool_survey %>% 
  filter(str_starts(type, "select_one "))

col_select_one <- tool_select_one$name

for (i in 1:length(col_select_one)){
  if(!is.null(data_labeled[[col_select_one[i]]])){
    data_labeled[[col_select_one[i]]] <- xml2label_choices_one(tool_survey,tool_choices,main_dataset,col_select_one[i])
  }
}

# select_multiple values - XML to Label

tool_select_multi <- tool_survey %>% 
  filter(str_starts(type, "select_multiple "))
col_select_multi <- tool_select_multi$name

for (i in 1:length(col_select_multi)){
  if(!is.null(data_labeled[[col_select_multi[i]]])){
    data_labeled[[col_select_multi[i]]] <- xml2label_choices_multiple(tool_survey,tool_choices,main_dataset,col_select_multi[i])
  }
}

# Column headers - XML to Label
col_names <- colnames(main_dataset)

for (i in 1:length(col_names)) {
  colnames(data_labeled)[i] <- xml2label_question(tool_survey, tool_choices, col_names[i])
  
}

8.4.2 change from label to xml

The following code transform column headers, select_one and select_multiple values from Label to XML.

Conditions required for the code to function correctly: 1- The “label::English” column in KOBO Tool must contain unique values. 2- The labeled data should be saved in the Excel file format. 3- When selecting options for select_multiple questions, ensure to determine if the options are separated by a forward slash (“/”) or (“.”).

Loading Libraries

library(tidyverse)
library(readxl)

Dataset column headers function

label2xml_question <- function(tool_survey, tool_choices, label_colname = "label::English", col, sm_separator = "/"){
  
  # for each column check if it is a select multiple
  if (str_detect(col, sm_separator)) {
    q.label <- str_split(col, sm_separator)[[1]][1]
    c.label <- paste0(tail(str_split(col, sm_separator)[[1]], -1), collapse=sm_separator)
  } else {
    q.label <- col
    c.label <- NA
  }
  
  # returning the xml and make sure to include the xml of multiple choices after sm_separator
  if (q.label %in% tool_survey[[label_colname]]){
    q <- tool_survey[tool_survey[[label_colname]] %in% q.label, ]
    q.name <- q$name
    if (is.na(q.name) | q$type %in% c("note")) q.name <- q.label
    if (!is.na(c.label)){
      q.list_name = ifelse(q$list_name == "NA", NA, q$list_name)
      c.name <- tool_choices[tool_choices$list_name %in% q.list_name & tool_choices[[label_colname]]%in% c.label, "name"]
    } else c.name <- NA
    name <- ifelse(is.na(c.name), q.name, paste0(q.name, sm_separator, c.name))
  } else name <- q.label
  return(name)
}

Select_one values function

label2xml_choices_one <- function(tool_survey, tool_choices, label_colname = "label::English", data, col) {
  
  # select the type column from each select_one question
  q.type <- tool_survey$type[tool_survey[[label_colname]] %in% col]
  
  # take the id of the choices to get the list name
  q.list_name <- str_split(q.type, " ")[[1]][2]
  
  # export the choices relevant to each select_one question
  choices <- tool_choices %>% filter(list_name == q.list_name) %>% 
    select(name, !!sym(label_colname)) %>% rename(label=label_colname)
  
  # replace the label with xml using left_join
  d.join <- data.frame(col = as.character(data[[col]])) %>% 
    left_join(choices, by = c("col"="label")) %>% select(name)
  
  # return only the new name column and replace it in the for loop using vectors 
  return(d.join$name)
}

Select_multiple values function

label2xml_choices_multiple <- function(tool_survey, tool_choices, data, label_colname = "label::English", col, sm_separator = "/") {
  
  # select all the columns with all the options for each select_multiple
  d.join <- data %>% 
    select(contains(paste0(col, sm_separator)))
  col_internal <- colnames(d.join)
  
  # for each column with options
  for(j in 1:length(col_internal)){
    
    # change all 1's to the label answer
    label_answer <- str_split(col_internal[j], sm_separator)[[1]][2]
    d.join <- d.join %>% 
      mutate(!!sym(col_internal[j]) := ifelse(!!sym(col_internal[j]) == "1", label_answer, NA))
    
    # get the list of the label and xml options for each select multiple questions
    choice_id <- tool_survey %>%
      filter(!!sym(label_colname) %in% str_split(col_internal[j], sm_separator)[[1]][1]) %>% 
      select(list_name)
    choice_id <- choice_id$list_name
    t.choices <- tool_choices %>% 
      filter(list_name == choice_id) %>% 
      select(name, !!sym(label_colname)) %>% rename(label = label_colname)
    
    # replace the label with xml using left_join
    d.new.join <- data.frame(col = as.character(d.join[[col_internal[j]]])) %>%
      left_join(t.choices, by=c("col"="label")) %>% select(name)
    d.join[col_internal[j]] <- d.new.join$name
  }
  
  # concatenate all the answers, removing NAs in one cell and separated by a ';' 
  d.join <- d.join %>% 
    unite("Merged", everything(), sep= ";", na.rm = T)
  
  # return only the new xml column and replace it in the for loop using vectors
  return(d.join$Merged)
}

Example

# input parameters
filename_tool <- "enter your tool here"
filename_labeled_data <- "enter your data here"
label_colname <- "label::English"
sm_separator <- "/"

# loading tool
tool_survey <- read_excel(filename_tool, sheet = "survey", col_types = "text") %>% 
  filter(!is.na(name)) %>% 
  # adding two columns showing the question type and the id of the list_name choices separately
  mutate(q.type = as.character(lapply(type, function(x) str_split(x, " ")[[1]][1])),
         list_name = as.character(lapply(type, function(x) str_split(x, " ")[[1]][2])))

tool_choices <- read_excel(filename_tool, sheet = "choices", col_types = "text") %>% 
  filter(!is.na(list_name))

# loading your dataset with labels 
data_labeled <- read_excel(filename_labeled_data, col_types = "text")

# creating another data frame with changes
data_xml <- data_labeled

# select_one values - Label to XML
tool_select_one <- tool_survey %>% 
  filter(str_starts(type, "select_one "))

col_select_one <- tool_select_one[[label_colname]]

for (i in 1:length(col_select_one)){
  if(!is.null(data_xml[[col_select_one[i]]])){
    data_xml[[col_select_one[i]]] <- label2xml_choices_one(tool_survey, tool_choices, label_colname,  data = data_labeled, col = col_select_one[i])
  }
}

# select_multiple values - Label to XML
tool_select_multiple <- tool_survey %>% 
  filter(str_starts(type, "select_multiple "))
col_select_multiple <- tool_select_multiple[[label_colname]]

for (i in 1:length(col_select_multiple)){
  if(!is.null(data_xml[[col_select_multiple[i]]])){
    data_xml[[col_select_multiple[i]]] <- label2xml_choices_multiple(tool_survey, tool_choices, label_colname, 
                                                                     data = data_labeled, col = col_select_multiple[i], sm_separator)
  }
}

# Column headers - Label to XML
col_label <- colnames(data_labeled)

for (i in 1:length(col_label)) {
  colnames(data_xml)[i] <- label2xml_question(tool_survey, tool_choices, label_colname, col = col_label[i], sm_separator)
}

8.5 Dashboarding - Sharing information

Html files Tableau Power BI Shiny

8.6 Outputs with hypothesis testing results