Part 1: Understand the current diet

Load the aggregated food groups and their attributes. We have 28 food groups.

foods <- read.csv('data/foods.csv', sep = ',')
data.table::setDT(foods) # use data.table format
foods[1:10,] # show the first 10
                   food intake energy protein   fat carbs sugar alcohol  ghge
                 <char>  <num>  <num>   <num> <num> <num> <num>   <num> <num>
 1:               Bread  175.4 10.696   0.091 0.030 0.441 0.002       0 0.001
 2:        Other grains   45.0 14.022   0.100 0.042 0.607 0.011       0 0.002
 3:               Cakes   35.6 14.185   0.067 0.152 0.424 0.185       0 0.002
 4:            Potatoes   67.8  3.791   0.021 0.007 0.178 0.000       0 0.000
 5:          Vegetables  154.6  1.565   0.015 0.008 0.050 0.005       0 0.001
 6:             Legumes    3.5  8.571   0.143 0.029 0.286 0.000       0 0.001
 7:      Fruit, berries  171.5  2.729   0.008 0.004 0.134 0.029       0 0.001
 8:               Juice  111.0  1.928   0.005 0.002 0.103 0.000       0 0.001
 9:                Nuts    4.3 25.581   0.209 0.535 0.116 0.000       0 0.005
10: Vegetarian products    0.7  4.286   0.143 0.000 0.000 0.000       0 0.003

Define the constraints on

constraints <- data.frame(
  constraint = c('lower', 'upper'), 
  energy = c(9000, 10000), 
  protein = c(55, 111.5), 
  fat = c(61.8, 98.8), 
  carbs = c(250, 334.6), 
  sugar = c(0, 54.8), 
  alcohol = c(0, 10),
  ghge = c(0, 4.7)
)
constraints
  constraint energy protein  fat carbs sugar alcohol ghge
1      lower   9000    55.0 61.8 250.0   0.0       0  0.0
2      upper  10000   111.5 98.8 334.6  54.8      10  4.7

Exploratory data analysis on current diet

Before we construct the optimization problem, we should always understand the data. This helps us picking the important food groups, as well as making sense of the constraints.

# compute the contribution (indiv * intake) for 28 foods
ftotal <- apply(X = foods[, c('energy', 'protein', 'fat', 
                              'carbs', 'sugar', 'alcohol', 'ghge')], 
                MARGIN = 2, 
                FUN = function(x){x*foods$intake})
rownames(ftotal) <- foods$food # name the rows
head(ftotal)
                energy protein    fat   carbs  sugar alcohol   ghge
Bread        1876.0784 15.9614 5.2620 77.3514 0.3508       0 0.1754
Other grains  630.9900  4.5000 1.8900 27.3150 0.4950       0 0.0900
Cakes         504.9860  2.3852 5.4112 15.0944 6.5860       0 0.0712
Potatoes      257.0298  1.4238 0.4746 12.0684 0.0000       0 0.0000
Vegetables    241.9490  2.3190 1.2368  7.7300 0.7730       0 0.1546
Legumes        29.9985  0.5005 0.1015  1.0010 0.0000       0 0.0035

We can also examine whether the current intake satisfy the constraints from above. For example, the energy contribution from bread is \(175.4 \times 10.696\), which is the intake times per unit energy.

t(as.matrix(foods$intake)) %*% as.matrix(foods[, .(energy, protein, fat, carbs, sugar, alcohol, ghge)])
       energy protein     fat    carbs   sugar alcohol   ghge
[1,] 9314.278 98.2159 85.7644 234.7172 39.2148  8.6498 3.7807

It looks like all categories but carbs fall within the expected range. Carb is slighter lower than the lower threshold.

Now we can compute the percentage of each one of the 28 food group contribution towards the total.

# divide by total of all 28 (upper constraints)
fsum <- apply(ftotal, 2, sum)
fsum
   energy   protein       fat     carbs     sugar   alcohol      ghge 
9314.2778   98.2159   85.7644  234.7172   39.2148    8.6498    3.7807 
fprop <- t(apply(X = ftotal, MARGIN = 1, FUN = function(x){x/fsum}))
fprop <- round(fprop, digits = 3) # keep 3 digits
rownames(fprop) <- foods$food
head(fprop)
             energy protein   fat carbs sugar alcohol  ghge
Bread         0.201   0.163 0.061 0.330 0.009       0 0.046
Other grains  0.068   0.046 0.022 0.116 0.013       0 0.024
Cakes         0.054   0.024 0.063 0.064 0.168       0 0.019
Potatoes      0.028   0.014 0.006 0.051 0.000       0 0.000
Vegetables    0.026   0.024 0.014 0.033 0.020       0 0.041
Legumes       0.003   0.005 0.001 0.004 0.000       0 0.001

For example, bread contributes to 20% towards the total energy, and 16.3% of the total protein.

Visualization

Note

In this section we are mostly focused on energy, intake, ghge. It is easy to extend to other macronutrient categories.

We need some more data manipulation before plotting.

Show code
# first define big groups
grain <- c('Bread', 'Other grains', 'Cakes')
fruit_vege <- c('Potatoes', 'Vegetables', 'Legumes', 'Fruit, berries', 
                'Juice', 'Nuts', 'Vegetarian products')
meat <- c('Red meat', 'White meat')
fish_egg <- c('Fish', 'Eggs')
dairy <- c('Cream, cream desserts', 'Milk, yoghurt', 'Cheese')
fats <- c('Butter, margarine, oil')
beverages <- c('Coffee, tea', 'Soda, saft', 'Water', 
               'Alcoholic beverages', 'Non-dairy milk')
sugar_other <- c('Sugar, sweets', 'Snacks', 'Sauces', 'Spices', 'Other')

# reorder food names to make the plot easier to read
names_ordered <- c(grain, fruit_vege, meat, fish_egg, 
                   dairy, fats, beverages, sugar_other)

names_ordered
 [1] "Bread"                  "Other grains"           "Cakes"                 
 [4] "Potatoes"               "Vegetables"             "Legumes"               
 [7] "Fruit, berries"         "Juice"                  "Nuts"                  
[10] "Vegetarian products"    "Red meat"               "White meat"            
[13] "Fish"                   "Eggs"                   "Cream, cream desserts" 
[16] "Milk, yoghurt"          "Cheese"                 "Butter, margarine, oil"
[19] "Coffee, tea"            "Soda, saft"             "Water"                 
[22] "Alcoholic beverages"    "Non-dairy milk"         "Sugar, sweets"         
[25] "Snacks"                 "Sauces"                 "Spices"                
[28] "Other"                 
Show code
# require some data manip
# need big food group, food name (smaller food group)
pdt <- data.frame(ftotal) # total 
pdt$food_name <- row.names(pdt)
pdt <- data.table::setDT(pdt)

# attach big group
pdt[food_name %in% grain, big_group := 'grain']
pdt[food_name %in% fruit_vege, big_group := 'fruit_vege']
pdt[food_name %in% meat, big_group := 'meat']
pdt[food_name %in% fish_egg, big_group := 'fish_egg']
pdt[food_name %in% dairy, big_group := 'dairy']
pdt[food_name %in% fats, big_group := 'fats']
pdt[food_name %in% beverages, big_group := 'beverages']
pdt[food_name %in% sugar_other, big_group := 'sugar_other']

# make long format
pdt_long <- tidyr::pivot_longer(pdt, cols = -c(food_name, big_group),
                                names_to = 'category')
pdt_long <- data.table::setDT(pdt_long)

# new variable, food_name_order
pdt_long$food_name_order <- factor(pdt_long$food_name, 
                                   levels = names_ordered, 
                                   labels = names_ordered)

Energy contribution from 28 food groups

In total these 28 food groups contribute to 9314kJ. Here is a breakdown of each food groups, colored by different types of food (crude).

library(ggplot2)
library(ggrepel)
library(RColorBrewer)

p1 <- ggplot(data = pdt_long[category == 'energy'], 
             aes(x = food_name_order, y = value, fill = big_group))
p1 <- p1 + geom_bar(stat = 'identity')
p1 <- p1 + coord_flip()
p1 <- p1 + theme_bw()
p1 <- p1 + scale_fill_brewer(palette = 'Dark2')
p1 <- p1 + labs(title = 'Energy (multiplied by food intake)', 
                x = 'Food groups', 
                y = 'Energy')
p1 <- p1 + theme(axis.text = element_text(size = 12), 
                   axis.title = element_text(size = 12), 
                   plot.title = element_text(size = 20))
p1

GHGE contribution from 28 food groups

We can also plot a different metric, say GHGE. We can see that red meat is the largest contributor, followed by cheese and milk (dairy products).

Show code
p2 <- ggplot(data = pdt_long[category == 'ghge'], 
             aes(x = food_name_order, y = value, fill = big_group))
p2 <- p2 + geom_bar(stat = 'identity')
p2 <- p2 + coord_flip()
p2 <- p2 + theme_bw()
p2 <- p2 + scale_fill_brewer(palette = 'Dark2')
p2 <- p2 + labs(title = 'GHGE (multiplied by food intake)', 
                x = 'Food groups', 
                y = 'GHGE')
p2 <- p2 + theme(axis.text = element_text(size = 12), 
                 axis.title = element_text(size = 12), 
                 plot.title = element_text(size = 20))

p2

Energy vs GHGE

We can also show the per unit contribution to energy and GHGE. The size of the bubbles are the amount of consumption: the bigger the more consumed.

Show code
pdfd <- data.table::setDT(foods)
# remove water, outlier
pdfd <- pdfd[food != 'Water']


# attach label 
pdfd[food %in% grain, big_group := 'grain']
pdfd[food %in% fruit_vege, big_group := 'fruit_vege']
pdfd[food %in% meat, big_group := 'meat']
pdfd[food %in% fish_egg, big_group := 'fish_egg']
pdfd[food %in% dairy, big_group := 'dairy']
pdfd[food %in% fats, big_group := 'fats']
pdfd[food %in% beverages, big_group := 'beverages']
pdfd[food %in% sugar_other, big_group := 'sugar_other']


p3 <- ggplot(data = pdfd, 
             aes(x = energy, y = ghge, size = intake, 
                 label = food, color = big_group))
p3 <- p3 + geom_point(alpha = 0.3) + xlim(-3, 30) + ylim(-0.002, 0.015)
p3 <- p3 + scale_size(range = c(0.1, 20))
p3 <- p3 + geom_text_repel(size = 4, max.overlaps = 15)
# p3 <- p3 + geom_text(size = 3, check_overlap = T)
p3 <- p3 + theme_bw()
p3 <- p3 + scale_color_brewer(palette = 'Dark2')
p3 <- p3 + labs(title = 'Energy vs GHGE per unit food (group)', 
                x = 'Energy (per unit)', 
                y = 'GHGE (per unit)')
p3

Proportion for 5 metrics

Finally we present the percentage contribution of 28 food groups towards 5 categories.

Show code
pdfp <- data.frame(fprop) # total 
pdfp$food_name <- row.names(pdfp)

# attach big group
data.table::setDT(pdfp)
pdfp[food_name %in% grain, big_group := 'grain']
pdfp[food_name %in% fruit_vege, big_group := 'fruit_vege']
pdfp[food_name %in% meat, big_group := 'meat']
pdfp[food_name %in% fish_egg, big_group := 'fish_egg']
pdfp[food_name %in% dairy, big_group := 'dairy']
pdfp[food_name %in% fats, big_group := 'fats']
pdfp[food_name %in% beverages, big_group := 'beverages']
pdfp[food_name %in% sugar_other, big_group := 'sugar_other']

pdfp_long <- tidyr::pivot_longer(pdfp, cols = -c(food_name, big_group), names_to = 'category')
pdfp_long <- data.table::setDT(pdfp_long)

# also add orders here
pdfp_long$food_name_order <- factor(pdfp_long$food_name, 
                                   levels = names_ordered, 
                                   labels = names_ordered)

# plot 
p4 <- ggplot(data = pdfp_long[category %in% c('energy', 'protein', 'fat', 'carbs','ghge')], 
             aes(x = food_name_order, y = value, fill = big_group))
p4 <- p4 + geom_bar(stat = 'identity')
p4 <- p4 + coord_flip()
p4 <- p4 + facet_wrap(~category, ncol = 5)
p4 <- p4 + scale_fill_brewer(palette = 'Dark2')
p4 <- p4 + labs(title = 'Proportion of food contribution',
                subtitle = 'Each category sums up to 1',
                x = 'Food groups', 
                y = 'Percentage')
p4 <- p4 + theme_bw()
p4 <- p4 + theme(axis.text = element_text(size = 10), 
                 axis.title = element_text(size = 10), 
                 plot.title = element_text(size = 15), 
                 strip.text = element_text(size = 12), 
                 legend.position = 'none')
p4