Libraries

library(haven)
library(tidyverse)
library(table1)
library(ggplot2)
library(Metrics)
library(ggtext)
library(corrplot)

Data

# Download individual datasets from the NHANES website:   
# https://wwwn.cdc.gov/nchs/nhanes/continuousnhanes/default.aspx?BeginYear=1999  
# https://wwwn.cdc.gov/nchs/nhanes/continuousnhanes/default.aspx?BeginYear=2001

# clocks dataset
clocks <- read_sas("dnmepi.sas7bdat") 
#  Participants who were part of the eligible population but who did not provide a blood specimen for DNA, 
#  did not have sufficient volume of DNA specimens, or did not give consent for their specimens to be used 
#  for future research are included in the file, but they have a sample weight assigned “0” in their records. 
table(clocks$WTDN4YR == 0, is.na(clocks$HorvathAge))
#         FALSE TRUE
#   FALSE  2532    0
#   TRUE      0 1917 
clocks <- clocks %>% filter(!is.na(HorvathAge)) # half the dataset is blank, 2532 true sample size
dim(clocks)
# 2532   32

# demographics
demo_1999 <- read_xpt("DEMO.XPT") 
demo_2001 <- read_xpt("DEMO_B.XPT")
subs <- intersect(colnames(demo_1999), colnames(demo_2001)) 
demo_1999 <- demo_1999 %>% dplyr::select(all_of(subs))
demo_2001 <- demo_2001 %>% dplyr::select(all_of(subs))
demo <- rbind(demo_1999, demo_2001)
df <- left_join(clocks, demo, by = "SEQN")
rm(list = setdiff(ls(), c("df")))

# smoking 
smoke_1999 <- read_xpt("SMQ.XPT")
smoke_2001 <- read_xpt("SMQ_B.XPT")
subs <- intersect(colnames(smoke_1999), colnames(smoke_2001))
smoke_1999 <- smoke_1999 %>% dplyr::select(all_of(subs))
smoke_2001 <- smoke_2001 %>% dplyr::select(all_of(subs))
smoke <- rbind(smoke_1999, smoke_2001)
df <- left_join(df, smoke, by = "SEQN")

# BMI 
bmi_1999 <- read_xpt("BMX.XPT")
bmi_2001 <- read_xpt("BMX_B.XPT")
subs <- intersect(colnames(bmi_1999), colnames(bmi_2001))
bmi_1999 <- bmi_1999 %>% dplyr::select(all_of(subs))
bmi_2001 <- bmi_2001 %>% dplyr::select(all_of(subs))
bmi <- rbind(bmi_1999, bmi_2001)
df <- left_join(df, bmi, by = "SEQN")

# add physical activity
active_1999 <- read_xpt("PAQ.XPT")
active_2001 <- read_xpt("PAQ_B.XPT")
subs <- intersect(colnames(active_1999), colnames(active_2001))
active_1999 <- active_1999 %>% dplyr::select(all_of(subs))
active_2001 <- active_2001 %>% dplyr::select(all_of(subs))
active <- rbind(active_1999, active_2001)
df <- left_join(df, active, by = "SEQN")

# add recoded occupation 
occ_1999 <- read_xpt("OCQ.XPT")
occ_2001 <- read_xpt("OCQ_B.XPT")
subs <- intersect(colnames(occ_1999), colnames(occ_2001))
occ_1999 <- occ_1999 %>% dplyr::select(all_of(subs))
occ_2001 <- occ_2001 %>% dplyr::select(all_of(subs))
occ <- rbind(occ_1999, occ_2001)
# creating 5 category occupational classification as used in Rehkopf et al, BMC Public Health 2006;
# based on two dimensions of occupation, white-collar versus blue collar and technical skill;
# based conceptually on the NS-SEC job classification
occ$hiwhite <- ifelse((occ$OCD390 %in% c(1:7,9,25)),1,0) 
occ$hiwhite[is.na(occ$OCD390)] <- NA
occ$lowwhite <- ifelse((occ$OCD390 %in% c(8,10,12:16,22)),1,0) 
occ$lowwhite[is.na(occ$OCD390)] <- NA
occ$hiblue <- ifelse((occ$OCD390 %in% c(28:31,41)),1,0) 
occ$hiblue[is.na(occ$OCD390)] <- NA
occ$lowblue <- ifelse((occ$OCD390 %in% c(11,17:21,23,24,26,27,32:40)),1,0) 
occ$lowblue[is.na(occ$OCD390)] <- NA
occ$nowork <- ifelse(occ$OCD390==98,1,0) 
occ$nowork[is.na(occ$OCD390)] <- NA

# Use ocd 240 current job to fill the missing
occ$hiwhite[occ$OCD240 %in% c(1:7,9,25) & is.na(occ$hiwhite)] <- 1
occ$hiwhite[occ$OCD240 %in% c(1:41,98) & is.na(occ$hiwhite)] <- 0
occ$lowwhite[occ$OCD240 %in% c(8,10,12:16,22) & is.na(occ$lowwhite)] <- 1
occ$lowwhite[occ$OCD240 %in% c(1:41,98) & is.na(occ$lowwhite)] <- 0
occ$hiblue[occ$OCD240 %in% c(28:31,41) & is.na(occ$hiblue)] <- 1
occ$hiblue[occ$OCD240 %in% c(1:41,98) & is.na(occ$hiblue)] <- 0
occ$lowblue[occ$OCD240 %in% c(11,17:21,23,24,26,27,32:40) & is.na(occ$lowblue)] <- 1
occ$lowblue[occ$OCD240 %in% c(1:41,98) & is.na(occ$lowblue)] <- 0
occ$nowork[occ$OCD240 == 98 & is.na(occ$nowork)] <- 1
occ$nowork[occ$OCD240 %in% c(1:41,98) & is.na(occ$nowork)] <- 0

occ <- occ %>% 
  mutate(OCCUPATION = ifelse(hiwhite == 1, "hiwhite", 
                      ifelse(lowwhite == 1, "lowwhite", 
                      ifelse(hiblue == 1, "hiblue", 
                      ifelse(lowblue == 1, "lowblue", 
                      ifelse(nowork == 1, "nowork", NA)))))) %>% 
  dplyr::select(SEQN, OCCUPATION)
df <- left_join(df, occ, by = "SEQN")

# add dietrary data
diet_1999 <- read_xpt("DRXTOT.XPT")
diet_2001 <- read_xpt("DRXTOT_B.XPT")
diet_2001 <- diet_2001 %>% dplyr::rename(DRDDRSTS = DRDDRSTZ)
subs <- intersect(colnames(diet_1999), colnames(diet_2001))
diet_1999 <- diet_1999 %>% dplyr::select(all_of(subs))
diet_2001 <- diet_2001 %>% dplyr::select(all_of(subs))
diet <- rbind(diet_1999, diet_2001)
df <- left_join(df, diet, by = "SEQN")

# add alcohol intake
alc <- read.csv("drinkvol_update_11_06_2024.csv")
df <- left_join(df, alc, by = "SEQN")
rm(list = setdiff(ls(), c("df")))

# variable to indicate participants < 85
df$lt85 = ifelse(df$RIDAGEYR != 85, 1, 0)
table(df$lt85)
#    0    1 
#  130 2402

# variable to indicate sex mismatches
df$sexMismatch = ifelse(df$RIAGENDR != df$XY_Estimation, 0, 1)
table(df$sexMismatch)
#    0    1 
# 2472   60 

# add variable for participants < 85 and without a sex mismatch
df$use = TRUE
df$use[df$lt85 == 0] = FALSE
df$use[df$sexMismatch == 1 | is.na(df$sexMismatch)] = FALSE
table(df$use)
# FALSE  TRUE
#   186  2346 
# sort
df = df[order(df$use, decreasing = T),]
table(df$use[c(1:2346)])
# TRUE 
# 2346 

# calculate scaled version of YangCell
df$YangCell_scale = NA
df$YangCell_scale[c(1:2346)] = scale(df$YangCell[c(1:2346)])

# calculate scaled version of DunedinPoAm
df$DunedinPoAm_scale = NA
df$DunedinPoAm_scale[c(1:2346)] = scale(df$DunedinPoAm[c(1:2346)])

# recoding covatiates
df <- df %>% 
  mutate(SMOKE_3cat = ifelse(SMQ020 == 2, "Never", 
                      ifelse(SMQ020 == 1  & SMQ040 == 1 | SMQ040 == 2, "Current", 
                      ifelse(SMQ020 == 1  & SMQ040 == 3, "Former", NA))))
df$SMOKE_3cat = factor(df$SMOKE_3cat, levels = c('Never', 'Former', 'Current'))

df <- df %>% 
  mutate(SMOKE_2cat = ifelse(SMQ020 == 2, "Never", 
                      ifelse(SMQ020 == 1, "Ever", NA)))
df$SMOKE_2cat = factor(df$SMOKE_2cat, levels = c('Never', 'Ever'))

df <- df %>% 
  mutate(SMOKE_2cat_current = ifelse(SMQ020 == 2 | (SMQ020 == 1  & SMQ040 == 3), "No", 
                      ifelse(SMQ020 == 1  & SMQ040 == 1 | SMQ040 == 2, "Current", NA)))
df$SMOKE_2cat_current = factor(df$SMOKE_2cat_current, levels = c('No', 'Current'))

df$EDUCATION <- ifelse(df$DMDEDUC == 1, "Less Than High School",
    ifelse(df$DMDEDUC == 2, "High School Diploma (including GED)",
    ifelse(df$DMDEDUC == 3, "More Than High School", NA)))

# variable for yes/no moderate physical activiy
df$active_mod = ifelse(df$PAD320 == 1, 'Yes',
                    ifelse(df$PAD320 == 2 | df$PAD320 == 3, 'No', NA))
df$active_mod = factor(df$active_mod, levels = c('No', 'Yes'))

# variable for yes/no moderate or vigourous physical activiy
df$active_vig = ifelse(df$PAD200 == 1, 'Yes',
                    ifelse(df$PAD200 == 2 | df$PAD200 == 3, 'No', NA))
df$active_vig = factor(df$active_vig, levels = c('No', 'Yes'))

df$active_modvig = ifelse(df$active_mod == 'Yes' | df$active_vig == 'Yes', 'Yes',
                      ifelse(df$active_mod == 'No' & df$active_vig == 'No', 'No', NA))
df$active_modvig = factor(df$active_modvig, levels = c('No', 'Yes'))
table(df$active_modvig)
#   No  Yes 
# 1415 1115 

# participants with reliable dietary data
table(df$DRDDRSTS[df$use])
#    1    2    3    5 
# 2260   13    4   69 

# variable for participants to use with reliable dietary data
df$use_fat = ifelse(df$use & df$DRDDRSTS == 1, TRUE, FALSE)
table(df$use_fat)
# FALSE  TRUE 
#   272  2260 

# filter individuals with extreme energy intake
# < 500 or > 8000 kcal/day for men
table(df[df$RIAGENDR == 1 & df$use_fat,]$DRXTKCAL < 500)
# FALSE  TRUE 
#  1142    12
table(df[df$RIAGENDR == 1 & df$use_fat,]$DRXTKCAL > 8000)
# FALSE  TRUE 
#  1153     1 
table(df[df$RIAGENDR == 2 & df$use_fat,]$DRXTKCAL < 500)
# FALSE  TRUE 
#  1079    27 
table(df[df$RIAGENDR == 2 & df$use_fat,]$DRXTKCAL > 5000)
# FALSE 
#  1106 

# variable for participants to use without extreme energy intake
df$use_fat = ifelse(df$use_fat & ((df$RIAGENDR == 1 & df$DRXTKCAL >= 500 & df$DRXTKCAL <= 8000) | (df$RIAGENDR == 2 & df$DRXTKCAL >= 500)), TRUE, FALSE)
table(df$use_fat)
# FALSE  TRUE 
#   312  2220 

# variable for participants to use and with complete covariate data
df$use_fat_covar = ifelse(df$use_fat & !is.na(df$RIDAGEYR) & !is.na(df$RIAGENDR) & !is.na(df$RIDRETH1) & 
    !is.na(df$BMXBMI) & !is.na(df$DRXTKCAL) & !is.na(df$EDUCATION) & !is.na(df$OCCUPATION) & !is.na(df$INDFMPIR) & 
    !is.na(df$SMOKE_3cat) & !is.na(df$drinkvol) & !is.na(df$active_modvig), TRUE, FALSE)
table(df$use_fat_covar)
# FALSE  TRUE 
#  761  1771 

# polyunsaturated fat to saturated fat ratio
df$psatfatRatio = df$DRXTPFAT/df$DRXTSFAT

# omega-6 and omega-3 intake
# https://pmc.ncbi.nlm.nih.gov/articles/PMC7103069/#Sec2 and https://pmc.ncbi.nlm.nih.gov/articles/PMC6627798/ for classification
# octadecatrienoic acid (18:3), octadecatetraenoic acid (18:4), eicosapentaenoic acid (20:5), docosapentanoic acid (22:5), and docosahexaenoic acid (22:6) were included in ω-3 fatty acids, 
# meanwhile octadecadienoic acid (18:2) and eicosatetraenoic acid (20:4) were included in ω-6 fatty acids 
df$omega6 = df$DRXTP182 + df$DRXTP204 
df$omega3 = df$DRXTP183 + df$DRXTP184 + df$DRXTP205 + df$DRXTP225 + df$DRXTP226

# log2 transformations
fats = c('DRXTTFAT', 'DRXTSFAT', 'DRXTMFAT', 'DRXTPFAT', "DRXTS040", "DRXTS060", "DRXTS080", "DRXTS100", "DRXTS120", "DRXTS140", "DRXTS160", "DRXTS180", "DRXTM161", "DRXTM181", "DRXTM201", "DRXTM221", "omega6", "DRXTP182", "DRXTP204", "omega3", "DRXTP183", "DRXTP184", "DRXTP205", "DRXTP225", "DRXTP226")
for (i in 1:length(fats)){
    if (sum(df[df$use_fat,][[fats[i]]] == 0) > 0){
        new = df[[fats[i]]] + 0.0001
    } else {
        new = df[[fats[i]]]
    }
    new = log2(new)
    df[[as.symbol(paste0(fats[i], '_log2'))]] <- new
}

# add a variable for calories from fat
df$cal_fat = df$DRXTTFAT*9
df$cal_fat_perc = df$cal_fat/df$DRXTKCAL
df$cal_sfat = df$DRXTSFAT*9
df$cal_sfat_perc = df$cal_sfat/df$DRXTKCAL
df$cal_mfat = df$DRXTMFAT*9
df$cal_mfat_perc = df$cal_mfat/df$DRXTKCAL
df$cal_pfat = df$DRXTPFAT*9
df$cal_pfat_perc = df$cal_pfat/df$DRXTKCAL

# save data
save(df, file = "nhanes_fattyAcids_clean_data.RData")

rm(list = setdiff(ls(), c("df")))

Descriptive statistics: Tables 1 and S1

# function to calculate p-values for differences between clusters
pvalue <- function(x, ...) {
    # Construct vectors of data y, and groups (strata) g
    y <- unlist(x)
    g <- factor(rep(1:length(x), times=sapply(x, length)))
    if (is.numeric(y)) {
        # For numeric variables
        p <- kruskal.test(y, g)$p.value
    } else {
        # For categorical variables
        p <- chisq.test(table(y, g))$p.value
    }
    # Format the p-value, using an HTML entity for the less-than sign.
    # The initial empty string places the output on the line below the variable label.
    c("", sub("<", "&lt;", format.pval(p, digits=3, eps=0.001)))
}

# complete cases
table1(~ factor(RIAGENDR) + RIDAGEYR + factor(RIDRETH1) + BMXBMI + factor(SMOKE_3cat) + drinkvol + 
    factor(EDUCATION) + factor(OCCUPATION) + INDFMPIR + DRXTKCAL +
    DRXTTFAT + cal_fat_perc + DRXTSFAT + cal_sfat_perc + DRXTMFAT + cal_mfat_perc + DRXTPFAT + cal_pfat_perc + psatfatRatio + omega6 + omega3, 
    data = df[df$use_fat_covar,])
Overall
(N=1771)
factor(RIAGENDR)
1 952 (53.8%)
2 819 (46.2%)
Age at Screening Adjudicated - Recode
Mean (SD) 64.8 (9.28)
Median [Min, Max] 64.0 [50.0, 84.0]
factor(RIDRETH1)
1 493 (27.8%)
2 100 (5.6%)
3 754 (42.6%)
4 366 (20.7%)
5 58 (3.3%)
Body Mass Index (kg/m**2)
Mean (SD) 28.9 (5.92)
Median [Min, Max] 28.1 [16.0, 62.5]
factor(SMOKE_3cat)
Never 780 (44.0%)
Former 713 (40.3%)
Current 278 (15.7%)
drinkvol
Mean (SD) 0.354 (0.937)
Median [Min, Max] 0.00821 [0, 12.0]
factor(EDUCATION)
High School Diploma (including GED) 388 (21.9%)
Less Than High School 728 (41.1%)
More Than High School 655 (37.0%)
factor(OCCUPATION)
hiblue 250 (14.1%)
hiwhite 452 (25.5%)
lowblue 704 (39.8%)
lowwhite 323 (18.2%)
nowork 42 (2.4%)
Family PIR
Mean (SD) 2.72 (1.60)
Median [Min, Max] 2.40 [0, 5.00]
Energy (kcal)
Mean (SD) 1860 (764)
Median [Min, Max] 1750 [512, 6200]
Total fat (gm)
Mean (SD) 69.2 (37.3)
Median [Min, Max] 62.5 [7.08, 295]
Total fat (gm)
Mean (SD) 0.329 (0.0925)
Median [Min, Max] 0.330 [0.0701, 0.705]
Total saturated fatty acids (gm)
Mean (SD) 21.8 (13.3)
Median [Min, Max] 19.2 [1.45, 116]
Total saturated fatty acids (gm)
Mean (SD) 0.103 (0.0385)
Median [Min, Max] 0.0993 [0.0158, 0.304]
Total monounsaturated fatty acids (gm)
Mean (SD) 25.7 (15.0)
Median [Min, Max] 22.6 [2.22, 110]
Total monounsaturated fatty acids (gm)
Mean (SD) 0.121 (0.0407)
Median [Min, Max] 0.120 [0.0174, 0.327]
Total polyunsaturated fatty acids (gm)
Mean (SD) 14.6 (9.31)
Median [Min, Max] 12.6 [0.570, 78.6]
Total polyunsaturated fatty acids (gm)
Mean (SD) 0.0700 (0.0320)
Median [Min, Max] 0.0644 [0.00722, 0.231]
Total polyunsaturated fatty acids (gm)
Mean (SD) 0.774 (0.451)
Median [Min, Max] 0.672 [0.0599, 3.71]
PFA 18:2 (Octadecadienoic) (gm)
Mean (SD) 13.0 (8.42)
Median [Min, Max] 11.1 [0.340, 67.5]
PFA 18:3 (Octadecatrienoic) (gm)
Mean (SD) 1.44 (1.03)
Median [Min, Max] 1.18 [0.100, 9.11]
# participants for impuation analyses
table1(~ factor(RIAGENDR) + RIDAGEYR + factor(RIDRETH1) + BMXBMI + factor(SMOKE_3cat) + drinkvol + 
    factor(EDUCATION) + factor(OCCUPATION) + INDFMPIR + DRXTKCAL +
    DRXTTFAT + cal_fat_perc + DRXTSFAT + cal_sfat_perc + DRXTMFAT + cal_mfat_perc + DRXTPFAT + cal_pfat_perc + psatfatRatio + omega6 + omega3, 
    data = df[df$use_fat,])
Overall
(N=2220)
factor(RIAGENDR)
1 1141 (51.4%)
2 1079 (48.6%)
Age at Screening Adjudicated - Recode
Mean (SD) 65.0 (9.28)
Median [Min, Max] 64.0 [50.0, 84.0]
factor(RIDRETH1)
1 648 (29.2%)
2 143 (6.4%)
3 889 (40.0%)
4 467 (21.0%)
5 73 (3.3%)
Body Mass Index (kg/m**2)
Mean (SD) 28.8 (5.82)
Median [Min, Max] 28.1 [16.0, 62.5]
Missing 61 (2.7%)
factor(SMOKE_3cat)
Never 1004 (45.2%)
Former 863 (38.9%)
Current 349 (15.7%)
Missing 4 (0.2%)
drinkvol
Mean (SD) 0.351 (0.936)
Median [Min, Max] 0.00548 [0, 12.0]
Missing 70 (3.2%)
factor(EDUCATION)
High School Diploma (including GED) 455 (20.5%)
Less Than High School 995 (44.8%)
More Than High School 770 (34.7%)
factor(OCCUPATION)
hiblue 301 (13.6%)
hiwhite 505 (22.7%)
lowblue 857 (38.6%)
lowwhite 376 (16.9%)
nowork 55 (2.5%)
Missing 126 (5.7%)
Family PIR
Mean (SD) 2.63 (1.60)
Median [Min, Max] 2.27 [0, 5.00]
Missing 246 (11.1%)
Energy (kcal)
Mean (SD) 1830 (771)
Median [Min, Max] 1710 [512, 6200]
Total fat (gm)
Mean (SD) 68.1 (37.5)
Median [Min, Max] 61.2 [6.91, 295]
Total fat (gm)
Mean (SD) 0.328 (0.0932)
Median [Min, Max] 0.329 [0.0701, 0.705]
Total saturated fatty acids (gm)
Mean (SD) 21.5 (13.3)
Median [Min, Max] 18.6 [1.45, 116]
Total saturated fatty acids (gm)
Mean (SD) 0.103 (0.0384)
Median [Min, Max] 0.0992 [0.0153, 0.304]
Total monounsaturated fatty acids (gm)
Mean (SD) 25.3 (15.1)
Median [Min, Max] 22.1 [2.22, 110]
Total monounsaturated fatty acids (gm)
Mean (SD) 0.121 (0.0412)
Median [Min, Max] 0.120 [0.0174, 0.336]
Total polyunsaturated fatty acids (gm)
Mean (SD) 14.5 (9.53)
Median [Min, Max] 12.3 [0.570, 87.1]
Total polyunsaturated fatty acids (gm)
Mean (SD) 0.0704 (0.0325)
Median [Min, Max] 0.0650 [0.00722, 0.233]
Total polyunsaturated fatty acids (gm)
Mean (SD) 0.785 (0.473)
Median [Min, Max] 0.677 [0.0599, 3.97]
PFA 18:2 (Octadecadienoic) (gm)
Mean (SD) 13.0 (8.61)
Median [Min, Max] 11.0 [0.340, 78.1]
PFA 18:3 (Octadecatrienoic) (gm)
Mean (SD) 1.44 (1.07)
Median [Min, Max] 1.15 [0.0700, 9.11]
# stratified by sex
table1(~ RIDAGEYR + factor(RIDRETH1) + BMXBMI + factor(SMOKE_3cat) + drinkvol + 
    factor(EDUCATION) + factor(OCCUPATION) + INDFMPIR + DRXTKCAL +
    DRXTTFAT + cal_fat_perc + DRXTSFAT + cal_sfat_perc + DRXTMFAT + cal_mfat_perc + DRXTPFAT + cal_pfat_perc + psatfatRatio + omega6 + omega3 | RIAGENDR, 
    data = df[df$use_fat_covar,], extra.col=list(`P-value`=pvalue))
## Warning in table1.formula(~RIDAGEYR + factor(RIDRETH1) + BMXBMI +
## factor(SMOKE_3cat) + : Terms to the right of '|' in formula 'x' define table
## columns and are expected to be factors with meaningful labels.
1
(N=952)
2
(N=819)
Overall
(N=1771)
P-value
Age at Screening Adjudicated - Recode
Mean (SD) 64.8 (9.18) 64.8 (9.40) 64.8 (9.28) 0.999
Median [Min, Max] 64.0 [50.0, 84.0] 64.0 [50.0, 84.0] 64.0 [50.0, 84.0]
factor(RIDRETH1)
1 279 (29.3%) 214 (26.1%) 493 (27.8%) 0.722
2 59 (6.2%) 41 (5.0%) 100 (5.6%)
3 394 (41.4%) 360 (44.0%) 754 (42.6%)
4 194 (20.4%) 172 (21.0%) 366 (20.7%)
5 26 (2.7%) 32 (3.9%) 58 (3.3%)
Body Mass Index (kg/m**2)
Mean (SD) 28.3 (5.03) 29.6 (6.75) 28.9 (5.92) 0.00626
Median [Min, Max] 27.8 [16.9, 54.5] 28.7 [16.0, 62.5] 28.1 [16.0, 62.5]
factor(SMOKE_3cat)
Never 309 (32.5%) 471 (57.5%) 780 (44.0%) <0.001
Former 478 (50.2%) 235 (28.7%) 713 (40.3%)
Current 165 (17.3%) 113 (13.8%) 278 (15.7%)
drinkvol
Mean (SD) 0.525 (1.13) 0.156 (0.582) 0.354 (0.937) <0.001
Median [Min, Max] 0.0329 [0, 12.0] 0 [0, 12.0] 0.00821 [0, 12.0]
factor(EDUCATION)
High School Diploma (including GED) 176 (18.5%) 212 (25.9%) 388 (21.9%) 0.00635
Less Than High School 404 (42.4%) 324 (39.6%) 728 (41.1%)
More Than High School 372 (39.1%) 283 (34.6%) 655 (37.0%)
factor(OCCUPATION)
hiblue 213 (22.4%) 37 (4.5%) 250 (14.1%) <0.001
hiwhite 251 (26.4%) 201 (24.5%) 452 (25.5%)
lowblue 383 (40.2%) 321 (39.2%) 704 (39.8%)
lowwhite 86 (9.0%) 237 (28.9%) 323 (18.2%)
nowork 19 (2.0%) 23 (2.8%) 42 (2.4%)
Family PIR
Mean (SD) 2.81 (1.60) 2.62 (1.59) 2.72 (1.60) 0.0598
Median [Min, Max] 2.52 [0, 5.00] 2.33 [0, 5.00] 2.40 [0, 5.00]
Energy (kcal)
Mean (SD) 2090 (822) 1600 (589) 1860 (764) <0.001
Median [Min, Max] 2010 [512, 6200] 1520 [546, 4300] 1750 [512, 6200]
Total fat (gm)
Mean (SD) 78.1 (41.0) 58.9 (29.3) 69.2 (37.3) <0.001
Median [Min, Max] 71.9 [7.08, 295] 54.6 [8.95, 223] 62.5 [7.08, 295]
Total fat (gm)
Mean (SD) 0.330 (0.0918) 0.328 (0.0932) 0.329 (0.0925) 0.912
Median [Min, Max] 0.330 [0.0740, 0.666] 0.331 [0.0701, 0.705] 0.330 [0.0701, 0.705]
Total saturated fatty acids (gm)
Mean (SD) 24.6 (14.7) 18.6 (10.5) 21.8 (13.3) <0.001
Median [Min, Max] 22.2 [2.07, 116] 16.6 [1.45, 82.8] 19.2 [1.45, 116]
Total saturated fatty acids (gm)
Mean (SD) 0.104 (0.0384) 0.103 (0.0387) 0.103 (0.0385) 0.917
Median [Min, Max] 0.100 [0.0164, 0.304] 0.0986 [0.0158, 0.254] 0.0993 [0.0158, 0.304]
Total monounsaturated fatty acids (gm)
Mean (SD) 29.4 (16.6) 21.5 (11.6) 25.7 (15.0) <0.001
Median [Min, Max] 26.3 [2.34, 110] 19.2 [2.22, 90.5] 22.6 [2.22, 110]
Total monounsaturated fatty acids (gm)
Mean (SD) 0.124 (0.0409) 0.119 (0.0404) 0.121 (0.0407) 0.068
Median [Min, Max] 0.120 [0.0182, 0.327] 0.120 [0.0174, 0.308] 0.120 [0.0174, 0.327]
Total polyunsaturated fatty acids (gm)
Mean (SD) 16.0 (10.1) 12.9 (7.92) 14.6 (9.31) <0.001
Median [Min, Max] 14.0 [0.570, 78.6] 10.9 [1.30, 52.1] 12.6 [0.570, 78.6]
Total polyunsaturated fatty acids (gm)
Mean (SD) 0.0684 (0.0320) 0.0718 (0.0320) 0.0700 (0.0320) 0.0803
Median [Min, Max] 0.0638 [0.00722, 0.231] 0.0660 [0.00878, 0.201] 0.0644 [0.00722, 0.231]
Total polyunsaturated fatty acids (gm)
Mean (SD) 0.756 (0.450) 0.794 (0.452) 0.774 (0.451) 0.0699
Median [Min, Max] 0.644 [0.0599, 3.40] 0.704 [0.0938, 3.71] 0.672 [0.0599, 3.71]
PFA 18:2 (Octadecadienoic) (gm)
Mean (SD) 14.3 (9.17) 11.6 (7.18) 13.0 (8.42) <0.001
Median [Min, Max] 12.6 [0.340, 67.5] 9.69 [1.11, 46.9] 11.1 [0.340, 67.5]
PFA 18:3 (Octadecatrienoic) (gm)
Mean (SD) 1.59 (1.13) 1.27 (0.866) 1.44 (1.03) <0.001
Median [Min, Max] 1.31 [0.107, 9.11] 1.01 [0.100, 5.75] 1.18 [0.100, 9.11]

Fatty acid subtypes: Table S2

# complete cases
table1(~ DRXTS040 + DRXTS060 + DRXTS080 + DRXTS100 + DRXTS120 + DRXTS140 + DRXTS160 + DRXTS180 + DRXTM161 + DRXTM181 + DRXTM201 + DRXTM221 + 
        DRXTP182 + DRXTP204 + DRXTP183 + DRXTP184 + DRXTP205 + DRXTP225 + DRXTP226, data = df[df$use_fat_covar,])
Overall
(N=1771)
SFA 4:0 (Butanoic) (gm)
Mean (SD) 0.403 (0.456)
Median [Min, Max] 0.260 [0, 3.61]
SFA 6:0 (Hexanoic) (gm)
Mean (SD) 0.219 (0.250)
Median [Min, Max] 0.140 [0, 2.05]
SFA 8:0 (Octanoic) (gm)
Mean (SD) 0.177 (0.206)
Median [Min, Max] 0.120 [0, 1.70]
SFA 10:0 (Decanoic) (gm)
Mean (SD) 0.336 (0.343)
Median [Min, Max] 0.240 [0, 2.67]
SFA 12:0 (Dodecanoic) (gm)
Mean (SD) 0.610 (0.894)
Median [Min, Max] 0.350 [0, 9.64]
SFA 14:0 (Tetradecanoic) (gm)
Mean (SD) 1.76 (1.53)
Median [Min, Max] 1.36 [0.0110, 11.8]
SFA 16:0 (Hexadecanoic) (gm)
Mean (SD) 12.1 (7.02)
Median [Min, Max] 10.8 [1.07, 61.6]
SFA 18:0 (Octadecanoic) (gm)
Mean (SD) 5.63 (3.48)
Median [Min, Max] 4.97 [0.319, 28.8]
MFA 16:1 (Hexadecenoic) (gm)
Mean (SD) 1.19 (0.859)
Median [Min, Max] 0.980 [0.0130, 7.20]
MFA 18:1 (Octadecenoic) (gm)
Mean (SD) 24.0 (14.1)
Median [Min, Max] 21.1 [1.90, 101]
MFA 20:1 (Eicosenoic) (gm)
Mean (SD) 0.176 (0.392)
Median [Min, Max] 0.120 [0, 10.8]
MFA 22:1 (Docosenoic) (gm)
Mean (SD) 0.0388 (0.137)
Median [Min, Max] 0.0100 [0, 2.77]
PFA 18:2 (Octadecadienoic) (gm)
Mean (SD) 12.9 (8.38)
Median [Min, Max] 11.0 [0.340, 67.0]
PFA 20:4 (Eicosatetraenoic) (gm)
Mean (SD) 0.135 (0.124)
Median [Min, Max] 0.104 [0, 1.64]
PFA 18:3 (Octadecatrienoic) (gm)
Mean (SD) 1.30 (0.891)
Median [Min, Max] 1.09 [0.0400, 9.07]
PFA 18:4 (Octadecatetraenoic) (gm)
Mean (SD) 0.00638 (0.0294)
Median [Min, Max] 0 [0, 0.367]
PFA 20:5 (Eicosapentaenoic) (gm)
Mean (SD) 0.0417 (0.128)
Median [Min, Max] 0.00400 [0, 1.67]
PFA 22:5 (Docosapentaenoic) (gm)
Mean (SD) 0.0183 (0.0480)
Median [Min, Max] 0 [0, 0.600]
PFA 22:6 (Docosahexaenoic) (gm)
Mean (SD) 0.0817 (0.196)
Median [Min, Max] 0.0270 [0, 2.34]
# participants for impuation analyses
table1(~ DRXTS040 + DRXTS060 + DRXTS080 + DRXTS100 + DRXTS120 + DRXTS140 + DRXTS160 + DRXTS180 + DRXTM161 + DRXTM181 + DRXTM201 + DRXTM221 + 
        DRXTP182 + DRXTP204 + DRXTP183 + DRXTP184 + DRXTP205 + DRXTP225 + DRXTP226, data = df[df$use_fat,])
Overall
(N=2220)
SFA 4:0 (Butanoic) (gm)
Mean (SD) 0.395 (0.448)
Median [Min, Max] 0.260 [0, 3.61]
SFA 6:0 (Hexanoic) (gm)
Mean (SD) 0.216 (0.247)
Median [Min, Max] 0.140 [0, 2.05]
SFA 8:0 (Octanoic) (gm)
Mean (SD) 0.177 (0.240)
Median [Min, Max] 0.115 [0, 6.05]
SFA 10:0 (Decanoic) (gm)
Mean (SD) 0.334 (0.353)
Median [Min, Max] 0.230 [0, 4.32]
SFA 12:0 (Dodecanoic) (gm)
Mean (SD) 0.607 (0.899)
Median [Min, Max] 0.345 [0, 10.4]
SFA 14:0 (Tetradecanoic) (gm)
Mean (SD) 1.73 (1.52)
Median [Min, Max] 1.31 [0.0100, 11.8]
SFA 16:0 (Hexadecanoic) (gm)
Mean (SD) 11.9 (7.02)
Median [Min, Max] 10.5 [1.07, 61.6]
SFA 18:0 (Octadecanoic) (gm)
Mean (SD) 5.52 (3.49)
Median [Min, Max] 4.81 [0.319, 28.8]
MFA 16:1 (Hexadecenoic) (gm)
Mean (SD) 1.17 (0.849)
Median [Min, Max] 0.963 [0, 7.20]
MFA 18:1 (Octadecenoic) (gm)
Mean (SD) 23.5 (14.2)
Median [Min, Max] 20.4 [1.90, 101]
MFA 20:1 (Eicosenoic) (gm)
Mean (SD) 0.175 (0.363)
Median [Min, Max] 0.116 [0, 10.8]
MFA 22:1 (Docosenoic) (gm)
Mean (SD) 0.0397 (0.139)
Median [Min, Max] 0.0100 [0, 2.77]
PFA 18:2 (Octadecadienoic) (gm)
Mean (SD) 12.8 (8.57)
Median [Min, Max] 10.8 [0.340, 77.7]
PFA 20:4 (Eicosatetraenoic) (gm)
Mean (SD) 0.132 (0.122)
Median [Min, Max] 0.100 [0, 1.64]
PFA 18:3 (Octadecatrienoic) (gm)
Mean (SD) 1.29 (0.924)
Median [Min, Max] 1.06 [0.0400, 9.07]
PFA 18:4 (Octadecatetraenoic) (gm)
Mean (SD) 0.00693 (0.0360)
Median [Min, Max] 0 [0, 0.750]
PFA 20:5 (Eicosapentaenoic) (gm)
Mean (SD) 0.0434 (0.148)
Median [Min, Max] 0.00400 [0, 2.66]
PFA 22:5 (Docosapentaenoic) (gm)
Mean (SD) 0.0181 (0.0474)
Median [Min, Max] 0 [0, 0.600]
PFA 22:6 (Docosahexaenoic) (gm)
Mean (SD) 0.0827 (0.209)
Median [Min, Max] 0.0240 [0, 2.78]
# stratified by sex
table1(~ DRXTS040 + DRXTS060 + DRXTS080 + DRXTS100 + DRXTS120 + DRXTS140 + DRXTS160 + DRXTS180 + DRXTM161 + DRXTM181 + DRXTM201 + DRXTM221 + 
        DRXTP182 + DRXTP204 + DRXTP183 + DRXTP184 + DRXTP205 + DRXTP225 + DRXTP226 | RIAGENDR, data = df[df$use_fat_covar,], extra.col=list(`P-value`=pvalue))
## Warning in table1.formula(~DRXTS040 + DRXTS060 + DRXTS080 + DRXTS100 + DRXTS120
## + : Terms to the right of '|' in formula 'x' define table columns and are
## expected to be factors with meaningful labels.
1
(N=952)
2
(N=819)
Overall
(N=1771)
P-value
SFA 4:0 (Butanoic) (gm)
Mean (SD) 0.437 (0.502) 0.364 (0.392) 0.403 (0.456) 0.17
Median [Min, Max] 0.290 [0, 3.61] 0.241 [0, 2.71] 0.260 [0, 3.61]
SFA 6:0 (Hexanoic) (gm)
Mean (SD) 0.240 (0.277) 0.195 (0.211) 0.219 (0.250) 0.069
Median [Min, Max] 0.159 [0, 2.05] 0.134 [0, 1.61] 0.140 [0, 2.05]
SFA 8:0 (Octanoic) (gm)
Mean (SD) 0.188 (0.227) 0.164 (0.180) 0.177 (0.206) 0.624
Median [Min, Max] 0.122 [0, 1.69] 0.110 [0, 1.70] 0.120 [0, 1.70]
SFA 10:0 (Decanoic) (gm)
Mean (SD) 0.364 (0.380) 0.303 (0.292) 0.336 (0.343) 0.0573
Median [Min, Max] 0.263 [0, 2.67] 0.211 [0, 2.10] 0.240 [0, 2.67]
SFA 12:0 (Dodecanoic) (gm)
Mean (SD) 0.623 (0.898) 0.596 (0.889) 0.610 (0.894) 0.243
Median [Min, Max] 0.365 [0, 9.62] 0.327 [0, 9.64] 0.350 [0, 9.64]
SFA 14:0 (Tetradecanoic) (gm)
Mean (SD) 1.95 (1.69) 1.54 (1.28) 1.76 (1.53) <0.001
Median [Min, Max] 1.53 [0.0250, 11.8] 1.20 [0.0110, 8.52] 1.36 [0.0110, 11.8]
SFA 16:0 (Hexadecanoic) (gm)
Mean (SD) 13.7 (7.74) 10.2 (5.50) 12.1 (7.02) <0.001
Median [Min, Max] 12.6 [1.43, 61.6] 9.30 [1.07, 48.2] 10.8 [1.07, 61.6]
SFA 18:0 (Octadecanoic) (gm)
Mean (SD) 6.43 (3.86) 4.70 (2.69) 5.63 (3.48) <0.001
Median [Min, Max] 5.86 [0.500, 28.8] 4.17 [0.319, 22.7] 4.97 [0.319, 28.8]
MFA 16:1 (Hexadecenoic) (gm)
Mean (SD) 1.38 (0.954) 0.973 (0.669) 1.19 (0.859) <0.001
Median [Min, Max] 1.21 [0.0130, 7.09] 0.830 [0.0200, 7.20] 0.980 [0.0130, 7.20]
MFA 18:1 (Octadecenoic) (gm)
Mean (SD) 27.4 (15.5) 20.0 (10.9) 24.0 (14.1) <0.001
Median [Min, Max] 24.7 [2.02, 101] 17.8 [1.90, 80.3] 21.1 [1.90, 101]
MFA 20:1 (Eicosenoic) (gm)
Mean (SD) 0.199 (0.390) 0.149 (0.393) 0.176 (0.392) <0.001
Median [Min, Max] 0.136 [0, 10.8] 0.100 [0, 10.7] 0.120 [0, 10.8]
MFA 22:1 (Docosenoic) (gm)
Mean (SD) 0.0457 (0.163) 0.0308 (0.0958) 0.0388 (0.137) 0.00992
Median [Min, Max] 0.0100 [0, 2.77] 0.00900 [0, 1.48] 0.0100 [0, 2.77]
PFA 18:2 (Octadecadienoic) (gm)
Mean (SD) 14.2 (9.12) 11.4 (7.15) 12.9 (8.38) <0.001
Median [Min, Max] 12.4 [0.340, 67.0] 9.61 [1.11, 46.8] 11.0 [0.340, 67.0]
PFA 20:4 (Eicosatetraenoic) (gm)
Mean (SD) 0.155 (0.140) 0.111 (0.0974) 0.135 (0.124) <0.001
Median [Min, Max] 0.128 [0, 1.64] 0.0870 [0, 0.610] 0.104 [0, 1.64]
PFA 18:3 (Octadecatrienoic) (gm)
Mean (SD) 1.42 (0.980) 1.15 (0.749) 1.30 (0.891) <0.001
Median [Min, Max] 1.20 [0.107, 9.07] 0.936 [0.0400, 4.75] 1.09 [0.0400, 9.07]
PFA 18:4 (Octadecatetraenoic) (gm)
Mean (SD) 0.00715 (0.0307) 0.00549 (0.0278) 0.00638 (0.0294) 0.511
Median [Min, Max] 0 [0, 0.356] 0 [0, 0.367] 0 [0, 0.367]
PFA 20:5 (Eicosapentaenoic) (gm)
Mean (SD) 0.0481 (0.138) 0.0343 (0.115) 0.0417 (0.128) 0.0464
Median [Min, Max] 0.00550 [0, 1.67] 0.00300 [0, 1.41] 0.00400 [0, 1.67]
PFA 22:5 (Docosapentaenoic) (gm)
Mean (SD) 0.0206 (0.0558) 0.0157 (0.0367) 0.0183 (0.0480) 0.864
Median [Min, Max] 0 [0, 0.600] 0 [0, 0.380] 0 [0, 0.600]
PFA 22:6 (Docosahexaenoic) (gm)
Mean (SD) 0.0921 (0.212) 0.0696 (0.174) 0.0817 (0.196) 0.017
Median [Min, Max] 0.0300 [0, 2.34] 0.0200 [0, 2.01] 0.0270 [0, 2.34]
# participants with 0 values
subtypes = c('DRXTS040', 'DRXTS060', 'DRXTS080', 'DRXTS100', 'DRXTS120', 'DRXTS140', 'DRXTS160', 'DRXTS180', 'DRXTM161', 'DRXTM181', 'DRXTM201', 'DRXTM221', 
        'DRXTP182', 'DRXTP204', 'DRXTP183', 'DRXTP184', 'DRXTP205', 'DRXTP225', 'DRXTP226')

# primary analyses
for (i in 1:length(subtypes)){
    print(subtypes[i])
    print(table(df[df$use_fat_covar,subtypes[i]] == 0))
    print(table(df[df$use_fat_covar,subtypes[i]] == 0)/1771)
}

# imputation analyses
for (i in 1:length(subtypes)){
    print(subtypes[i])
    print(table(df[df$use_fat,subtypes[i]] == 0))
    print(table(df[df$use_fat,subtypes[i]] == 0)/2220)
}

# males
for (i in 1:length(subtypes)){
    print(subtypes[i])
    print(table(df[df$use_fat_covar & df$RIAGENDR == 1,subtypes[i]] == 0))
    print(table(df[df$use_fat_covar & df$RIAGENDR == 1,subtypes[i]] == 0)/952)
}

# females
for (i in 1:length(subtypes)){
    print(subtypes[i])
    print(table(df[df$use_fat_covar & df$RIAGENDR == 2,subtypes[i]] == 0))
    print(table(df[df$use_fat_covar & df$RIAGENDR == 2,subtypes[i]] == 0)/819)
}