PBMCs

Data processing and DNAm age calculations

Normalization using funnorm

# path = file.path()

# load RGset
load(paste0(path, "/pmbcs_RGset.rdata"))

# Funnorm
funnorm = preprocessFunnorm(RGset)
betas_fun = minfi::getBeta(funnorm)

pheno = pData(funnorm)
pheno$sample_name = rownames(pheno)
# adding Sentrix ID for subsequent batch correction in downstream analysis
sentrix_id = as.factor(
  stringr::str_extract_all(pheno$sample_name, "^([0-9]{12})",
                           simplify = TRUE))
pheno = data.frame(pheno, sentrix_id)
# remove features without any variation
pheno = pheno[, apply(pheno, 2, function(x) length(unique(x)) > 1)]

# one sample with sex mismatch
table(pheno$sex, pheno$predictedSex)
     # F  M
  # F 19  0
  # M  1 20  
  
# save the processed betas and pheno data
save(betas_fun, pheno, file = paste0(path, "/PBMC_betas_funnorm_pheno.RData"))

Process data for dnamage

# load data
# load('PBMC_betas_funnorm_pheno.RData')

# filtering beta values using CpGs from https://dnamage.genetics.ucla.edu/new
datMiniAnnotation=read.csv(paste0(path, "/datMiniAnnotation3.csv"))
dim(datMiniAnnotation)
# 30084     7

match1=match(datMiniAnnotation[,1], rownames(betas_fun))

# filter probes
betas_fun_reduced = betas_fun[match1,]
dim(betas_fun_reduced)
# 30084    40

betas_fun_reduced = data.frame(ProbeID = datMiniAnnotation$Name, betas_fun_reduced)
dim(betas_fun_reduced)
# 30084    41

betas_fun_reduced[,1]=as.character(betas_fun_reduced[,1])

betas_fun_reduced = data.frame(betas_fun_reduced)

for (i in 2:dim(betas_fun_reduced)[[2]] ){betas_fun_reduced[,i]=
as.numeric(as.character(gsub(x= betas_fun_reduced[,i],pattern="\"",replacement=""))) }

colNames = colnames(betas_fun_reduced)[2:ncol(betas_fun_reduced)]
colNames = substr(colNames, 2, nchar(colNames))
colnames(betas_fun_reduced)[2:ncol(betas_fun_reduced)] = colNames

# number of missing probes
sum(is.na(betas_fun_reduced[,2]))
# 2558

# save filtered probes
write.table(betas_fun_reduced, paste0(path, "/PBMC_Betas_funnorm_reduced_for_clock.csv"), row.names=F, sep="," )

# pheno/annotation file
pheno$Female[pheno$sex == 'F'] = 1
pheno$Female[pheno$sex == 'M'] = 0

ageAnno = data.frame(ID = rownames(pheno), Age = pheno$age, Female = pheno$Female, Tissue = 'Blood PBMC')

all(ageAnno$ID == colnames(betas_fun_reduced)[-1])
# TRUE
identical(ageAnno$ID, colnames(betas_fun_reduced)[-1])
# TRUE

# save annotation file
write.table(ageAnno, paste0(path, "/PBMCs_anno_for_clock.csv"), row.names=F, sep="," )

DNA methylation age calculator

https://dnamage.genetics.ucla.edu/submit

upload and select “normalize data” and “advanced analysis”

Six clocks: (1) DNAmAge = Horvath clock; (2) DNAmAgeHannum = Hannum clock; (3) DNAmPhenoAge = PhenoAge Clock; (4) DNAmAgeSkinBloodClock = Skin and blood clock; (5) DNAmGrimAge = GrimAge; DNAmTL = (6) DNAm telomere length

Acceleration measurements:(1) DNAmAge = AgeAccelerationResidual; (2) DNAmAgeHannum = AgeAccelerationResidualHannum; (3) DNAmPhenoAge = AgeAccelPheno; (4) DNAmAgeSkinBloodClock = DNAmAgeSkinBloodClockAdjAge; (5) DNAmGrimAge = AgeAccelGrim; (6) DNAmTL = DNAmTLAdjAge

IEAA = intrinsic epigenetic age (derived from Horvath’s acceleration; adjusting for cell-types); EAA = extrinsic epigenetic age (derived from Hannum’s clock)

# load result
PBMC_age = read.csv(paste0(path, '/PBMC_Betas_funnorm_reduced_for_clock.output.csv'))
PBMC_age$SampleID = substr(PBMC_age$SampleID, 2, nchar(PBMC_age$SampleID))

all(PBMC_age$SampleID == pheno$sample_name)
# TRUE
identical(PBMC_age$SampleID, pheno$sample_name)
# TRUE

# merge result with pheno data
pheno = cbind(pheno, PBMC_age[,-1])

# predicted tissue
table(pheno$predictedTissue)
# Blood PBMC 
        # 40 
        
# drop duplicated column
pheno = pheno[,-94]

Save pheno data with clocks

# drop sample with sex mismatch 
pheno = pheno[pheno$sex == pheno$predictedSex,]
dim(pheno)
# 39 154

table(pheno$sex, pheno$predictedSex)
     # F  M
  # F 19  0
  # M  0 20

pheno$ID2 = as.numeric(substring(pheno$study_id, 3, 9))

# add additional exposure variables and rename pheno
all_pheno = read.sas7bdat(paste0(path, '/AC13_052018.sas7bdat'))

# participants with DNAm data
all_pheno_DNam = all_pheno[all_pheno$ID2 %in% pheno$ID2,]

# subset As variables of interest
all_pheno_DNam = all_pheno_DNam[,c('ID2', 'CUM_AGE0_10', 'CUM_AGE20ABOVE', 'CUM_LAG0', 'LIFEALAG0', 'LIFEAAGE0_10', 'LIFEAAGE20ABOVE', 'as_birth', 'cum_as_an')]
  
PBMC_pheno = merge(pheno, all_pheno_DNam, by = 'ID2', all.x = T)

# variables as factors
PBMC_pheno$exposed = factor(PBMC_pheno$exposed, levels = c('no', 'yes'))
PBMC_pheno$predictedSex = factor(PBMC_pheno$predictedSex, levels = c('F', 'M'))
PBMC_pheno$smoking = factor(PBMC_pheno$smoking, levels = c('no', 'yes'))

write.csv(PBMC_pheno, file = paste0(path, '/PBMC_fun_pheno_withAges.csv'))

Scatter plots of Horvath clocks and age

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

Linear models of associations between age acceleration and exposure (dichotomized)

Adjusted robust linear regression results

B_uadj lower_unadj upper_unadj p_unadj B_adj lower_adj upper_adj p_adj
AgeAccelerationResidual 1.1029684 -1.8035370 4.0094737 0.4570142 0.7840953 -2.5681020 4.1362925 0.6466327
AgeAccelerationResidualHannum 1.8664940 -1.3401143 5.0731023 0.2539321 2.5068961 -0.8139845 5.8277768 0.1389920
AgeAccelPheno 6.3986801 2.2569740 10.5403863 0.0024616 6.0100062 2.5997605 9.4202520 0.0005521
DNAmAgeSkinBloodClockAdjAge 0.8566773 -0.9926463 2.7060009 0.3639151 0.7415603 -1.4841599 2.9672805 0.5137458
AgeAccelGrim 0.7696887 -1.8396811 3.3790585 0.5631745 0.6278798 -1.9556222 3.2113817 0.6338332
DNAmTLAdjAge 0.0272048 -0.1267176 0.1811273 0.7290334 0.0362969 -0.1042872 0.1768811 0.6128310
IEAA 0.5293092 -1.8449489 2.9035673 0.6621492 0.1169399 -2.4061635 2.6400433 0.9276200
EEAA 3.3738615 -1.2370242 7.9847472 0.1515329 3.6568485 -0.9121843 8.2258813 0.1167255

Plotting

Adjusted robust linear regression results

Robust linear models of associations between age acceleration and As concentration at birth (continuous)

B_uadj lower_unadj upper_unadj p_unadj B_adj lower_adj upper_adj p_adj
AgeAccelerationResidual 0.0823272 -0.2862256 0.4508801 0.6615195 0.0670134 -0.3289081 0.4629350 0.7400838
AgeAccelerationResidualHannum 0.1733740 -0.0802910 0.4270390 0.1803790 0.2190451 -0.0445277 0.4826178 0.1033448
AgeAccelPheno 0.2435845 -0.0545365 0.5417055 0.1092840 0.3178199 0.0025806 0.6330592 0.0481538
DNAmAgeSkinBloodClockAdjAge 0.0791054 -0.1188209 0.2770318 0.4334271 0.0712736 -0.1368320 0.2793793 0.5020527
AgeAccelGrim -0.0069677 -0.1501505 0.1362151 0.9240151 0.0362655 -0.1768427 0.2493736 0.7387307
DNAmTLAdjAge 0.0054715 -0.0037311 0.0146741 0.2438881 0.0044017 -0.0090040 0.0178074 0.5198703
IEAA 0.0489152 -0.2117025 0.3095330 0.7129740 0.0165341 -0.2558606 0.2889288 0.9053006
EEAA 0.2041484 -0.1691494 0.5774462 0.2837823 0.2867282 -0.0916870 0.6651433 0.1375226

Rogust linear models of dichotomous exposure adjusting for sex, smoking, and lifetime average As concentrations

B_uadj lower_unadj upper_unadj p_unadj B_adj lower_adj upper_adj p_adj
AgeAccelerationResidual 1.1029684 -1.8035370 4.0094737 0.4570142 0.7107540 -4.4597419 5.8812499 0.7876039
AgeAccelerationResidualHannum 1.8664940 -1.3401143 5.0731023 0.2539321 1.5824725 -2.7975631 5.9625081 0.4788710
AgeAccelPheno 6.3986801 2.2569740 10.5403863 0.0024616 6.6478627 1.9091587 11.3865668 0.0059667
DNAmAgeSkinBloodClockAdjAge 0.8566773 -0.9926463 2.7060009 0.3639151 1.2887800 -0.9237954 3.5013554 0.2536041
AgeAccelGrim 0.7696887 -1.8396811 3.3790585 0.5631745 0.8077982 -3.8267358 5.4423323 0.7326359
DNAmTLAdjAge 0.0272048 -0.1267176 0.1811273 0.7290334 0.0128747 -0.1981477 0.2238971 0.9048163
IEAA 0.5293092 -1.8449489 2.9035673 0.6621492 0.3626017 -4.2765122 5.0017155 0.8782450
EEAA 3.3738615 -1.2370242 7.9847472 0.1515329 2.0306519 -3.6639896 7.7252935 0.4846124

Rogust linear models of dichotomous exposure adjusting for sex, smoking, and cell type proportions

B_uadj lower_unadj upper_unadj p_unadj B_adj lower_adj upper_adj p_adj
AgeAccelerationResidual 1.1029684 -1.8035370 4.0094737 0.4570142 0.1000837 -2.6873074 2.8874747 0.9438958
AgeAccelerationResidualHannum 1.8664940 -1.3401143 5.0731023 0.2539321 3.1106589 0.1250439 6.0962738 0.0411464
AgeAccelPheno 6.3986801 2.2569740 10.5403863 0.0024616 4.9355903 1.8840838 7.9870967 0.0015239
DNAmAgeSkinBloodClockAdjAge 0.8566773 -0.9926463 2.7060009 0.3639151 1.7705859 0.5145553 3.0266165 0.0057291
AgeAccelGrim 0.7696887 -1.8396811 3.3790585 0.5631745 0.3162269 -2.0886371 2.7210908 0.7966192
DNAmTLAdjAge 0.0272048 -0.1267176 0.1811273 0.7290334 0.0022695 -0.1561092 0.1606481 0.9775943
IEAA 0.5293092 -1.8449489 2.9035673 0.6621492 -0.4336447 -3.3027906 2.4355012 0.7670542
EEAA 3.3738615 -1.2370242 7.9847472 0.1515329 4.8981260 1.2230517 8.5732004 0.0089953

Plotting

Buccal cells

Data processing and DNAm age calculations

Normalization using funnorm

# remove PBMC objects
rm(ageAnno, betas_fun, betas_fun_reduced, funnorm, pheno, RGset)

# load RGset
load(paste0(path, "/buccal_RGset.rdata"))

# B14's detection p-values are significantly larger
RGset = RGset[,-26]
pheno = pData(RGset)
dim(pheno)
# 39 34

# Funnorm
funnorm = preprocessFunnorm(RGset)
betas_fun = minfi::getBeta(funnorm)

pheno = pData(funnorm)
pheno$sample_name = rownames(pheno)
# adding Sentrix ID for subsequent batch correction in downstream analysis
sentrix_id = as.factor(
  stringr::str_extract_all(pheno$sample_name, "^([0-9]{12})",
                           simplify = TRUE))
pheno = data.frame(pheno, sentrix_id)
# remove features without any variation
pheno = pheno[, apply(pheno, 2, function(x) length(unique(x)) > 1)]

# one sample with sex mismatch
table(pheno$sex, pheno$predictedSex)
     # F  M
  # F 19  0
  # M  1 20  d

# save the processed betas and pheno data
save(betas_fun, pheno, file = paste0(path, "/buccalCells_betas_funnorm_pheno.RData"))

###$ Process data for dnamage

# load data
# load('buccalCells_betas_funnorm_pheno.RData')

match1=match(datMiniAnnotation[,1], rownames(betas_fun))

# filter probes
betas_fun_reduced = betas_fun[match1,]
dim(betas_fun_reduced)
# 30084    39

betas_fun_reduced = data.frame(ProbeID = datMiniAnnotation$Name, betas_fun_reduced)
dim(betas_fun_reduced)
# 30084    41

betas_fun_reduced[,1]=as.character(betas_fun_reduced[,1])

betas_fun_reduced = data.frame(betas_fun_reduced)

for (i in 2:dim(betas_fun_reduced)[[2]] ){betas_fun_reduced[,i]=
as.numeric(as.character(gsub(x= betas_fun_reduced[,i],pattern="\"",replacement=""))) }

colNames = colnames(betas_fun_reduced)[2:ncol(betas_fun_reduced)]
colNames = substr(colNames, 2, nchar(colNames))
colnames(betas_fun_reduced)[2:ncol(betas_fun_reduced)] = colNames

# number of missing probes
sum(is.na(betas_fun_reduced[,2]))
# 2558

# save filtered probes
write.table(betas_fun_reduced, paste0(path, "/buccalCells_Betas_funnorm_reduced_for_clock.csv"), row.names=F, sep="," )

# pheno/annotation file
pheno$Female[pheno$sex == 'F'] = 1
pheno$Female[pheno$sex == 'M'] = 0

ageAnno = data.frame(ID = rownames(pheno), Age = pheno$age, Female = pheno$Female, Tissue = 'Blood PBMC')

all(ageAnno$ID == colnames(betas_fun_reduced)[-1])
# TRUE
identical(ageAnno$ID, colnames(betas_fun_reduced)[-1])
# TRUE

# save annotation file
write.table(ageAnno, paste0(path, "/buccalCells_anno_for_clock.csv"), row.names=F, sep="," )

DNA methylation age calculator

# load result
buccalCell_age = read.csv(paste0(path, '/buccalCells_Betas_funnorm_reduced_for_clock.output.csv'))
buccalCell_age$SampleID = substr(buccalCell_age$SampleID, 2, nchar(buccalCell_age$SampleID))

all(buccalCell_age$SampleID == pheno$sample_name)
# TRUE
identical(buccalCell_age$SampleID, pheno$sample_name)
# TRUE

# merge result with pheno data
pheno = cbind(pheno, buccalCell_age[,-1])

# predicted tissue
table(pheno$predictedTissue)
# Buccal Saliva 
    # 22     17 
        
# drop duplicated column
pheno = pheno[,-92]

Save pheno data with clocks

# drop sample with sex mismatch 
pheno = pheno[pheno$sex == pheno$predictedSex,]
dim(pheno)
# 38 142

table(pheno$sex, pheno$predictedSex)
     # F  M
  # F 19  0
  # M  0 19

pheno$ID2 = as.numeric(substring(pheno$study_id, 3, 9))

# add additional exposure variables and rename pheno
all_pheno = read.sas7bdat(paste0(path, '/AC13_052018.sas7bdat'))

# participants with DNAm data
all_pheno_DNam = all_pheno[all_pheno$ID2 %in% pheno$ID2,]

# subset As variables of interest
all_pheno_DNam = all_pheno_DNam[,c('ID2', 'CUM_AGE0_10', 'CUM_AGE20ABOVE', 'CUM_LAG0', 'LIFEALAG0', 'LIFEAAGE0_10', 'LIFEAAGE20ABOVE', 'as_birth', 'cum_as_an')]
  
buccalCell_pheno = merge(pheno, all_pheno_DNam, by = 'ID2', all.x = T)

# variables as factors
buccalCell_pheno$exposed = factor(buccalCell_pheno$exposed, levels = c('no', 'yes'))
buccalCell_pheno$predictedSex = factor(buccalCell_pheno$predictedSex, levels = c('F', 'M'))
buccalCell_pheno$smoking = factor(buccalCell_pheno$smoking, levels = c('no', 'yes'))

write.csv(buccalCell_pheno, paste0(path, '/buccalCell_fun_pheno_withAges.csv'))

Scatter plots of Horvath clocks and age

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

Linear models of associations between age acceleration and exposure

Adjusted robust linear regression results

B_uadj lower_unadj upper_unadj p_unadj B_adj lower_adj upper_adj p_adj
AgeAccelerationResidual 3.4872988 -0.2372958 7.2118933 0.0664918 2.3060280 -1.0380417 5.6500978 0.1765141
AgeAccelerationResidualHannum -0.4947023 -3.9491455 2.9597410 0.7789546 -1.6745771 -6.4217628 3.0726085 0.4893266
AgeAccelPheno 6.0728322 0.1562629 11.9894015 0.0442485 4.8796976 -1.6007878 11.3601830 0.1399922
DNAmAgeSkinBloodClockAdjAge 0.7004360 -1.4718879 2.8727600 0.5274107 0.6239160 -2.1449018 3.3927338 0.6587413
AgeAccelGrim -1.4091434 -5.1208658 2.3025790 0.4568196 -1.6090914 -5.4533169 2.2351340 0.4119942
DNAmTLAdjAge 0.1584270 -0.1426494 0.4595034 0.3023828 0.1833202 -0.1783693 0.5450097 0.3205168
IEAA 3.2436850 -0.2037145 6.6910844 0.0651620 2.3841868 -1.2896274 6.0580010 0.2033897
EEAA -0.2471809 -3.9197674 3.4254056 0.8950523 -1.8100192 -7.2590805 3.6390421 0.5150188

Plotting

Adjusted robust linear regression results

Combining PBMC and buccal cell plots

# scatter plots 
titles1 = ggplot(data.frame(x = c(1:3), y = 1)) + annotate('text', label = 'Horvath', x = 1.1, y = 1, size = 3.5) + annotate('text', label = 'Hannum', x = 2, y = 1, size = 3.5) + annotate('text', label = 'Skin and blood', x = 2.92, y = 1, size = 3.5) + xlim(c(0.75,3.25)) + theme_void() 
titles2 = ggplot(data.frame(x = c(1:3), y = 1)) + annotate('text', label = 'PhenoAge', x = 1.18, y = 1, size = 3.5) + annotate('text', label = 'GrimAge', x = 2, y = 1, size = 3.5) + annotate('text', label = 'DNAmTL', x = 2.95, y = 1, size = 3.5)+ xlim(c(0.75,3.25)) + theme_void()

header1 = ggplot(data.frame(x = c(1:3), y = 1)) + annotate('text', label = 'PBMCs', x = 2, y = 1, size = 5) + xlim(c(0.75,3.25)) + theme_void() 
header2 = ggplot(data.frame(x = c(1:3), y = 1)) + annotate('text', label = 'Buccal cells', x = 2, y = 1, size = 5) + xlim(c(0.75,3.25)) + theme_void() 

blank = ggplot(mtcars, aes(x = wt, y = mpg)) + geom_blank()

scatter_DNAmAge_PBMC2 = scatter_DNAmAge_PBMC + theme(axis.title.x = element_blank()) + labs(y = 'DNAm age')
scatter_DNAmAgeHannum_PBMC2 = scatter_DNAmAgeHannum_PBMC + theme(axis.title.x = element_blank(), axis.title.y = element_blank())
scatter_DNAmPhenoAge_PBMC2 = scatter_DNAmPhenoAge_PBMC + theme(axis.title.x = element_blank()) + labs(y = 'DNAm age')
scatter_DNAmAgeSkinBloodClock_PBMC2 = scatter_DNAmAgeSkinBloodClock_PBMC + theme(axis.title.x = element_blank(), axis.title.y = element_blank()) 
scatter_DNAmGrimAge_PBMC2 = scatter_DNAmGrimAge_PBMC + theme(axis.title.x = element_blank(), axis.title.y = element_blank())
scatter_DNAmTL_PBMC2 = scatter_DNAmTL_PBMC + theme(axis.title.x = element_blank(), axis.title.y = element_blank())

scatter_DNAmAge_buccalCell2 = scatter_DNAmAge_buccalCell + labs(y = 'DNAm age') + theme(axis.title.x = element_blank())
scatter_DNAmAgeHannum_buccalCell2 = scatter_DNAmAgeHannum_buccalCell + theme(axis.title.x = element_blank(), axis.title.y = element_blank())
scatter_DNAmPhenoAge_buccalCell2 = scatter_DNAmPhenoAge_buccalCell + labs(x = 'Chronological age', y = 'DNAm age')
scatter_DNAmAgeSkinBloodClock_buccalCell2 = scatter_DNAmAgeSkinBloodClock_buccalCell + theme(axis.title.x = element_blank(), axis.title.y = element_blank())
scatter_DNAmGrimAge_buccalCell2 = scatter_DNAmGrimAge_buccalCell + theme(axis.title.y = element_blank()) + labs(x = 'Chronological age')
scatter_DNAmTL_buccalCell2 = scatter_DNAmTL_buccalCell + theme(axis.title.y = element_blank()) + labs(x = 'Chronological age')

g1 <- arrangeGrob(scatter_DNAmAge_PBMC2, scatter_DNAmAgeHannum_PBMC2, scatter_DNAmAgeSkinBloodClock_PBMC2, nrow = 1)
g2 <- arrangeGrob(scatter_DNAmPhenoAge_PBMC2, scatter_DNAmGrimAge_PBMC2, scatter_DNAmTL_PBMC2, nrow = 1)
g3 <- arrangeGrob(scatter_DNAmAge_buccalCell2,  scatter_DNAmAgeHannum_buccalCell2, scatter_DNAmAgeSkinBloodClock_buccalCell2, nrow = 1)
g4 <- arrangeGrob(scatter_DNAmPhenoAge_buccalCell2, scatter_DNAmGrimAge_buccalCell2, scatter_DNAmTL_buccalCell2, nrow = 1)

grid.arrange(header1,titles1,g1,titles2,g2,header2,titles1,g3,titles2,g4,nrow=10, heights=unit(c(0.6,0.4,5.25,0.4,5.25,0.6,0.4,5.25,0.4,5.4), c("cm")))
quartz.save(file = 'all_clocks_scatterPlots_long.png', type = 'png', dpi = 300)

# forest plots
fp_buccalcells_robust2 = fp_buccalcells_robust + theme(axis.title.y = element_blank())
grid.arrange(fp_PBMCs_robust, fp_buccalcells_robust2, ncol = 2)
# quartz.save(file = paste0(path, '/all_ageAccelAssoc_forestPlots.png'), type = 'png', dpi = 300)

# forest plots
fp_buccalcells_robust2 = fp_buccalcells_robust + theme(axis.title.y = element_blank())
grid.arrange(fp_PBMCs_robust, fp_buccalcells_robust2, ncol = 2)
# quartz.save(file = paste0(path, '/all_ageAccelAssoc_forestPlots.png'), type = 'png', dpi = 300)