Chile Arsenic Study, epigenetic clocks

PBMCs

Data processing and DNAm age calculations

Normalization using funnorm

# path = file.path()

# load RGset
load(paste0(path, "/pmbcs_RGset.rdata"))

# Funnorm
funnorm = preprocessFunnorm(RGset)
betas_fun = minfi::getBeta(funnorm)

pheno = pData(funnorm)
pheno$sample_name = rownames(pheno)
# adding Sentrix ID for subsequent batch correction in downstream analysis
sentrix_id = as.factor(
  stringr::str_extract_all(pheno$sample_name, "^([0-9]{12})",
                           simplify = TRUE))
pheno = data.frame(pheno, sentrix_id)
# remove features without any variation
pheno = pheno[, apply(pheno, 2, function(x) length(unique(x)) > 1)]

# one sample with sex mismatch
table(pheno$sex, pheno$predictedSex)
     # F  M
  # F 19  0
  # M  1 20  
  
# save the processed betas and pheno data
save(betas_fun, pheno, file = paste0(path, "/PBMC_betas_funnorm_pheno.RData"))

Process data for dnamage

# load data
# load('PBMC_betas_funnorm_pheno.RData')

# filtering beta values using CpGs from https://dnamage.genetics.ucla.edu/new
datMiniAnnotation=read.csv(paste0(path, "/datMiniAnnotation3.csv"))
dim(datMiniAnnotation)
# 30084     7

match1=match(datMiniAnnotation[,1], rownames(betas_fun))

# filter probes
betas_fun_reduced = betas_fun[match1,]
dim(betas_fun_reduced)
# 30084    40

betas_fun_reduced = data.frame(ProbeID = datMiniAnnotation$Name, betas_fun_reduced)
dim(betas_fun_reduced)
# 30084    41

betas_fun_reduced[,1]=as.character(betas_fun_reduced[,1])

betas_fun_reduced = data.frame(betas_fun_reduced)

for (i in 2:dim(betas_fun_reduced)[[2]] ){betas_fun_reduced[,i]=
as.numeric(as.character(gsub(x= betas_fun_reduced[,i],pattern="\"",replacement=""))) }

colNames = colnames(betas_fun_reduced)[2:ncol(betas_fun_reduced)]
colNames = substr(colNames, 2, nchar(colNames))
colnames(betas_fun_reduced)[2:ncol(betas_fun_reduced)] = colNames

# number of missing probes
sum(is.na(betas_fun_reduced[,2]))
# 2558

# save filtered probes
write.table(betas_fun_reduced, paste0(path, "/PBMC_Betas_funnorm_reduced_for_clock.csv"), row.names=F, sep="," )

# pheno/annotation file
pheno$Female[pheno$sex == 'F'] = 1
pheno$Female[pheno$sex == 'M'] = 0

ageAnno = data.frame(ID = rownames(pheno), Age = pheno$age, Female = pheno$Female, Tissue = 'Blood PBMC')

all(ageAnno$ID == colnames(betas_fun_reduced)[-1])
# TRUE
identical(ageAnno$ID, colnames(betas_fun_reduced)[-1])
# TRUE

# save annotation file
write.table(ageAnno, paste0(path, "/PBMCs_anno_for_clock.csv"), row.names=F, sep="," )

DNA methylation age calculator

https://dnamage.genetics.ucla.edu/submit

upload and select “normalize data” and “advanced analysis”

Six clocks: (1) DNAmAge = Horvath clock; (2) DNAmAgeHannum = Hannum clock; (3) DNAmPhenoAge = PhenoAge Clock; (4) DNAmAgeSkinBloodClock = Skin and blood clock; (5) DNAmGrimAge = GrimAge; DNAmTL = (6) DNAm telomere length

Acceleration measurements:(1) DNAmAge = AgeAccelerationResidual; (2) DNAmAgeHannum = AgeAccelerationResidualHannum; (3) DNAmPhenoAge = AgeAccelPheno; (4) DNAmAgeSkinBloodClock = DNAmAgeSkinBloodClockAdjAge; (5) DNAmGrimAge = AgeAccelGrim; (6) DNAmTL = DNAmTLAdjAge

IEAA = intrinsic epigenetic age (derived from Horvath’s acceleration; adjusting for cell-types); EAA = extrinsic epigenetic age (derived from Hannum’s clock)

# load result
PBMC_age = read.csv(paste0(path, '/PBMC_Betas_funnorm_reduced_for_clock.output.csv'))
PBMC_age$SampleID = substr(PBMC_age$SampleID, 2, nchar(PBMC_age$SampleID))

all(PBMC_age$SampleID == pheno$sample_name)
# TRUE
identical(PBMC_age$SampleID, pheno$sample_name)
# TRUE

# merge result with pheno data
pheno = cbind(pheno, PBMC_age[,-1])

# predicted tissue
table(pheno$predictedTissue)
# Blood PBMC 
        # 40 
        
# drop duplicated column
pheno = pheno[,-94]

Save pheno data with clocks

# drop sample with sex mismatch 
pheno = pheno[pheno$sex == pheno$predictedSex,]
dim(pheno)
# 39 154

table(pheno$sex, pheno$predictedSex)
     # F  M
  # F 19  0
  # M  0 20

pheno$ID2 = as.numeric(substring(pheno$study_id, 3, 9))

# add additional exposure variables and rename pheno
all_pheno = read.sas7bdat(paste0(path, '/AC13_052018.sas7bdat'))

# participants with DNAm data
all_pheno_DNam = all_pheno[all_pheno$ID2 %in% pheno$ID2,]

# subset As variables of interest
all_pheno_DNam = all_pheno_DNam[,c('ID2', 'CUM_AGE0_10', 'CUM_AGE20ABOVE', 'CUM_LAG0', 'LIFEALAG0', 'LIFEAAGE0_10', 'LIFEAAGE20ABOVE', 'as_birth', 'cum_as_an')]
  
PBMC_pheno = merge(pheno, all_pheno_DNam, by = 'ID2', all.x = T)

# variables as factors
PBMC_pheno$exposed = factor(PBMC_pheno$exposed, levels = c('no', 'yes'))
PBMC_pheno$predictedSex = factor(PBMC_pheno$predictedSex, levels = c('F', 'M'))
PBMC_pheno$smoking = factor(PBMC_pheno$smoking, levels = c('no', 'yes'))

write.csv(PBMC_pheno, file = paste0(path, '/PBMC_fun_pheno_withAges.csv'))

Scatter plots of Horvath clocks and age

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

Linear models of associations between age acceleration and exposure (dichotomized)

Adjusted robust linear regression results

	B_uadj	lower_unadj	upper_unadj	p_unadj	B_adj	lower_adj	upper_adj	p_adj
AgeAccelerationResidual	1.1029684	-1.8035370	4.0094737	0.4570142	0.7840953	-2.5681020	4.1362925	0.6466327
AgeAccelerationResidualHannum	1.8664940	-1.3401143	5.0731023	0.2539321	2.5068961	-0.8139845	5.8277768	0.1389920
AgeAccelPheno	6.3986801	2.2569740	10.5403863	0.0024616	6.0100062	2.5997605	9.4202520	0.0005521
DNAmAgeSkinBloodClockAdjAge	0.8566773	-0.9926463	2.7060009	0.3639151	0.7415603	-1.4841599	2.9672805	0.5137458
AgeAccelGrim	0.7696887	-1.8396811	3.3790585	0.5631745	0.6278798	-1.9556222	3.2113817	0.6338332
DNAmTLAdjAge	0.0272048	-0.1267176	0.1811273	0.7290334	0.0362969	-0.1042872	0.1768811	0.6128310
IEAA	0.5293092	-1.8449489	2.9035673	0.6621492	0.1169399	-2.4061635	2.6400433	0.9276200
EEAA	3.3738615	-1.2370242	7.9847472	0.1515329	3.6568485	-0.9121843	8.2258813	0.1167255

Plotting

Adjusted robust linear regression results

Robust linear models of associations between age acceleration and As concentration at birth (continuous)

	B_uadj	lower_unadj	upper_unadj	p_unadj	B_adj	lower_adj	upper_adj	p_adj
AgeAccelerationResidual	0.0823272	-0.2862256	0.4508801	0.6615195	0.0670134	-0.3289081	0.4629350	0.7400838
AgeAccelerationResidualHannum	0.1733740	-0.0802910	0.4270390	0.1803790	0.2190451	-0.0445277	0.4826178	0.1033448
AgeAccelPheno	0.2435845	-0.0545365	0.5417055	0.1092840	0.3178199	0.0025806	0.6330592	0.0481538
DNAmAgeSkinBloodClockAdjAge	0.0791054	-0.1188209	0.2770318	0.4334271	0.0712736	-0.1368320	0.2793793	0.5020527
AgeAccelGrim	-0.0069677	-0.1501505	0.1362151	0.9240151	0.0362655	-0.1768427	0.2493736	0.7387307
DNAmTLAdjAge	0.0054715	-0.0037311	0.0146741	0.2438881	0.0044017	-0.0090040	0.0178074	0.5198703
IEAA	0.0489152	-0.2117025	0.3095330	0.7129740	0.0165341	-0.2558606	0.2889288	0.9053006
EEAA	0.2041484	-0.1691494	0.5774462	0.2837823	0.2867282	-0.0916870	0.6651433	0.1375226

Rogust linear models of dichotomous exposure adjusting for sex, smoking, and lifetime average As concentrations

	B_uadj	lower_unadj	upper_unadj	p_unadj	B_adj	lower_adj	upper_adj	p_adj
AgeAccelerationResidual	1.1029684	-1.8035370	4.0094737	0.4570142	0.7107540	-4.4597419	5.8812499	0.7876039
AgeAccelerationResidualHannum	1.8664940	-1.3401143	5.0731023	0.2539321	1.5824725	-2.7975631	5.9625081	0.4788710
AgeAccelPheno	6.3986801	2.2569740	10.5403863	0.0024616	6.6478627	1.9091587	11.3865668	0.0059667
DNAmAgeSkinBloodClockAdjAge	0.8566773	-0.9926463	2.7060009	0.3639151	1.2887800	-0.9237954	3.5013554	0.2536041
AgeAccelGrim	0.7696887	-1.8396811	3.3790585	0.5631745	0.8077982	-3.8267358	5.4423323	0.7326359
DNAmTLAdjAge	0.0272048	-0.1267176	0.1811273	0.7290334	0.0128747	-0.1981477	0.2238971	0.9048163
IEAA	0.5293092	-1.8449489	2.9035673	0.6621492	0.3626017	-4.2765122	5.0017155	0.8782450
EEAA	3.3738615	-1.2370242	7.9847472	0.1515329	2.0306519	-3.6639896	7.7252935	0.4846124

Rogust linear models of dichotomous exposure adjusting for sex, smoking, and cell type proportions

	B_uadj	lower_unadj	upper_unadj	p_unadj	B_adj	lower_adj	upper_adj	p_adj
AgeAccelerationResidual	1.1029684	-1.8035370	4.0094737	0.4570142	0.1000837	-2.6873074	2.8874747	0.9438958
AgeAccelerationResidualHannum	1.8664940	-1.3401143	5.0731023	0.2539321	3.1106589	0.1250439	6.0962738	0.0411464
AgeAccelPheno	6.3986801	2.2569740	10.5403863	0.0024616	4.9355903	1.8840838	7.9870967	0.0015239
DNAmAgeSkinBloodClockAdjAge	0.8566773	-0.9926463	2.7060009	0.3639151	1.7705859	0.5145553	3.0266165	0.0057291
AgeAccelGrim	0.7696887	-1.8396811	3.3790585	0.5631745	0.3162269	-2.0886371	2.7210908	0.7966192
DNAmTLAdjAge	0.0272048	-0.1267176	0.1811273	0.7290334	0.0022695	-0.1561092	0.1606481	0.9775943
IEAA	0.5293092	-1.8449489	2.9035673	0.6621492	-0.4336447	-3.3027906	2.4355012	0.7670542
EEAA	3.3738615	-1.2370242	7.9847472	0.1515329	4.8981260	1.2230517	8.5732004	0.0089953

Plotting

Buccal cells

Data processing and DNAm age calculations

Normalization using funnorm

# remove PBMC objects
rm(ageAnno, betas_fun, betas_fun_reduced, funnorm, pheno, RGset)

# load RGset
load(paste0(path, "/buccal_RGset.rdata"))

# B14's detection p-values are significantly larger
RGset = RGset[,-26]
pheno = pData(RGset)
dim(pheno)
# 39 34

# Funnorm
funnorm = preprocessFunnorm(RGset)
betas_fun = minfi::getBeta(funnorm)

pheno = pData(funnorm)
pheno$sample_name = rownames(pheno)
# adding Sentrix ID for subsequent batch correction in downstream analysis
sentrix_id = as.factor(
  stringr::str_extract_all(pheno$sample_name, "^([0-9]{12})",
                           simplify = TRUE))
pheno = data.frame(pheno, sentrix_id)
# remove features without any variation
pheno = pheno[, apply(pheno, 2, function(x) length(unique(x)) > 1)]

# one sample with sex mismatch
table(pheno$sex, pheno$predictedSex)
     # F  M
  # F 19  0
  # M  1 20  d

# save the processed betas and pheno data
save(betas_fun, pheno, file = paste0(path, "/buccalCells_betas_funnorm_pheno.RData"))

###$ Process data for dnamage

# load data
# load('buccalCells_betas_funnorm_pheno.RData')

match1=match(datMiniAnnotation[,1], rownames(betas_fun))

# filter probes
betas_fun_reduced = betas_fun[match1,]
dim(betas_fun_reduced)
# 30084    39

betas_fun_reduced = data.frame(ProbeID = datMiniAnnotation$Name, betas_fun_reduced)
dim(betas_fun_reduced)
# 30084    41

betas_fun_reduced[,1]=as.character(betas_fun_reduced[,1])

betas_fun_reduced = data.frame(betas_fun_reduced)

for (i in 2:dim(betas_fun_reduced)[[2]] ){betas_fun_reduced[,i]=
as.numeric(as.character(gsub(x= betas_fun_reduced[,i],pattern="\"",replacement=""))) }

colNames = colnames(betas_fun_reduced)[2:ncol(betas_fun_reduced)]
colNames = substr(colNames, 2, nchar(colNames))
colnames(betas_fun_reduced)[2:ncol(betas_fun_reduced)] = colNames

# number of missing probes
sum(is.na(betas_fun_reduced[,2]))
# 2558

# save filtered probes
write.table(betas_fun_reduced, paste0(path, "/buccalCells_Betas_funnorm_reduced_for_clock.csv"), row.names=F, sep="," )

# pheno/annotation file
pheno$Female[pheno$sex == 'F'] = 1
pheno$Female[pheno$sex == 'M'] = 0

ageAnno = data.frame(ID = rownames(pheno), Age = pheno$age, Female = pheno$Female, Tissue = 'Blood PBMC')

all(ageAnno$ID == colnames(betas_fun_reduced)[-1])
# TRUE
identical(ageAnno$ID, colnames(betas_fun_reduced)[-1])
# TRUE

# save annotation file
write.table(ageAnno, paste0(path, "/buccalCells_anno_for_clock.csv"), row.names=F, sep="," )

DNA methylation age calculator

# load result
buccalCell_age = read.csv(paste0(path, '/buccalCells_Betas_funnorm_reduced_for_clock.output.csv'))
buccalCell_age$SampleID = substr(buccalCell_age$SampleID, 2, nchar(buccalCell_age$SampleID))

all(buccalCell_age$SampleID == pheno$sample_name)
# TRUE
identical(buccalCell_age$SampleID, pheno$sample_name)
# TRUE

# merge result with pheno data
pheno = cbind(pheno, buccalCell_age[,-1])

# predicted tissue
table(pheno$predictedTissue)
# Buccal Saliva 
    # 22     17 
        
# drop duplicated column
pheno = pheno[,-92]

Save pheno data with clocks

# drop sample with sex mismatch 
pheno = pheno[pheno$sex == pheno$predictedSex,]
dim(pheno)
# 38 142

table(pheno$sex, pheno$predictedSex)
     # F  M
  # F 19  0
  # M  0 19

pheno$ID2 = as.numeric(substring(pheno$study_id, 3, 9))

# add additional exposure variables and rename pheno
all_pheno = read.sas7bdat(paste0(path, '/AC13_052018.sas7bdat'))

# participants with DNAm data
all_pheno_DNam = all_pheno[all_pheno$ID2 %in% pheno$ID2,]

# subset As variables of interest
all_pheno_DNam = all_pheno_DNam[,c('ID2', 'CUM_AGE0_10', 'CUM_AGE20ABOVE', 'CUM_LAG0', 'LIFEALAG0', 'LIFEAAGE0_10', 'LIFEAAGE20ABOVE', 'as_birth', 'cum_as_an')]
  
buccalCell_pheno = merge(pheno, all_pheno_DNam, by = 'ID2', all.x = T)

# variables as factors
buccalCell_pheno$exposed = factor(buccalCell_pheno$exposed, levels = c('no', 'yes'))
buccalCell_pheno$predictedSex = factor(buccalCell_pheno$predictedSex, levels = c('F', 'M'))
buccalCell_pheno$smoking = factor(buccalCell_pheno$smoking, levels = c('no', 'yes'))

write.csv(buccalCell_pheno, paste0(path, '/buccalCell_fun_pheno_withAges.csv'))

Scatter plots of Horvath clocks and age

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

Linear models of associations between age acceleration and exposure

Adjusted robust linear regression results

	B_uadj	lower_unadj	upper_unadj	p_unadj	B_adj	lower_adj	upper_adj	p_adj
AgeAccelerationResidual	3.4872988	-0.2372958	7.2118933	0.0664918	2.3060280	-1.0380417	5.6500978	0.1765141
AgeAccelerationResidualHannum	-0.4947023	-3.9491455	2.9597410	0.7789546	-1.6745771	-6.4217628	3.0726085	0.4893266
AgeAccelPheno	6.0728322	0.1562629	11.9894015	0.0442485	4.8796976	-1.6007878	11.3601830	0.1399922
DNAmAgeSkinBloodClockAdjAge	0.7004360	-1.4718879	2.8727600	0.5274107	0.6239160	-2.1449018	3.3927338	0.6587413
AgeAccelGrim	-1.4091434	-5.1208658	2.3025790	0.4568196	-1.6090914	-5.4533169	2.2351340	0.4119942
DNAmTLAdjAge	0.1584270	-0.1426494	0.4595034	0.3023828	0.1833202	-0.1783693	0.5450097	0.3205168
IEAA	3.2436850	-0.2037145	6.6910844	0.0651620	2.3841868	-1.2896274	6.0580010	0.2033897
EEAA	-0.2471809	-3.9197674	3.4254056	0.8950523	-1.8100192	-7.2590805	3.6390421	0.5150188

Plotting

Adjusted robust linear regression results

Combining PBMC and buccal cell plots

# scatter plots 
titles1 = ggplot(data.frame(x = c(1:3), y = 1)) + annotate('text', label = 'Horvath', x = 1.1, y = 1, size = 3.5) + annotate('text', label = 'Hannum', x = 2, y = 1, size = 3.5) + annotate('text', label = 'Skin and blood', x = 2.92, y = 1, size = 3.5) + xlim(c(0.75,3.25)) + theme_void() 
titles2 = ggplot(data.frame(x = c(1:3), y = 1)) + annotate('text', label = 'PhenoAge', x = 1.18, y = 1, size = 3.5) + annotate('text', label = 'GrimAge', x = 2, y = 1, size = 3.5) + annotate('text', label = 'DNAmTL', x = 2.95, y = 1, size = 3.5)+ xlim(c(0.75,3.25)) + theme_void()

header1 = ggplot(data.frame(x = c(1:3), y = 1)) + annotate('text', label = 'PBMCs', x = 2, y = 1, size = 5) + xlim(c(0.75,3.25)) + theme_void() 
header2 = ggplot(data.frame(x = c(1:3), y = 1)) + annotate('text', label = 'Buccal cells', x = 2, y = 1, size = 5) + xlim(c(0.75,3.25)) + theme_void() 

blank = ggplot(mtcars, aes(x = wt, y = mpg)) + geom_blank()

scatter_DNAmAge_PBMC2 = scatter_DNAmAge_PBMC + theme(axis.title.x = element_blank()) + labs(y = 'DNAm age')
scatter_DNAmAgeHannum_PBMC2 = scatter_DNAmAgeHannum_PBMC + theme(axis.title.x = element_blank(), axis.title.y = element_blank())
scatter_DNAmPhenoAge_PBMC2 = scatter_DNAmPhenoAge_PBMC + theme(axis.title.x = element_blank()) + labs(y = 'DNAm age')
scatter_DNAmAgeSkinBloodClock_PBMC2 = scatter_DNAmAgeSkinBloodClock_PBMC + theme(axis.title.x = element_blank(), axis.title.y = element_blank()) 
scatter_DNAmGrimAge_PBMC2 = scatter_DNAmGrimAge_PBMC + theme(axis.title.x = element_blank(), axis.title.y = element_blank())
scatter_DNAmTL_PBMC2 = scatter_DNAmTL_PBMC + theme(axis.title.x = element_blank(), axis.title.y = element_blank())

scatter_DNAmAge_buccalCell2 = scatter_DNAmAge_buccalCell + labs(y = 'DNAm age') + theme(axis.title.x = element_blank())
scatter_DNAmAgeHannum_buccalCell2 = scatter_DNAmAgeHannum_buccalCell + theme(axis.title.x = element_blank(), axis.title.y = element_blank())
scatter_DNAmPhenoAge_buccalCell2 = scatter_DNAmPhenoAge_buccalCell + labs(x = 'Chronological age', y = 'DNAm age')
scatter_DNAmAgeSkinBloodClock_buccalCell2 = scatter_DNAmAgeSkinBloodClock_buccalCell + theme(axis.title.x = element_blank(), axis.title.y = element_blank())
scatter_DNAmGrimAge_buccalCell2 = scatter_DNAmGrimAge_buccalCell + theme(axis.title.y = element_blank()) + labs(x = 'Chronological age')
scatter_DNAmTL_buccalCell2 = scatter_DNAmTL_buccalCell + theme(axis.title.y = element_blank()) + labs(x = 'Chronological age')

g1 <- arrangeGrob(scatter_DNAmAge_PBMC2, scatter_DNAmAgeHannum_PBMC2, scatter_DNAmAgeSkinBloodClock_PBMC2, nrow = 1)
g2 <- arrangeGrob(scatter_DNAmPhenoAge_PBMC2, scatter_DNAmGrimAge_PBMC2, scatter_DNAmTL_PBMC2, nrow = 1)
g3 <- arrangeGrob(scatter_DNAmAge_buccalCell2,  scatter_DNAmAgeHannum_buccalCell2, scatter_DNAmAgeSkinBloodClock_buccalCell2, nrow = 1)
g4 <- arrangeGrob(scatter_DNAmPhenoAge_buccalCell2, scatter_DNAmGrimAge_buccalCell2, scatter_DNAmTL_buccalCell2, nrow = 1)

grid.arrange(header1,titles1,g1,titles2,g2,header2,titles1,g3,titles2,g4,nrow=10, heights=unit(c(0.6,0.4,5.25,0.4,5.25,0.6,0.4,5.25,0.4,5.4), c("cm")))
quartz.save(file = 'all_clocks_scatterPlots_long.png', type = 'png', dpi = 300)

# forest plots
fp_buccalcells_robust2 = fp_buccalcells_robust + theme(axis.title.y = element_blank())
grid.arrange(fp_PBMCs_robust, fp_buccalcells_robust2, ncol = 2)
# quartz.save(file = paste0(path, '/all_ageAccelAssoc_forestPlots.png'), type = 'png', dpi = 300)

# forest plots
fp_buccalcells_robust2 = fp_buccalcells_robust + theme(axis.title.y = element_blank())
grid.arrange(fp_PBMCs_robust, fp_buccalcells_robust2, ncol = 2)
# quartz.save(file = paste0(path, '/all_ageAccelAssoc_forestPlots.png'), type = 'png', dpi = 300)