View on GitHub

OpenCyto


A Robust BioConductor Framework for Automated Flow Data Analysis

HVTN 080 Manual vs Automated Gating

HVTN 080 Manual vs Automated Gating

The HVTN080 study flow cytometry data has been preprocessed (loaded in R from the raw FCS files and the FlowJo workspaces). The gating set is available here, and has been gated using OpenCyto.

We have gated the data using this OpenCyto template.

The manually gated data is available here, and contains the manual gates from the FlowJo workspaces.

The raw FCS files are available from flowrepository.org/id/FR-FCM-ZZ7U.

The data in the gating sets has already been compensated and transformed using the compensation and transformation information stored in the FlowJo workspaces.

The FlowJo workspaces for this data will be posted soon.

The code to perform the gating and extract the features is here.

Examples of generating visualization of automated and manual gates is available here.

Below, we perform a comparison of visit 2 (pre-vaccine) vs. visit 5 (post-vaccine) across treatment groups in HVTN080. Manual vs Automated gating. The gating is described here, and some further analysis and visualization can be found here.

suppressPackageStartupMessages({
  require(reshape2)
  require(data.table)
  require(knitr)
  require(ggplot2)
  require(plyr)
  require(gridExtra)
  require(lme4)
  require(contrast)
  require(epiR)
  require(multcomp)
  require(MASS)
  require(GFMisc)
  require(scales)
})
opts_chunk$set(list(message=FALSE,warning=FALSE))
WORKDIR<-"/Users/gfinak/Dropbox/GoTeam/Projects/Paper-OpenCytoPipeline/HVTN080/reproducible/"
setwd(WORKDIR)

####Differences between treatments at post-vaccine and at baseline

#read the exported feature matrix
  m <- readRDS("./features2.rds")
  m<-lapply(m,as.data.frame)
  m<-lapply(m,function(x)setnames(x,"pub_id","pubid"))
  m <- do.call(rbind,llply(m,function(x)melt(x,id.vars =c("Stim","pubid","VISITNO"))))
  m<-data.frame(m,Method=gsub("\\..*","",rownames(m)))
  m<-data.table(m)
  setnames(m,c("variable","value"),c("Population","proportion"))
  rx<-read.csv("rx_v2.csv")
  rx<-data.table(rx)
  rx$pubid<-rx$pub_id
  merged<-merge(rx,m,by="pubid")
  merged<-merged[,Stim:=factor(Stim)]
  levels(merged$arm)<-c("Placebo","Placebo","Placebo","Treatment","Treatment","Treatment")
foo <- merged
fit<-ddply(foo,.(Method,Stim,Population),function(x){
  KK<-GFMisc:::optimAsinhCofactor(subset(x,rx%in%"Control")$proportion)
  x$prop_trans<-asinh(x$proportion*KK)
  lmfit<-lm(prop_trans~rx_code*VISITNO,data=subset(x,!rx%in%"Control"))
  contr<-contrast(lmfit,a=list(rx_code=c("T1","T2&T3"),VISITNO="5"),b=list(rx_code=c("T1","T2&T3"),VISITNO="2"))
  fit1<-(lmer(prop_trans~rx_code*VISITNO+(1|pub_id),data=subset(x,!rx%in%"Control")))
  sumry<-data.frame(p.value=summary(glht(fit1,linfct=contr$X,alternative="greater"))$test$pvalues,rx_code=c("Pennvax B","Pennvax B + IL12 DNA")) #divide by 2 for one sided test
})

etext<-element_text(size=12)
etext2<-element_text(size=20)

#Bonferroni adjusted p-value over all tests
fit$TCELLSUB<-factor(c("CD8","CD4")[grepl("^4\\+",fit$Population)+1])
ftable(Stim~signif+TCELLSUB+rx_code+Method,ddply(fit,.(Stim,Method,rx_code,TCELLSUB,Population),function(x)with(x,data.frame(signif=any(p.value<0.05/(nrow(fit)))))))
##                                             Stim ENV-1-PTEG GAG-1-PTEG POL-1-PTEG
## signif TCELLSUB rx_code              Method                                      
## FALSE  CD4      Pennvax B            auto                29         26         29
##                                      manual              29         28         26
##                 Pennvax B + IL12 DNA auto                28         21         20
##                                      manual              28         21         21
##        CD8      Pennvax B            auto                29         29         29
##                                      manual              29         29         29
##                 Pennvax B + IL12 DNA auto                29         29         22
##                                      manual              29         29         27
## TRUE   CD4      Pennvax B            auto                 0          3          0
##                                      manual               0          1          3
##                 Pennvax B + IL12 DNA auto                 1          8          9
##                                      manual               1          8          8
##        CD8      Pennvax B            auto                 0          0          0
##                                      manual               0          0          0
##                 Pennvax B + IL12 DNA auto                 0          0          7
##                                      manual               0          0          2

selected<-unique(subset(ddply(fit,.(Stim,Method,rx_code,TCELLSUB,Population),function(x)with(x,data.frame(signif=any(p.value<0.05/nrow(fit))))),signif==TRUE,select=c("Population","Stim")))

foo<-merge(foo,selected,by=c("Stim","Population"),all.y=TRUE,all.x=FALSE)
foo$Population<-factor(foo$Population)
levels(foo$Method)<-c("OpenCyto","Manual")
foo<-within(dcast(foo,...~VISITNO,value.var="proportion"),paired.difference<-`5`-`2`)
levels(foo$Stim)<-c("ENV","GAG","POL")

plt2<-ggplot(subset(foo,!rx%in%"Control"))+geom_boxplot(aes(y=paired.difference,x=Population,fill=rx))+theme_bw()+facet_grid(Method~Stim,scales="free_x",space="free_x")+scale_fill_brewer("Vaccine Regimen",labels=c("Pennvax B","Pennvax B + IL12 DNA"))+  scale_y_continuous("Vaccine - Baseline")+coord_trans(ytrans=asinh_trans(10000))+theme(axis.text.x=element_text(size=10,angle=45,hjust=1),strip.text.y=etext,strip.text.x=etext,axis.text.y=element_text(size=10),axis.title.x=etext2,axis.title.y=element_text(size=20))+geom_hline(yintercept=0,lty=3)+theme(axis.text.x=element_blank(),axis.title.x=element_blank(),axis.ticks.x=element_blank())

plt3<-plt2+theme(legend.position="top",legend.justification="center",plot.title=element_text(size=12,hjust=0))+ggtitle("A")

selected_2<-ddply(within(selected,{
  TCELLSUB<-factor(c("CD8","CD4")[grepl("^4\\+",Population)+1])
}),.(Stim),function(x)x[order(x$Population),])
emat<-do.call(rbind,lapply(strsplit(gsub("^[84]\\+_","",selected_2$Population),"&"),function(x){
  .pad<-function(q,n){
    r<-c(q,rep("",n-length(q)))
    if(r[1]=="IL2|IFNg"){
      rev(r)
    }else{
      r
    }
  }
  .pad(x,6)
}))
emat2<-as.matrix(t(apply(emat,1,function(x)!(grepl("[!]",x)|x==""))))
mode(emat2)<-"numeric"
colnames(emat2)<-diag(unique(gsub("!","",emat))[c(rep(1,5),2),1:6])
colnames(emat)<-colnames(emat2)
emat2 <- cbind(emat2,CD4=as.numeric(selected_2$TCELLSUB=="CD4"))
emat2 <- cbind(emat2,CD8=as.numeric(selected_2$TCELLSUB=="CD8"))
emat2<-data.frame(emat2,Stim=selected_2$Stim)
emat3<-cbind(emat2,row=1:nrow(emat2))
emat3<-melt(emat3,id=c("Stim","row"))
levels(emat3$Stim)<-c("ENV","GAG","POL")

gb<-ggplot(emat3)+geom_tile(aes(x=factor(row),y=variable,fill=factor(value)),color="black")+scale_fill_manual("",values=c("gray100","gray40")) +scale_x_discrete("Cell Population")+scale_y_discrete("")+theme_bw()+theme(axis.text.x=element_blank(),axis.ticks.x=element_blank(),axis.title.x=element_blank(),axis.text.y=element_text(size=12),legend.position="none")+facet_grid(.~Stim,space="free_x",scales="free_x")+theme(plot.margin=unit(c(-1.7,0,0,0), "lines"),strip.background=element_blank(),strip.text.x=element_blank())
gb2<-ggplot_gtable(ggplot_build(gb))
gb1<-ggplot_gtable(ggplot_build(plt3))
maxwidths<-unit.pmax(gb1$widths,gb2$widths)
gb1$widths<-maxwidths
gb2$widths<-maxwidths
gb3<-arrangeGrob(gb1,gb2,heights=c(0.8,0.2))

plt4<-ggplot(dcast(unique(foo),Population+Stim+pub_id+arm~Method,value.var="paired.difference"))+geom_point(aes(x=OpenCyto,y=Manual ))+geom_smooth(aes(x=OpenCyto,y=Manual),method="rlm")+facet_wrap(~Stim)+theme_bw()+theme(axis.text.x=element_text(size=10,angle=75,hjust=1),strip.text.y=etext,strip.text.x=etext,axis.text.y=element_text(size=8),axis.title.x=etext2,axis.title.y=element_text(size=20))+coord_trans(xtrans=asinh_trans(1000),ytrans=asinh_trans(1000))+scale_x_continuous("OpenCyto\n(Vaccine-Baseline)")+scale_y_continuous("Manual Gatingn\n(Vaccine-Baseline)")+theme(plot.title=element_text(size=12,hjust=0))+ggtitle("B")

##CCC, epi.ccc
ccc<-ldply(dlply(dcast(unique(foo),Population+Stim+pub_id+arm~Method,value.var="paired.difference"),.(Stim),function(x)with(x,epi.ccc(asinh(OpenCyto*1000),asinh(Manual*1000)))),function(x)x$rho.c["est"])
setnames(ccc,"est","rho")
plt4<-plt4+geom_text(aes(x=0.001,y=0.005,label=paste(expression(rho),signif(rho,2),sep="==")),data=ccc,parse=TRUE)


plot4<-ggplot_gtable(ggplot_build(plt4))
plot4$widths
##  [1] 0.5lines            1grobwidth+0.5lines 0.956120000000001cm
##  [4] 1null               0.127cm             0cm                
##  [7] 1null               0.127cm             0cm                
## [10] 1null               0cm                 1lines
w4<-unit(c(2,2),"lines")
plot4$widths[[1]]<-w4[1]
plot4$widths[[12]]<-w4[2]
plot5<-arrangeGrob(arrangeGrob(gb3,plot4,heights=c(2,1)),nrow=1)

grid.arrange(plot5)

plot of chunk unnamed-chunk-1

pdf(file="../../Submission/Figure3.pdf",width=8,height=9)
grid.arrange(plot5)
dev.off()
## pdf 
##   2
tiff(file="../../Submission/Figure3.tiff",width=8*150,height=9*150,res=150)
grid.arrange(plot5)
dev.off()
## pdf 
##   2

Some trends but no significant differences.

within(ddply(subset(foo,!rx%in%"Control"),.(Stim,rx,Population),function(x)with(x,data.frame(pdiff=wilcox.test(paired.difference~Method,exact=FALSE)$p.value))),signif<-pdiff<0.01)