Install packages as necessary.
dir.create("luad_rnaseq_analysis")
list.of.bioconductor.packages <- c("edgeR","limma","pathifier","KEGGREST","biomaRt","GSVA","GSEABase","qusage")
new.bioc.packages <- list.of.bioconductor.packages[!(list.of.bioconductor.packages %in% installed.packages()[,"Package"])]
if(length(new.bioc.packages)) BiocManager::install(new.bioc.packages)
list.of.packages <- c("aricode","ggplot2","mclust","Rmisc","survminer","survival")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
Data downloaded from Broad GDAC Firehose https://gdac.broadinstitute.org/. LUAD RNA-seq data from illuminahiseq_rnaseqv2-RSEM_genes_normalized and clinical data from Merge_Clinical.
header <- read.table('./input_data/LUAD.rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes_normalized__data.data.txt',nrows=1,sep="\t", quote="", header=F, row.names=1,stringsAsFactors=F)
luad_data <- read.table('./input_data/LUAD.rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes_normalized__data.data.txt',skip=2,sep="\t", quote="", header=F, row.names=1,stringsAsFactors=F)
colnames(luad_data) <- unlist(header)
names(luad_data) <- make.names(names(luad_data))
test <- row.names(luad_data)
test <- gsub ('|','.',test,fixed=T)
test <- gsub ('?','X',test, fixed=T)
row.names(luad_data) <- test
luad_data <- t(as.matrix(luad_data))
luad_data <- as.data.frame(luad_data,stringsAsFactors=F)
#Parse sample IDs to get normals
sample <- row.names(luad_data)
key <- data.frame( do.call( rbind, strsplit( sample, '.',fixed=T ) ) )
names(key) <- c("project","tss","participant","sample","portion","plate","center")
code <- character(length = dim(luad_data)[1])
code[1:length(code)] <- "LUAD"
code[key$sample=="11A" | key$sample=="11B"] <- "Normal"
table(code)
## code
## LUAD Normal
## 517 59
#Read in clinical data
df_full <- as.data.frame(code,row.names=row.names(luad_data))
df_full_row_mod <- row.names(df_full)
s <- data.frame(do.call('rbind', strsplit(df_full_row_mod,'.',fixed=TRUE)))
df_full$lookup_id <- paste(tolower(s$X1),tolower(s$X2),tolower(s$X3),sep="-")
df_full$full_id <- row.names(df_full)
brca_full <- read.table("./input_data/LUAD.clin.merged.txt",header=F,quote="",row.names=1,sep='\t')
brca_full <- as.data.frame(t(brca_full))
row.names(brca_full) <- brca_full$patient.bcr_patient_barcode
brca_full$lookup_id <- brca_full$patient.bcr_patient_barcode
clinical <- merge(df_full, brca_full, by="lookup_id", all.x=T)
clinical_normal <- clinical[clinical$code=="Normal",]
clinical_luad <- clinical[clinical$code=="LUAD",]
Filter out lowly expressed genes and use voom to convert counts to log2(cpm) with associated weights.
# Filter out lowly expressed genes
keep <- edgeR::filterByExpr(t(luad_data))
table(keep)
## keep
## FALSE TRUE
## 6293 14238
gene_exp <- luad_data[,keep]
# Apply voom
exp_voom <- limma::voom(t(gene_exp),plot=T)
exp_voom <- as.data.frame(t(exp_voom$E))
write.table(exp_voom,file="./luad_rnaseq_analysis/luad_normalized_counts.exp_filt.voom.txt",quote=F,sep="\t")
Keep only 3,000 most highly variant genes.
exp_voom <- ssnpa::filter_by_variance(exp_voom, 3000)
dim(exp_voom)
## [1] 576 3000
write.table(exp_voom,file='./luad_rnaseq_analysis/luad_normalized_counts.exp_filt.voom.var_filt.txt',quote=F, sep='\t')
ind_keep <- grep('days_to_new_tumor_event_after_initial_treatment',colnames(clinical_luad))
# here are numerous follow ups, collapse them together and keep the first value (the higher one) if more than one is available
new_tum <- as.matrix(clinical_luad[,ind_keep])
new_tum_collapsed <- c()
for (i in 1:dim(new_tum)[1]){
if ( sum ( is.na(new_tum[i,])) < dim(new_tum)[2]){
m <- min(new_tum[i,],na.rm=T)
new_tum_collapsed <- c(new_tum_collapsed,m)
} else {
new_tum_collapsed <- c(new_tum_collapsed,'NA')
}
}
# do the same to death
ind_keep <- grep('days_to_death',colnames(clinical_luad))
death <- as.matrix(clinical_luad[,ind_keep])
death_collapsed <- c()
for (i in 1:dim(death)[1]){
if ( sum ( is.na(death[i,])) < dim(death)[2]){
m <- max(death[i,],na.rm=T)
death_collapsed <- c(death_collapsed,m)
} else {
death_collapsed <- c(death_collapsed,'NA')
}
}
# and days last follow up here we take the most recent which is the max number
ind_keep <- grep('days_to_last_followup',colnames(clinical_luad))
fl <- as.matrix(clinical_luad[,ind_keep])
fl_collapsed <- c()
for (i in 1:dim(fl)[1]){
if ( sum(is.na(fl[i,])) < dim(fl)[2]){
m <- max(fl[i,],na.rm=T)
fl_collapsed <- c(fl_collapsed,m)
} else {
fl_collapsed <- c(fl_collapsed,'NA')
}
}
# and put everything together
all_clin <- data.frame(clinical_luad$code,new_tum_collapsed,death_collapsed,fl_collapsed)
colnames(all_clin) <- c("sample_code",'new_tumor_days', 'death_days', 'followUp_days')
# create vector with time to new tumor containing data to censor for new_tumor
all_clin$new_time <- c()
for (i in 1:length(as.numeric(as.character(all_clin$new_tumor_days)))){
all_clin$new_time[i] <- ifelse ( is.na(as.numeric(as.character(all_clin$new_tumor_days))[i]),
as.numeric(as.character(all_clin$followUp_days))[i],as.numeric(as.character(all_clin$new_tumor_days))[i])
}
# create vector time to death containing values to censor for death
all_clin$new_death <- c()
for (i in 1:length(as.numeric(as.character(all_clin$death_days)))){
all_clin$new_death[i] <- ifelse ( is.na(as.numeric(as.character(all_clin$death_days))[i]),
as.numeric(as.character(all_clin$followUp_days))[i],as.numeric(as.character(all_clin$death_days))[i])
}
# create vector for death censoring
table(clinical_luad$patient.vital_status)
##
## alive dead
## 391 126
all_clin$death_event <- ifelse(clinical_luad$patient.vital_status == 'alive', 0,1)
#finally add row.names to clinical
row.names(all_clin) <- row.names(clinical_luad)
dir.create("./luad_rnaseq_analysis/ssnpa_results")
for (pd in c(12,11,10,9,8,7,6,5,4)){
ssnpa_cluster <- ssnpa::run_ssnpa(gene_exp = exp_voom,
reference_samples = (code=="Normal"),
out_dir = paste0('./luad_rnaseq_analysis/ssnpa_results'),
seed_value = 1,
fgs_pd = pd,
n_pc = 10,
cluster_resolution = 0.6,
cluster_ref_samples = F)
write.table(ssnpa_cluster$clustering,file=paste0('./luad_rnaseq_analysis/ssnpa_results/','ssnpa_cluster_output_pd_',toString(pd),'_.txt'),quote = F, sep='\t')
write.table(ssnpa_cluster$loadings,file=paste0('./luad_rnaseq_analysis/ssnpa_results','/ssNPA_loading_output_pd_',toString(pd),'_.txt'),quote = F, sep='\t')
}
Run survival analysis with ssNPA clsuters
ssnpa_result <- read.table('./luad_rnaseq_analysis/ssnpa_results/ssnpa_cluster_output_pd_8_.txt')
Cluster <- ssnpa_result$Cluster
table(Cluster)
## Cluster
## 0 1 2 3
## 206 134 90 87
# run survival analysis
ssnpa_surv_full <- survminer::ggsurvplot(
survival::survfit(survival::Surv(as.numeric(as.character(all_clin$new_death)),all_clin$death_event)~Cluster), # survfit object with calculated statistics.
data = all_clin, # data used to fit survival curves.
legend = "right",
risk.table = TRUE, # show risk table.
pval = TRUE, # show p-value of log-rank test.
pval.coord = c(5000,1),
pval.size = 12,
conf.int = F, # show confidence intervals for
# point estimaes of survival curves.
xlim = c(0,7500), # present narrower X axis, but not affect
# survival estimates.
break.time.by = 500, # break X axis in time intervals by 500.
ggtheme = ggplot2::theme_minimal(), # customize plot and risk table with a theme.
risk.table.y.text.col = T, # colour risk table text annotations.
risk.table.y.text = F # show bars instead of names in text annotations
# in legend of risk table
)
ssnpa_surv_trunc <- survminer::ggsurvplot(
survival::survfit(survival::Surv(as.numeric(as.character(all_clin$new_death)),all_clin$death_event)~Cluster), # survfit object with calculated statistics.
data = all_clin, # data used to fit survival curves.
legend = "right",
risk.table = F, # show risk table.
pval = TRUE, # show p-value of log-rank test.
pval.coord = c(2500,1),
pval.size = 12,
conf.int = F, # show confidence intervals for
# point estimaes of survival curves.
xlim = c(0,3500), # present narrower X axis, but not affect
# survival estimates.
break.time.by = 500, # break X axis in time intervals by 500.
ggtheme = ggplot2::theme_minimal(), # customize plot and risk table with a theme.
)
Use ssGSEA with MSigDB pathways (v7) downloaded from http://software.broadinstitute.org/gsea/msigdb/collections.jsp#C2 to cluster these data.
dir.create(paste0('./luad_rnaseq_analysis/test_ssgsea'))
gene_list <- names(exp_voom)
entrez_ids <- sapply(strsplit(gene_list,".",fixed=T), `[`, 2)
exp_voom_entrez <- exp_voom
names(exp_voom_entrez) <- entrez_ids
hg_c2 <- qusage::read.gmt('./input_data/c2.all.v7.0.entrez.gmt')
ssgsea_features <- GSVA::gsva(as.matrix(t(exp_voom_entrez)), hg_c2, method="ssgsea")
ssgsea_cluster_input <- as.data.frame(t(ssgsea_features))
names(ssgsea_cluster_input) <- make.names(names(ssgsea_cluster_input))
ssgsea_cluster <- ssnpa::cluster_samples(features = as.data.frame(t(ssgsea_features)),
meta_data_df = clinical,
n_pc = 10,
cluster_resolution =0.6,
cluster_ref_samples = F,
ref_samples = (code=="Normal"),
seed_value = 1)$clustering
write.table(ssgsea_cluster,file=paste0('./luad_rnaseq_analysis/test_ssgsea/ssgsea_cluster_output.txt'),quote = F, sep='\t')
Run survival analysis with ssGSEA clsuters
ssgsea_result <- read.table('./luad_rnaseq_analysis/test_ssgsea/ssgsea_cluster_output.txt')
Cluster <- ssgsea_result$Cluster
table(Cluster)
## Cluster
## 0 1 2 3
## 171 162 94 90
# run survival analysis
ssgsea_surv_full <- survminer::ggsurvplot(
survival::survfit(survival::Surv(as.numeric(as.character(all_clin$new_death)),all_clin$death_event)~Cluster), # survfit object with calculated statistics.
data = all_clin, # data used to fit survival curves.
legend = "right",
risk.table = TRUE, # show risk table.
pval = TRUE, # show p-value of log-rank test.
pval.coord = c(5000,1),
pval.size = 12,
conf.int = F, # show confidence intervals for
# point estimaes of survival curves.
xlim = c(0,7500), # present narrower X axis, but not affect
# survival estimates.
break.time.by = 500, # break X axis in time intervals by 500.
ggtheme = ggplot2::theme_minimal(), # customize plot and risk table with a theme.
risk.table.y.text.col = T, # colour risk table text annotations.
risk.table.y.text = F # show bars instead of names in text annotations
# in legend of risk table
)
ssgsea_surv_trunc <- survminer::ggsurvplot(
survival::survfit(survival::Surv(as.numeric(as.character(all_clin$new_death)),all_clin$death_event)~Cluster), # survfit object with calculated statistics.
data = all_clin, # data used to fit survival curves.
legend = "right",
risk.table = F, # show risk table.
pval = TRUE, # show p-value of log-rank test.
pval.coord = c(2500,1),
pval.size = 12,
conf.int = F, # show confidence intervals for
# point estimaes of survival curves.
xlim = c(0,3500), # present narrower X axis, but not affect
# survival estimates.
break.time.by = 500, # break X axis in time intervals by 500.
ggtheme = ggplot2::theme_minimal(), # customize plot and risk table with a theme.
)
dir.create(paste0('./luad_rnaseq_analysis/test_gex'))
gex_cluster <- ssnpa::cluster_samples(features = exp_voom,
meta_data_df = clinical,
n_pc = 10,
cluster_resolution =0.6,
cluster_ref_samples = F,
ref_samples = (code=="Normal"),
seed_value = 1)$clustering
write.table(gex_cluster,file=paste0('./luad_rnaseq_analysis/test_gex/gex_cluster_output.txt'),quote = F, sep='\t')
Run survival analysis with gene expression clsuters
gex_result <- read.table('./luad_rnaseq_analysis/test_gex/gex_cluster_output.txt')
Cluster <- gex_result$Cluster
table(Cluster)
## Cluster
## 0 1 2 3 4
## 146 145 95 88 43
# run survival analysis
gex_surv_full <- survminer::ggsurvplot(
survival::survfit(survival::Surv(as.numeric(as.character(all_clin$new_death)),all_clin$death_event)~Cluster), # survfit object with calculated statistics.
data = all_clin, # data used to fit survival curves.
legend = "right",
risk.table = TRUE, # show risk table.
pval = TRUE, # show p-value of log-rank test.
pval.coord = c(5000,1),
pval.size = 12,
conf.int = F, # show confidence intervals for
# point estimaes of survival curves.
xlim = c(0,7500), # present narrower X axis, but not affect
# survival estimates.
break.time.by = 500, # break X axis in time intervals by 500.
ggtheme = ggplot2::theme_minimal(), # customize plot and risk table with a theme.
risk.table.y.text.col = T, # colour risk table text annotations.
risk.table.y.text = F # show bars instead of names in text annotations
# in legend of risk table
)
gex_surv_trunc <- survminer::ggsurvplot(
survival::survfit(survival::Surv(as.numeric(as.character(all_clin$new_death)),all_clin$death_event)~Cluster), # survfit object with calculated statistics.
data = all_clin, # data used to fit survival curves.
legend = "right",
risk.table = F, # show risk table.
pval = TRUE, # show p-value of log-rank test.
pval.coord = c(2500,1),
pval.size = 12,
conf.int = F, # show confidence intervals for
# point estimaes of survival curves.
xlim = c(0,3500), # present narrower X axis, but not affect
# survival estimates.
break.time.by = 500, # break X axis in time intervals by 500.
ggtheme = ggplot2::theme_minimal(), # customize plot and risk table with a theme.
)
Use Pathifier with KEGG pathways to cluster these data. First, read in KEGG pathways.
dir.create(paste0('./output_data'))
kegg_full <- list()
pathways <- KEGGREST::keggList("pathway", "hsa") #hsa for human
count2 <- 1
for (i in 1:length(pathways)){
x <- attr(pathways,"names")[i]
x <- gsub("path:","",x, fixed=T)
path_full <- KEGGREST::keggGet(x)
p <- path_full[[1]]$GENE
if (length(p)>0){
even_indexes<-seq(2,length(p),2)
q <- p[even_indexes]
r <- strsplit(q,";",fixed=T)
gene_names <- NULL
count <- 1
for (j in 1:length(r)){
if (length(r[[j]])>1){
gene_names[count] <- r[[j]][1]
count <- count+1
}
}
# only keep pathways with at least 10 genes
if (length(gene_names)>10){
kegg_full$gs[count2] <- list(gene_names)
kegg_full$pathwaynames[count2] <- path_full[[1]]$NAME
count2 <- count2 + 1
}
}
}
save(kegg_full,file="./output_data/kegg_full.hsa.RData")
summary(kegg_full)
kegg_full2 <- kegg_full
Convert to gene names to match KEGG.
gene_list <- names(exp_voom)
gene_names <- sapply(strsplit(gene_list,".",fixed=T), `[`, 1)
exp_voom_names <- exp_voom
names(exp_voom_names) <- gene_names
Run Pathifier.
kegg_full <- kegg_full2
data <- t(exp_voom_names)
dir.create(paste0('./luad_rnaseq_analysis/test_pathifier/'))
kegg_full[["gs"]][[40]] <- NULL
kegg_full[["gs"]][[50]] <- NULL
kegg_full[["gs"]][[54]] <- NULL
kegg_full[["gs"]][[88]] <- NULL
kegg_full[["gs"]][[185]] <- NULL
kegg_full[["gs"]][[229]] <- NULL
kegg_full[["gs"]][[233]] <- NULL
kegg_full$pathwaynames <- kegg_full$pathwaynames[c(-40)]
kegg_full$pathwaynames <- kegg_full$pathwaynames[c(-50)]
kegg_full$pathwaynames <- kegg_full$pathwaynames[c(-54)]
kegg_full$pathwaynames <- kegg_full$pathwaynames[c(-88)]
kegg_full$pathwaynames <- kegg_full$pathwaynames[c(-185)]
kegg_full$pathwaynames <- kegg_full$pathwaynames[c(-229)]
kegg_full$pathwaynames <- kegg_full$pathwaynames[c(-233)]
pathway_dereg <- pathifier::quantify_pathways_deregulation(data,allgenes = row.names(data),syms = kegg_full$gs,pathwaynames = kegg_full$pathwaynames,normals = (code=="Normal"), attempts=100)
scores <- do.call(rbind,pathway_dereg$scores)
scores <- as.data.frame(t(scores))
names(scores) <- names(pathway_dereg$scores)
names(scores) <- make.names(names(scores))
row.names(scores) <- row.names(exp_voom_names)
write.table(scores,file=paste0('./luad_rnaseq_analysis/test_pathifier/pathifier_scores.txt'),quote = F, sep='\t')
pathifier_cluster <- ssnpa::cluster_samples(features = scores,
meta_data_df = clinical,
n_pc = 10,
cluster_resolution =0.6,
cluster_ref_samples = F,
ref_samples = (code=="Normal"),
seed_value = 1)$clustering
write.table(pathifier_cluster,file=paste0('./luad_rnaseq_analysis/test_pathifier/pathifier_cluster_output.txt'),quote = F, sep='\t')
Run survival analysis with Pathifier clsuters
pathifier_result <- read.table('./luad_rnaseq_analysis/test_pathifier/pathifier_cluster_output.txt')
Cluster <- pathifier_result$Cluster
table(Cluster)
## Cluster
## 0 1 2 3 4
## 137 111 101 99 69
# run survival analysis
pathifier_surv_full <- survminer::ggsurvplot(
survival::survfit(survival::Surv(as.numeric(as.character(all_clin$new_death)),all_clin$death_event)~Cluster), # survfit object with calculated statistics.
data = all_clin, # data used to fit survival curves.
legend = "right",
risk.table = TRUE, # show risk table.
pval = TRUE, # show p-value of log-rank test.
pval.coord = c(5000,1),
pval.size = 12,
conf.int = F, # show confidence intervals for
# point estimaes of survival curves.
xlim = c(0,7500), # present narrower X axis, but not affect
# survival estimates.
break.time.by = 500, # break X axis in time intervals by 500.
ggtheme = ggplot2::theme_minimal(), # customize plot and risk table with a theme.
risk.table.y.text.col = T, # colour risk table text annotations.
risk.table.y.text = F # show bars instead of names in text annotations
# in legend of risk table
)
pathifier_surv_trunc <- survminer::ggsurvplot(
survival::survfit(survival::Surv(as.numeric(as.character(all_clin$new_death)),all_clin$death_event)~Cluster), # survfit object with calculated statistics.
data = all_clin, # data used to fit survival curves.
legend = "right",
risk.table = F, # show risk table.
pval = TRUE, # show p-value of log-rank test.
pval.coord = c(2500,1),
pval.size = 12,
conf.int = F, # show confidence intervals for
# point estimaes of survival curves.
xlim = c(0,3500), # present narrower X axis, but not affect
# survival estimates.
break.time.by = 500, # break X axis in time intervals by 500.
ggtheme = ggplot2::theme_minimal(), # customize plot and risk table with a theme.
)
ssnpa_loadings <- read.table('./luad_rnaseq_analysis/ssnpa_results/ssNPA_loading_output_pd_8_.txt', quote="", sep='\t', header=T)
ssnpa_loadings_abs <- abs(ssnpa_loadings)
top_pc_genes <- list()
top_pc_loadings <- list()
pc <- list()
top_n <- 10
for (i in 1:dim(ssnpa_loadings_abs)[2]){
loadings <- ssnpa_loadings[order(-ssnpa_loadings_abs[,i]),i]
genes <- row.names(ssnpa_loadings)[order(-ssnpa_loadings_abs[,i])]
top_pc_genes <- c(top_pc_genes, genes[1:top_n])
top_pc_loadings <- c(top_pc_loadings, loadings[1:top_n])
pc <- c(pc, rep(paste0("PC", toString(i)),top_n))
}
gene_id <- unlist(top_pc_genes)
loading <- unlist(top_pc_loadings)
PC <- unlist(pc)
ssnpa_top_loadings <- data.frame(gene_id, loading, PC)
ssnpa_top_loadings$gene_entrez <- sapply(strsplit(as.character(ssnpa_top_loadings$gene_id),".",fixed=T), `[`, 2)
ssnpa_top_loadings$gene_name <- sapply(strsplit(as.character(ssnpa_top_loadings$gene_id),".",fixed=T), `[`, 1)
ensembl <- biomaRt::useEnsembl(biomart="ensembl", dataset="hsapiens_gene_ensembl")
lookup <- biomaRt::getBM(attributes=c('entrezgene_id','name_1006'), filters ='entrezgene_id', values = ssnpa_top_loadings$gene_entrez, mart = ensembl)
t <- match(ssnpa_top_loadings$gene_entrez, lookup$entrezgene_id)
ssnpa_top_loadings$description <- lookup$name_1006[t]
length(unique(ssnpa_top_loadings$gene_name))
## [1] 98
ssnpa_top_loadings[,1:3]
## gene_id loading PC
## 1 CAPN3.825 -0.04361881 PC1
## 2 PRR11.55771 -0.04302068 PC1
## 3 FAM83D.81610 -0.04267316 PC1
## 4 AQP1.358 -0.04241908 PC1
## 5 CCL22.6367 -0.03886609 PC1
## 6 SFTA1P.207107 -0.03813977 PC1
## 7 HSF4.3299 -0.03795281 PC1
## 8 FXYD1.5348 -0.03788944 PC1
## 9 CYP4B1.1580 -0.03768650 PC1
## 10 NAPSB.256236 -0.03736376 PC1
## 11 DCBLD2.131566 0.06227584 PC2
## 12 PDCD1LG2.80380 -0.06109009 PC2
## 13 BEND6.221336 0.05921174 PC2
## 14 ARNTL2.56938 0.05742110 PC2
## 15 FIGF.2277 0.05618135 PC2
## 16 GJB2.2706 0.05576402 PC2
## 17 NT5E.4907 0.05558658 PC2
## 18 SLC16A14.151473 -0.05530914 PC2
## 19 ERO1LB.56605 -0.05451140 PC2
## 20 PLEK2.26499 0.05446294 PC2
## 21 S100P.6286 -0.07150524 PC3
## 22 SPTB.6710 0.07040618 PC3
## 23 TMEM63C.57156 0.06747394 PC3
## 24 GRAMD1B.57476 -0.06654829 PC3
## 25 GCNT3.9245 -0.06479345 PC3
## 26 SIK1.150094 -0.06369875 PC3
## 27 SDCBP2.27111 -0.06366104 PC3
## 28 S1PR1.1901 0.06110917 PC3
## 29 SLC7A11.23657 -0.06051835 PC3
## 30 LYPD3.27076 -0.05982198 PC3
## 31 TUBB2B.347733 0.07279445 PC4
## 32 TDO2.6999 -0.06318555 PC4
## 33 ACSS3.79611 -0.05995265 PC4
## 34 RET.5979 0.05981380 PC4
## 35 MLLT11.10962 0.05958970 PC4
## 36 PAX9.5083 0.05906091 PC4
## 37 NXPH4.11247 0.05864311 PC4
## 38 CYSLTR2.57105 -0.05715708 PC4
## 39 KCND2.3751 0.05621184 PC4
## 40 AQP9.366 -0.05603864 PC4
## 41 PRG4.10216 0.06541767 PC5
## 42 COL11A1.1301 0.06527480 PC5
## 43 TBX15.6913 0.06362428 PC5
## 44 SULF1.23213 0.06215068 PC5
## 45 HTRA3.94031 0.06198783 PC5
## 46 KIF26B.55083 0.06118653 PC5
## 47 VCAN.1462 0.05929002 PC5
## 48 SCML2.10389 -0.05805212 PC5
## 49 CLIC5.53405 0.05762053 PC5
## 50 PLUNC.51297 0.05655426 PC5
## 51 THBS2.7058 -0.06526240 PC6
## 52 UBASH3B.84959 -0.05915507 PC6
## 53 WISP2.8839 -0.05820800 PC6
## 54 PPP1R1B.84152 -0.05813451 PC6
## 55 SCD5.79966 -0.05771531 PC6
## 56 STX11.8676 -0.05670610 PC6
## 57 CA9.768 -0.05659262 PC6
## 58 KCNQ1.3784 0.05586271 PC6
## 59 PLEKHG6.55200 0.05563910 PC6
## 60 CLEC4E.26253 -0.05351223 PC6
## 61 PLA2G10.8399 0.07405081 PC7
## 62 KCNJ11.3767 0.06270304 PC7
## 63 GEM.2669 0.06127419 PC7
## 64 RBPMS2.348093 -0.05562228 PC7
## 65 CBR3.874 0.05483457 PC7
## 66 MNDA.4332 -0.05455973 PC7
## 67 AFF3.3899 -0.05455801 PC7
## 68 ADAMTS6.11174 0.05395366 PC7
## 69 C1orf59.113802 -0.05386096 PC7
## 70 FAM3B.54097 0.05310753 PC7
## 71 ACY3.91703 0.06954862 PC8
## 72 INPP4B.8821 0.06463832 PC8
## 73 FOXA3.3171 0.06336670 PC8
## 74 SLC22A18AS.5003 0.06245528 PC8
## 75 ANKRD22.118932 0.06226496 PC8
## 76 ATP10B.23120 0.06037694 PC8
## 77 NAPSB.256236 -0.05965919 PC8
## 78 PTPRH.5794 0.05863501 PC8
## 79 SRPX2.27286 0.05717100 PC8
## 80 RAMP1.10267 0.05670256 PC8
## 81 FER1L4.80307 0.07082680 PC9
## 82 LYVE1.10894 -0.06454227 PC9
## 83 FHL2.2274 0.06405995 PC9
## 84 F2RL1.2150 0.06323158 PC9
## 85 B3GNT6.192134 0.06309392 PC9
## 86 FLJ40330.645784 0.06184118 PC9
## 87 MYO15B.80022 0.06178080 PC9
## 88 CD38.952 0.06077413 PC9
## 89 TGFA.7039 0.06034392 PC9
## 90 NPTX2.4885 0.05879156 PC9
## 91 SCN1A.6323 -0.08124795 PC10
## 92 DPCR1.135656 -0.07832734 PC10
## 93 MATN3.4148 -0.07498220 PC10
## 94 SRPX2.27286 0.06881731 PC10
## 95 TSPAN1.10103 -0.06863391 PC10
## 96 POF1B.79983 -0.06728000 PC10
## 97 CXCL2.2920 -0.06493051 PC10
## 98 PMAIP1.5366 0.05962130 PC10
## 99 FAM101A.144347 -0.05762621 PC10
## 100 C12orf75.387882 -0.05761501 PC10
write.table(ssnpa_top_loadings,'./luad_rnaseq_analysis/ssnpa_top_loadings.txt',quote=F, sep='\t', row.names=F)
ssnpa_result <- read.table('./luad_rnaseq_analysis/ssnpa_results/ssnpa_cluster_output_pd_8_.txt', quote="", sep='\t', header = T)
Cluster <- as.factor(ssnpa_result$Cluster)
prr11 <- ggplot2::ggplot(exp_voom[code=="LUAD",],ggplot2::aes(x=as.factor(Cluster),y=PRR11.55771))+
ggplot2::geom_boxplot()+
ggplot2::theme_bw()+
ggplot2::labs(x="ssNPA Cluster",y="PRR11 Expression, log2(cpm)")+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=0),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
axis.text.x = ggplot2::element_text(size=14),
axis.text.y = ggplot2::element_text(size=14),
axis.title=ggplot2::element_text(size=24))
fam83 <- ggplot2::ggplot(exp_voom[code=="LUAD",],ggplot2::aes(x=as.factor(Cluster),y=FAM83D.81610))+
ggplot2::geom_boxplot()+
ggplot2::theme_bw()+
ggplot2::labs(x="ssNPA Cluster",y="FAM83D Expression, log2(cpm)")+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=0),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
axis.text.x = ggplot2::element_text(size=14),
axis.text.y = ggplot2::element_text(size=14),
axis.title=ggplot2::element_text(size=24))
aqp1 <- ggplot2::ggplot(exp_voom[code=="LUAD",],ggplot2::aes(x=as.factor(Cluster),y=AQP1.358))+
ggplot2::geom_boxplot()+
ggplot2::theme_bw()+
ggplot2::labs(x="ssNPA Cluster",y="AQP1 Expression, log2(cpm)")+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=0),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
axis.text.x = ggplot2::element_text(size=14),
axis.text.y = ggplot2::element_text(size=14),
axis.title=ggplot2::element_text(size=24))
capn3 <- ggplot2::ggplot(exp_voom[code=="LUAD",],ggplot2::aes(x=as.factor(Cluster),y=CAPN3.825))+
ggplot2::geom_boxplot()+
ggplot2::theme_bw()+
ggplot2::labs(x="ssNPA Cluster",y="CAPN3 Expression, log2(cpm)")+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=0),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
axis.text.x = ggplot2::element_text(size=14),
axis.text.y = ggplot2::element_text(size=14),
axis.title=ggplot2::element_text(size=24))
gex_result <- read.table('./luad_rnaseq_analysis/test_gex/gex_cluster_output.txt', quote="", sep='\t', header = T)
gex_cluster <- ggplot2::ggplot(gex_result,ggplot2::aes(tSNE_1,tSNE_2))+
ggplot2::geom_point(ggplot2::aes(colour=as.factor(Cluster)), size=4.5)+
#ggplot2::ggtitle('Gene Expression')+
ggplot2::theme_bw()+
ggplot2::labs(color="Cluster")+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=38),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size = 10)))
gex_surv_trunc_g <- gex_surv_trunc$plot+
ggplot2::labs(title='Gene Expression')+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=0),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
axis.title=ggplot2::element_text(size=24))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size = 10)))
gex_surv_full_curve <- gex_surv_full$plot+
#ggplot2::labs(title='Gene Expression')+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=0),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
axis.title=ggplot2::element_text(size=24))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size = 10)))
gex_surv_full_table <- gex_surv_full$table+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,19,3,3), "lines"),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=0),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
axis.title=ggplot2::element_text(size=24))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size = 10)))
ssnpa_result <- read.table('./luad_rnaseq_analysis/ssnpa_results/ssnpa_cluster_output_pd_8_.txt', quote="", sep='\t', header = T)
ssnpa_cluster <- ggplot2::ggplot(ssnpa_result,ggplot2::aes(tSNE_1,tSNE_2))+
ggplot2::geom_point(ggplot2::aes(colour=as.factor(Cluster)), size=4.5)+
ggplot2::ggtitle('ssNPA')+
ggplot2::theme_bw()+
ggplot2::labs(color="Cluster")+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=38),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size = 10)))
ssnpa_surv_trunc_g <- ssnpa_surv_trunc$plot+
ggplot2::labs(title='ssNPA')+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=0),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
axis.title=ggplot2::element_text(size=24))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size = 10)))
ssnpa_surv_full_curve <- ssnpa_surv_full$plot+
ggplot2::labs(title='ssNPA')+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=0),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
axis.title=ggplot2::element_text(size=24))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size = 10)))
ssnpa_surv_full_table <- ssnpa_surv_full$table+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,19,3,3), "lines"),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=0),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
axis.title=ggplot2::element_text(size=24))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size = 10)))
ssgsea_result <- read.table('./luad_rnaseq_analysis/test_ssgsea/ssgsea_cluster_output.txt', quote="", sep='\t', header = T)
ssgsea_cluster <- ggplot2::ggplot(ssgsea_result,ggplot2::aes(tSNE_1,tSNE_2))+
ggplot2::geom_point(ggplot2::aes(colour=as.factor(Cluster)), size=4.5)+
ggplot2::ggtitle('ssGSEA')+
ggplot2::theme_bw()+
ggplot2::labs(color="Cluster")+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=38),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size = 10)))
ssgsea_surv_trunc_g <- ssgsea_surv_trunc$plot+
ggplot2::labs(title='ssGSEA')+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=0),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
axis.title=ggplot2::element_text(size=24))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size = 10)))
ssgsea_surv_full_curve <- ssgsea_surv_full$plot+
ggplot2::labs(title='ssGSEA')+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=0),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
axis.title=ggplot2::element_text(size=24))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size = 10)))
ssgsea_surv_full_table <- ssgsea_surv_full$table+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,19,3,3), "lines"),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=0),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
axis.title=ggplot2::element_text(size=24))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size = 10)))
pathifier_result <- read.table('./luad_rnaseq_analysis/test_pathifier/pathifier_cluster_output.txt', quote="", sep='\t', header = T)
pathifier_cluster <- ggplot2::ggplot(pathifier_result,ggplot2::aes(tSNE_1,tSNE_2))+
ggplot2::geom_point(ggplot2::aes(colour=as.factor(Cluster)), size=4.5)+
ggplot2::ggtitle('Pathifier')+
ggplot2::theme_bw()+
ggplot2::labs(color="Cluster")+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=38),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size = 10)))
pathifier_surv_trunc_g <- pathifier_surv_trunc$plot+
ggplot2::labs(title='Pathifier')+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=0),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
axis.title=ggplot2::element_text(size=24))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size = 10)))
pathifier_surv_full_curve <- pathifier_surv_full$plot+
ggplot2::labs(title='Pathifier')+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=0),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
axis.title=ggplot2::element_text(size=24))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size = 10)))
pathifier_surv_full_table <- pathifier_surv_full$table+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,19,3,3), "lines"),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=0),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
axis.title=ggplot2::element_text(size=24))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size = 10)))
pdf("luad_cluster_and_truncated_survival.pdf", width=40,height=24)
ggpubr::ggarrange(ssnpa_cluster,pathifier_cluster,ssgsea_cluster, ssnpa_surv_trunc_g,pathifier_surv_trunc_g,ssgsea_surv_trunc_g, labels=c("A","B","C","D","E","F"),font.label = list(size = 52, face = "bold"),nrow=2,ncol=3)
dev.off()
## quartz_off_screen
## 2
pdf("luad_survival_full.pdf",width=45,height=18)
ggpubr::ggarrange(ssnpa_surv_full_curve,pathifier_surv_full_curve,ssgsea_surv_full_curve,ssnpa_surv_full_table, pathifier_surv_full_table,ssgsea_surv_full_table, labels=c("A","B","C","","",""),font.label = list(size = 52, face = "bold"),nrow=2,ncol=3,heights=c(2,1))
dev.off()
## quartz_off_screen
## 2
pdf("luad_gene_boxplots.pdf", width=24, height=24)
ggpubr::ggarrange(prr11, fam83, aqp1, capn3, labels="AUTO", font.label = list(size = 52, face = "bold"), nrow=2, ncol=2)
dev.off()
## quartz_off_screen
## 2
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.2 locfit_1.5-9.1 lattice_0.20-38
## [4] tidyr_1.0.0 prettyunits_1.0.2 zoo_1.8-6
## [7] assertthat_0.2.1 zeallot_0.1.0 digest_0.6.22
## [10] R6_2.4.0 survminer_0.4.6 backports_1.1.5
## [13] stats4_3.6.1 RSQLite_2.1.2 evaluate_0.14
## [16] httr_1.4.1 ggplot2_3.2.1 pillar_1.4.2
## [19] progress_1.2.2 rlang_0.4.1 curl_4.2
## [22] lazyeval_0.2.2 data.table_1.12.6 blob_1.2.0
## [25] S4Vectors_0.22.1 Matrix_1.2-17 rmarkdown_1.16
## [28] labeling_0.3 splines_3.6.1 stringr_1.4.0
## [31] RCurl_1.95-4.12 bit_1.1-14 biomaRt_2.40.5
## [34] munsell_0.5.0 broom_0.5.2 compiler_3.6.1
## [37] xfun_0.10 pkgconfig_2.0.3 BiocGenerics_0.30.0
## [40] htmltools_0.4.0 tidyselect_0.2.5 tibble_2.1.3
## [43] gridExtra_2.3 km.ci_0.5-2 edgeR_3.26.8
## [46] IRanges_2.18.3 XML_3.98-1.20 crayon_1.3.4
## [49] dplyr_0.8.3 ggpubr_0.2.3 bitops_1.0-6
## [52] grid_3.6.1 nlme_3.1-141 xtable_1.8-4
## [55] gtable_0.3.0 lifecycle_0.1.0 DBI_1.0.0
## [58] magrittr_1.5 KMsurv_0.1-5 scales_1.0.0
## [61] stringi_1.4.3 ggsignif_0.6.0 limma_3.40.6
## [64] survMisc_0.5.5 generics_0.0.2 vctrs_0.2.0
## [67] cowplot_1.0.0 tools_3.6.1 bit64_0.9-7
## [70] Biobase_2.44.0 glue_1.3.1 purrr_0.3.3
## [73] hms_0.5.2 parallel_3.6.1 survival_2.44-1.1
## [76] yaml_2.2.0 ssnpa_0.0.0.9003 AnnotationDbi_1.46.1
## [79] colorspace_1.4-1 memoise_1.1.0 knitr_1.25