Install packages as necessary.
dir.create("brca_rnaseq_analysis")
list.of.bioconductor.packages <- c("edgeR","limma","pathifier","KEGGREST","biomaRt","GSVA","GSEABase","qusage")
new.bioc.packages <- list.of.bioconductor.packages[!(list.of.bioconductor.packages %in% installed.packages()[,"Package"])]
if(length(new.bioc.packages)) BiocManager::install(new.bioc.packages)
list.of.packages <- c("aricode","ggplot2","mclust","Rmisc","survminer","survival")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
Data downloaded from Broad GDAC Firehose https://gdac.broadinstitute.org/. BRCA RNA-seq data from illuminahiseq_rnaseqv2-RSEM_genes_normalized and clinical data from Merge_Clinical.
header <- read.table('./input_data/BRCA.rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes_normalized__data.data.txt',nrows=1,sep="\t", quote="", header=F, row.names=1,stringsAsFactors=F)
brca_data <- read.table('./input_data/BRCA.rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes_normalized__data.data.txt',skip=2,sep="\t", quote="", header=F, row.names=1,stringsAsFactors=F)
colnames(brca_data) <- unlist(header)
names(brca_data) <- make.names(names(brca_data))
test <- row.names(brca_data)
test <- gsub ('|','.',test,fixed=T)
test <- gsub ('?','X',test, fixed=T)
row.names(brca_data) <- test
brca_data <- t(as.matrix(brca_data))
brca_data <- as.data.frame(brca_data,stringsAsFactors=F)
#Parse sample IDs to get normals
sample <- row.names(brca_data)
key <- data.frame( do.call( rbind, strsplit( sample, '.',fixed=T ) ) )
names(key) <- c("project","tss","participant","sample","portion","plate","center")
code <- character(length = dim(brca_data)[1])
code[1:length(code)] <- "BRCA"
code[key$sample=="11A" | key$sample=="11B"] <- "Normal"
table(code)
## code
## BRCA Normal
## 1100 112
#Read in clinical data
df_full <- as.data.frame(code,row.names=row.names(brca_data))
df_full_row_mod <- row.names(df_full)
s <- data.frame(do.call('rbind', strsplit(df_full_row_mod,'.',fixed=TRUE)))
df_full$lookup_id <- paste(tolower(s$X1),tolower(s$X2),tolower(s$X3),sep="-")
df_full$full_id <- row.names(df_full)
brca_full <- read.table("./input_data/BRCA.clin.merged.txt",header=F,quote="",row.names=1,sep='\t')
brca_full <- as.data.frame(t(brca_full))
row.names(brca_full) <- brca_full$patient.bcr_patient_barcode
brca_full$lookup_id <- brca_full$patient.bcr_patient_barcode
clinical <- merge(df_full, brca_full, by="lookup_id", all.x=T)
clinical$subtype <- NA
clinical$subtype[clinical$patient.breast_carcinoma_estrogen_receptor_status=="negative" & clinical$patient.breast_carcinoma_progesterone_receptor_status=="negative" & clinical$patient.lab_proc_her2_neu_immunohistochemistry_receptor_status=="negative"] <- 'Basal'
clinical$subtype[clinical$patient.breast_carcinoma_estrogen_receptor_status=="negative" & clinical$patient.breast_carcinoma_progesterone_receptor_status=="negative" & clinical$patient.lab_proc_her2_neu_immunohistochemistry_receptor_status=="positive"] <- 'HER2+'
clinical$subtype[(clinical$patient.breast_carcinoma_estrogen_receptor_status=="positive" | clinical$patient.breast_carcinoma_progesterone_receptor_status=="positive") & clinical$patient.lab_proc_her2_neu_immunohistochemistry_receptor_status=="negative"] <- 'Luminal A'
clinical$subtype[(clinical$patient.breast_carcinoma_estrogen_receptor_status=="positive" | clinical$patient.breast_carcinoma_progesterone_receptor_status=="positive") & clinical$patient.lab_proc_her2_neu_immunohistochemistry_receptor_status=="positive"] <- 'Luminal B'
table(clinical$subtype)
##
## Basal HER2+ Luminal A Luminal B
## 127 41 488 144
clinical_normal <- clinical[clinical$code=="Normal",]
clinical_brca <- clinical[clinical$code=="BRCA",]
Filter out lowly expressed genes and use voom to convert counts to log2(cpm) with associated weights.
# Filter out lowly expressed genes
keep <- edgeR::filterByExpr(t(brca_data))
table(keep)
## keep
## FALSE TRUE
## 6529 14002
gene_exp <- brca_data[,keep]
# Apply voom
exp_voom <- limma::voom(t(gene_exp),plot=T)
exp_voom <- as.data.frame(t(exp_voom$E))
write.table(exp_voom,file="./brca_rnaseq_analysis/brca_normalized_counts.exp_filt.voom.txt",quote=F,sep="\t")
Keep only 3,000 most highly variant genes.
exp_voom <- ssnpa::filter_by_variance(exp_voom, 3000)
dim(exp_voom)
## [1] 1212 3000
write.table(exp_voom,file='./brca_rnaseq_analysis/brca_normalized_counts.exp_filt.voom.var_filt.txt',quote=F, sep='\t')
ind_keep <- grep('days_to_new_tumor_event_after_initial_treatment',colnames(clinical_brca))
# here are numerous follow ups, collapse them together and keep the first value (the higher one) if more than one is available
new_tum <- as.matrix(clinical_brca[,ind_keep])
new_tum_collapsed <- c()
for (i in 1:dim(new_tum)[1]){
if ( sum ( is.na(new_tum[i,])) < dim(new_tum)[2]){
m <- min(new_tum[i,],na.rm=T)
new_tum_collapsed <- c(new_tum_collapsed,m)
} else {
new_tum_collapsed <- c(new_tum_collapsed,'NA')
}
}
# do the same to death
ind_keep <- grep('days_to_death',colnames(clinical_brca))
death <- as.matrix(clinical_brca[,ind_keep])
death_collapsed <- c()
for (i in 1:dim(death)[1]){
if ( sum ( is.na(death[i,])) < dim(death)[2]){
m <- max(death[i,],na.rm=T)
death_collapsed <- c(death_collapsed,m)
} else {
death_collapsed <- c(death_collapsed,'NA')
}
}
# and days last follow up here we take the most recent which is the max number
ind_keep <- grep('days_to_last_followup',colnames(clinical_brca))
fl <- as.matrix(clinical_brca[,ind_keep])
fl_collapsed <- c()
for (i in 1:dim(fl)[1]){
if ( sum(is.na(fl[i,])) < dim(fl)[2]){
m <- max(fl[i,],na.rm=T)
fl_collapsed <- c(fl_collapsed,m)
} else {
fl_collapsed <- c(fl_collapsed,'NA')
}
}
# and put everything together
all_clin <- data.frame(clinical_brca$code,new_tum_collapsed,death_collapsed,fl_collapsed)
colnames(all_clin) <- c("sample_code",'new_tumor_days', 'death_days', 'followUp_days')
# create vector with time to new tumor containing data to censor for new_tumor
all_clin$new_time <- c()
for (i in 1:length(as.numeric(as.character(all_clin$new_tumor_days)))){
all_clin$new_time[i] <- ifelse ( is.na(as.numeric(as.character(all_clin$new_tumor_days))[i]),
as.numeric(as.character(all_clin$followUp_days))[i],as.numeric(as.character(all_clin$new_tumor_days))[i])
}
# create vector time to death containing values to censor for death
all_clin$new_death <- c()
for (i in 1:length(as.numeric(as.character(all_clin$death_days)))){
all_clin$new_death[i] <- ifelse ( is.na(as.numeric(as.character(all_clin$death_days))[i]),
as.numeric(as.character(all_clin$followUp_days))[i],as.numeric(as.character(all_clin$death_days))[i])
}
# create vector for death censoring
table(clinical_brca$patient.vital_status)
##
## alive dead
## 993 107
all_clin$death_event <- ifelse(clinical_brca$patient.vital_status == 'alive', 0,1)
#finally add row.names to clinical
row.names(all_clin) <- row.names(clinical_brca)
dir.create("./brca_rnaseq_analysis/ssnpa_results")
for (pd in c(12,11,10,9,8,7,6,5,4)){
ssnpa_cluster <- ssnpa::run_ssnpa(gene_exp = exp_voom,
reference_samples = (code=="Normal"),
out_dir = paste0('./brca_rnaseq_analysis/ssnpa_results'),
seed_value = 1,
fgs_pd = pd,
n_pc = 10,
cluster_resolution = 0.6,
cluster_ref_samples = F)
write.table(ssnpa_cluster$clustering,file=paste0('./brca_rnaseq_analysis/ssnpa_results/','ssnpa_cluster_output_pd_',toString(pd),'_.txt'),quote = F, sep='\t')
write.table(ssnpa_cluster$loadings,file=paste0('./brca_rnaseq_analysis/ssnpa_results/','/ssNPA_loading_output_pd_',toString(pd),'_.txt'),quote = F, sep='\t')
}
Check association between ssNPA clusters and molecular subtype
ssnpa_result <- read.table('./brca_rnaseq_analysis/ssnpa_results/ssnpa_cluster_output_pd_10_.txt')
Cluster <- ssnpa_result$Cluster
cluster_x_subtype <- table(ssnpa_result$Cluster, clinical_brca$subtype)
cluster_x_subtype
##
## Basal HER2+ Luminal A Luminal B
## 0 0 0 140 31
## 1 2 0 121 33
## 2 5 1 129 16
## 3 99 9 19 2
## 4 10 27 36 44
chisq.test(cluster_x_subtype)
##
## Pearson's Chi-squared test
##
## data: cluster_x_subtype
## X-squared = 609.52, df = 12, p-value < 2.2e-16
Run survival analysis with ssNPA clsuters
# run survival analysis
ssnpa_surv_full <- survminer::ggsurvplot(
survival::survfit(survival::Surv(as.numeric(as.character(all_clin$new_death)),all_clin$death_event)~Cluster), # survfit object with calculated statistics.
data = all_clin, # data used to fit survival curves.
legend = "right",
risk.table = TRUE, # show risk table.
pval = TRUE, # show p-value of log-rank test.
pval.coord = c(6500,1),
pval.size = 12,
conf.int = F, # show confidence intervals for
# point estimaes of survival curves.
xlim = c(0,9000), # present narrower X axis, but not affect
# survival estimates.
break.time.by = 500, # break X axis in time intervals by 500.
ggtheme = ggplot2::theme_minimal(), # customize plot and risk table with a theme.
risk.table.y.text.col = T, # colour risk table text annotations.
risk.table.y.text = F # show bars instead of names in text annotations
# in legend of risk table
)
ssnpa_surv_trunc <- survminer::ggsurvplot(
survival::survfit(survival::Surv(as.numeric(as.character(all_clin$new_death)),all_clin$death_event)~Cluster), # survfit object with calculated statistics.
data = all_clin, # data used to fit survival curves.
legend = "right",
risk.table = F, # show risk table.
pval = TRUE, # show p-value of log-rank test.
pval.coord = c(4000,1),
pval.size = 12,
conf.int = F, # show confidence intervals for
# point estimaes of survival curves.
xlim = c(0,5000), # present narrower X axis, but not affect
# survival estimates.
break.time.by = 500, # break X axis in time intervals by 500.
ggtheme = ggplot2::theme_minimal(), # customize plot and risk table with a theme.
)
Use ssGSEA with MSigDB pathways (v7) downloaded from http://software.broadinstitute.org/gsea/msigdb/collections.jsp#C2 to cluster these data.
dir.create(paste0('./brca_rnaseq_analysis/test_ssgsea'))
gene_list <- names(exp_voom)
entrez_ids <- sapply(strsplit(gene_list,".",fixed=T), `[`, 2)
exp_voom_entrez <- exp_voom
names(exp_voom_entrez) <- entrez_ids
hg_c2 <- qusage::read.gmt('./input_data/c2.all.v7.0.entrez.gmt')
ssgsea_features <- GSVA::gsva(as.matrix(t(exp_voom_entrez)), hg_c2, method="ssgsea")
ssgsea_cluster_input <- as.data.frame(t(ssgsea_features))
names(ssgsea_cluster_input) <- make.names(names(ssgsea_cluster_input))
ssgsea_cluster <- ssnpa::cluster_samples(features = as.data.frame(t(ssgsea_features)),
meta_data_df = clinical,
n_pc = 10,
cluster_resolution =0.6,
cluster_ref_samples = F,
ref_samples = (code=="Normal"),
seed_value = 1)$clustering
write.table(ssgsea_cluster,file=paste0('./brca_rnaseq_analysis/test_ssgsea/ssgsea_cluster_output.txt'),quote = F, sep='\t')
Check association between ssGSEA clusters and molecular subtype
ssgsea_result <- read.table('./brca_rnaseq_analysis/test_ssgsea/ssgsea_cluster_output.txt')
Cluster <- ssgsea_result$Cluster
cluster_x_subtype <- table(ssgsea_result$Cluster, clinical_brca$subtype)
cluster_x_subtype
##
## Basal HER2+ Luminal A Luminal B
## 0 5 0 198 32
## 1 0 0 132 27
## 2 11 28 93 64
## 3 100 9 22 3
chisq.test(cluster_x_subtype)
##
## Pearson's Chi-squared test
##
## data: cluster_x_subtype
## X-squared = 535.62, df = 9, p-value < 2.2e-16
Run survival analysis with ssGSEA clsuters
# run survival analysis
ssgsea_surv_full <- survminer::ggsurvplot(
survival::survfit(survival::Surv(as.numeric(as.character(all_clin$new_death)),all_clin$death_event)~Cluster), # survfit object with calculated statistics.
data = all_clin, # data used to fit survival curves.
legend = "right",
risk.table = TRUE, # show risk table.
pval = TRUE, # show p-value of log-rank test.
pval.coord = c(6500,1),
pval.size = 12,
conf.int = F, # show confidence intervals for
# point estimaes of survival curves.
xlim = c(0,9000), # present narrower X axis, but not affect
# survival estimates.
break.time.by = 500, # break X axis in time intervals by 500.
ggtheme = ggplot2::theme_minimal(), # customize plot and risk table with a theme.
risk.table.y.text.col = T, # colour risk table text annotations.
risk.table.y.text = F # show bars instead of names in text annotations
# in legend of risk table
)
ssgsea_surv_trunc <- survminer::ggsurvplot(
survival::survfit(survival::Surv(as.numeric(as.character(all_clin$new_death)),all_clin$death_event)~Cluster), # survfit object with calculated statistics.
data = all_clin, # data used to fit survival curves.
legend = "right",
risk.table = F, # show risk table.
pval = TRUE, # show p-value of log-rank test.
pval.coord = c(4000,1),
pval.size = 12,
conf.int = F, # show confidence intervals for
# point estimaes of survival curves.
xlim = c(0,5000), # present narrower X axis, but not affect
# survival estimates.
break.time.by = 500, # break X axis in time intervals by 500.
ggtheme = ggplot2::theme_minimal(), # customize plot and risk table with a theme.
)
dir.create(paste0('./brca_rnaseq_analysis/test_gex'))
gex_cluster <- ssnpa::cluster_samples(features = exp_voom,
meta_data_df = clinical,
n_pc = 10,
cluster_resolution =0.6,
cluster_ref_samples = F,
ref_samples = (code=="Normal"),
seed_value = 1)$clustering
write.table(gex_cluster,file=paste0('./brca_rnaseq_analysis/test_gex/gex_cluster_output.txt'),quote = F, sep='\t')
Check association between gene expression clusters and molecular subtype
gex_result <- read.table('./brca_rnaseq_analysis/test_gex/gex_cluster_output.txt')
Cluster <- gex_result$Cluster
cluster_x_subtype <- table(gex_result$Cluster, clinical_brca$subtype)
cluster_x_subtype
##
## Basal HER2+ Luminal A Luminal B
## 0 12 29 119 58
## 1 0 0 125 35
## 2 2 0 149 19
## 3 101 8 21 2
## 4 1 0 31 12
chisq.test(cluster_x_subtype)
## Warning in chisq.test(cluster_x_subtype): Chi-squared approximation may be
## incorrect
##
## Pearson's Chi-squared test
##
## data: cluster_x_subtype
## X-squared = 529.58, df = 12, p-value < 2.2e-16
Run survival analysis with gene expression clsuters
# run survival analysis
gex_surv_full <- survminer::ggsurvplot(
survival::survfit(survival::Surv(as.numeric(as.character(all_clin$new_death)),all_clin$death_event)~Cluster), # survfit object with calculated statistics.
data = all_clin, # data used to fit survival curves.
legend = "right",
risk.table = TRUE, # show risk table.
pval = TRUE, # show p-value of log-rank test.
pval.coord = c(6500,1),
pval.size = 12,
conf.int = F, # show confidence intervals for
# point estimaes of survival curves.
xlim = c(0,9000), # present narrower X axis, but not affect
# survival estimates.
break.time.by = 500, # break X axis in time intervals by 500.
ggtheme = ggplot2::theme_minimal(), # customize plot and risk table with a theme.
risk.table.y.text.col = T, # colour risk table text annotations.
risk.table.y.text = F # show bars instead of names in text annotations
# in legend of risk table
)
gex_surv_trunc <- survminer::ggsurvplot(
survival::survfit(survival::Surv(as.numeric(as.character(all_clin$new_death)),all_clin$death_event)~Cluster), # survfit object with calculated statistics.
data = all_clin, # data used to fit survival curves.
legend = "right",
risk.table = F, # show risk table.
pval = TRUE, # show p-value of log-rank test.
pval.coord = c(4000,1),
pval.size = 12,
conf.int = F, # show confidence intervals for
# point estimaes of survival curves.
xlim = c(0,5000), # present narrower X axis, but not affect
# survival estimates.
break.time.by = 500, # break X axis in time intervals by 500.
ggtheme = ggplot2::theme_minimal(), # customize plot and risk table with a theme.
)
Use Pathifier with KEGG pathways to cluster these data. First, read in KEGG pathways.
dir.create(paste0('./output_data'))
kegg_full <- list()
pathways <- KEGGREST::keggList("pathway", "hsa") #hsa for human
count2 <- 1
for (i in 1:length(pathways)){
x <- attr(pathways,"names")[i]
x <- gsub("path:","",x, fixed=T)
path_full <- KEGGREST::keggGet(x)
p <- path_full[[1]]$GENE
if (length(p)>0){
even_indexes<-seq(2,length(p),2)
q <- p[even_indexes]
r <- strsplit(q,";",fixed=T)
gene_names <- NULL
count <- 1
for (j in 1:length(r)){
if (length(r[[j]])>1){
gene_names[count] <- r[[j]][1]
count <- count+1
}
}
# only keep pathways with at least 10 genes
if (length(gene_names)>10){
kegg_full$gs[count2] <- list(gene_names)
kegg_full$pathwaynames[count2] <- path_full[[1]]$NAME
count2 <- count2 + 1
}
}
}
save(kegg_full,file="./output_data/kegg_full.hsa.RData")
summary(kegg_full)
Convert to gene names to match KEGG.
gene_list <- names(exp_voom)
gene_names <- sapply(strsplit(gene_list,".",fixed=T), `[`, 1)
exp_voom_names <- exp_voom
names(exp_voom_names) <- gene_names
Run Pathifier.
data <- t(exp_voom_names)
dir.create(paste0('./brca_rnaseq_analysis/test_pathifier/'))
kegg_full[["gs"]][[40]] <- NULL
kegg_full[["gs"]][[50]] <- NULL
kegg_full[["gs"]][[54]] <- NULL
kegg_full$pathwaynames <- kegg_full$pathwaynames[c(-40)]
kegg_full$pathwaynames <- kegg_full$pathwaynames[c(-50)]
kegg_full$pathwaynames <- kegg_full$pathwaynames[c(-54)]
pathway_dereg <- pathifier::quantify_pathways_deregulation(data,allgenes = row.names(data),syms = kegg_full$gs,pathwaynames = kegg_full$pathwaynames,normals = (code=="Normal"), attempts=100)
scores <- do.call(rbind,pathway_dereg$scores)
scores <- as.data.frame(t(scores))
names(scores) <- names(pathway_dereg$scores)
names(scores) <- make.names(names(scores))
row.names(scores) <- row.names(exp_voom_names)
write.table(scores,file=paste0('./brca_rnaseq_analysis/test_pathifier/pathifier_scores.txt'),quote = F, sep='\t')
pathifier_cluster <- ssnpa::cluster_samples(features = scores,
meta_data_df = clinical,
n_pc = 10,
cluster_resolution =0.6,
cluster_ref_samples = F,
ref_samples = (code=="Normal"),
seed_value = 1)$clustering
write.table(pathifier_cluster,file=paste0('./brca_rnaseq_analysis/test_pathifier/pathifier_cluster_output.txt'),quote = F, sep='\t')
Check association between Pathifier clusters and molecular subtype
pathifier_result <- read.table('./brca_rnaseq_analysis/test_pathifier/pathifier_cluster_output.txt')
Cluster <- pathifier_result$Cluster
cluster_x_subtype <- table(pathifier_result$Cluster, clinical_brca$subtype)
cluster_x_subtype
##
## Basal HER2+ Luminal A Luminal B
## 0 1 0 135 29
## 1 100 9 16 3
## 2 0 0 83 22
## 3 6 18 32 19
## 4 0 0 58 10
## 5 4 9 26 26
## 6 0 0 36 9
## 7 1 1 38 7
## 8 4 0 21 1
chisq.test(cluster_x_subtype)
## Warning in chisq.test(cluster_x_subtype): Chi-squared approximation may be
## incorrect
##
## Pearson's Chi-squared test
##
## data: cluster_x_subtype
## X-squared = 596.23, df = 24, p-value < 2.2e-16
Run survival analysis with Pathifier clsuters
# run survival analysis
pathifier_surv_full <- survminer::ggsurvplot(
survival::survfit(survival::Surv(as.numeric(as.character(all_clin$new_death)),all_clin$death_event)~Cluster), # survfit object with calculated statistics.
data = all_clin, # data used to fit survival curves.
legend = "right",
risk.table = TRUE, # show risk table.
pval = TRUE, # show p-value of log-rank test.
pval.coord = c(6500,1),
pval.size = 12,
conf.int = F, # show confidence intervals for
# point estimaes of survival curves.
xlim = c(0,9000), # present narrower X axis, but not affect
# survival estimates.
break.time.by = 500, # break X axis in time intervals by 500.
ggtheme = ggplot2::theme_minimal(), # customize plot and risk table with a theme.
risk.table.y.text.col = T, # colour risk table text annotations.
risk.table.y.text = F # show bars instead of names in text annotations
# in legend of risk table
)
pathifier_surv_trunc <- survminer::ggsurvplot(
survival::survfit(survival::Surv(as.numeric(as.character(all_clin$new_death)),all_clin$death_event)~Cluster), # survfit object with calculated statistics.
data = all_clin, # data used to fit survival curves.
legend = "right",
risk.table = F, # show risk table.
pval = TRUE, # show p-value of log-rank test.
pval.coord = c(4000,1),
pval.size = 12,
conf.int = F, # show confidence intervals for
# point estimaes of survival curves.
xlim = c(0,5000), # present narrower X axis, but not affect
# survival estimates.
break.time.by = 500, # break X axis in time intervals by 500.
ggtheme = ggplot2::theme_minimal(), # customize plot and risk table with a theme.
)
ssnpa_loadings <- read.table('./brca_rnaseq_analysis/ssnpa_results/ssNPA_loading_output_pd_10_.txt', quote="", sep='\t', header=T)
ssnpa_loadings_abs <- abs(ssnpa_loadings)
top_pc_genes <- list()
top_pc_loadings <- list()
pc <- list()
top_n <- 10
for (i in 1:dim(ssnpa_loadings_abs)[2]){
loadings <- ssnpa_loadings[order(-ssnpa_loadings_abs[,i]),i]
genes <- row.names(ssnpa_loadings)[order(-ssnpa_loadings_abs[,i])]
top_pc_genes <- c(top_pc_genes, genes[1:top_n])
top_pc_loadings <- c(top_pc_loadings, loadings[1:top_n])
pc <- c(pc, rep(paste0("PC", toString(i)),top_n))
}
gene_id <- unlist(top_pc_genes)
loading <- unlist(top_pc_loadings)
PC <- unlist(pc)
ssnpa_top_loadings <- data.frame(gene_id, loading, PC)
ssnpa_top_loadings$gene_entrez <- sapply(strsplit(as.character(ssnpa_top_loadings$gene_id),".",fixed=T), `[`, 2)
ssnpa_top_loadings$gene_name <- sapply(strsplit(as.character(ssnpa_top_loadings$gene_id),".",fixed=T), `[`, 1)
ensembl <- biomaRt::useEnsembl(biomart="ensembl", dataset="hsapiens_gene_ensembl")
lookup <- biomaRt::getBM(attributes=c('entrezgene_id','name_1006'), filters ='entrezgene_id', values = ssnpa_top_loadings$gene_entrez, mart = ensembl)
t <- match(ssnpa_top_loadings$gene_entrez, lookup$entrezgene_id)
ssnpa_top_loadings$description <- lookup$name_1006[t]
length(unique(ssnpa_top_loadings$gene_name))
## [1] 95
ssnpa_top_loadings[,1:3]
## gene_id loading PC
## 1 ITM2A.9452 -0.04241358 PC1
## 2 TNN.63923 -0.04042540 PC1
## 3 CLDN11.5010 -0.04019018 PC1
## 4 SCN4B.6330 -0.04004075 PC1
## 5 HOXA7.3204 -0.03883170 PC1
## 6 LOC728264.728264 -0.03848648 PC1
## 7 CCDC8.83987 -0.03752029 PC1
## 8 ADAM33.80332 -0.03597111 PC1
## 9 CORO2B.10391 -0.03579380 PC1
## 10 RSPO3.84870 -0.03578246 PC1
## 11 RGMA.56963 -0.06297852 PC2
## 12 CHST3.9469 -0.06122275 PC2
## 13 FAM47E.100129583 0.05954893 PC2
## 14 GSTP1.2950 -0.05899879 PC2
## 15 ETV7.51513 0.05596822 PC2
## 16 TTC39A.22996 0.05515450 PC2
## 17 GGTA1.2681 0.05464110 PC2
## 18 DMD.1756 -0.05461720 PC2
## 19 AMOTL1.154810 -0.05388841 PC2
## 20 EGFR.1956 -0.05344762 PC2
## 21 LRRC15.131578 -0.10101857 PC3
## 22 HTRA3.94031 -0.09392926 PC3
## 23 PI16.221476 -0.09188154 PC3
## 24 TMEM90B.79953 -0.09099014 PC3
## 25 TNXB.7148 -0.08774209 PC3
## 26 COL11A1.1301 -0.08496308 PC3
## 27 COL8A1.1295 -0.08465184 PC3
## 28 AEBP1.165 -0.08424014 PC3
## 29 IGF1.3479 -0.08388008 PC3
## 30 PRG4.10216 -0.08367251 PC3
## 31 FAM176B.55194 -0.06456423 PC4
## 32 KRT23.25984 -0.06141949 PC4
## 33 HIST4H4.121504 -0.06086621 PC4
## 34 PDZK1IP1.10158 -0.05601234 PC4
## 35 ETNK2.55224 -0.05529035 PC4
## 36 CYBRD1.79901 0.05504212 PC4
## 37 C6orf155.79940 -0.05442520 PC4
## 38 CLEC5A.23601 0.05420223 PC4
## 39 S100A1.6271 -0.05355781 PC4
## 40 EPN3.55040 -0.05293997 PC4
## 41 SPRY2.10253 -0.07184161 PC5
## 42 PRUNE2.158471 -0.06814914 PC5
## 43 TFEC.22797 -0.06675750 PC5
## 44 PRKCQ.5588 0.06652540 PC5
## 45 CACNB2.783 0.06565950 PC5
## 46 DYNC1I1.1780 -0.06462310 PC5
## 47 CDC14A.8556 -0.06069796 PC5
## 48 SYN1.6853 0.06058417 PC5
## 49 MFGE8.4240 -0.05760957 PC5
## 50 SCN4B.6330 -0.05755966 PC5
## 51 CLSPN.63967 -0.07546052 PC6
## 52 GBP4.115361 0.06414826 PC6
## 53 IL6R.3570 -0.06342410 PC6
## 54 PRF1.5551 -0.06290388 PC6
## 55 TAGAP.117289 -0.06225653 PC6
## 56 UBE2C.11065 -0.06098122 PC6
## 57 TMEM130.222865 -0.05526640 PC6
## 58 AOAH.313 -0.05491570 PC6
## 59 CACNB2.783 0.05424362 PC6
## 60 PTTG1.9232 -0.05313329 PC6
## 61 GRB7.2886 -0.07001603 PC7
## 62 CREB3L1.90993 -0.06578191 PC7
## 63 ERBB2.2064 -0.06501048 PC7
## 64 TRPS1.7227 0.06200590 PC7
## 65 DST.667 0.06060793 PC7
## 66 GPRC5C.55890 0.05988186 PC7
## 67 PDZD2.23037 -0.05944198 PC7
## 68 FGF13.2258 0.05917485 PC7
## 69 MFI2.4241 -0.05908595 PC7
## 70 CYBB.1536 -0.05803922 PC7
## 71 TPRG1.285386 0.07140014 PC8
## 72 B3GNT7.93010 0.06244497 PC8
## 73 PIGR.5284 0.06099476 PC8
## 74 SFRP4.6424 -0.05869513 PC8
## 75 MFAP5.8076 -0.05830432 PC8
## 76 GBP4.115361 -0.05717884 PC8
## 77 C1S.716 -0.05601388 PC8
## 78 LPL.4023 -0.05485978 PC8
## 79 CTGF.1490 -0.05388728 PC8
## 80 MPZL2.10205 0.05354232 PC8
## 81 P2RX5.5026 0.07910823 PC9
## 82 SYP.6855 0.07810780 PC9
## 83 IQGAP2.10788 0.06778438 PC9
## 84 SYN1.6853 0.06481237 PC9
## 85 COL8A2.1296 -0.06334487 PC9
## 86 PTPRN2.5799 0.06196022 PC9
## 87 ASPN.54829 -0.06187037 PC9
## 88 MAML2.84441 0.06040511 PC9
## 89 FIBIN.387758 -0.06011679 PC9
## 90 ODZ3.55714 -0.05978657 PC9
## 91 TNFRSF4.7293 0.08729933 PC10
## 92 CLSPN.63967 0.08312917 PC10
## 93 NBPF16.728936 0.07828141 PC10
## 94 RASSF3.283349 0.07716121 PC10
## 95 FAM21A.387680 0.07485755 PC10
## 96 MAGED4.728239 0.07165218 PC10
## 97 RASEF.158158 0.07109732 PC10
## 98 WNT3.7473 0.06791188 PC10
## 99 AKAP2.11217 0.06533658 PC10
## 100 MYO9A.4649 0.06363982 PC10
write.table(ssnpa_top_loadings,'./brca_rnaseq_analysis/ssnpa_top_loadings.txt',quote=F, sep='\t', row.names=F)
ssnpa_result <- read.table('./brca_rnaseq_analysis/ssnpa_results/ssnpa_cluster_output_pd_10_.txt', quote="", sep='\t', header = T)
Cluster <- as.factor(ssnpa_result$Cluster)
itm2a <- ggplot2::ggplot(exp_voom[code=="BRCA",],ggplot2::aes(x=as.factor(Cluster),y=ITM2A.9452))+
ggplot2::geom_boxplot()+
ggplot2::theme_bw()+
ggplot2::labs(x="ssNPA Cluster",y="ITM2A Expression, log2(cpm)")+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=0),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
axis.text.x = ggplot2::element_text(size=14),
axis.text.y = ggplot2::element_text(size=14),
axis.title=ggplot2::element_text(size=24))
tnn <- ggplot2::ggplot(exp_voom[code=="BRCA",],ggplot2::aes(x=as.factor(Cluster),y=TNN.63923))+
ggplot2::geom_boxplot()+
ggplot2::theme_bw()+
ggplot2::labs(x="ssNPA Cluster",y="TNN Expression, log2(cpm)")+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=0),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
axis.text.x = ggplot2::element_text(size=14),
axis.text.y = ggplot2::element_text(size=14),
axis.title=ggplot2::element_text(size=24))
scn4b <- ggplot2::ggplot(exp_voom[code=="BRCA",],ggplot2::aes(x=as.factor(Cluster),y=SCN4B.6330))+
ggplot2::geom_boxplot()+
ggplot2::theme_bw()+
ggplot2::labs(x="ssNPA Cluster",y="SCN4B Expression, log2(cpm)")+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=0),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
axis.text.x = ggplot2::element_text(size=14),
axis.text.y = ggplot2::element_text(size=14),
axis.title=ggplot2::element_text(size=24))
hoxa7 <- ggplot2::ggplot(exp_voom[code=="BRCA",],ggplot2::aes(x=as.factor(Cluster),y=HOXA7.3204))+
ggplot2::geom_boxplot()+
ggplot2::theme_bw()+
ggplot2::labs(x="ssNPA Cluster",y="HOXA7 Expression, log2(cpm)")+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=0),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
axis.text.x = ggplot2::element_text(size=14),
axis.text.y = ggplot2::element_text(size=14),
axis.title=ggplot2::element_text(size=24))
gex_result <- read.table('./brca_rnaseq_analysis/test_gex/gex_cluster_output.txt', quote="", sep='\t', header = T)
gex_class <- ggplot2::ggplot(gex_result,ggplot2::aes(tSNE_1,tSNE_2))+
ggplot2::geom_point(ggplot2::aes(colour=clinical_brca$subtype), size=4.5)+
#ggplot2::ggtitle("Gene Expression")+
ggplot2::theme_bw()+
ggplot2::labs(color="Molecular Subtype")+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
legend.text = ggplot2::element_text(size = 36),
legend.title = ggplot2::element_text(size = 38),
legend.key.size = ggplot2::unit(3.5, "lines"),
plot.title = ggplot2::element_text(hjust = 0.5, size = 40))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size =10)))
gex_cluster <- ggplot2::ggplot(gex_result,ggplot2::aes(tSNE_1,tSNE_2))+
ggplot2::geom_point(ggplot2::aes(colour=as.factor(Cluster)), size=4.5)+
#ggplot2::ggtitle('Gene Expression')+
ggplot2::theme_bw()+
ggplot2::labs(color="Cluster")+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=38),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size = 10)))
gex_surv_trunc_g <- gex_surv_trunc$plot+
ggplot2::labs(title='Gene Expression')+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=0),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
axis.title=ggplot2::element_text(size=24))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size = 10)))
gex_surv_full_curve <- gex_surv_full$plot+
#ggplot2::labs(title='Gene Expression')+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=0),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
axis.title=ggplot2::element_text(size=24))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size = 10)))
gex_surv_full_table <- gex_surv_full$table+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,19,3,3), "lines"),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=0),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
axis.title=ggplot2::element_text(size=24))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size = 10)))
ssnpa_result <- read.table('./brca_rnaseq_analysis/ssnpa_results/ssnpa_cluster_output_pd_10_.txt', quote="", sep='\t', header = T)
ssnpa_class <- ggplot2::ggplot(ssnpa_result,ggplot2::aes(tSNE_1,tSNE_2))+
ggplot2::geom_point(ggplot2::aes(colour=clinical_brca$subtype), size=4.5)+
ggplot2::ggtitle("ssNPA")+
ggplot2::theme_bw()+
ggplot2::labs(color="Molecular Subtype")+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
legend.text = ggplot2::element_text(size = 36),
legend.title = ggplot2::element_text(size = 38),
legend.key.size = ggplot2::unit(3.5, "lines"),
plot.title = ggplot2::element_text(hjust = 0.5, size = 40))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size =10)))
ssnpa_cluster <- ggplot2::ggplot(ssnpa_result,ggplot2::aes(tSNE_1,tSNE_2))+
ggplot2::geom_point(ggplot2::aes(colour=as.factor(Cluster)), size=4.5)+
ggplot2::ggtitle('ssNPA')+
ggplot2::theme_bw()+
ggplot2::labs(color="Cluster")+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=38),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size = 10)))
ssnpa_surv_trunc_g <- ssnpa_surv_trunc$plot+
ggplot2::labs(title='ssNPA')+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=0),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
axis.title=ggplot2::element_text(size=24))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size = 10)))
ssnpa_surv_full_curve <- ssnpa_surv_full$plot+
ggplot2::labs(title='ssNPA')+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=0),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
axis.title=ggplot2::element_text(size=24))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size = 10)))
ssnpa_surv_full_table <- ssnpa_surv_full$table+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,19,3,3), "lines"),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=0),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
axis.title=ggplot2::element_text(size=24))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size = 10)))
ssgsea_result <- read.table('./brca_rnaseq_analysis/test_ssgsea/ssgsea_cluster_output.txt', quote="", sep='\t', header = T)
ssgsea_class <- ggplot2::ggplot(ssgsea_result,ggplot2::aes(tSNE_1,tSNE_2))+
ggplot2::geom_point(ggplot2::aes(colour=clinical_brca$subtype), size=4.5)+
ggplot2::ggtitle("ssGSEA")+
ggplot2::theme_bw()+
ggplot2::labs(color="Molecular Subtype")+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
legend.text = ggplot2::element_text(size = 36),
legend.title = ggplot2::element_text(size = 38),
legend.key.size = ggplot2::unit(3.5, "lines"),
plot.title = ggplot2::element_text(hjust = 0.5, size = 40))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size =10)))
ssgsea_cluster <- ggplot2::ggplot(ssgsea_result,ggplot2::aes(tSNE_1,tSNE_2))+
ggplot2::geom_point(ggplot2::aes(colour=as.factor(Cluster)), size=4.5)+
ggplot2::ggtitle('ssGSEA')+
ggplot2::theme_bw()+
ggplot2::labs(color="Cluster")+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=38),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size = 10)))
ssgsea_surv_trunc_g <- ssgsea_surv_trunc$plot+
ggplot2::labs(title='ssGSEA')+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=0),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
axis.title=ggplot2::element_text(size=24))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size = 10)))
ssgsea_surv_full_curve <- ssgsea_surv_full$plot+
ggplot2::labs(title='ssGSEA')+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=0),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
axis.title=ggplot2::element_text(size=24))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size = 10)))
ssgsea_surv_full_table <- ssgsea_surv_full$table+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,19,3,3), "lines"),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=0),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
axis.title=ggplot2::element_text(size=24))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size = 10)))
pathifier_result <- read.table('./brca_rnaseq_analysis/test_pathifier/pathifier_cluster_output.txt', quote="", sep='\t', header = T)
pathifier_class <- ggplot2::ggplot(pathifier_result,ggplot2::aes(tSNE_1,tSNE_2))+
ggplot2::geom_point(ggplot2::aes(colour=clinical_brca$subtype), size=4.5)+
ggplot2::ggtitle("Pathifier")+
ggplot2::theme_bw()+
ggplot2::labs(color="Molecular Subtype")+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
legend.text = ggplot2::element_text(size = 36),
legend.title = ggplot2::element_text(size = 38),
legend.key.size = ggplot2::unit(3.5, "lines"),
plot.title = ggplot2::element_text(hjust = 0.5, size = 40))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size =10)))
pathifier_cluster <- ggplot2::ggplot(pathifier_result,ggplot2::aes(tSNE_1,tSNE_2))+
ggplot2::geom_point(ggplot2::aes(colour=as.factor(Cluster)), size=4.5)+
ggplot2::ggtitle('Pathifier')+
ggplot2::theme_bw()+
ggplot2::labs(color="Cluster")+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=38),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size = 10)))
pathifier_surv_trunc_g <- pathifier_surv_trunc$plot+
ggplot2::labs(title='Pathifier')+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=0),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
axis.title=ggplot2::element_text(size=24))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size = 10)))
pathifier_surv_full_curve <- pathifier_surv_full$plot+
ggplot2::labs(title='Pathifier')+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=0),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
axis.title=ggplot2::element_text(size=24))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size = 10)))
pathifier_surv_full_table <- pathifier_surv_full$table+
ggplot2::theme(plot.margin = ggplot2::unit(c(3,19,3,3), "lines"),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=0),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
axis.title=ggplot2::element_text(size=24))+
ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
color = ggplot2::guide_legend(override.aes = list(size = 10)))
pdf("brca_subtypes_and_truncated_survival.pdf", width=50,height=24)
ggpubr::ggarrange(ssnpa_class,pathifier_class,ssgsea_class, ssnpa_surv_trunc_g,pathifier_surv_trunc_g,ssgsea_surv_trunc_g, labels=c("A","B","C","D","E","F"),font.label = list(size = 52, face = "bold"),nrow=2,ncol=3)
dev.off()
## quartz_off_screen
## 2
pdf("brca_clusters.pdf", width=56,height=18)
ggpubr::ggarrange(ssnpa_cluster,pathifier_cluster,ssgsea_cluster, labels=c("A","B","C"),font.label = list(size = 52, face = "bold"),nrow=1,ncol=3)
dev.off()
## quartz_off_screen
## 2
pdf("brca_survival_full.pdf",width=45,height=18)
ggpubr::ggarrange(ssnpa_surv_full_curve,pathifier_surv_full_curve,ssgsea_surv_full_curve,ssnpa_surv_full_table, pathifier_surv_full_table,ssgsea_surv_full_table, labels=c("A","B","C","","",""),font.label = list(size = 52, face = "bold"),nrow=2,ncol=3,heights=c(2,1))
dev.off()
## quartz_off_screen
## 2
pdf("brca_gex_results.pdf",width=45,height=18)
ggpubr::ggarrange(gex_class, gex_cluster, gex_surv_full_curve,ggplot2::element_blank(),ggplot2::element_blank(),gex_surv_full_table, labels=c("A","B","C","","",""),font.label = list(size = 52, face = "bold"),nrow=2,ncol=3, heights=c(2,1),widths=c(1.15,1,1))
## Warning in as_grob.default(plot): Cannot convert object of class
## element_blankelement into a grob.
## Warning in as_grob.default(plot): Cannot convert object of class
## element_blankelement into a grob.
dev.off()
## quartz_off_screen
## 2
pdf("brca_gene_boxplots.pdf", width=24, height=24)
ggpubr::ggarrange(itm2a, tnn, scn4b, hoxa7, labels="AUTO", font.label = list(size = 52, face = "bold"), nrow=2, ncol=2)
dev.off()
## quartz_off_screen
## 2
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.2 locfit_1.5-9.1 lattice_0.20-38
## [4] tidyr_1.0.0 prettyunits_1.0.2 zoo_1.8-6
## [7] assertthat_0.2.1 zeallot_0.1.0 digest_0.6.22
## [10] R6_2.4.0 survminer_0.4.6 backports_1.1.5
## [13] stats4_3.6.1 RSQLite_2.1.2 evaluate_0.14
## [16] httr_1.4.1 ggplot2_3.2.1 pillar_1.4.2
## [19] progress_1.2.2 rlang_0.4.1 curl_4.2
## [22] lazyeval_0.2.2 data.table_1.12.6 blob_1.2.0
## [25] S4Vectors_0.22.1 Matrix_1.2-17 rmarkdown_1.16
## [28] labeling_0.3 splines_3.6.1 stringr_1.4.0
## [31] RCurl_1.95-4.12 bit_1.1-14 biomaRt_2.40.5
## [34] munsell_0.5.0 broom_0.5.2 compiler_3.6.1
## [37] xfun_0.10 pkgconfig_2.0.3 BiocGenerics_0.30.0
## [40] htmltools_0.4.0 tidyselect_0.2.5 tibble_2.1.3
## [43] gridExtra_2.3 km.ci_0.5-2 edgeR_3.26.8
## [46] IRanges_2.18.3 XML_3.98-1.20 crayon_1.3.4
## [49] dplyr_0.8.3 ggpubr_0.2.3 bitops_1.0-6
## [52] grid_3.6.1 nlme_3.1-141 xtable_1.8-4
## [55] gtable_0.3.0 lifecycle_0.1.0 DBI_1.0.0
## [58] magrittr_1.5 KMsurv_0.1-5 scales_1.0.0
## [61] stringi_1.4.3 ggsignif_0.6.0 limma_3.40.6
## [64] survMisc_0.5.5 generics_0.0.2 vctrs_0.2.0
## [67] cowplot_1.0.0 tools_3.6.1 bit64_0.9-7
## [70] Biobase_2.44.0 glue_1.3.1 purrr_0.3.3
## [73] hms_0.5.2 parallel_3.6.1 survival_2.44-1.1
## [76] yaml_2.2.0 ssnpa_0.0.0.9003 AnnotationDbi_1.46.1
## [79] colorspace_1.4-1 memoise_1.1.0 knitr_1.25