Set up

Install packages as necessary.

dir.create("brca_rnaseq_analysis")
list.of.bioconductor.packages <- c("edgeR","limma","pathifier","KEGGREST","biomaRt","GSVA","GSEABase","qusage")
new.bioc.packages <- list.of.bioconductor.packages[!(list.of.bioconductor.packages %in% installed.packages()[,"Package"])]
if(length(new.bioc.packages)) BiocManager::install(new.bioc.packages)

list.of.packages <- c("aricode","ggplot2","mclust","Rmisc","survminer","survival")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

Read in and preprocess RNA-seq data

Data downloaded from Broad GDAC Firehose https://gdac.broadinstitute.org/. BRCA RNA-seq data from illuminahiseq_rnaseqv2-RSEM_genes_normalized and clinical data from Merge_Clinical.

header <- read.table('./input_data/BRCA.rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes_normalized__data.data.txt',nrows=1,sep="\t", quote="", header=F, row.names=1,stringsAsFactors=F)
brca_data <- read.table('./input_data/BRCA.rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes_normalized__data.data.txt',skip=2,sep="\t", quote="", header=F, row.names=1,stringsAsFactors=F)
colnames(brca_data) <- unlist(header)
names(brca_data) <- make.names(names(brca_data))
test <- row.names(brca_data)
test <- gsub ('|','.',test,fixed=T)
test <- gsub ('?','X',test, fixed=T)
row.names(brca_data) <- test
brca_data <- t(as.matrix(brca_data))
brca_data <- as.data.frame(brca_data,stringsAsFactors=F)

#Parse sample IDs to get normals
sample <- row.names(brca_data)
key <- data.frame( do.call( rbind, strsplit( sample, '.',fixed=T ) ) )
names(key) <- c("project","tss","participant","sample","portion","plate","center")
code <- character(length = dim(brca_data)[1])
code[1:length(code)] <- "BRCA"

code[key$sample=="11A" | key$sample=="11B"] <- "Normal"
table(code)
## code
##   BRCA Normal 
##   1100    112
#Read in clinical data
df_full <- as.data.frame(code,row.names=row.names(brca_data))
df_full_row_mod <- row.names(df_full)
s <- data.frame(do.call('rbind', strsplit(df_full_row_mod,'.',fixed=TRUE))) 
df_full$lookup_id <- paste(tolower(s$X1),tolower(s$X2),tolower(s$X3),sep="-")
df_full$full_id <- row.names(df_full)

brca_full <- read.table("./input_data/BRCA.clin.merged.txt",header=F,quote="",row.names=1,sep='\t')
brca_full <- as.data.frame(t(brca_full))
row.names(brca_full) <- brca_full$patient.bcr_patient_barcode
brca_full$lookup_id <- brca_full$patient.bcr_patient_barcode

clinical <- merge(df_full, brca_full, by="lookup_id", all.x=T)

clinical$subtype <- NA
clinical$subtype[clinical$patient.breast_carcinoma_estrogen_receptor_status=="negative" & clinical$patient.breast_carcinoma_progesterone_receptor_status=="negative" & clinical$patient.lab_proc_her2_neu_immunohistochemistry_receptor_status=="negative"] <- 'Basal'
clinical$subtype[clinical$patient.breast_carcinoma_estrogen_receptor_status=="negative" & clinical$patient.breast_carcinoma_progesterone_receptor_status=="negative" & clinical$patient.lab_proc_her2_neu_immunohistochemistry_receptor_status=="positive"] <- 'HER2+'
clinical$subtype[(clinical$patient.breast_carcinoma_estrogen_receptor_status=="positive" | clinical$patient.breast_carcinoma_progesterone_receptor_status=="positive") & clinical$patient.lab_proc_her2_neu_immunohistochemistry_receptor_status=="negative"] <- 'Luminal A'
clinical$subtype[(clinical$patient.breast_carcinoma_estrogen_receptor_status=="positive" | clinical$patient.breast_carcinoma_progesterone_receptor_status=="positive") & clinical$patient.lab_proc_her2_neu_immunohistochemistry_receptor_status=="positive"] <- 'Luminal B'
table(clinical$subtype)
## 
##     Basal     HER2+ Luminal A Luminal B 
##       127        41       488       144
clinical_normal <- clinical[clinical$code=="Normal",]
clinical_brca <- clinical[clinical$code=="BRCA",]

Filter out lowly expressed genes and use voom to convert counts to log2(cpm) with associated weights.

# Filter out lowly expressed genes
keep <- edgeR::filterByExpr(t(brca_data))
table(keep)
## keep
## FALSE  TRUE 
##  6529 14002
gene_exp <- brca_data[,keep]

# Apply voom
exp_voom <- limma::voom(t(gene_exp),plot=T)

exp_voom <- as.data.frame(t(exp_voom$E))

write.table(exp_voom,file="./brca_rnaseq_analysis/brca_normalized_counts.exp_filt.voom.txt",quote=F,sep="\t")

Keep only 3,000 most highly variant genes.

exp_voom <- ssnpa::filter_by_variance(exp_voom, 3000)
dim(exp_voom)
## [1] 1212 3000
write.table(exp_voom,file='./brca_rnaseq_analysis/brca_normalized_counts.exp_filt.voom.var_filt.txt',quote=F, sep='\t')

Prepare survival analysis

ind_keep <- grep('days_to_new_tumor_event_after_initial_treatment',colnames(clinical_brca))

# here are numerous follow ups, collapse them together and keep the first value (the higher one) if more than one is available
new_tum <- as.matrix(clinical_brca[,ind_keep])
new_tum_collapsed <- c()
for (i in 1:dim(new_tum)[1]){
  if ( sum ( is.na(new_tum[i,])) < dim(new_tum)[2]){
    m <- min(new_tum[i,],na.rm=T)
    new_tum_collapsed <- c(new_tum_collapsed,m)
  } else {
    new_tum_collapsed <- c(new_tum_collapsed,'NA')
  }
}

# do the same to death
ind_keep <- grep('days_to_death',colnames(clinical_brca))
death <- as.matrix(clinical_brca[,ind_keep])
death_collapsed <- c()
for (i in 1:dim(death)[1]){
  if ( sum ( is.na(death[i,])) < dim(death)[2]){
    m <- max(death[i,],na.rm=T)
    death_collapsed <- c(death_collapsed,m)
  } else {
    death_collapsed <- c(death_collapsed,'NA')
  }
}

# and days last follow up here we take the most recent which is the max number
ind_keep <- grep('days_to_last_followup',colnames(clinical_brca))
fl <- as.matrix(clinical_brca[,ind_keep])
fl_collapsed <- c()
for (i in 1:dim(fl)[1]){
  if ( sum(is.na(fl[i,])) < dim(fl)[2]){
    m <- max(fl[i,],na.rm=T)
    fl_collapsed <- c(fl_collapsed,m)
  } else {
    fl_collapsed <- c(fl_collapsed,'NA')
  }
}

# and put everything together
all_clin <- data.frame(clinical_brca$code,new_tum_collapsed,death_collapsed,fl_collapsed)
colnames(all_clin) <- c("sample_code",'new_tumor_days', 'death_days', 'followUp_days')

# create vector with time to new tumor containing data to censor for new_tumor
all_clin$new_time <- c()
for (i in 1:length(as.numeric(as.character(all_clin$new_tumor_days)))){
  all_clin$new_time[i] <- ifelse ( is.na(as.numeric(as.character(all_clin$new_tumor_days))[i]),
                    as.numeric(as.character(all_clin$followUp_days))[i],as.numeric(as.character(all_clin$new_tumor_days))[i])
}

# create vector time to death containing values to censor for death
all_clin$new_death <- c()
for (i in 1:length(as.numeric(as.character(all_clin$death_days)))){
  all_clin$new_death[i] <- ifelse ( is.na(as.numeric(as.character(all_clin$death_days))[i]),
                                 as.numeric(as.character(all_clin$followUp_days))[i],as.numeric(as.character(all_clin$death_days))[i])
}

# create vector for death censoring
table(clinical_brca$patient.vital_status)
## 
## alive  dead 
##   993   107
all_clin$death_event <- ifelse(clinical_brca$patient.vital_status == 'alive', 0,1)

#finally add row.names to clinical
row.names(all_clin) <- row.names(clinical_brca)

Run ssNPA

  dir.create("./brca_rnaseq_analysis/ssnpa_results")
for (pd in c(12,11,10,9,8,7,6,5,4)){
  ssnpa_cluster <- ssnpa::run_ssnpa(gene_exp = exp_voom,
                 reference_samples = (code=="Normal"), 
                 out_dir = paste0('./brca_rnaseq_analysis/ssnpa_results'),
                 seed_value = 1,
                 fgs_pd = pd,
                 n_pc = 10,
                 cluster_resolution = 0.6,
                 cluster_ref_samples = F)
  write.table(ssnpa_cluster$clustering,file=paste0('./brca_rnaseq_analysis/ssnpa_results/','ssnpa_cluster_output_pd_',toString(pd),'_.txt'),quote = F, sep='\t')
  write.table(ssnpa_cluster$loadings,file=paste0('./brca_rnaseq_analysis/ssnpa_results/','/ssNPA_loading_output_pd_',toString(pd),'_.txt'),quote = F, sep='\t')
}

Check association between ssNPA clusters and molecular subtype

ssnpa_result <- read.table('./brca_rnaseq_analysis/ssnpa_results/ssnpa_cluster_output_pd_10_.txt')
Cluster <- ssnpa_result$Cluster
cluster_x_subtype <- table(ssnpa_result$Cluster, clinical_brca$subtype)
cluster_x_subtype
##    
##     Basal HER2+ Luminal A Luminal B
##   0     0     0       140        31
##   1     2     0       121        33
##   2     5     1       129        16
##   3    99     9        19         2
##   4    10    27        36        44
chisq.test(cluster_x_subtype)
## 
##  Pearson's Chi-squared test
## 
## data:  cluster_x_subtype
## X-squared = 609.52, df = 12, p-value < 2.2e-16

Run survival analysis with ssNPA clsuters

# run survival analysis
ssnpa_surv_full <- survminer::ggsurvplot(
   survival::survfit(survival::Surv(as.numeric(as.character(all_clin$new_death)),all_clin$death_event)~Cluster), # survfit object with calculated statistics.
   data = all_clin,  # data used to fit survival curves. 
   legend = "right",
   risk.table = TRUE,       # show risk table.
   pval = TRUE,             # show p-value of log-rank test.
   pval.coord = c(6500,1),
   pval.size = 12,
   conf.int = F,         # show confidence intervals for 
                            # point estimaes of survival curves.
   xlim = c(0,9000),        # present narrower X axis, but not affect
                            # survival estimates.
   break.time.by = 500,     # break X axis in time intervals by 500.
   ggtheme = ggplot2::theme_minimal(), # customize plot and risk table with a theme.
 risk.table.y.text.col = T, # colour risk table text annotations.
  risk.table.y.text = F # show bars instead of names in text annotations
                            # in legend of risk table
 )

ssnpa_surv_trunc <- survminer::ggsurvplot(
   survival::survfit(survival::Surv(as.numeric(as.character(all_clin$new_death)),all_clin$death_event)~Cluster), # survfit object with calculated statistics.
   data = all_clin,  # data used to fit survival curves.
   legend = "right",
   risk.table = F,       # show risk table.
   pval = TRUE,             # show p-value of log-rank test.
   pval.coord = c(4000,1),
   pval.size = 12,
   conf.int = F,         # show confidence intervals for
                            # point estimaes of survival curves.
   xlim = c(0,5000),        # present narrower X axis, but not affect
                            # survival estimates.
   break.time.by = 500,     # break X axis in time intervals by 500.
   ggtheme = ggplot2::theme_minimal(), # customize plot and risk table with a theme.
)

Run ssGSEA

Use ssGSEA with MSigDB pathways (v7) downloaded from http://software.broadinstitute.org/gsea/msigdb/collections.jsp#C2 to cluster these data.

dir.create(paste0('./brca_rnaseq_analysis/test_ssgsea'))
gene_list <- names(exp_voom)
entrez_ids <- sapply(strsplit(gene_list,".",fixed=T), `[`, 2)
exp_voom_entrez <- exp_voom
names(exp_voom_entrez) <- entrez_ids

hg_c2 <- qusage::read.gmt('./input_data/c2.all.v7.0.entrez.gmt')
ssgsea_features <- GSVA::gsva(as.matrix(t(exp_voom_entrez)), hg_c2, method="ssgsea")
ssgsea_cluster_input <- as.data.frame(t(ssgsea_features))
names(ssgsea_cluster_input) <- make.names(names(ssgsea_cluster_input))
ssgsea_cluster <- ssnpa::cluster_samples(features = as.data.frame(t(ssgsea_features)),
                                      meta_data_df = clinical,
                                      n_pc = 10,
                                      cluster_resolution =0.6,
                                      cluster_ref_samples = F,
                                      ref_samples = (code=="Normal"),
                                      seed_value = 1)$clustering
write.table(ssgsea_cluster,file=paste0('./brca_rnaseq_analysis/test_ssgsea/ssgsea_cluster_output.txt'),quote = F, sep='\t')

Check association between ssGSEA clusters and molecular subtype

ssgsea_result <- read.table('./brca_rnaseq_analysis/test_ssgsea/ssgsea_cluster_output.txt')
Cluster <- ssgsea_result$Cluster
cluster_x_subtype <- table(ssgsea_result$Cluster, clinical_brca$subtype)
cluster_x_subtype
##    
##     Basal HER2+ Luminal A Luminal B
##   0     5     0       198        32
##   1     0     0       132        27
##   2    11    28        93        64
##   3   100     9        22         3
chisq.test(cluster_x_subtype)
## 
##  Pearson's Chi-squared test
## 
## data:  cluster_x_subtype
## X-squared = 535.62, df = 9, p-value < 2.2e-16

Run survival analysis with ssGSEA clsuters

# run survival analysis
ssgsea_surv_full <- survminer::ggsurvplot(
   survival::survfit(survival::Surv(as.numeric(as.character(all_clin$new_death)),all_clin$death_event)~Cluster), # survfit object with calculated statistics.
   data = all_clin,  # data used to fit survival curves. 
   legend = "right",
   risk.table = TRUE,       # show risk table.
   pval = TRUE,             # show p-value of log-rank test.
   pval.coord = c(6500,1),
   pval.size = 12,
   conf.int = F,         # show confidence intervals for 
                            # point estimaes of survival curves.
   xlim = c(0,9000),        # present narrower X axis, but not affect
                            # survival estimates.
   break.time.by = 500,     # break X axis in time intervals by 500.
   ggtheme = ggplot2::theme_minimal(), # customize plot and risk table with a theme.
 risk.table.y.text.col = T, # colour risk table text annotations.
  risk.table.y.text = F # show bars instead of names in text annotations
                            # in legend of risk table
 )

ssgsea_surv_trunc <- survminer::ggsurvplot(
   survival::survfit(survival::Surv(as.numeric(as.character(all_clin$new_death)),all_clin$death_event)~Cluster), # survfit object with calculated statistics.
   data = all_clin,  # data used to fit survival curves.
   legend = "right",
   risk.table = F,       # show risk table.
   pval = TRUE,             # show p-value of log-rank test.
   pval.coord = c(4000,1),
   pval.size = 12,
   conf.int = F,         # show confidence intervals for
                            # point estimaes of survival curves.
   xlim = c(0,5000),        # present narrower X axis, but not affect
                            # survival estimates.
   break.time.by = 500,     # break X axis in time intervals by 500.
   ggtheme = ggplot2::theme_minimal(), # customize plot and risk table with a theme.
)

Cluster with just gene expression

dir.create(paste0('./brca_rnaseq_analysis/test_gex'))
gex_cluster <- ssnpa::cluster_samples(features = exp_voom,
                                      meta_data_df = clinical,
                                      n_pc = 10,
                                      cluster_resolution =0.6,
                                      cluster_ref_samples = F,
                                      ref_samples = (code=="Normal"),
                                      seed_value = 1)$clustering
write.table(gex_cluster,file=paste0('./brca_rnaseq_analysis/test_gex/gex_cluster_output.txt'),quote = F, sep='\t')

Check association between gene expression clusters and molecular subtype

gex_result <- read.table('./brca_rnaseq_analysis/test_gex/gex_cluster_output.txt')
Cluster <- gex_result$Cluster
cluster_x_subtype <- table(gex_result$Cluster, clinical_brca$subtype)
cluster_x_subtype
##    
##     Basal HER2+ Luminal A Luminal B
##   0    12    29       119        58
##   1     0     0       125        35
##   2     2     0       149        19
##   3   101     8        21         2
##   4     1     0        31        12
chisq.test(cluster_x_subtype)
## Warning in chisq.test(cluster_x_subtype): Chi-squared approximation may be
## incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  cluster_x_subtype
## X-squared = 529.58, df = 12, p-value < 2.2e-16

Run survival analysis with gene expression clsuters

# run survival analysis
gex_surv_full <- survminer::ggsurvplot(
   survival::survfit(survival::Surv(as.numeric(as.character(all_clin$new_death)),all_clin$death_event)~Cluster), # survfit object with calculated statistics.
   data = all_clin,  # data used to fit survival curves. 
   legend = "right",
   risk.table = TRUE,       # show risk table.
   pval = TRUE,             # show p-value of log-rank test.
   pval.coord = c(6500,1),
   pval.size = 12,
   conf.int = F,         # show confidence intervals for 
                            # point estimaes of survival curves.
   xlim = c(0,9000),        # present narrower X axis, but not affect
                            # survival estimates.
   break.time.by = 500,     # break X axis in time intervals by 500.
   ggtheme = ggplot2::theme_minimal(), # customize plot and risk table with a theme.
 risk.table.y.text.col = T, # colour risk table text annotations.
  risk.table.y.text = F # show bars instead of names in text annotations
                            # in legend of risk table
 )

gex_surv_trunc <- survminer::ggsurvplot(
   survival::survfit(survival::Surv(as.numeric(as.character(all_clin$new_death)),all_clin$death_event)~Cluster), # survfit object with calculated statistics.
   data = all_clin,  # data used to fit survival curves.
   legend = "right",
   risk.table = F,       # show risk table.
   pval = TRUE,             # show p-value of log-rank test.
   pval.coord = c(4000,1),
   pval.size = 12,
   conf.int = F,         # show confidence intervals for
                            # point estimaes of survival curves.
   xlim = c(0,5000),        # present narrower X axis, but not affect
                            # survival estimates.
   break.time.by = 500,     # break X axis in time intervals by 500.
   ggtheme = ggplot2::theme_minimal(), # customize plot and risk table with a theme.
)

Run Pathifier

Use Pathifier with KEGG pathways to cluster these data. First, read in KEGG pathways.

dir.create(paste0('./output_data'))
kegg_full <- list()
pathways <- KEGGREST::keggList("pathway", "hsa") #hsa for human
count2 <- 1
for (i in 1:length(pathways)){
    x <- attr(pathways,"names")[i]
    x <- gsub("path:","",x, fixed=T)
    path_full <- KEGGREST::keggGet(x)
    p <- path_full[[1]]$GENE
    if (length(p)>0){
    even_indexes<-seq(2,length(p),2)
    q <- p[even_indexes]
    r <- strsplit(q,";",fixed=T)
    gene_names <- NULL
    count <- 1
    for (j in 1:length(r)){
        if (length(r[[j]])>1){
        gene_names[count] <- r[[j]][1]
        count <- count+1
        }
    }
    # only keep pathways with at least 10 genes
        if (length(gene_names)>10){
    kegg_full$gs[count2] <- list(gene_names)
    kegg_full$pathwaynames[count2] <- path_full[[1]]$NAME
    count2 <- count2 + 1
    }
    }
}
save(kegg_full,file="./output_data/kegg_full.hsa.RData")
summary(kegg_full)

Convert to gene names to match KEGG.

gene_list <- names(exp_voom)
gene_names <- sapply(strsplit(gene_list,".",fixed=T), `[`, 1)
exp_voom_names <- exp_voom
names(exp_voom_names) <- gene_names

Run Pathifier.

data <- t(exp_voom_names)
dir.create(paste0('./brca_rnaseq_analysis/test_pathifier/'))
kegg_full[["gs"]][[40]] <- NULL
kegg_full[["gs"]][[50]] <- NULL
kegg_full[["gs"]][[54]] <- NULL
kegg_full$pathwaynames <- kegg_full$pathwaynames[c(-40)]
kegg_full$pathwaynames <- kegg_full$pathwaynames[c(-50)]
kegg_full$pathwaynames <- kegg_full$pathwaynames[c(-54)]
pathway_dereg <- pathifier::quantify_pathways_deregulation(data,allgenes = row.names(data),syms = kegg_full$gs,pathwaynames = kegg_full$pathwaynames,normals = (code=="Normal"), attempts=100)
scores <- do.call(rbind,pathway_dereg$scores)
scores <- as.data.frame(t(scores))
names(scores) <- names(pathway_dereg$scores)
names(scores) <- make.names(names(scores))
row.names(scores) <- row.names(exp_voom_names)
write.table(scores,file=paste0('./brca_rnaseq_analysis/test_pathifier/pathifier_scores.txt'),quote = F, sep='\t')
pathifier_cluster <- ssnpa::cluster_samples(features = scores,
                                      meta_data_df = clinical,
                                      n_pc = 10,
                                      cluster_resolution =0.6,
                                      cluster_ref_samples = F,
                                      ref_samples = (code=="Normal"),
                                      seed_value = 1)$clustering
write.table(pathifier_cluster,file=paste0('./brca_rnaseq_analysis/test_pathifier/pathifier_cluster_output.txt'),quote = F, sep='\t')

Check association between Pathifier clusters and molecular subtype

pathifier_result <- read.table('./brca_rnaseq_analysis/test_pathifier/pathifier_cluster_output.txt')
Cluster <- pathifier_result$Cluster
cluster_x_subtype <- table(pathifier_result$Cluster, clinical_brca$subtype)
cluster_x_subtype
##    
##     Basal HER2+ Luminal A Luminal B
##   0     1     0       135        29
##   1   100     9        16         3
##   2     0     0        83        22
##   3     6    18        32        19
##   4     0     0        58        10
##   5     4     9        26        26
##   6     0     0        36         9
##   7     1     1        38         7
##   8     4     0        21         1
chisq.test(cluster_x_subtype)
## Warning in chisq.test(cluster_x_subtype): Chi-squared approximation may be
## incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  cluster_x_subtype
## X-squared = 596.23, df = 24, p-value < 2.2e-16

Run survival analysis with Pathifier clsuters

# run survival analysis
pathifier_surv_full <- survminer::ggsurvplot(
   survival::survfit(survival::Surv(as.numeric(as.character(all_clin$new_death)),all_clin$death_event)~Cluster), # survfit object with calculated statistics.
   data = all_clin,  # data used to fit survival curves. 
   legend = "right",
   risk.table = TRUE,       # show risk table.
   pval = TRUE,             # show p-value of log-rank test.
   pval.coord = c(6500,1),
   pval.size = 12,
   conf.int = F,         # show confidence intervals for 
                            # point estimaes of survival curves.
   xlim = c(0,9000),        # present narrower X axis, but not affect
                            # survival estimates.
   break.time.by = 500,     # break X axis in time intervals by 500.
   ggtheme = ggplot2::theme_minimal(), # customize plot and risk table with a theme.
 risk.table.y.text.col = T, # colour risk table text annotations.
  risk.table.y.text = F # show bars instead of names in text annotations
                            # in legend of risk table
 )

pathifier_surv_trunc <- survminer::ggsurvplot(
   survival::survfit(survival::Surv(as.numeric(as.character(all_clin$new_death)),all_clin$death_event)~Cluster), # survfit object with calculated statistics.
   data = all_clin,  # data used to fit survival curves.
   legend = "right",
   risk.table = F,       # show risk table.
   pval = TRUE,             # show p-value of log-rank test.
   pval.coord = c(4000,1),
   pval.size = 12,
   conf.int = F,         # show confidence intervals for
                            # point estimaes of survival curves.
   xlim = c(0,5000),        # present narrower X axis, but not affect
                            # survival estimates.
   break.time.by = 500,     # break X axis in time intervals by 500.
   ggtheme = ggplot2::theme_minimal(), # customize plot and risk table with a theme.
)

Check ssNPA PCA Loadings

ssnpa_loadings <- read.table('./brca_rnaseq_analysis/ssnpa_results/ssNPA_loading_output_pd_10_.txt', quote="", sep='\t', header=T)
ssnpa_loadings_abs <- abs(ssnpa_loadings)
top_pc_genes <- list()
top_pc_loadings <- list()
pc <- list()
top_n <- 10
for (i in 1:dim(ssnpa_loadings_abs)[2]){
  loadings <- ssnpa_loadings[order(-ssnpa_loadings_abs[,i]),i]
  genes <- row.names(ssnpa_loadings)[order(-ssnpa_loadings_abs[,i])]
  
  top_pc_genes <- c(top_pc_genes, genes[1:top_n])
  top_pc_loadings <- c(top_pc_loadings, loadings[1:top_n])
  pc <- c(pc, rep(paste0("PC", toString(i)),top_n))
}
gene_id <- unlist(top_pc_genes)
loading <- unlist(top_pc_loadings)
PC <- unlist(pc)
ssnpa_top_loadings <- data.frame(gene_id, loading, PC)
ssnpa_top_loadings$gene_entrez <- sapply(strsplit(as.character(ssnpa_top_loadings$gene_id),".",fixed=T), `[`, 2)
ssnpa_top_loadings$gene_name <- sapply(strsplit(as.character(ssnpa_top_loadings$gene_id),".",fixed=T), `[`, 1)

ensembl <- biomaRt::useEnsembl(biomart="ensembl", dataset="hsapiens_gene_ensembl")
lookup <- biomaRt::getBM(attributes=c('entrezgene_id','name_1006'), filters ='entrezgene_id', values = ssnpa_top_loadings$gene_entrez, mart = ensembl)

t <- match(ssnpa_top_loadings$gene_entrez, lookup$entrezgene_id)
ssnpa_top_loadings$description <- lookup$name_1006[t]
length(unique(ssnpa_top_loadings$gene_name))
## [1] 95
ssnpa_top_loadings[,1:3]
##              gene_id     loading   PC
## 1         ITM2A.9452 -0.04241358  PC1
## 2          TNN.63923 -0.04042540  PC1
## 3        CLDN11.5010 -0.04019018  PC1
## 4         SCN4B.6330 -0.04004075  PC1
## 5         HOXA7.3204 -0.03883170  PC1
## 6   LOC728264.728264 -0.03848648  PC1
## 7        CCDC8.83987 -0.03752029  PC1
## 8       ADAM33.80332 -0.03597111  PC1
## 9       CORO2B.10391 -0.03579380  PC1
## 10       RSPO3.84870 -0.03578246  PC1
## 11        RGMA.56963 -0.06297852  PC2
## 12        CHST3.9469 -0.06122275  PC2
## 13  FAM47E.100129583  0.05954893  PC2
## 14        GSTP1.2950 -0.05899879  PC2
## 15        ETV7.51513  0.05596822  PC2
## 16      TTC39A.22996  0.05515450  PC2
## 17        GGTA1.2681  0.05464110  PC2
## 18          DMD.1756 -0.05461720  PC2
## 19     AMOTL1.154810 -0.05388841  PC2
## 20         EGFR.1956 -0.05344762  PC2
## 21     LRRC15.131578 -0.10101857  PC3
## 22       HTRA3.94031 -0.09392926  PC3
## 23       PI16.221476 -0.09188154  PC3
## 24     TMEM90B.79953 -0.09099014  PC3
## 25         TNXB.7148 -0.08774209  PC3
## 26      COL11A1.1301 -0.08496308  PC3
## 27       COL8A1.1295 -0.08465184  PC3
## 28         AEBP1.165 -0.08424014  PC3
## 29         IGF1.3479 -0.08388008  PC3
## 30        PRG4.10216 -0.08367251  PC3
## 31     FAM176B.55194 -0.06456423  PC4
## 32       KRT23.25984 -0.06141949  PC4
## 33    HIST4H4.121504 -0.06086621  PC4
## 34    PDZK1IP1.10158 -0.05601234  PC4
## 35       ETNK2.55224 -0.05529035  PC4
## 36      CYBRD1.79901  0.05504212  PC4
## 37    C6orf155.79940 -0.05442520  PC4
## 38      CLEC5A.23601  0.05420223  PC4
## 39       S100A1.6271 -0.05355781  PC4
## 40        EPN3.55040 -0.05293997  PC4
## 41       SPRY2.10253 -0.07184161  PC5
## 42     PRUNE2.158471 -0.06814914  PC5
## 43        TFEC.22797 -0.06675750  PC5
## 44        PRKCQ.5588  0.06652540  PC5
## 45        CACNB2.783  0.06565950  PC5
## 46      DYNC1I1.1780 -0.06462310  PC5
## 47       CDC14A.8556 -0.06069796  PC5
## 48         SYN1.6853  0.06058417  PC5
## 49        MFGE8.4240 -0.05760957  PC5
## 50        SCN4B.6330 -0.05755966  PC5
## 51       CLSPN.63967 -0.07546052  PC6
## 52       GBP4.115361  0.06414826  PC6
## 53         IL6R.3570 -0.06342410  PC6
## 54         PRF1.5551 -0.06290388  PC6
## 55      TAGAP.117289 -0.06225653  PC6
## 56       UBE2C.11065 -0.06098122  PC6
## 57    TMEM130.222865 -0.05526640  PC6
## 58          AOAH.313 -0.05491570  PC6
## 59        CACNB2.783  0.05424362  PC6
## 60        PTTG1.9232 -0.05313329  PC6
## 61         GRB7.2886 -0.07001603  PC7
## 62     CREB3L1.90993 -0.06578191  PC7
## 63        ERBB2.2064 -0.06501048  PC7
## 64        TRPS1.7227  0.06200590  PC7
## 65           DST.667  0.06060793  PC7
## 66      GPRC5C.55890  0.05988186  PC7
## 67       PDZD2.23037 -0.05944198  PC7
## 68        FGF13.2258  0.05917485  PC7
## 69         MFI2.4241 -0.05908595  PC7
## 70         CYBB.1536 -0.05803922  PC7
## 71      TPRG1.285386  0.07140014  PC8
## 72      B3GNT7.93010  0.06244497  PC8
## 73         PIGR.5284  0.06099476  PC8
## 74        SFRP4.6424 -0.05869513  PC8
## 75        MFAP5.8076 -0.05830432  PC8
## 76       GBP4.115361 -0.05717884  PC8
## 77           C1S.716 -0.05601388  PC8
## 78          LPL.4023 -0.05485978  PC8
## 79         CTGF.1490 -0.05388728  PC8
## 80       MPZL2.10205  0.05354232  PC8
## 81        P2RX5.5026  0.07910823  PC9
## 82          SYP.6855  0.07810780  PC9
## 83      IQGAP2.10788  0.06778438  PC9
## 84         SYN1.6853  0.06481237  PC9
## 85       COL8A2.1296 -0.06334487  PC9
## 86       PTPRN2.5799  0.06196022  PC9
## 87        ASPN.54829 -0.06187037  PC9
## 88       MAML2.84441  0.06040511  PC9
## 89      FIBIN.387758 -0.06011679  PC9
## 90        ODZ3.55714 -0.05978657  PC9
## 91      TNFRSF4.7293  0.08729933 PC10
## 92       CLSPN.63967  0.08312917 PC10
## 93     NBPF16.728936  0.07828141 PC10
## 94     RASSF3.283349  0.07716121 PC10
## 95     FAM21A.387680  0.07485755 PC10
## 96     MAGED4.728239  0.07165218 PC10
## 97      RASEF.158158  0.07109732 PC10
## 98         WNT3.7473  0.06791188 PC10
## 99       AKAP2.11217  0.06533658 PC10
## 100       MYO9A.4649  0.06363982 PC10
write.table(ssnpa_top_loadings,'./brca_rnaseq_analysis/ssnpa_top_loadings.txt',quote=F, sep='\t', row.names=F)

Check expression of top PC loading genes

ssnpa_result <- read.table('./brca_rnaseq_analysis/ssnpa_results/ssnpa_cluster_output_pd_10_.txt', quote="", sep='\t', header = T)
Cluster <- as.factor(ssnpa_result$Cluster)

itm2a <- ggplot2::ggplot(exp_voom[code=="BRCA",],ggplot2::aes(x=as.factor(Cluster),y=ITM2A.9452))+
  ggplot2::geom_boxplot()+
  ggplot2::theme_bw()+
  ggplot2::labs(x="ssNPA Cluster",y="ITM2A Expression, log2(cpm)")+
  ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
        legend.text=ggplot2::element_text(size=36),
        legend.title=ggplot2::element_text(size=0),
        legend.key.size = ggplot2::unit(3.5, 'lines'),
        plot.title = ggplot2::element_text(hjust = 0.5, size=40),
        axis.text.x = ggplot2::element_text(size=14),
        axis.text.y = ggplot2::element_text(size=14),
        axis.title=ggplot2::element_text(size=24))

tnn <- ggplot2::ggplot(exp_voom[code=="BRCA",],ggplot2::aes(x=as.factor(Cluster),y=TNN.63923))+
  ggplot2::geom_boxplot()+
  ggplot2::theme_bw()+
  ggplot2::labs(x="ssNPA Cluster",y="TNN Expression, log2(cpm)")+
  ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
        legend.text=ggplot2::element_text(size=36),
        legend.title=ggplot2::element_text(size=0),
        legend.key.size = ggplot2::unit(3.5, 'lines'),
        plot.title = ggplot2::element_text(hjust = 0.5, size=40),
        axis.text.x = ggplot2::element_text(size=14),
        axis.text.y = ggplot2::element_text(size=14),
        axis.title=ggplot2::element_text(size=24))

scn4b <- ggplot2::ggplot(exp_voom[code=="BRCA",],ggplot2::aes(x=as.factor(Cluster),y=SCN4B.6330))+
  ggplot2::geom_boxplot()+
  ggplot2::theme_bw()+
  ggplot2::labs(x="ssNPA Cluster",y="SCN4B Expression, log2(cpm)")+
  ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
        legend.text=ggplot2::element_text(size=36),
        legend.title=ggplot2::element_text(size=0),
        legend.key.size = ggplot2::unit(3.5, 'lines'),
        plot.title = ggplot2::element_text(hjust = 0.5, size=40),
        axis.text.x = ggplot2::element_text(size=14),
        axis.text.y = ggplot2::element_text(size=14),
        axis.title=ggplot2::element_text(size=24))

hoxa7 <- ggplot2::ggplot(exp_voom[code=="BRCA",],ggplot2::aes(x=as.factor(Cluster),y=HOXA7.3204))+
  ggplot2::geom_boxplot()+
  ggplot2::theme_bw()+
  ggplot2::labs(x="ssNPA Cluster",y="HOXA7 Expression, log2(cpm)")+
  ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
        legend.text=ggplot2::element_text(size=36),
        legend.title=ggplot2::element_text(size=0),
        legend.key.size = ggplot2::unit(3.5, 'lines'),
        plot.title = ggplot2::element_text(hjust = 0.5, size=40),
        axis.text.x = ggplot2::element_text(size=14),
        axis.text.y = ggplot2::element_text(size=14),
        axis.title=ggplot2::element_text(size=24))

Figures for Manuscript

gex_result <- read.table('./brca_rnaseq_analysis/test_gex/gex_cluster_output.txt', quote="", sep='\t', header = T)

gex_class <- ggplot2::ggplot(gex_result,ggplot2::aes(tSNE_1,tSNE_2))+
  ggplot2::geom_point(ggplot2::aes(colour=clinical_brca$subtype), size=4.5)+
  #ggplot2::ggtitle("Gene Expression")+
  ggplot2::theme_bw()+
  ggplot2::labs(color="Molecular Subtype")+
  ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
                 plot.background = ggplot2::element_blank(),
                 panel.grid.major = ggplot2::element_blank(),
                 panel.grid.minor = ggplot2::element_blank(),
                 legend.text = ggplot2::element_text(size = 36),
                 legend.title = ggplot2::element_text(size = 38),
                 legend.key.size = ggplot2::unit(3.5, "lines"),
                 plot.title = ggplot2::element_text(hjust = 0.5, size = 40))+
  ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)), 
                  color = ggplot2::guide_legend(override.aes = list(size =10)))

gex_cluster <- ggplot2::ggplot(gex_result,ggplot2::aes(tSNE_1,tSNE_2))+
  ggplot2::geom_point(ggplot2::aes(colour=as.factor(Cluster)), size=4.5)+
  #ggplot2::ggtitle('Gene Expression')+
  ggplot2::theme_bw()+
  ggplot2::labs(color="Cluster")+
  ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
                 plot.background = ggplot2::element_blank(),
                 panel.grid.major = ggplot2::element_blank(),
                 panel.grid.minor = ggplot2::element_blank(),
                 legend.text=ggplot2::element_text(size=36),
                 legend.title=ggplot2::element_text(size=38),
                 legend.key.size = ggplot2::unit(3.5, 'lines'),
                 plot.title = ggplot2::element_text(hjust = 0.5, size=40))+
  ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
                  color = ggplot2::guide_legend(override.aes = list(size = 10)))

gex_surv_trunc_g <- gex_surv_trunc$plot+
  ggplot2::labs(title='Gene Expression')+
  ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
                 legend.text=ggplot2::element_text(size=36),
                 legend.title=ggplot2::element_text(size=0),
                 legend.key.size = ggplot2::unit(3.5, 'lines'),
                 plot.title = ggplot2::element_text(hjust = 0.5, size=40),
                 axis.title=ggplot2::element_text(size=24))+ 
  ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
                  color = ggplot2::guide_legend(override.aes = list(size = 10)))

gex_surv_full_curve <- gex_surv_full$plot+
  #ggplot2::labs(title='Gene Expression')+
  ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
        legend.text=ggplot2::element_text(size=36),
        legend.title=ggplot2::element_text(size=0),
        legend.key.size = ggplot2::unit(3.5, 'lines'),
        plot.title = ggplot2::element_text(hjust = 0.5, size=40),
        axis.title=ggplot2::element_text(size=24))+
  ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
         color = ggplot2::guide_legend(override.aes = list(size = 10)))

gex_surv_full_table <- gex_surv_full$table+
  ggplot2::theme(plot.margin = ggplot2::unit(c(3,19,3,3), "lines"),
        legend.text=ggplot2::element_text(size=36),
        legend.title=ggplot2::element_text(size=0),
        legend.key.size = ggplot2::unit(3.5, 'lines'),
        plot.title = ggplot2::element_text(hjust = 0.5, size=40),
        axis.title=ggplot2::element_text(size=24))+
  ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
         color = ggplot2::guide_legend(override.aes = list(size = 10)))

ssnpa_result <- read.table('./brca_rnaseq_analysis/ssnpa_results/ssnpa_cluster_output_pd_10_.txt', quote="", sep='\t', header = T)

ssnpa_class <- ggplot2::ggplot(ssnpa_result,ggplot2::aes(tSNE_1,tSNE_2))+
  ggplot2::geom_point(ggplot2::aes(colour=clinical_brca$subtype), size=4.5)+
  ggplot2::ggtitle("ssNPA")+
  ggplot2::theme_bw()+
  ggplot2::labs(color="Molecular Subtype")+
  ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
                 plot.background = ggplot2::element_blank(),
                 panel.grid.major = ggplot2::element_blank(),
                 panel.grid.minor = ggplot2::element_blank(),
                 legend.text = ggplot2::element_text(size = 36),
                 legend.title = ggplot2::element_text(size = 38),
                 legend.key.size = ggplot2::unit(3.5, "lines"),
                 plot.title = ggplot2::element_text(hjust = 0.5, size = 40))+
  ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)), 
                  color = ggplot2::guide_legend(override.aes = list(size =10)))

ssnpa_cluster <- ggplot2::ggplot(ssnpa_result,ggplot2::aes(tSNE_1,tSNE_2))+
  ggplot2::geom_point(ggplot2::aes(colour=as.factor(Cluster)), size=4.5)+
  ggplot2::ggtitle('ssNPA')+
  ggplot2::theme_bw()+
  ggplot2::labs(color="Cluster")+
  ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
                 plot.background = ggplot2::element_blank(),
                 panel.grid.major = ggplot2::element_blank(),
                 panel.grid.minor = ggplot2::element_blank(),
                 legend.text=ggplot2::element_text(size=36),
                 legend.title=ggplot2::element_text(size=38),
                 legend.key.size = ggplot2::unit(3.5, 'lines'),
                 plot.title = ggplot2::element_text(hjust = 0.5, size=40))+
  ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
                  color = ggplot2::guide_legend(override.aes = list(size = 10)))

ssnpa_surv_trunc_g <- ssnpa_surv_trunc$plot+
  ggplot2::labs(title='ssNPA')+
  ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
                 legend.text=ggplot2::element_text(size=36),
                 legend.title=ggplot2::element_text(size=0),
                 legend.key.size = ggplot2::unit(3.5, 'lines'),
                 plot.title = ggplot2::element_text(hjust = 0.5, size=40),
                 axis.title=ggplot2::element_text(size=24))+ 
  ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
                  color = ggplot2::guide_legend(override.aes = list(size = 10)))

ssnpa_surv_full_curve <- ssnpa_surv_full$plot+
  ggplot2::labs(title='ssNPA')+
  ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
        legend.text=ggplot2::element_text(size=36),
        legend.title=ggplot2::element_text(size=0),
        legend.key.size = ggplot2::unit(3.5, 'lines'),
        plot.title = ggplot2::element_text(hjust = 0.5, size=40),
        axis.title=ggplot2::element_text(size=24))+
  ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
         color = ggplot2::guide_legend(override.aes = list(size = 10)))

ssnpa_surv_full_table <- ssnpa_surv_full$table+
  ggplot2::theme(plot.margin = ggplot2::unit(c(3,19,3,3), "lines"),
        legend.text=ggplot2::element_text(size=36),
        legend.title=ggplot2::element_text(size=0),
        legend.key.size = ggplot2::unit(3.5, 'lines'),
        plot.title = ggplot2::element_text(hjust = 0.5, size=40),
        axis.title=ggplot2::element_text(size=24))+
  ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
         color = ggplot2::guide_legend(override.aes = list(size = 10)))

ssgsea_result <- read.table('./brca_rnaseq_analysis/test_ssgsea/ssgsea_cluster_output.txt', quote="", sep='\t', header = T)

ssgsea_class <- ggplot2::ggplot(ssgsea_result,ggplot2::aes(tSNE_1,tSNE_2))+
  ggplot2::geom_point(ggplot2::aes(colour=clinical_brca$subtype), size=4.5)+
  ggplot2::ggtitle("ssGSEA")+
  ggplot2::theme_bw()+
  ggplot2::labs(color="Molecular Subtype")+
  ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
                 plot.background = ggplot2::element_blank(),
                 panel.grid.major = ggplot2::element_blank(),
                 panel.grid.minor = ggplot2::element_blank(),
                 legend.text = ggplot2::element_text(size = 36),
                 legend.title = ggplot2::element_text(size = 38),
                 legend.key.size = ggplot2::unit(3.5, "lines"),
                 plot.title = ggplot2::element_text(hjust = 0.5, size = 40))+
  ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)), 
                  color = ggplot2::guide_legend(override.aes = list(size =10)))

ssgsea_cluster <- ggplot2::ggplot(ssgsea_result,ggplot2::aes(tSNE_1,tSNE_2))+
  ggplot2::geom_point(ggplot2::aes(colour=as.factor(Cluster)), size=4.5)+
  ggplot2::ggtitle('ssGSEA')+
  ggplot2::theme_bw()+
  ggplot2::labs(color="Cluster")+
  ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
                 plot.background = ggplot2::element_blank(),
                 panel.grid.major = ggplot2::element_blank(),
                 panel.grid.minor = ggplot2::element_blank(),
                 legend.text=ggplot2::element_text(size=36),
                 legend.title=ggplot2::element_text(size=38),
                 legend.key.size = ggplot2::unit(3.5, 'lines'),
                 plot.title = ggplot2::element_text(hjust = 0.5, size=40))+
  ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
                  color = ggplot2::guide_legend(override.aes = list(size = 10)))

ssgsea_surv_trunc_g <- ssgsea_surv_trunc$plot+
  ggplot2::labs(title='ssGSEA')+
  ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
                 legend.text=ggplot2::element_text(size=36),
                 legend.title=ggplot2::element_text(size=0),
                 legend.key.size = ggplot2::unit(3.5, 'lines'),
                 plot.title = ggplot2::element_text(hjust = 0.5, size=40),
                 axis.title=ggplot2::element_text(size=24))+ 
  ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
                  color = ggplot2::guide_legend(override.aes = list(size = 10)))

ssgsea_surv_full_curve <- ssgsea_surv_full$plot+
  ggplot2::labs(title='ssGSEA')+
  ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
        legend.text=ggplot2::element_text(size=36),
        legend.title=ggplot2::element_text(size=0),
        legend.key.size = ggplot2::unit(3.5, 'lines'),
        plot.title = ggplot2::element_text(hjust = 0.5, size=40),
        axis.title=ggplot2::element_text(size=24))+
  ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
         color = ggplot2::guide_legend(override.aes = list(size = 10)))

ssgsea_surv_full_table <- ssgsea_surv_full$table+
  ggplot2::theme(plot.margin = ggplot2::unit(c(3,19,3,3), "lines"),
        legend.text=ggplot2::element_text(size=36),
        legend.title=ggplot2::element_text(size=0),
        legend.key.size = ggplot2::unit(3.5, 'lines'),
        plot.title = ggplot2::element_text(hjust = 0.5, size=40),
        axis.title=ggplot2::element_text(size=24))+
  ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
         color = ggplot2::guide_legend(override.aes = list(size = 10)))

pathifier_result <- read.table('./brca_rnaseq_analysis/test_pathifier/pathifier_cluster_output.txt', quote="", sep='\t', header = T)

pathifier_class <- ggplot2::ggplot(pathifier_result,ggplot2::aes(tSNE_1,tSNE_2))+
  ggplot2::geom_point(ggplot2::aes(colour=clinical_brca$subtype), size=4.5)+
  ggplot2::ggtitle("Pathifier")+
  ggplot2::theme_bw()+
  ggplot2::labs(color="Molecular Subtype")+
  ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
                 plot.background = ggplot2::element_blank(),
                 panel.grid.major = ggplot2::element_blank(),
                 panel.grid.minor = ggplot2::element_blank(),
                 legend.text = ggplot2::element_text(size = 36),
                 legend.title = ggplot2::element_text(size = 38),
                 legend.key.size = ggplot2::unit(3.5, "lines"),
                 plot.title = ggplot2::element_text(hjust = 0.5, size = 40))+
  ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)), 
                  color = ggplot2::guide_legend(override.aes = list(size =10)))

pathifier_cluster <- ggplot2::ggplot(pathifier_result,ggplot2::aes(tSNE_1,tSNE_2))+
  ggplot2::geom_point(ggplot2::aes(colour=as.factor(Cluster)), size=4.5)+
  ggplot2::ggtitle('Pathifier')+
  ggplot2::theme_bw()+
  ggplot2::labs(color="Cluster")+
  ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
                 plot.background = ggplot2::element_blank(),
                 panel.grid.major = ggplot2::element_blank(),
                 panel.grid.minor = ggplot2::element_blank(),
                 legend.text=ggplot2::element_text(size=36),
                 legend.title=ggplot2::element_text(size=38),
                 legend.key.size = ggplot2::unit(3.5, 'lines'),
                 plot.title = ggplot2::element_text(hjust = 0.5, size=40))+
  ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
                  color = ggplot2::guide_legend(override.aes = list(size = 10)))
  
pathifier_surv_trunc_g <- pathifier_surv_trunc$plot+
  ggplot2::labs(title='Pathifier')+
  ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
                 legend.text=ggplot2::element_text(size=36),
                 legend.title=ggplot2::element_text(size=0),
                 legend.key.size = ggplot2::unit(3.5, 'lines'),
                 plot.title = ggplot2::element_text(hjust = 0.5, size=40),
                 axis.title=ggplot2::element_text(size=24))+ 
  ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
                  color = ggplot2::guide_legend(override.aes = list(size = 10)))

pathifier_surv_full_curve <- pathifier_surv_full$plot+
  ggplot2::labs(title='Pathifier')+
  ggplot2::theme(plot.margin = ggplot2::unit(c(3,3,3,3), "lines"),
        legend.text=ggplot2::element_text(size=36),
        legend.title=ggplot2::element_text(size=0),
        legend.key.size = ggplot2::unit(3.5, 'lines'),
        plot.title = ggplot2::element_text(hjust = 0.5, size=40),
        axis.title=ggplot2::element_text(size=24))+
  ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
         color = ggplot2::guide_legend(override.aes = list(size = 10)))

pathifier_surv_full_table <- pathifier_surv_full$table+
  ggplot2::theme(plot.margin = ggplot2::unit(c(3,19,3,3), "lines"),
        legend.text=ggplot2::element_text(size=36),
        legend.title=ggplot2::element_text(size=0),
        legend.key.size = ggplot2::unit(3.5, 'lines'),
        plot.title = ggplot2::element_text(hjust = 0.5, size=40),
        axis.title=ggplot2::element_text(size=24))+
  ggplot2::guides(shape = ggplot2::guide_legend(override.aes = list(size = 10)),
         color = ggplot2::guide_legend(override.aes = list(size = 10)))

pdf("brca_subtypes_and_truncated_survival.pdf", width=50,height=24)
ggpubr::ggarrange(ssnpa_class,pathifier_class,ssgsea_class, ssnpa_surv_trunc_g,pathifier_surv_trunc_g,ssgsea_surv_trunc_g, labels=c("A","B","C","D","E","F"),font.label = list(size = 52, face = "bold"),nrow=2,ncol=3) 
dev.off()
## quartz_off_screen 
##                 2
pdf("brca_clusters.pdf", width=56,height=18)
ggpubr::ggarrange(ssnpa_cluster,pathifier_cluster,ssgsea_cluster, labels=c("A","B","C"),font.label = list(size = 52, face = "bold"),nrow=1,ncol=3) 
dev.off()
## quartz_off_screen 
##                 2
pdf("brca_survival_full.pdf",width=45,height=18)
ggpubr::ggarrange(ssnpa_surv_full_curve,pathifier_surv_full_curve,ssgsea_surv_full_curve,ssnpa_surv_full_table, pathifier_surv_full_table,ssgsea_surv_full_table, labels=c("A","B","C","","",""),font.label = list(size = 52, face = "bold"),nrow=2,ncol=3,heights=c(2,1)) 
dev.off()
## quartz_off_screen 
##                 2
pdf("brca_gex_results.pdf",width=45,height=18)
ggpubr::ggarrange(gex_class, gex_cluster, gex_surv_full_curve,ggplot2::element_blank(),ggplot2::element_blank(),gex_surv_full_table, labels=c("A","B","C","","",""),font.label = list(size = 52, face = "bold"),nrow=2,ncol=3, heights=c(2,1),widths=c(1.15,1,1)) 
## Warning in as_grob.default(plot): Cannot convert object of class
## element_blankelement into a grob.

## Warning in as_grob.default(plot): Cannot convert object of class
## element_blankelement into a grob.
dev.off()
## quartz_off_screen 
##                 2
pdf("brca_gene_boxplots.pdf", width=24, height=24)
ggpubr::ggarrange(itm2a, tnn, scn4b, hoxa7, labels="AUTO", font.label = list(size = 52, face = "bold"), nrow=2, ncol=2)
dev.off()
## quartz_off_screen 
##                 2
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.2           locfit_1.5-9.1       lattice_0.20-38     
##  [4] tidyr_1.0.0          prettyunits_1.0.2    zoo_1.8-6           
##  [7] assertthat_0.2.1     zeallot_0.1.0        digest_0.6.22       
## [10] R6_2.4.0             survminer_0.4.6      backports_1.1.5     
## [13] stats4_3.6.1         RSQLite_2.1.2        evaluate_0.14       
## [16] httr_1.4.1           ggplot2_3.2.1        pillar_1.4.2        
## [19] progress_1.2.2       rlang_0.4.1          curl_4.2            
## [22] lazyeval_0.2.2       data.table_1.12.6    blob_1.2.0          
## [25] S4Vectors_0.22.1     Matrix_1.2-17        rmarkdown_1.16      
## [28] labeling_0.3         splines_3.6.1        stringr_1.4.0       
## [31] RCurl_1.95-4.12      bit_1.1-14           biomaRt_2.40.5      
## [34] munsell_0.5.0        broom_0.5.2          compiler_3.6.1      
## [37] xfun_0.10            pkgconfig_2.0.3      BiocGenerics_0.30.0 
## [40] htmltools_0.4.0      tidyselect_0.2.5     tibble_2.1.3        
## [43] gridExtra_2.3        km.ci_0.5-2          edgeR_3.26.8        
## [46] IRanges_2.18.3       XML_3.98-1.20        crayon_1.3.4        
## [49] dplyr_0.8.3          ggpubr_0.2.3         bitops_1.0-6        
## [52] grid_3.6.1           nlme_3.1-141         xtable_1.8-4        
## [55] gtable_0.3.0         lifecycle_0.1.0      DBI_1.0.0           
## [58] magrittr_1.5         KMsurv_0.1-5         scales_1.0.0        
## [61] stringi_1.4.3        ggsignif_0.6.0       limma_3.40.6        
## [64] survMisc_0.5.5       generics_0.0.2       vctrs_0.2.0         
## [67] cowplot_1.0.0        tools_3.6.1          bit64_0.9-7         
## [70] Biobase_2.44.0       glue_1.3.1           purrr_0.3.3         
## [73] hms_0.5.2            parallel_3.6.1       survival_2.44-1.1   
## [76] yaml_2.2.0           ssnpa_0.0.0.9003     AnnotationDbi_1.46.1
## [79] colorspace_1.4-1     memoise_1.1.0        knitr_1.25