Set up

Install packages as necessary.

dir.create('liver_scrnaseq_analysis/')
## Warning in dir.create("liver_scrnaseq_analysis/"):
## 'liver_scrnaseq_analysis' already exists
list.of.bioconductor.packages <- c("edgeR","limma","pathifier","KEGGREST","biomaRt","GSVA","GSEABase")
new.bioc.packages <- list.of.bioconductor.packages[!(list.of.bioconductor.packages %in% installed.packages()[,"Package"])]
if(length(new.bioc.packages)) BiocManager::install(new.bioc.packages)

list.of.packages <- c("aricode","ggplot2","mclust","Rmisc","ggpubr")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

Read in and preprocess scRNA-seq data

Count data downloaded from GEO GSE90047.

# Read in data downloaded from GEO
gene_exp <- read.table("./input_data/GSE90047_Single-cell_RNA-seq_Read_Count.txt",header=TRUE,quote="",sep="\t",row.names=1)
gene_symbols <- gene_exp$Symbol
gene_lengths <- gene_exp$GeneLength
gene_exp$Symbol <- NULL
gene_exp$GeneLength <- NULL
gene_exp <- as.data.frame(t(gene_exp))
dim(gene_exp)
## [1]   529 40916
# Get rid of spike-in controls
i <- grep("ERCC", names(gene_exp))
gene_exp[,i] <- NULL
dim(gene_exp)
## [1]   529 40824
# Keep only cells from main time course experiment
gene_exp <- as.matrix(gene_exp)
gene_exp <- gene_exp[1:447,]

Filter out lowly expressed genes and use voom to convert counts to log2(cpm) with associated weights.

# Filter out lowly expressed genes
keep <- edgeR::filterByExpr(t(gene_exp))
table(keep)
## keep
## FALSE  TRUE 
## 35753  5071
gene_exp <- gene_exp[,keep]

# Apply voom
exp_voom <- limma::voom(t(gene_exp),plot=T)

exp_voom <- as.data.frame(t(exp_voom$E))

write.table(exp_voom,file="./liver_scrnaseq_analysis/liver_counts.exp_filt.voom.txt",quote=F,sep="\t")

Keep only 3,000 most highly variant genes.

# Keep top 3,000 most highly variant genes
exp_voom <- ssnpa::filter_by_variance(exp_voom, 3000)
dim(exp_voom)
## [1]  447 3000
write.table(exp_voom,file='./liver_scrnaseq_analysis/liver_counts.exp_filt.voom.var_filt.txt',quote=F, sep='\t')

Parse cell names for timepoint and read in cell type information.

m <- row.names(exp_voom)
n <- read.table(text=m, sep="_", colClasses="character")
time <- n$V1
time <- substring(time,1,5)
class <- as.data.frame(time, row.names=m)

cell <- read.table('./input_data/liver_cell_types.txt',header=T,sep="\t",row.names=1,quote="")
class <- merge(class,cell, by="row.names")
row.names(class) <- class$Row.names
class$Row.names <- NULL
class$timexcell <- paste(class$time, class$cell_type,sep = '_')

table(class$time)
## 
## E10.5 E11.5 E12.5 E13.5 E14.5 E15.5 E17.5 
##    54    70    41    65    70    77    70
table(class$cell_type)
## 
##          cholangiocyte hepatoblast/hepatocyte 
##                    102                    345
table(class$timexcell)
## 
## E10.5_hepatoblast/hepatocyte          E11.5_cholangiocyte 
##                           54                            2 
## E11.5_hepatoblast/hepatocyte          E12.5_cholangiocyte 
##                           68                            3 
## E12.5_hepatoblast/hepatocyte          E13.5_cholangiocyte 
##                           38                           10 
## E13.5_hepatoblast/hepatocyte          E14.5_cholangiocyte 
##                           55                           17 
## E14.5_hepatoblast/hepatocyte          E15.5_cholangiocyte 
##                           53                           36 
## E15.5_hepatoblast/hepatocyte          E17.5_cholangiocyte 
##                           41                           34 
## E17.5_hepatoblast/hepatocyte 
##                           36

Test ssNPA with different reference groups and penalty discounts

Scan a range of penalty discount values and reference groups and evaluate performance of ssNPA by NMI and ARI

dir.create('./liver_scrnaseq_analysis/test_refxpd/')
for (ref in c("E10.5","E11.5","E12.5","E13.5","E14.5","E15.5","E17.5")){
  dir.create(paste0('./liver_scrnaseq_analysis/test_refxpd/',ref))
  for (pd in 5:12){
    ssnpa_cluster <- ssnpa::run_ssnpa(gene_exp = exp_voom,
                 reference_samples = (class$time==ref), 
                 out_dir = paste0('./liver_scrnaseq_analysis/test_refxpd/',ref),
                 seed_value = 1,
                 fgs_pd = pd,
                 n_pc = 10,
                 cluster_resolution = 0.6,
                 cluster_ref_samples = T)
  write.table(ssnpa_cluster$clustering,file=paste0('./liver_scrnaseq_analysis/test_refxpd/',ref,'/ssNPA_cluster_output_',ref,'_pd_',toString(pd),'_.txt'),quote = F, sep='\t')
  write.table(ssnpa_cluster$loadings,file=paste0('./liver_scrnaseq_analysis/test_refxpd/',ref,'/ssNPA_loading_output_',ref,'_pd_',toString(pd),'_.txt'),quote = F, sep='\t')
  }
}

for (ref in c("E10.5","E11.5","E13.5","E14.5","E15.5","E17.5")){
  for (pd in 4){
    ssnpa_cluster <- ssnpa::run_ssnpa(gene_exp = exp_voom,
                 reference_samples = (class$time==ref), 
                 out_dir = paste0('./liver_scrnaseq_analysis/test_refxpd/',ref),
                 seed_value = 1,
                 fgs_pd = pd,
                 n_pc = 10,
                 cluster_resolution = 0.6,
                 cluster_ref_samples = T)
  write.table(ssnpa_cluster$clustering,file=paste0('./liver_scrnaseq_analysis/test_refxpd/',ref,'/ssNPA_cluster_output_',ref,'_pd_',toString(pd),'_.txt'),quote = F, sep='\t')
  write.table(ssnpa_cluster$loadings,file=paste0('./liver_scrnaseq_analysis/test_refxpd/',ref,'/ssNPA_loading_output_',ref,'_pd_',toString(pd),'_.txt'),quote = F, sep='\t')
  }
}

# add in E17.5 Hepatocytes
dir.create('./liver_scrnaseq_analysis/test_refxpd/E17.5hepatocytes/')
for (pd in 5:12){
  ssnpa_cluster <- ssnpa::run_ssnpa(gene_exp = exp_voom,
                                    reference_samples = (class$timexcell=='E17.5_hepatoblast/hepatocyte'), 
                                    out_dir = paste0('./liver_scrnaseq_analysis/test_refxpd/','E17.5hepatocytes'),
                                    seed_value = 1,
                                    fgs_pd = pd,
                                    n_pc = 10,
                                    cluster_resolution = 0.6,
                                    cluster_ref_samples = T)
  write.table(ssnpa_cluster$clustering,file=paste0('./liver_scrnaseq_analysis/test_refxpd/','E17.5hepatocytes','/ssNPA_cluster_output_','E17.5hepatocytes','_pd_',toString(pd),'_.txt'),quote = F, sep='\t')
  write.table(ssnpa_cluster$loadings,file=paste0('./liver_scrnaseq_analysis/test_refxpd/','E17.5hepatocytes','/ssNPA_loading_output_','E17.5hepatocytes','_pd_',toString(pd),'_.txt'),quote = F, sep='\t')
}

Generate random reference groups.

dir.create('./liver_scrnaseq_analysis/random_reference_cells/')
set.seed(1)
for (i in 1:5){
  s <- sample(row.names(exp_voom), median(table(class$time)), replace = FALSE)
  write.table(s,paste0('./liver_scrnaseq_analysis/random_reference_cells/reference_cells_',toString(i),'_.txt'),quote=F,sep='\t',row.names = F, col.names=F)  
}

Test ssNPA with random reference groups.

ref="Random"
for (i in 1:5){
  dir.create(paste0('./liver_scrnaseq_analysis/test_refxpd/random',toString(i)))
  ref_list <- read.table(paste0('./liver_scrnaseq_analysis/random_reference_cells/reference_cells_',toString(i),'_.txt'), quote="", sep='\t', header =F)
  ref_list <- ref_list$V1
  ref_sam <- (row.names(exp_voom) %in% ref_list)
  for (pd in 4:12){
    ssnpa_cluster <- ssnpa::run_ssnpa(gene_exp = exp_voom,
                                    reference_samples = ref_sam, 
                                    out_dir = paste0('./liver_scrnaseq_analysis/test_refxpd/','random',toString(i)),
                                    seed_value = 1,
                                    fgs_pd = pd,
                                    n_pc = 10,
                                    cluster_resolution = 0.6,
                                    cluster_ref_samples = T)
  write.table(ssnpa_cluster$clustering,file=paste0('./liver_scrnaseq_analysis/test_refxpd/','random',toString(i),'/ssNPA_cluster_output_','random','_pd_',toString(pd),'_.txt'),quote = F, sep='\t')
  write.table(ssnpa_cluster$loadings,file=paste0('./liver_scrnaseq_analysis/test_refxpd/','random',toString(i),'/ssNPA_loading_output_','random','_pd_',toString(pd),'_.txt'),quote = F, sep='\t')
  }
}

Read in all ssNPA results files and calculate NMI and ARI

result_files <- list.files(path = "./liver_scrnaseq_analysis/test_refxpd", pattern = 'ssNPA_cluster_output',full.names = T, recursive = T,ignore.case = FALSE, include.dirs = T)

method <- character()
reference_set <- character()
pds <- integer()
num_clusters <- integer()
nmi <- double()
ari <- double()
for (file in result_files){
  a <- strsplit(file,"_",fixed=T)
  method <- c(method, 'ssNPA')
  reference_set <- c(reference_set, a[[1]][7])
  pds <- c(pds,a[[1]][9])
  df <- read.table(file,header=T,sep='\t', quote="",row.names=1,stringsAsFactors=T)
  num_clusters <- c(num_clusters,length(unique(df$Cluster)))
  nmi <- c(nmi,aricode::NMI(class$timexcell,df$Cluster))
  ari <- c(ari,mclust::adjustedRandIndex(class$timexcell, df$Cluster))
}
ssnpa_results <- data.frame(method,reference_set,pds,num_clusters,nmi,ari)
ssnpa_results$pds <- factor(ssnpa_results$pds, levels = c(4,5,6,7,8,9,10,11,12))
levels(ssnpa_results$reference_set)[levels(ssnpa_results$reference_set)=="E17.5hepatocytes"] <- "E17.5 hepatocytes"
levels(ssnpa_results$reference_set)[levels(ssnpa_results$reference_set)=="random"] <- "Random"

ssnpa_results
##     method     reference_set pds num_clusters       nmi       ari
## 1    ssNPA             E10.5  10            6 0.4989061 0.3629863
## 2    ssNPA             E10.5  11            6 0.4996008 0.3632776
## 3    ssNPA             E10.5  12            6 0.4985045 0.3628323
## 4    ssNPA             E10.5   4            6 0.5190910 0.3941733
## 5    ssNPA             E10.5   5            6 0.5115482 0.3732445
## 6    ssNPA             E10.5   6            6 0.5126917 0.3783058
## 7    ssNPA             E10.5   7            6 0.5076537 0.3721858
## 8    ssNPA             E10.5   8            6 0.4984591 0.3607016
## 9    ssNPA             E10.5   9            6 0.4949195 0.3575609
## 10   ssNPA             E11.5  10            7 0.5061272 0.3711821
## 11   ssNPA             E11.5  11            7 0.5055431 0.3688952
## 12   ssNPA             E11.5  12            7 0.5021115 0.3647993
## 13   ssNPA             E11.5   4            7 0.5806941 0.5046954
## 14   ssNPA             E11.5   5            7 0.5536136 0.4661542
## 15   ssNPA             E11.5   6            7 0.5235163 0.4111588
## 16   ssNPA             E11.5   7            7 0.5139211 0.3944585
## 17   ssNPA             E11.5   8            7 0.5257517 0.3955257
## 18   ssNPA             E11.5   9            7 0.5188797 0.3866566
## 19   ssNPA             E12.5  10            6 0.4627493 0.3496546
## 20   ssNPA             E12.5  11            6 0.4881712 0.3675188
## 21   ssNPA             E12.5  12            6 0.4805727 0.3593541
## 22   ssNPA             E12.5   5            7 0.5388598 0.4157324
## 23   ssNPA             E12.5   6            6 0.5170250 0.3849483
## 24   ssNPA             E12.5   7            6 0.4708107 0.3427727
## 25   ssNPA             E12.5   8            6 0.4923602 0.3669023
## 26   ssNPA             E12.5   9            6 0.4823793 0.3323744
## 27   ssNPA             E13.5  10            6 0.4674087 0.3593620
## 28   ssNPA             E13.5  11            6 0.4649576 0.3608429
## 29   ssNPA             E13.5  12            6 0.4664099 0.3562749
## 30   ssNPA             E13.5   4            7 0.5512526 0.4297797
## 31   ssNPA             E13.5   5            6 0.4785982 0.3424778
## 32   ssNPA             E13.5   6            7 0.5343838 0.4101959
## 33   ssNPA             E13.5   7            6 0.4720886 0.3514341
## 34   ssNPA             E13.5   8            6 0.4687013 0.3434875
## 35   ssNPA             E13.5   9            6 0.4696692 0.3576294
## 36   ssNPA             E14.5  10            7 0.5110207 0.3762143
## 37   ssNPA             E14.5  11            7 0.5218555 0.3817916
## 38   ssNPA             E14.5  12            7 0.5058115 0.3705352
## 39   ssNPA             E14.5   4            6 0.5740522 0.4723229
## 40   ssNPA             E14.5   5            7 0.5714402 0.4315123
## 41   ssNPA             E14.5   6            7 0.5401851 0.3833062
## 42   ssNPA             E14.5   7            7 0.4937949 0.3686072
## 43   ssNPA             E14.5   8            7 0.5066050 0.3846853
## 44   ssNPA             E14.5   9            7 0.4931975 0.3606796
## 45   ssNPA             E15.5  10            6 0.5014041 0.3901273
## 46   ssNPA             E15.5  11            6 0.5002681 0.3930109
## 47   ssNPA             E15.5  12            6 0.5032696 0.3938568
## 48   ssNPA             E15.5   4            8 0.6150530 0.4873983
## 49   ssNPA             E15.5   5            8 0.5976367 0.4478579
## 50   ssNPA             E15.5   6            7 0.5148128 0.3515344
## 51   ssNPA             E15.5   7            6 0.4934968 0.3591223
## 52   ssNPA             E15.5   8            6 0.5037886 0.3964979
## 53   ssNPA             E15.5   9            7 0.5137178 0.3802511
## 54   ssNPA             E17.5  10            6 0.4595449 0.3449952
## 55   ssNPA             E17.5  11            6 0.4502055 0.3345621
## 56   ssNPA             E17.5  12            6 0.4325164 0.3125855
## 57   ssNPA             E17.5   4            7 0.5188686 0.3782065
## 58   ssNPA             E17.5   5            7 0.5380958 0.4139808
## 59   ssNPA             E17.5   6            6 0.4937677 0.3811921
## 60   ssNPA             E17.5   7            6 0.4927237 0.3814886
## 61   ssNPA             E17.5   8            6 0.4863517 0.3675997
## 62   ssNPA             E17.5   9            6 0.4596659 0.3496078
## 63   ssNPA E17.5 hepatocytes  10            6 0.4423047 0.3399355
## 64   ssNPA E17.5 hepatocytes  11            6 0.4483608 0.3443706
## 65   ssNPA E17.5 hepatocytes  12            6 0.4417953 0.3414855
## 66   ssNPA E17.5 hepatocytes   5            7 0.5244172 0.4103018
## 67   ssNPA E17.5 hepatocytes   6            6 0.4588314 0.3620383
## 68   ssNPA E17.5 hepatocytes   7            6 0.4579592 0.3556687
## 69   ssNPA E17.5 hepatocytes   8            6 0.4390137 0.3349598
## 70   ssNPA E17.5 hepatocytes   9            6 0.4486123 0.3457244
## 71   ssNPA            Random  10            5 0.3994849 0.2940187
## 72   ssNPA            Random  11            6 0.4615890 0.3325610
## 73   ssNPA            Random  12            5 0.3952019 0.2968861
## 74   ssNPA            Random   4            5 0.2971578 0.1786121
## 75   ssNPA            Random   5            5 0.2951630 0.2001413
## 76   ssNPA            Random   6            5 0.3449468 0.2433838
## 77   ssNPA            Random   7            5 0.3774365 0.2693926
## 78   ssNPA            Random   8            5 0.4055045 0.2950569
## 79   ssNPA            Random   9            5 0.3927834 0.2829422
## 80   ssNPA            Random  10            5 0.4036376 0.3017254
## 81   ssNPA            Random  11            5 0.4107778 0.3040372
## 82   ssNPA            Random  12            5 0.4107587 0.3029480
## 83   ssNPA            Random   4            5 0.2900579 0.1844669
## 84   ssNPA            Random   5            6 0.3414785 0.2011797
## 85   ssNPA            Random   6            6 0.4170360 0.2886322
## 86   ssNPA            Random   7            5 0.3752135 0.2631230
## 87   ssNPA            Random   8            6 0.4419078 0.3080828
## 88   ssNPA            Random   9            6 0.4319798 0.3082374
## 89   ssNPA            Random  10            5 0.3908403 0.2875533
## 90   ssNPA            Random  11            5 0.3899106 0.2877471
## 91   ssNPA            Random  12            5 0.3981384 0.2979477
## 92   ssNPA            Random   4            5 0.2940171 0.1872200
## 93   ssNPA            Random   5            6 0.3385853 0.2241929
## 94   ssNPA            Random   6            5 0.3349233 0.2278658
## 95   ssNPA            Random   7            5 0.3513398 0.2317307
## 96   ssNPA            Random   8            5 0.4063303 0.2823776
## 97   ssNPA            Random   9            5 0.3975408 0.2912891
## 98   ssNPA            Random  10            7 0.4605585 0.3216140
## 99   ssNPA            Random  11            6 0.4532103 0.3282535
## 100  ssNPA            Random  12            6 0.4543818 0.3314441
## 101  ssNPA            Random   4            5 0.3012069 0.1922797
## 102  ssNPA            Random   5            6 0.3374999 0.2291097
## 103  ssNPA            Random   6            5 0.3404424 0.2119682
## 104  ssNPA            Random   7            6 0.4252663 0.2786030
## 105  ssNPA            Random   8            6 0.4219663 0.2748916
## 106  ssNPA            Random   9            6 0.4284912 0.2825565
## 107  ssNPA            Random  10            5 0.4294952 0.3208720
## 108  ssNPA            Random  11            5 0.4201252 0.3109494
## 109  ssNPA            Random  12            5 0.4192327 0.3142967
## 110  ssNPA            Random   4            5 0.2863549 0.1888866
## 111  ssNPA            Random   5            5 0.3301963 0.2209320
## 112  ssNPA            Random   6            6 0.3465440 0.2381293
## 113  ssNPA            Random   7            5 0.3663433 0.2449496
## 114  ssNPA            Random   8            5 0.3817106 0.2816567
## 115  ssNPA            Random   9            5 0.3910132 0.2999313
mean(ssnpa_results$nmi[ssnpa_results$reference_set=="Random"])
## [1] 0.3819507
s <- Rmisc::summarySE(ssnpa_results, measurevar="nmi", groupvars=c("reference_set","pds"))
## Warning in qt(conf.interval/2 + 0.5, datac$N - 1): NaNs produced
g <- ggplot2::ggplot(s,ggplot2::aes(x=reference_set,y=nmi, fill=as.factor(pds),width=0.75))
g <- g + ggplot2::geom_bar(stat="identity", position=ggplot2::position_dodge(width=0.9))
g <- g + ggplot2::geom_errorbar(ggplot2::aes(ymin=nmi-se, ymax=nmi+se), width=0.5,position=ggplot2::position_dodge(.9))
g <- g + ggplot2::theme_bw()
g <- g + ggplot2::xlab('Reference Cells')
g <- g + ggplot2::ylab('NMI')
g <- g + ggplot2::guides(fill=ggplot2::guide_legend(title="Penalty\nDiscount"))
g <- g + ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
                        axis.text.x = ggplot2::element_text(angle = 45, hjust = 1),
                        text = ggplot2::element_text(size=20),
                        legend.text=ggplot2::element_text(size=18),
                        axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)))
pdf('ssnpa_liver_referencecells_by_pd.nmi.pdf', width=12, height=6)
g
## Warning: Removed 70 rows containing missing values (geom_errorbar).
dev.off()
## quartz_off_screen 
##                 2
g
## Warning: Removed 70 rows containing missing values (geom_errorbar).

s <- Rmisc::summarySE(ssnpa_results, measurevar="ari", groupvars=c("reference_set","pds"))
## Warning in qt(conf.interval/2 + 0.5, datac$N - 1): NaNs produced
g <- ggplot2::ggplot(s,ggplot2::aes(x=reference_set,y=ari, fill=as.factor(pds),width=0.75))
g <- g + ggplot2::geom_bar(stat="identity", position=ggplot2::position_dodge(width=0.9))
g <- g + ggplot2::geom_errorbar(ggplot2::aes(ymin=ari-se, ymax=ari+se), width=0.5,position=ggplot2::position_dodge(.9))
g <- g + ggplot2::theme_bw()
g <- g + ggplot2::xlab('Reference Cells')
g <- g + ggplot2::ylab('ARI')
g <- g + ggplot2::guides(fill=ggplot2::guide_legend(title="Penalty\nDiscount"))
g <- g + ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
                        axis.text.x = ggplot2::element_text(angle = 45, hjust = 1),
                        text = ggplot2::element_text(size=20),
                        legend.text=ggplot2::element_text(size=18),
                        axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)))
pdf('ssnpa_liver_referencecells_by_pd.ari.pdf', width=12, height=6)
g
## Warning: Removed 70 rows containing missing values (geom_errorbar).
dev.off()
## quartz_off_screen 
##                 2
g
## Warning: Removed 70 rows containing missing values (geom_errorbar).

ssnpa_features_selected <- read.table('./liver_scrnaseq_analysis/test_refxpd/E15.5/selectedfeatures.fges_pd4.txt', header=T, sep='\t',quote = "")
mean(ssnpa_features_selected$num_var)
## [1] 3.584

Test ssNPA-LR with different reference groups

dir.create('./liver_scrnaseq_analysis/test_lr_ref/')
for (ref in c("E10.5","E11.5","E12.5","E13.5","E14.5","E15.5","E17.5")){
    dir.create(paste0('./liver_scrnaseq_analysis/test_lr_ref/',ref))
    ssnpa_cluster <- ssnpa::run_ssnpa_lr(gene_exp = exp_voom,
                                      reference_samples = (class$time==ref), 
                                      out_dir = paste0('./liver_scrnaseq_analysis/test_lr_ref/',ref),
                                      seed_value = 1,
                                      n_pc = 10,
                                      cluster_resolution = 0.6,
                                      cluster_ref_samples = T)
    write.table(ssnpa_cluster$clustering,file=paste0('./liver_scrnaseq_analysis/test_lr_ref/',ref,'/ssNPA-LR_cluster_output_',ref,'_.txt'),quote = F, sep='\t')
    write.table(ssnpa_cluster$loadings,file=paste0('./liver_scrnaseq_analysis/test_lr_ref/',ref,'/ssNPA-LR_loading_output_',ref,'_.txt'),quote = F, sep='\t')
}

# add in E17.5 Hepatocytes
dir.create(paste0('./liver_scrnaseq_analysis/test_lr_ref/E17.5hepatocytes/'))
ssnpa_cluster <- ssnpa::run_ssnpa_lr(gene_exp = exp_voom,
                                    reference_samples = (class$timexcell=='E17.5_hepatoblast/hepatocyte'), 
                                    out_dir = paste0('./liver_scrnaseq_analysis/test_lr_ref/','E17.5hepatocytes'),
                                    seed_value = 1,
                                    n_pc = 10,
                                    cluster_resolution = 0.6,
                                    cluster_ref_samples = T)
write.table(ssnpa_cluster$clustering,file=paste0('./liver_scrnaseq_analysis/test_lr_ref/','E17.5hepatocytes','/ssNPA-LR_cluster_output_','E17.5hepatocytes','_.txt'),quote = F, sep='\t')
write.table(ssnpa_cluster$loadings,file=paste0('./liver_scrnaseq_analysis/test_lr_ref/','E17.5hepatocytes','/ssNPA-LR_loading_output_','E17.5hepatocytes','_.txt'),quote = F, sep='\t')


for (i in 1:5){
  dir.create(paste0("./liver_scrnaseq_analysis/test_lr_ref/random",toString(i)))
  ref_list <- read.table(paste0('./liver_scrnaseq_analysis/random_reference_cells/reference_cells_',toString(i),'_.txt'), quote="", sep='\t', header =F)
  ref_list <- ref_list$V1
  ref_sam <- (row.names(exp_voom) %in% ref_list)
  ssnpa_cluster <- ssnpa::run_ssnpa_lr(gene_exp = exp_voom,
                                    reference_samples = ref_sam, 
                                    out_dir = paste0('./liver_scrnaseq_analysis/test_lr_ref/','random',toString(i)),
                                    seed_value = 1,
                                    n_pc = 10,
                                    cluster_resolution = 0.6,
                                    cluster_ref_samples = T)
  write.table(ssnpa_cluster$clustering,file=paste0('./liver_scrnaseq_analysis/test_lr_ref/','random',toString(i),'/ssNPA-LR_cluster_output_','random','_.txt'),quote = F, sep='\t')
  write.table(ssnpa_cluster$loadings,file=paste0('./liver_scrnaseq_analysis/test_lr_ref/','random',toString(i),'/ssNPA-LR_loading_output_','random','_.txt'),quote = F, sep='\t')
}

Read in all ssNPA-LR results files and calculate NMI and ARI

result_files <- list.files(path = "./liver_scrnaseq_analysis/test_lr_ref", pattern = 'ssNPA-LR_cluster_output',full.names = T, recursive = T,ignore.case = F, include.dirs = T)

method <- character()
reference_set <- character()
pds <- integer()
num_clusters <- integer()
nmi <- double()
ari <- double()
for (file in result_files){
  a <- strsplit(file,"_",fixed=T)
  method <- c(method, 'ssNPA-LR')
  reference_set <- c(reference_set, a[[1]][8])
  pds <- c(pds,NA)
  df <- read.table(file,header=T,sep='\t', quote="",row.names=1,stringsAsFactors=F)
  num_clusters <- c(num_clusters,length(unique(df$Cluster)))
  nmi <- c(nmi,aricode::NMI(class$timexcell,df$Cluster))
  ari <- c(ari,mclust::adjustedRandIndex(class$timexcell, df$Cluster))
}
ssnpa_lr_results <- data.frame(method,reference_set,pds,num_clusters,nmi,ari)
levels(ssnpa_lr_results$reference_set)[levels(ssnpa_lr_results$reference_set)=="E17.5hepatocytes"] <- "E17.5 hepatocytes"
levels(ssnpa_lr_results$reference_set)[levels(ssnpa_lr_results$reference_set)=="random"] <- "Random"

ssnpa_lr_results
##      method     reference_set pds num_clusters       nmi       ari
## 1  ssNPA-LR             E10.5  NA            6 0.5343157 0.4103438
## 2  ssNPA-LR             E11.5  NA            7 0.5619260 0.4851154
## 3  ssNPA-LR             E12.5  NA            7 0.5401574 0.4101360
## 4  ssNPA-LR             E13.5  NA            7 0.5956836 0.4771295
## 5  ssNPA-LR             E14.5  NA            6 0.5944397 0.4973185
## 6  ssNPA-LR             E15.5  NA            7 0.5919097 0.4755229
## 7  ssNPA-LR             E17.5  NA            6 0.5100085 0.3861713
## 8  ssNPA-LR E17.5 hepatocytes  NA            7 0.5199538 0.4069017
## 9  ssNPA-LR            Random  NA            6 0.3327361 0.2236332
## 10 ssNPA-LR            Random  NA            6 0.3348679 0.2181527
## 11 ssNPA-LR            Random  NA            6 0.3153343 0.2041835
## 12 ssNPA-LR            Random  NA            6 0.3155976 0.1994630
## 13 ssNPA-LR            Random  NA            6 0.3513737 0.2508438
mean(ssnpa_lr_results$nmi[ssnpa_lr_results$reference_set=="Random"])
## [1] 0.3299819
s <- Rmisc::summarySE(ssnpa_lr_results, measurevar="nmi", groupvars=c("reference_set"))
## Warning in qt(conf.interval/2 + 0.5, datac$N - 1): NaNs produced
g <- ggplot2::ggplot(s,ggplot2::aes(x=reference_set,y=nmi, width=0.75))
g <- g + ggplot2::geom_bar(stat="identity", position=ggplot2::position_dodge(width=0.9),fill="dodgerblue3")
g <- g + ggplot2::geom_errorbar(ggplot2::aes(ymin=nmi-se, ymax=nmi+se), width=0.5,position=ggplot2::position_dodge(.9))
g <- g + ggplot2::theme_bw()
g <- g + ggplot2::xlab('Reference Cells')
g <- g + ggplot2::ylab('NMI')
g <- g + ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
                        axis.text.x = ggplot2::element_text(angle = 45, hjust = 1),
                        text = ggplot2::element_text(size=20),
                        legend.text=ggplot2::element_text(size=18),
                        axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)))
pdf('liver_ssnpa-lr_by_referencecells.nmi.pdf', width=8, height=6)
g
## Warning: Removed 8 rows containing missing values (geom_errorbar).
dev.off()
## quartz_off_screen 
##                 2
g
## Warning: Removed 8 rows containing missing values (geom_errorbar).

s <- Rmisc::summarySE(ssnpa_lr_results, measurevar="ari", groupvars=c("reference_set"))
## Warning in qt(conf.interval/2 + 0.5, datac$N - 1): NaNs produced
g <- ggplot2::ggplot(s,ggplot2::aes(x=reference_set,y=ari, width=0.75))
g <- g + ggplot2::geom_bar(stat="identity", position=ggplot2::position_dodge(width=0.9),fill="dodgerblue3")
g <- g + ggplot2::geom_errorbar(ggplot2::aes(ymin=ari-se, ymax=ari+se), width=0.5,position=ggplot2::position_dodge(.9))
g <- g + ggplot2::theme_bw()
g <- g + ggplot2::xlab('Reference Cells')
g <- g + ggplot2::ylab('ARI')
g <- g + ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
                        axis.text.x = ggplot2::element_text(angle = 45, hjust = 1),
                        text = ggplot2::element_text(size=20),
                        legend.text=ggplot2::element_text(size=18),
                        axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)))
pdf('liver_ssnpa-lr_by_referencecells.ari.pdf', width=8, height=6)
g
## Warning: Removed 8 rows containing missing values (geom_errorbar).
dev.off()
## quartz_off_screen 
##                 2
g
## Warning: Removed 8 rows containing missing values (geom_errorbar).

ssnpalr_features_selected <- read.table('./liver_scrnaseq_analysis/test_lr_ref/E13.5/ssnpa_lr_selectedfeatures.txt', header=T, sep='\t',quote = "")
mean(ssnpalr_features_selected$num_var)
## [1] 13.51167

Cluster cells just based on gene expression

dir.create(paste0('./liver_scrnaseq_analysis/test_gex'))
gex_cluster <- ssnpa::cluster_samples(features = exp_voom,
                                      meta_data_df = class,
                                      n_pc = 10,
                                      cluster_resolution =0.6,
                                      cluster_ref_samples = T,
                                      ref_samples = rep(T,dim(exp_voom)[1]),
                                      seed_value = 1)$clustering
method <- 'Gene Expression'
reference_set <- NA
pds <- NA
num_clusters <- length(unique(gex_cluster$Cluster))
nmi <- aricode::NMI(class$timexcell,gex_cluster$Cluster)
ari <- mclust::adjustedRandIndex(class$timexcell, gex_cluster$Cluster)
write.table(gex_cluster,file=paste0('./liver_scrnaseq_analysis/test_gex/gex_cluster_output.txt'),quote = F, sep='\t')

Read in gene expression result file and calcule NMI and ARI

result_files <- list.files(path = "./liver_scrnaseq_analysis/test_gex", pattern = 'gex_cluster_output',full.names = T, recursive = T,ignore.case = F, include.dirs = T)

method <- character()
reference_set <- character()
pds <- integer()
num_clusters <- integer()
nmi <- double()
ari <- double()
for (file in result_files){
  method <- c(method, 'Gene Expression')
  reference_set <- c(reference_set, NA)
  pds <- c(pds,NA)
  df <- read.table(file,header=T,sep='\t', quote="",row.names=1,stringsAsFactors=F)
  num_clusters <- c(num_clusters,length(unique(df$Cluster)))
  nmi <- c(nmi,aricode::NMI(class$timexcell,df$Cluster))
  ari <- c(ari,mclust::adjustedRandIndex(class$timexcell, df$Cluster))
}
gex_results <- data.frame(method,reference_set,pds,num_clusters,nmi,ari)
gex_results
##            method reference_set pds num_clusters       nmi       ari
## 1 Gene Expression          <NA>  NA            5 0.4967235 0.3748872

Compare performance to Pathifier

Use Pathifier with KEGG pathways to cluster these data. First, read in KEGG pathways.

dir.create(paste0('./output_data'))
kegg_full <- list()
pathways <- KEGGREST::keggList("pathway", "mmu") #mmu for mouse
count2 <- 1
for (i in 1:length(pathways)){
    x <- attr(pathways,"names")[i]
    x <- gsub("path:","",x, fixed=T)
    path_full <- KEGGREST::keggGet(x)
    p <- path_full[[1]]$GENE
    if (length(p)>0){
    even_indexes<-seq(2,length(p),2)
    q <- p[even_indexes]
    r <- strsplit(q,";",fixed=T)
    gene_names <- NULL
    count <- 1
    for (j in 1:length(r)){
        if (length(r[[j]])>1){
        gene_names[count] <- r[[j]][1]
        count <- count+1
        }
    }
    # only keep pathways with at least 10 genes
        if (length(gene_names)>10){
    kegg_full$gs[count2] <- list(gene_names)
    kegg_full$pathwaynames[count2] <- path_full[[1]]$NAME
    count2 <- count2 + 1
    }
    }
}
save(kegg_full,file="./output_data/kegg_full.mmu.RData")
summary(kegg_full)

Convert ENSEMBL IDs to gene names to match KEGG.

ensembl <- biomaRt::useEnsembl(biomart="ensembl", dataset="mmusculus_gene_ensembl")
lookup <- biomaRt::getBM(attributes=c('ensembl_gene_id','external_gene_name'), filters ='ensembl_gene_id', values =names(exp_voom), mart = ensembl)

t <- match(names(exp_voom), lookup$ensembl_gene_id)
gene_names <- lookup$external_gene_name[t]
exp_voom_genename <- exp_voom
names(exp_voom_genename)[!is.na(gene_names)] <- gene_names[!is.na(gene_names)]

Run Pathifier with all groups of cells as the reference set.

data <- t(exp_voom_genename)
dir.create(paste0('./liver_scrnaseq_analysis/test_pathifier_ref/'))
for (ref in c("E10.5","E11.5","E12.5","E13.5","E14.5","E15.5","E17.5")){
    dir.create(paste0('./liver_scrnaseq_analysis/test_pathifier_ref/',ref))
    pathway_dereg <- pathifier::quantify_pathways_deregulation(data,allgenes = row.names(data),syms = kegg_full$gs,pathwaynames = kegg_full$pathwaynames,normals = (class$time==ref), attempts=100)
    scores <- do.call(rbind,pathway_dereg$scores)
    scores <- as.data.frame(t(scores))
    names(scores) <- names(pathway_dereg$scores)
    names(scores) <- make.names(names(scores))
    row.names(scores) <- row.names(exp_voom_genename)
    pathifier_cluster <- ssnpa::cluster_samples(features = scores,
                                      meta_data_df = class,
                                      n_pc = 10,
                                      cluster_resolution =0.6,
                                      cluster_ref_samples = T,
                                      ref_samples = (class$time==ref),
                                      seed_value = 1)$clustering
    write.table(pathifier_cluster,file=paste0('./liver_scrnaseq_analysis/test_pathifier_ref/',ref,'/pathifier_cluster_output_',ref,'_.txt'),quote = F, sep='\t')
}


for (i in 1:5){
  dir.create(paste0("./liver_scrnaseq_analysis/test_pathifier_ref/random",toString(i)))
  ref_list <- read.table(paste0('./liver_scrnaseq_analysis/random_reference_cells/reference_cells_',toString(i),'_.txt'), quote="", sep='\t', header =F)
  ref_list <- ref_list$V1
  ref_sam <- (row.names(exp_voom) %in% ref_list)
  
  pathway_dereg <- pathifier::quantify_pathways_deregulation(data,allgenes = row.names(data),syms = kegg_full$gs,pathwaynames = kegg_full$pathwaynames,normals = ref_sam, attempts=100)
  scores <- do.call(rbind,pathway_dereg$scores)
  scores <- as.data.frame(t(scores))
  names(scores) <- names(pathway_dereg$scores)
  names(scores) <- make.names(names(scores))
  row.names(scores) <- row.names(exp_voom_genename)
  write.table(scores, file=paste0('./liver_scrnaseq_analysis/test_pathifier_ref/random', toString(i),'/pathifier_scores_random_.txt'), quote=F, sep='\t')
  pathifier_cluster <- ssnpa::cluster_samples(features = scores,
                                      meta_data_df = class,
                                      n_pc = 10,
                                      cluster_resolution =0.6,
                                      cluster_ref_samples = T,
                                      ref_samples = ref_sam,
                                      seed_value = 1)$clustering
  write.table(pathifier_cluster,file=paste0('./liver_scrnaseq_analysis/test_pathifier_ref/','random',toString(i),'/pathifier_cluster_output_','random','_.txt'),quote = F, sep='\t')
}

Read in Pathifier result files and calcule NMI and ARI

result_files <- list.files(path = "./liver_scrnaseq_analysis/test_pathifier_ref", pattern = 'pathifier_cluster_output',full.names = T, recursive = T,ignore.case = F, include.dirs = T)

method <- character()
reference_set <- character()
pds <- integer()
num_clusters <- integer()
nmi <- double()
ari <- double()
for (file in result_files){
  a <- strsplit(file,"_",fixed=T)
  method <- c(method, 'Pathifier')
  reference_set <- c(reference_set, a[[1]][8])
  pds <- c(pds,NA)
  df <- read.table(file,header=T,sep='\t', quote="",row.names=1,stringsAsFactors=F)
  num_clusters <- c(num_clusters,length(unique(df$Cluster)))
  nmi <- c(nmi,aricode::NMI(class$timexcell,df$Cluster))
  ari <- c(ari,mclust::adjustedRandIndex(class$timexcell, df$Cluster))
}
pathifier_results <- data.frame(method,reference_set,pds,num_clusters,nmi,ari)
levels(pathifier_results$reference_set)[levels(pathifier_results$reference_set)=="random"] <- "Random"


pathifier_results
##       method reference_set pds num_clusters       nmi       ari
## 1  Pathifier         E10.5  NA            6 0.5039087 0.3571078
## 2  Pathifier         E11.5  NA            6 0.4930281 0.3824889
## 3  Pathifier         E12.5  NA            6 0.5103271 0.3741414
## 4  Pathifier         E13.5  NA            6 0.5289707 0.3993093
## 5  Pathifier         E14.5  NA            6 0.5072126 0.3641602
## 6  Pathifier         E15.5  NA            5 0.4475906 0.3258259
## 7  Pathifier         E17.5  NA            5 0.4361635 0.3074956
## 8  Pathifier        Random  NA            5 0.4424782 0.3142321
## 9  Pathifier        Random  NA            6 0.4601782 0.3006597
## 10 Pathifier        Random  NA            6 0.5026206 0.3678690
## 11 Pathifier        Random  NA            5 0.4747649 0.3528858
## 12 Pathifier        Random  NA            5 0.4448886 0.3192647
mean(pathifier_results$nmi[pathifier_results$reference_set=="Random"])
## [1] 0.4649861
s <- Rmisc::summarySE(pathifier_results, measurevar="nmi", groupvars=c("reference_set"))
## Warning in qt(conf.interval/2 + 0.5, datac$N - 1): NaNs produced
g <- ggplot2::ggplot(s,ggplot2::aes(x=reference_set,y=nmi, width=0.75))
g <- g + ggplot2::geom_bar(stat="identity", position=ggplot2::position_dodge(width=0.9),fill="dodgerblue3")
g <- g + ggplot2::geom_errorbar(ggplot2::aes(ymin=nmi-se, ymax=nmi+se), width=0.5,position=ggplot2::position_dodge(.9))
g <- g + ggplot2::theme_bw()
g <- g + ggplot2::xlab('Reference Cells')
g <- g + ggplot2::ylab('NMI')
g <- g + ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
                        axis.text.x = ggplot2::element_text(angle = 45, hjust = 1),
                        text = ggplot2::element_text(size=20),
                        legend.text=ggplot2::element_text(size=18),
                        axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)))
pdf('liver_pathifier_by_referencecells.nmi.pdf', width=8, height=6)
g
## Warning: Removed 7 rows containing missing values (geom_errorbar).
dev.off()
## quartz_off_screen 
##                 2
g
## Warning: Removed 7 rows containing missing values (geom_errorbar).

s <- Rmisc::summarySE(pathifier_results, measurevar="ari", groupvars=c("reference_set"))
## Warning in qt(conf.interval/2 + 0.5, datac$N - 1): NaNs produced
g <- ggplot2::ggplot(s,ggplot2::aes(x=reference_set,y=ari, width=0.75))
g <- g + ggplot2::geom_bar(stat="identity", position=ggplot2::position_dodge(width=0.9),fill="dodgerblue3")
g <- g + ggplot2::geom_errorbar(ggplot2::aes(ymin=ari-se, ymax=ari+se), width=0.5,position=ggplot2::position_dodge(.9))
g <- g + ggplot2::theme_bw()
g <- g + ggplot2::xlab('Reference Cells')
g <- g + ggplot2::ylab('ARI')
g <- g + ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
                        axis.text.x = ggplot2::element_text(angle = 45, hjust = 1),
                        text = ggplot2::element_text(size=20),
                        legend.text=ggplot2::element_text(size=18),
                        axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)))
pdf('liver_pathifier_by_referencecells.ari.pdf', width=8, height=6)
g
## Warning: Removed 7 rows containing missing values (geom_errorbar).
dev.off()
## quartz_off_screen 
##                 2
g
## Warning: Removed 7 rows containing missing values (geom_errorbar).

Compare performance to ssGSEA

Use ssGSEA with MSigDB pathways to cluster these data. First, convert ENSEMBL IDs to Entrez gene IDs.

ensembl <- biomaRt::useEnsembl(biomart="ensembl", dataset="mmusculus_gene_ensembl")
lookup <- biomaRt::getBM(attributes=c('ensembl_gene_id','entrezgene_id'), filters ='ensembl_gene_id', values =names(exp_voom), mart = ensembl)

t <- match(names(exp_voom), lookup$ensembl_gene_id)
gene_names <- lookup$entrezgene[t]
exp_voom_entrez <- exp_voom
names(exp_voom_entrez)[!is.na(gene_names)] <- gene_names[!is.na(gene_names)]

Load in the C2 collection from MSigDB (version 5.2 with mouse identifiers) downloaded from http://bioinf.wehi.edu.au/software/MSigDB/index.html

load('./input_data/mouse_c2_v5p2.rdata')

Run ssGSEA and cluster cells.

dir.create(paste0('./liver_scrnaseq_analysis/test_ssgsea'))
ssgsea_features <- GSVA::gsva(as.matrix(t(exp_voom_entrez)), Mm.c2, method="ssgsea")
ssgsea_cluster_input <- as.data.frame(t(ssgsea_features))
names(ssgsea_cluster_input) <- make.names(names(ssgsea_cluster_input))
ssgsea_cluster <- ssnpa::cluster_samples(features = as.data.frame(t(ssgsea_features)),
                                      meta_data_df = class,
                                      n_pc = 10,
                                      cluster_resolution =0.6,
                                      cluster_ref_samples = T,
                                      ref_samples = rep(T,dim(class)[1]),
                                      seed_value = 1)$clustering
write.table(ssgsea_cluster,file=paste0('./liver_scrnaseq_analysis/test_ssgsea/ssgsea_cluster_output.txt'),quote = F, sep='\t')

Read in ssGSEA result file and calculate NMI and ARI

result_files <- list.files(path = "./liver_scrnaseq_analysis/test_ssgsea", pattern = 'ssgsea_cluster_output',full.names = T, recursive = T,ignore.case = F, include.dirs = T)

method <- character()
reference_set <- character()
pds <- integer()
num_clusters <- integer()
nmi <- double()
ari <- double()
for (file in result_files){
  method <- c(method, 'ssGSEA')
  reference_set <- c(reference_set, NA)
  pds <- c(pds,NA)
  df <- read.table(file,header=T,sep='\t', quote="",row.names=1,stringsAsFactors=F)
  num_clusters <- c(num_clusters,length(unique(df$Cluster)))
  nmi <- c(nmi,aricode::NMI(class$timexcell,df$Cluster))
  ari <- c(ari,mclust::adjustedRandIndex(class$timexcell, df$Cluster))
}
ssgsea_results <- data.frame(method,reference_set,pds,num_clusters,nmi,ari)
ssgsea_results
##   method reference_set pds num_clusters       nmi       ari
## 1 ssGSEA          <NA>  NA            5 0.4776922 0.3531489

Check performance with fewer genes

Generate random datasets.

dir.create(paste0('./liver_scrnaseq_analysis/test_fewgenes/datasets'))
set.seed(1)
for (p in c(0.95, 0.9, 0.85, 0.8, 0.75, 0.5, 0.25)){
for (i in 1:5){
  s <- sample(names(exp_voom), floor(dim(exp_voom)[2]*p), replace = FALSE)
  exp_s <- exp_voom[,s]
  write.table(exp_s,paste0('./liver_scrnaseq_analysis/test_fewgenes/datasets/expvoom_p_',toString(p),'_',toString(i),'_.txt'),quote=F,sep='\t',row.names = T, col.names=T)
}
}

Run ssNPA on each of these random datasets.

ref <- 'E15.5'
pd <- 4

result_files <- list.files(path = "./liver_scrnaseq_analysis/test_fewgenes/datasets", pattern = 'expvoom',full.names = T, recursive = T,ignore.case = F, include.dirs = T)

for (file in result_files){
  df <- read.table(file,header=T,sep='\t',quote="",row.names=1)
  a <- strsplit(file,'_',fixed = T)
  p <- a[[1]][6]
  i <- a[[1]][7]
  dir.create(paste0('./liver_scrnaseq_analysis/test_fewgenes/p',toString(p),'/',toString(i)))
  ssnpa_cluster <- ssnpa::run_ssnpa(gene_exp = df,
                 reference_samples = (class$time==ref), 
                 out_dir = paste0('./liver_scrnaseq_analysis/test_fewgenes/p',toString(p),'/',toString(i)),
                 seed_value = 1,
                 fgs_pd = pd,
                 n_pc = 10,
                 cluster_resolution = 0.6,
                 cluster_ref_samples = T)$clustering
  write.table(ssnpa_cluster,file=paste0('./liver_scrnaseq_analysis/test_fewgenes/ssNPA_cluster_output_',ref,'_pd_',toString(pd),'_',toString(p),'_',toString(i),'_.txt'),quote = F, sep='\t')
}
  #Apply ssNPA to full dataset (all 3,000 genes)
  p <- 1
  i <- 1
  dir.create(paste0('./liver_scrnaseq_analysis/test_fewgenes/p',toString(p),'/',toString(i)))
  ssnpa_cluster <- ssnpa::run_ssnpa(gene_exp = exp_voom,
                 reference_samples = (class$time==ref), 
                 out_dir = paste0('./liver_scrnaseq_analysis/test_fewgenes/p',toString(p),'/',toString(i)),
                 seed_value = 1,
                 fgs_pd = pd,
                 n_pc = 10,
                 cluster_resolution = 0.6,
                 cluster_ref_samples = T)$clustering
  write.table(ssnpa_cluster,file=paste0('./liver_scrnaseq_analysis/test_fewgenes/ssNPA_cluster_output_',ref,'_pd_',toString(pd),'_',toString(p),'_',toString(i),'_.txt'),quote = F, sep='\t')

Read in reduced gene result files and calculate NMI and ARI

result_files <- list.files(path = "./liver_scrnaseq_analysis/test_fewgenes", pattern = 'ssNPA_cluster_output',full.names = T, recursive = T,ignore.case = F, include.dirs = T)

f <- double()
num_clusters <- integer()
nmi <- double()
ari <- double()
for (file in result_files){
  a <- strsplit(file,'_',fixed = T)
  f <- c(f, as.numeric(a[[1]][10])*100)
  df <- read.table(file,header=T,sep='\t', quote="",row.names=1,stringsAsFactors=F)
  num_clusters <- c(num_clusters,length(unique(df$Cluster)))
  nmi <- c(nmi,aricode::NMI(class$timexcell,df$Cluster))
  ari <- c(ari,mclust::adjustedRandIndex(class$timexcell, df$Cluster))
}
ssnpa_fewgene_results <- data.frame(f,num_clusters,nmi,ari)

s <- Rmisc::summarySE(ssnpa_fewgene_results, measurevar="nmi", groupvars=c("f"))

s
##     f N       nmi         sd         se         ci
## 1  25 5 0.3855473 0.03936100 0.01760277 0.04887314
## 2  50 5 0.5216856 0.03135548 0.01402260 0.03893297
## 3  75 5 0.5421200 0.04445185 0.01987947 0.05519426
## 4  80 5 0.5592278 0.04314140 0.01929342 0.05356712
## 5  85 5 0.5833929 0.02407098 0.01076487 0.02988807
## 6  90 5 0.5823763 0.02749929 0.01229806 0.03414488
## 7  95 5 0.5948758 0.03501905 0.01566099 0.04348189
## 8 100 1 0.6150530         NA         NA        NaN
g_genes <- ggplot2::ggplot(s, ggplot2::aes(x=as.numeric(f), y=nmi)) + 
    ggplot2::geom_errorbar(ggplot2::aes(ymin=nmi-se, ymax=nmi+se), width=1) + ggplot2::geom_line() + ggplot2::geom_point() + ggplot2::theme_bw() + ggplot2::xlab("Percentage of 3,000 Genes Selected") + ggplot2::ylab("NMI")
g_genes

pdf('ssnpa_liver_percentgenes.pdf',width=7, heigh=5)
g_genes
dev.off()
## quartz_off_screen 
##                 2

Check performance with fewer cells

Generate random datasets.

dir.create(paste0('./liver_scrnaseq_analysis/test_fewcells/datasets'))
set.seed(1)
for (p in c(0.95, 0.9, 0.85, 0.8, 0.75)){
for (i in 1:5){
  cells_keep <- list()
  # maintain proportion of true classes
  for (j in unique(class$timexcell)){
    s <- sample(row.names(exp_voom)[class$timexcell==j], round(dim(exp_voom[class$timexcell==j,])[1]*p,digits = 0), replace = FALSE)
    cells_keep <- c(cells_keep, s)
  }
  exp_s <- exp_voom[unlist(cells_keep),]
  write.table(exp_s,paste0('./liver_scrnaseq_analysis/test_fewcells/datasets/expvoom_p_',toString(p),'_',toString(i),'_.txt'),quote=F,sep='\t',row.names = T, col.names=T)
}
}

Run ssNPA on each of these random datasets.

ref <- 'E15.5'
pd <- 4

result_files <- list.files(path = "./liver_scrnaseq_analysis/test_fewcells/datasets", pattern = 'expvoom',full.names = T, recursive = T,ignore.case = F, include.dirs = T)

for (file in result_files){
  df <- read.table(file,header=T,sep='\t',quote="",row.names=1)
  a <- strsplit(file,'_',fixed = T)
  p <- a[[1]][6]
  i <- a[[1]][7]
  t <- match(row.names(df),row.names(class))
  class_s <- class[t,]
  
  dir.create(paste0('./liver_scrnaseq_analysis/test_fewcells/p',toString(p),'/',toString(i)))
  ssnpa_cluster <- ssnpa::run_ssnpa(gene_exp = df,
                 reference_samples = (class_s$time==ref), 
                 out_dir = paste0('./liver_scrnaseq_analysis/test_fewcells/p',toString(p),'/',toString(i)),
                 seed_value = 1,
                 fgs_pd = pd,
                 n_pc = 10,
                 cluster_resolution = 0.6,
                 cluster_ref_samples = T)$clustering
  write.table(ssnpa_cluster,file=paste0('./liver_scrnaseq_analysis/test_fewcells/ssNPA_cluster_output_',ref,'_pd_',toString(pd),'_',toString(p),'_',toString(i),'_.txt'),quote = F, sep='\t')
}
  #Apply ssNPA to full dataset (all 447 cells)
  p <- 1
  i <- 1
  dir.create(paste0('./liver_scrnaseq_analysis/test_fewcells/p',toString(p),'/',toString(i)))
  ssnpa_cluster <- ssnpa::run_ssnpa(gene_exp = exp_voom,
                 reference_samples = (class$time==ref), 
                 out_dir = paste0('./liver_scrnaseq_analysis/test_fewcells/p',toString(p),'/',toString(i)),
                 seed_value = 1,
                 fgs_pd = pd,
                 n_pc = 10,
                 cluster_resolution = 0.6,
                 cluster_ref_samples = T)$clustering
  write.table(ssnpa_cluster,file=paste0('./liver_scrnaseq_analysis/test_fewcells/ssNPA_cluster_output_',ref,'_pd_',toString(pd),'_',toString(p),'_',toString(i),'_.txt'),quote = F, sep='\t')

Read in reduced cell result files and calculate NMI and ARI

result_files <- list.files(path = "./liver_scrnaseq_analysis/test_fewcells", pattern = 'ssNPA_cluster_output',full.names = T, recursive = T,ignore.case = F, include.dirs = T)

f <- double()
num_clusters <- integer()
nmi <- double()
ari <- double()
for (file in result_files){
  a <- strsplit(file,'_',fixed = T)
  f <- c(f, as.numeric(a[[1]][10])*100)
  df <- read.table(file,header=T,sep='\t', quote="",row.names=1,stringsAsFactors=F)
  num_clusters <- c(num_clusters,length(unique(df$Cluster)))
  t <- match(row.names(df),row.names(class))
  nmi <- c(nmi,aricode::NMI(class$timexcell[t],df$Cluster))
  ari <- c(ari,mclust::adjustedRandIndex(class$timexcell[t], df$Cluster))
}
ssnpa_fewcell_results <- data.frame(f,num_clusters,nmi,ari)

s <- Rmisc::summarySE(ssnpa_fewcell_results, measurevar="nmi", groupvars=c("f"))

s
##     f N       nmi         sd         se         ci
## 1  75 5 0.5140561 0.05266120 0.02355081 0.06538752
## 2  80 5 0.5034949 0.03532563 0.01579810 0.04386257
## 3  85 5 0.5433212 0.02829108 0.01265216 0.03512802
## 4  90 5 0.5608482 0.03289078 0.01470920 0.04083930
## 5  95 5 0.5756385 0.04746641 0.02122763 0.05893734
## 6 100 1 0.6150530         NA         NA         NA
g_cells <- ggplot2::ggplot(s, ggplot2::aes(x=as.numeric(f), y=nmi)) + 
    ggplot2::geom_errorbar(ggplot2::aes(ymin=nmi-se, ymax=nmi+se), width=1) + ggplot2::geom_line() + ggplot2::geom_point() + ggplot2::theme_bw() + ggplot2::xlab("Percentage of 447 Cells Selected") + ggplot2::ylab("NMI")
g_cells

pdf('ssnpa_liver_percentcells.pdf',width=7, heigh=5)
g_cells
dev.off()
## quartz_off_screen 
##                 2

Check ssNPA PCA Loadings

ssnpa_loadings <- read.table('./liver_scrnaseq_analysis/test_refxpd/E15.5/ssNPA_loading_output_E15.5_pd_4_.txt', quote="", sep='\t', header=T)
ssnpa_loadings_abs <- abs(ssnpa_loadings)
top_pc_genes <- list()
top_pc_loadings <- list()
pc <- list()
top_n <- 10
for (i in 1:dim(ssnpa_loadings_abs)[2]){
  loadings <- ssnpa_loadings[order(-ssnpa_loadings_abs[,i]),i]
  genes <- row.names(ssnpa_loadings)[order(-ssnpa_loadings_abs[,i])]
  
  top_pc_genes <- c(top_pc_genes, genes[1:top_n])
  top_pc_loadings <- c(top_pc_loadings, loadings[1:top_n])
  pc <- c(pc, rep(paste0("PC", toString(i)),top_n))
}
gene_ensembl <- unlist(top_pc_genes)
loading <- unlist(top_pc_loadings)
PC <- unlist(pc)
ssnpa_top_loadings <- data.frame(gene_ensembl, loading, PC)

ensembl <- biomaRt::useEnsembl(biomart="ensembl", dataset="mmusculus_gene_ensembl")
lookup <- biomaRt::getBM(attributes=c('ensembl_gene_id','external_gene_name','name_1006'), filters ='ensembl_gene_id', values = ssnpa_top_loadings$gene_ensembl, mart = ensembl)

t <- match(ssnpa_top_loadings$gene_ensembl, lookup$ensembl_gene_id)
ssnpa_top_loadings$gene <- lookup$external_gene_name[t]
ssnpa_top_loadings$description <- lookup$name_1006[t]
length(unique(ssnpa_top_loadings$gene))
## [1] 94
ssnpa_top_loadings[,1:4]
##           gene_ensembl     loading   PC          gene
## 1   ENSMUSG00000037470 -0.03513024  PC1         Uggt1
## 2   ENSMUSG00000035478 -0.03488756  PC1          Mbd3
## 3   ENSMUSG00000016018 -0.03458942  PC1         Mtrex
## 4   ENSMUSG00000024683 -0.03444034  PC1        Mrpl16
## 5   ENSMUSG00000024082 -0.03428993  PC1       Ndufaf7
## 6   ENSMUSG00000026843 -0.03419275  PC1         Fubp3
## 7   ENSMUSG00000001627 -0.03377094  PC1         Ifrd1
## 8   ENSMUSG00000054115 -0.03369422  PC1          Skp2
## 9   ENSMUSG00000037461 -0.03357244  PC1         Ints7
## 10  ENSMUSG00000028394 -0.03356370  PC1         Pole3
## 11  ENSMUSG00000033831  0.08639173  PC2           Fgb
## 12  ENSMUSG00000028307  0.07976233  PC2         Aldob
## 13  ENSMUSG00000045092  0.07904960  PC2         S1pr1
## 14  ENSMUSG00000015243  0.07885478  PC2         Abca1
## 15  ENSMUSG00000066406  0.07555713  PC2        Akap13
## 16  ENSMUSG00000022766  0.07493626  PC2      Serpind1
## 17  ENSMUSG00000002769  0.07428510  PC2          Gnmt
## 18  ENSMUSG00000029260  0.07360931  PC2       Ugt2b34
## 19  ENSMUSG00000068566  0.07352332  PC2         Myadm
## 20  ENSMUSG00000031402  0.07327199  PC2          Mpp1
## 21  ENSMUSG00000055912 -0.07676164  PC3      Tmem150a
## 22  ENSMUSG00000064225 -0.06875963  PC3         Paqr9
## 23  ENSMUSG00000023262 -0.06219225  PC3          Acy1
## 24  ENSMUSG00000018796 -0.06193448  PC3         Acsl1
## 25  ENSMUSG00000047945  0.06092703  PC3      Marcksl1
## 26  ENSMUSG00000021892 -0.05854530  PC3        Sh3bp5
## 27  ENSMUSG00000062300  0.05811363  PC3       Nectin2
## 28  ENSMUSG00000006517 -0.05713638  PC3           Mvd
## 29  ENSMUSG00000053646  0.05700964  PC3        Plxnb1
## 30  ENSMUSG00000028832  0.05559043  PC3         Stmn1
## 31  ENSMUSG00000060807 -0.11085462  PC4      Serpina6
## 32  ENSMUSG00000031383 -0.09393560  PC4         Dusp9
## 33  ENSMUSG00000010307 -0.09294081  PC4       Tmem86a
## 34  ENSMUSG00000051159 -0.08805587  PC4        Cited1
## 35  ENSMUSG00000029581 -0.07606941  PC4         Fscn1
## 36  ENSMUSG00000070348 -0.06926351  PC4         Ccnd1
## 37  ENSMUSG00000040699 -0.06851796  PC4         Limd2
## 38  ENSMUSG00000075266 -0.06646365  PC4         Cenpw
## 39  ENSMUSG00000022505 -0.06600525  PC4          Emp2
## 40  ENSMUSG00000019461 -0.06441820  PC4        Plscr3
## 41  ENSMUSG00000045294 -0.09008979  PC5        Insig1
## 42  ENSMUSG00000001285 -0.07178131  PC5          Myg1
## 43  ENSMUSG00000038383 -0.06790060  PC5          Pigu
## 44  ENSMUSG00000022914 -0.06664825  PC5         Brwd1
## 45  ENSMUSG00000048000 -0.06640357  PC5        Gigyf2
## 46  ENSMUSG00000046814 -0.06587141  PC5         Gchfr
## 47  ENSMUSG00000026272 -0.06440306  PC5          Agxt
## 48  ENSMUSG00000036606 -0.06406238  PC5        Plxnb2
## 49  ENSMUSG00000028180  0.06388521  PC5        Zranb2
## 50  ENSMUSG00000066979 -0.06286190  PC5          Bub3
## 51  ENSMUSG00000040616  0.07585918  PC6        Tmem51
## 52  ENSMUSG00000021149 -0.07583079  PC6        Gtpbp4
## 53  ENSMUSG00000021395  0.07485412  PC6         Spin1
## 54  ENSMUSG00000002804 -0.07218189  PC6        Nudt14
## 55  ENSMUSG00000025337  0.07195760  PC6          Sbds
## 56  ENSMUSG00000028334 -0.07013933  PC6          Nans
## 57  ENSMUSG00000026603 -0.06842760  PC6         Smyd2
## 58  ENSMUSG00000045767  0.06743263  PC6 B230219D22Rik
## 59  ENSMUSG00000018189 -0.06718611  PC6         Uchl5
## 60  ENSMUSG00000052423  0.06704137  PC6       B4galt3
## 61  ENSMUSG00000008333 -0.10055473  PC7        Snrpb2
## 62  ENSMUSG00000017950  0.08026718  PC7         Hnf4a
## 63  ENSMUSG00000008690  0.07898537  PC7        Ncaph2
## 64  ENSMUSG00000043131  0.07633742  PC7         Mob1a
## 65  ENSMUSG00000056121 -0.07355063  PC7          Fez2
## 66  ENSMUSG00000064037  0.06854111  PC7          Gpn1
## 67  ENSMUSG00000010911  0.06727907  PC7          Apip
## 68  ENSMUSG00000020609  0.06722823  PC7          Apob
## 69  ENSMUSG00000024908  0.06704612  PC7        Ppp6r3
## 70  ENSMUSG00000022023  0.06411352  PC7          Wbp4
## 71  ENSMUSG00000020116  0.07241917  PC8          Pno1
## 72  ENSMUSG00000022698 -0.06991674  PC8         Naa50
## 73  ENSMUSG00000010911 -0.06984377  PC8          Apip
## 74  ENSMUSG00000001525  0.06797213  PC8         Tubb5
## 75  ENSMUSG00000016756  0.06708920  PC8          Cmah
## 76  ENSMUSG00000040451 -0.06459179  PC8         Sgms1
## 77  ENSMUSG00000022024  0.06378986  PC8         Sugt1
## 78  ENSMUSG00000066979  0.06364009  PC8          Bub3
## 79  ENSMUSG00000007050  0.06139804  PC8          Lsm2
## 80  ENSMUSG00000028233  0.06119468  PC8          Tgs1
## 81  ENSMUSG00000034269  0.08225236  PC9         Setd5
## 82  ENSMUSG00000026547  0.07865298  PC9        Tagln2
## 83  ENSMUSG00000024603  0.07725204  PC9         Dctn4
## 84  ENSMUSG00000018189  0.07723070  PC9         Uchl5
## 85  ENSMUSG00000001525  0.07326395  PC9         Tubb5
## 86  ENSMUSG00000020687  0.07030661  PC9         Cdc27
## 87  ENSMUSG00000017950  0.06980342  PC9         Hnf4a
## 88  ENSMUSG00000028229  0.05801654  PC9         Rmdn1
## 89  ENSMUSG00000032485  0.05767303  PC9          Scap
## 90  ENSMUSG00000031819  0.05685796  PC9          Emc8
## 91  ENSMUSG00000031201  0.09666380 PC10         Brcc3
## 92  ENSMUSG00000034621  0.08961701 PC10       Gpatch8
## 93  ENSMUSG00000029250  0.08421378 PC10        Polr2b
## 94  ENSMUSG00000026349  0.08276864 PC10         Ccnt2
## 95  ENSMUSG00000020929  0.08200153 PC10        Eftud2
## 96  ENSMUSG00000012535  0.07000487 PC10         Tnpo3
## 97  ENSMUSG00000028898  0.06570566 PC10      Trnau1ap
## 98  ENSMUSG00000024231  0.06504754 PC10          Cul2
## 99  ENSMUSG00000028832  0.06497135 PC10         Stmn1
## 100 ENSMUSG00000027367 -0.06221578 PC10        Stard7
write.table(ssnpa_top_loadings,'./liver_scrnaseq_analysis/ssnpa_top_loadings.txt',quote=F, sep='\t', row.names=F)

Figures for manuscript

class$cell_type <- factor(class$cell_type, levels = c("hepatoblast/hepatocyte","cholangiocyte"))
gex_result <- read.table('./liver_scrnaseq_analysis/test_gex/gex_cluster_output.txt', quote="", sep='\t', header=T)

gex_class <- ggplot2::ggplot(gex_result,ggplot2::aes(tSNE_1,tSNE_2))+ggplot2::geom_point(ggplot2::aes(colour=class$time,shape=as.factor(class$cell_type)), size=4.5)+ggplot2::theme_bw()+ggplot2::labs(color="Time",shape="Cell Type")+ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(size = 10)),shape = ggplot2::guide_legend(override.aes = list(size = 10))) + 
ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
text = ggplot2::element_text(size=20),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=48),
axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
plot.margin = ggplot2::unit(c(3,3,3,3),"lines")) +
ggplot2::scale_colour_brewer(palette="Set2") + ggplot2::ggtitle('Gene Expression')

gex_cluster <- ggplot2::ggplot(gex_result,ggplot2::aes(tSNE_1,tSNE_2))+ggplot2::geom_point(ggplot2::aes(colour=as.factor(Cluster)), size=4.5)+ggplot2::theme_bw()+ggplot2::labs(color="Cluster")+ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(size = 10))) + 
ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
text = ggplot2::element_text(size=20),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=48),
axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
plot.margin = ggplot2::unit(c(3,3,3,3),"lines")) + ggplot2::ggtitle('Gene Expression')


ssnpa_result <- read.table('./liver_scrnaseq_analysis/test_refxpd/E15.5/ssNPA_cluster_output_E15.5_pd_4_.txt', quote="", sep='\t', header=T)


ssnpa_class <- ggplot2::ggplot(ssnpa_result,ggplot2::aes(tSNE_1,tSNE_2))+ggplot2::geom_point(ggplot2::aes(colour=class$time,shape=as.factor(class$cell_type)), size=4.5)+ggplot2::theme_bw()+ggplot2::labs(color="Time",shape="Cell Type")+ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(size = 10)),shape = ggplot2::guide_legend(override.aes = list(size = 10))) + 
ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
text = ggplot2::element_text(size=20),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=48),
axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
plot.margin = ggplot2::unit(c(3,3,3,3),"lines")) +
ggplot2::scale_colour_brewer(palette="Set2") + ggplot2::ggtitle('ssNPA')

ssnpa_cluster <- ggplot2::ggplot(ssnpa_result,ggplot2::aes(tSNE_1,tSNE_2))+ggplot2::geom_point(ggplot2::aes(colour=as.factor(Cluster)), size=4.5)+ggplot2::theme_bw()+ggplot2::labs(color="Cluster")+ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(size = 10))) + 
ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
text = ggplot2::element_text(size=20),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=48),
axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
plot.margin = ggplot2::unit(c(3,3,3,3),"lines")) + ggplot2::ggtitle('ssNPA')

pathifier_result <- read.table('./liver_scrnaseq_analysis/test_pathifier_ref/E13.5/pathifier_cluster_output_E13.5_.txt', quote="", sep='\t', header=T)

pathifier_class <- ggplot2::ggplot(pathifier_result,ggplot2::aes(tSNE_1,tSNE_2))+ggplot2::geom_point(ggplot2::aes(colour=class$time,shape=as.factor(class$cell_type)), size=4.5)+ggplot2::theme_bw()+ggplot2::labs(color="Time",shape="Cell Type")+ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(size = 10)),shape = ggplot2::guide_legend(override.aes = list(size = 10))) + 
ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
text = ggplot2::element_text(size=20),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=48),
axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
plot.margin = ggplot2::unit(c(3,3,3,3),"lines")) +
ggplot2::scale_colour_brewer(palette="Set2") + ggplot2::ggtitle('Pathifier')

pathifier_cluster <- ggplot2::ggplot(pathifier_result,ggplot2::aes(tSNE_1,tSNE_2))+ggplot2::geom_point(ggplot2::aes(colour=as.factor(Cluster)), size=4.5)+ggplot2::theme_bw()+ggplot2::labs(color="Cluster")+ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(size = 10))) + 
ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
text = ggplot2::element_text(size=20),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=48),
axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
plot.margin = ggplot2::unit(c(3,3,3,3),"lines")) + ggplot2::ggtitle('Pathifier')

ssgsea_result <- read.table('./liver_scrnaseq_analysis/test_ssgsea/ssgsea_cluster_output.txt', quote="", sep='\t', header=T)

ssgsea_class <- ggplot2::ggplot(ssgsea_result,ggplot2::aes(tSNE_1,tSNE_2))+ggplot2::geom_point(ggplot2::aes(colour=class$time,shape=as.factor(class$cell_type)), size=4.5)+ggplot2::theme_bw()+ggplot2::labs(color="Time",shape="Cell Type")+ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(size = 10)),shape = ggplot2::guide_legend(override.aes = list(size = 10))) + 
ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
text = ggplot2::element_text(size=20),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=48),
axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
plot.margin = ggplot2::unit(c(3,3,3,3),"lines")) +
ggplot2::scale_colour_brewer(palette="Set2") + ggplot2::ggtitle('ssGSEA')

ssgsea_cluster <- ggplot2::ggplot(ssgsea_result,ggplot2::aes(tSNE_1,tSNE_2))+ggplot2::geom_point(ggplot2::aes(colour=as.factor(Cluster)), size=4.5)+ggplot2::theme_bw()+ggplot2::labs(color="Cluster")+ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(size = 10))) + 
ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
text = ggplot2::element_text(size=20),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=48),
axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
plot.margin = ggplot2::unit(c(3,3,3,3),"lines"))+ ggplot2::ggtitle('ssGSEA')

ssnpalr_result <- read.table('./liver_scrnaseq_analysis/test_lr_ref/E13.5/ssNPA-LR_cluster_output_E13.5_.txt', quote="", sep='\t', header=T)

ssnpalr_class <- ggplot2::ggplot(ssnpalr_result,ggplot2::aes(tSNE_1,tSNE_2))+ggplot2::geom_point(ggplot2::aes(colour=class$time,shape=as.factor(class$cell_type)), size=4.5)+ggplot2::theme_bw()+ggplot2::labs(color="Time",shape="Cell Type")+ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(size = 10)),shape = ggplot2::guide_legend(override.aes = list(size = 10))) + 
ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
text = ggplot2::element_text(size=20),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=48),
axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
plot.margin = ggplot2::unit(c(3,3,3,3),"lines")) +
ggplot2::scale_colour_brewer(palette="Set2")

ssnpalr_cluster <- ggplot2::ggplot(ssnpalr_result,ggplot2::aes(tSNE_1,tSNE_2))+ggplot2::geom_point(ggplot2::aes(colour=as.factor(Cluster)), size=4.5)+ggplot2::theme_bw()+ggplot2::labs(color="Cluster")+ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(size = 10))) + 
ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
text = ggplot2::element_text(size=20),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=48),
axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
plot.margin = ggplot2::unit(c(3,22,3,3),"lines"))

# Make panel figure comparing the methods, colored by time and cell type
method_comp_class <- ggpubr::ggarrange(gex_class,ssnpa_class,pathifier_class,ssgsea_class, common.legend = T, legend="right", labels="AUTO",font.label=list(size=52, face="bold"), nrow=2, ncol=2)
pdf("liver_method_comparison_class.pdf", width=30, height=24)
method_comp_class
dev.off()
## quartz_off_screen 
##                 2
# Make panel figure comparing the methods, colored by cluster assignment
method_comp_cluster <- ggpubr::ggarrange(gex_cluster,ssnpa_cluster,pathifier_cluster,ssgsea_cluster, common.legend = F, legend="right", labels="AUTO",font.label=list(size=52, face="bold"), nrow=2, ncol=2)
pdf("liver_method_comparison_cluster.pdf", width=29, height=24)
method_comp_cluster
dev.off()
## quartz_off_screen 
##                 2
# Make panel figure with ssNPA-LR results
ssnpalr_figure <- ggpubr::ggarrange(ssnpalr_class,ssnpalr_cluster, common.legend = F,legend="right",labels='AUTO',font.label = list(size = 52, face = "bold"),nrow=1,ncol=2) 
pdf("liver_ssnpa_lr_result_figure.pdf", width=36, height=12)
ssnpalr_figure
dev.off()
## quartz_off_screen 
##                 2
# Make panel figure fewer genes/cells results
g_genes <- g_genes + ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
text = ggplot2::element_text(size=20),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=48),
axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
plot.margin = ggplot2::unit(c(3,3,3,3),"lines"))

g_cells <- g_cells + ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
text = ggplot2::element_text(size=20),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=48),
axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
plot.margin = ggplot2::unit(c(3,3,3,3),"lines"))

few_figure <- ggpubr::ggarrange(g_genes,g_cells, common.legend = F,labels='AUTO',font.label = list(size = 52, face = "bold"),nrow=1,ncol=2) 
## Warning: Removed 1 rows containing missing values (geom_errorbar).

## Warning: Removed 1 rows containing missing values (geom_errorbar).
pdf("fewer_genes_cells_result_figure.pdf", width=24, height=12)
few_figure
dev.off()
## quartz_off_screen 
##                 2
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] mclust_5.4.5         Rcpp_1.0.2           locfit_1.5-9.1      
##  [4] lattice_0.20-38      prettyunits_1.0.2    assertthat_0.2.1    
##  [7] zeallot_0.1.0        digest_0.6.22        R6_2.4.0            
## [10] plyr_1.8.4           backports_1.1.5      stats4_3.6.1        
## [13] RSQLite_2.1.2        evaluate_0.14        httr_1.4.1          
## [16] ggplot2_3.2.1        pillar_1.4.2         rlang_0.4.1         
## [19] Rmisc_1.5            progress_1.2.2       lazyeval_0.2.2      
## [22] curl_4.2             blob_1.2.0           S4Vectors_0.22.1    
## [25] Matrix_1.2-17        rmarkdown_1.16       labeling_0.3        
## [28] stringr_1.4.0        RCurl_1.95-4.12      bit_1.1-14          
## [31] biomaRt_2.40.5       munsell_0.5.0        compiler_3.6.1      
## [34] xfun_0.10            pkgconfig_2.0.3      BiocGenerics_0.30.0 
## [37] htmltools_0.4.0      tidyselect_0.2.5     gridExtra_2.3       
## [40] tibble_2.1.3         edgeR_3.26.8         IRanges_2.18.3      
## [43] XML_3.98-1.20        aricode_0.1.2        crayon_1.3.4        
## [46] dplyr_0.8.3          ggpubr_0.2.3         bitops_1.0-6        
## [49] grid_3.6.1           gtable_0.3.0         DBI_1.0.0           
## [52] magrittr_1.5         scales_1.0.0         stringi_1.4.3       
## [55] ggsignif_0.6.0       limma_3.40.6         vctrs_0.2.0         
## [58] cowplot_1.0.0        RColorBrewer_1.1-2   tools_3.6.1         
## [61] bit64_0.9-7          Biobase_2.44.0       glue_1.3.1          
## [64] purrr_0.3.3          hms_0.5.2            parallel_3.6.1      
## [67] yaml_2.2.0           ssnpa_0.0.0.9003     AnnotationDbi_1.46.1
## [70] colorspace_1.4-1     memoise_1.1.0        knitr_1.25