Install packages as necessary.
dir.create('liver_scrnaseq_analysis/')
## Warning in dir.create("liver_scrnaseq_analysis/"):
## 'liver_scrnaseq_analysis' already exists
list.of.bioconductor.packages <- c("edgeR","limma","pathifier","KEGGREST","biomaRt","GSVA","GSEABase")
new.bioc.packages <- list.of.bioconductor.packages[!(list.of.bioconductor.packages %in% installed.packages()[,"Package"])]
if(length(new.bioc.packages)) BiocManager::install(new.bioc.packages)
list.of.packages <- c("aricode","ggplot2","mclust","Rmisc","ggpubr")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
Count data downloaded from GEO GSE90047.
# Read in data downloaded from GEO
gene_exp <- read.table("./input_data/GSE90047_Single-cell_RNA-seq_Read_Count.txt",header=TRUE,quote="",sep="\t",row.names=1)
gene_symbols <- gene_exp$Symbol
gene_lengths <- gene_exp$GeneLength
gene_exp$Symbol <- NULL
gene_exp$GeneLength <- NULL
gene_exp <- as.data.frame(t(gene_exp))
dim(gene_exp)
## [1] 529 40916
# Get rid of spike-in controls
i <- grep("ERCC", names(gene_exp))
gene_exp[,i] <- NULL
dim(gene_exp)
## [1] 529 40824
# Keep only cells from main time course experiment
gene_exp <- as.matrix(gene_exp)
gene_exp <- gene_exp[1:447,]
Filter out lowly expressed genes and use voom to convert counts to log2(cpm) with associated weights.
# Filter out lowly expressed genes
keep <- edgeR::filterByExpr(t(gene_exp))
table(keep)
## keep
## FALSE TRUE
## 35753 5071
gene_exp <- gene_exp[,keep]
# Apply voom
exp_voom <- limma::voom(t(gene_exp),plot=T)
exp_voom <- as.data.frame(t(exp_voom$E))
write.table(exp_voom,file="./liver_scrnaseq_analysis/liver_counts.exp_filt.voom.txt",quote=F,sep="\t")
Keep only 3,000 most highly variant genes.
# Keep top 3,000 most highly variant genes
exp_voom <- ssnpa::filter_by_variance(exp_voom, 3000)
dim(exp_voom)
## [1] 447 3000
write.table(exp_voom,file='./liver_scrnaseq_analysis/liver_counts.exp_filt.voom.var_filt.txt',quote=F, sep='\t')
Parse cell names for timepoint and read in cell type information.
m <- row.names(exp_voom)
n <- read.table(text=m, sep="_", colClasses="character")
time <- n$V1
time <- substring(time,1,5)
class <- as.data.frame(time, row.names=m)
cell <- read.table('./input_data/liver_cell_types.txt',header=T,sep="\t",row.names=1,quote="")
class <- merge(class,cell, by="row.names")
row.names(class) <- class$Row.names
class$Row.names <- NULL
class$timexcell <- paste(class$time, class$cell_type,sep = '_')
table(class$time)
##
## E10.5 E11.5 E12.5 E13.5 E14.5 E15.5 E17.5
## 54 70 41 65 70 77 70
table(class$cell_type)
##
## cholangiocyte hepatoblast/hepatocyte
## 102 345
table(class$timexcell)
##
## E10.5_hepatoblast/hepatocyte E11.5_cholangiocyte
## 54 2
## E11.5_hepatoblast/hepatocyte E12.5_cholangiocyte
## 68 3
## E12.5_hepatoblast/hepatocyte E13.5_cholangiocyte
## 38 10
## E13.5_hepatoblast/hepatocyte E14.5_cholangiocyte
## 55 17
## E14.5_hepatoblast/hepatocyte E15.5_cholangiocyte
## 53 36
## E15.5_hepatoblast/hepatocyte E17.5_cholangiocyte
## 41 34
## E17.5_hepatoblast/hepatocyte
## 36
Scan a range of penalty discount values and reference groups and evaluate performance of ssNPA by NMI and ARI
dir.create('./liver_scrnaseq_analysis/test_refxpd/')
for (ref in c("E10.5","E11.5","E12.5","E13.5","E14.5","E15.5","E17.5")){
dir.create(paste0('./liver_scrnaseq_analysis/test_refxpd/',ref))
for (pd in 5:12){
ssnpa_cluster <- ssnpa::run_ssnpa(gene_exp = exp_voom,
reference_samples = (class$time==ref),
out_dir = paste0('./liver_scrnaseq_analysis/test_refxpd/',ref),
seed_value = 1,
fgs_pd = pd,
n_pc = 10,
cluster_resolution = 0.6,
cluster_ref_samples = T)
write.table(ssnpa_cluster$clustering,file=paste0('./liver_scrnaseq_analysis/test_refxpd/',ref,'/ssNPA_cluster_output_',ref,'_pd_',toString(pd),'_.txt'),quote = F, sep='\t')
write.table(ssnpa_cluster$loadings,file=paste0('./liver_scrnaseq_analysis/test_refxpd/',ref,'/ssNPA_loading_output_',ref,'_pd_',toString(pd),'_.txt'),quote = F, sep='\t')
}
}
for (ref in c("E10.5","E11.5","E13.5","E14.5","E15.5","E17.5")){
for (pd in 4){
ssnpa_cluster <- ssnpa::run_ssnpa(gene_exp = exp_voom,
reference_samples = (class$time==ref),
out_dir = paste0('./liver_scrnaseq_analysis/test_refxpd/',ref),
seed_value = 1,
fgs_pd = pd,
n_pc = 10,
cluster_resolution = 0.6,
cluster_ref_samples = T)
write.table(ssnpa_cluster$clustering,file=paste0('./liver_scrnaseq_analysis/test_refxpd/',ref,'/ssNPA_cluster_output_',ref,'_pd_',toString(pd),'_.txt'),quote = F, sep='\t')
write.table(ssnpa_cluster$loadings,file=paste0('./liver_scrnaseq_analysis/test_refxpd/',ref,'/ssNPA_loading_output_',ref,'_pd_',toString(pd),'_.txt'),quote = F, sep='\t')
}
}
# add in E17.5 Hepatocytes
dir.create('./liver_scrnaseq_analysis/test_refxpd/E17.5hepatocytes/')
for (pd in 5:12){
ssnpa_cluster <- ssnpa::run_ssnpa(gene_exp = exp_voom,
reference_samples = (class$timexcell=='E17.5_hepatoblast/hepatocyte'),
out_dir = paste0('./liver_scrnaseq_analysis/test_refxpd/','E17.5hepatocytes'),
seed_value = 1,
fgs_pd = pd,
n_pc = 10,
cluster_resolution = 0.6,
cluster_ref_samples = T)
write.table(ssnpa_cluster$clustering,file=paste0('./liver_scrnaseq_analysis/test_refxpd/','E17.5hepatocytes','/ssNPA_cluster_output_','E17.5hepatocytes','_pd_',toString(pd),'_.txt'),quote = F, sep='\t')
write.table(ssnpa_cluster$loadings,file=paste0('./liver_scrnaseq_analysis/test_refxpd/','E17.5hepatocytes','/ssNPA_loading_output_','E17.5hepatocytes','_pd_',toString(pd),'_.txt'),quote = F, sep='\t')
}
Generate random reference groups.
dir.create('./liver_scrnaseq_analysis/random_reference_cells/')
set.seed(1)
for (i in 1:5){
s <- sample(row.names(exp_voom), median(table(class$time)), replace = FALSE)
write.table(s,paste0('./liver_scrnaseq_analysis/random_reference_cells/reference_cells_',toString(i),'_.txt'),quote=F,sep='\t',row.names = F, col.names=F)
}
Test ssNPA with random reference groups.
ref="Random"
for (i in 1:5){
dir.create(paste0('./liver_scrnaseq_analysis/test_refxpd/random',toString(i)))
ref_list <- read.table(paste0('./liver_scrnaseq_analysis/random_reference_cells/reference_cells_',toString(i),'_.txt'), quote="", sep='\t', header =F)
ref_list <- ref_list$V1
ref_sam <- (row.names(exp_voom) %in% ref_list)
for (pd in 4:12){
ssnpa_cluster <- ssnpa::run_ssnpa(gene_exp = exp_voom,
reference_samples = ref_sam,
out_dir = paste0('./liver_scrnaseq_analysis/test_refxpd/','random',toString(i)),
seed_value = 1,
fgs_pd = pd,
n_pc = 10,
cluster_resolution = 0.6,
cluster_ref_samples = T)
write.table(ssnpa_cluster$clustering,file=paste0('./liver_scrnaseq_analysis/test_refxpd/','random',toString(i),'/ssNPA_cluster_output_','random','_pd_',toString(pd),'_.txt'),quote = F, sep='\t')
write.table(ssnpa_cluster$loadings,file=paste0('./liver_scrnaseq_analysis/test_refxpd/','random',toString(i),'/ssNPA_loading_output_','random','_pd_',toString(pd),'_.txt'),quote = F, sep='\t')
}
}
Read in all ssNPA results files and calculate NMI and ARI
result_files <- list.files(path = "./liver_scrnaseq_analysis/test_refxpd", pattern = 'ssNPA_cluster_output',full.names = T, recursive = T,ignore.case = FALSE, include.dirs = T)
method <- character()
reference_set <- character()
pds <- integer()
num_clusters <- integer()
nmi <- double()
ari <- double()
for (file in result_files){
a <- strsplit(file,"_",fixed=T)
method <- c(method, 'ssNPA')
reference_set <- c(reference_set, a[[1]][7])
pds <- c(pds,a[[1]][9])
df <- read.table(file,header=T,sep='\t', quote="",row.names=1,stringsAsFactors=T)
num_clusters <- c(num_clusters,length(unique(df$Cluster)))
nmi <- c(nmi,aricode::NMI(class$timexcell,df$Cluster))
ari <- c(ari,mclust::adjustedRandIndex(class$timexcell, df$Cluster))
}
ssnpa_results <- data.frame(method,reference_set,pds,num_clusters,nmi,ari)
ssnpa_results$pds <- factor(ssnpa_results$pds, levels = c(4,5,6,7,8,9,10,11,12))
levels(ssnpa_results$reference_set)[levels(ssnpa_results$reference_set)=="E17.5hepatocytes"] <- "E17.5 hepatocytes"
levels(ssnpa_results$reference_set)[levels(ssnpa_results$reference_set)=="random"] <- "Random"
ssnpa_results
## method reference_set pds num_clusters nmi ari
## 1 ssNPA E10.5 10 6 0.4989061 0.3629863
## 2 ssNPA E10.5 11 6 0.4996008 0.3632776
## 3 ssNPA E10.5 12 6 0.4985045 0.3628323
## 4 ssNPA E10.5 4 6 0.5190910 0.3941733
## 5 ssNPA E10.5 5 6 0.5115482 0.3732445
## 6 ssNPA E10.5 6 6 0.5126917 0.3783058
## 7 ssNPA E10.5 7 6 0.5076537 0.3721858
## 8 ssNPA E10.5 8 6 0.4984591 0.3607016
## 9 ssNPA E10.5 9 6 0.4949195 0.3575609
## 10 ssNPA E11.5 10 7 0.5061272 0.3711821
## 11 ssNPA E11.5 11 7 0.5055431 0.3688952
## 12 ssNPA E11.5 12 7 0.5021115 0.3647993
## 13 ssNPA E11.5 4 7 0.5806941 0.5046954
## 14 ssNPA E11.5 5 7 0.5536136 0.4661542
## 15 ssNPA E11.5 6 7 0.5235163 0.4111588
## 16 ssNPA E11.5 7 7 0.5139211 0.3944585
## 17 ssNPA E11.5 8 7 0.5257517 0.3955257
## 18 ssNPA E11.5 9 7 0.5188797 0.3866566
## 19 ssNPA E12.5 10 6 0.4627493 0.3496546
## 20 ssNPA E12.5 11 6 0.4881712 0.3675188
## 21 ssNPA E12.5 12 6 0.4805727 0.3593541
## 22 ssNPA E12.5 5 7 0.5388598 0.4157324
## 23 ssNPA E12.5 6 6 0.5170250 0.3849483
## 24 ssNPA E12.5 7 6 0.4708107 0.3427727
## 25 ssNPA E12.5 8 6 0.4923602 0.3669023
## 26 ssNPA E12.5 9 6 0.4823793 0.3323744
## 27 ssNPA E13.5 10 6 0.4674087 0.3593620
## 28 ssNPA E13.5 11 6 0.4649576 0.3608429
## 29 ssNPA E13.5 12 6 0.4664099 0.3562749
## 30 ssNPA E13.5 4 7 0.5512526 0.4297797
## 31 ssNPA E13.5 5 6 0.4785982 0.3424778
## 32 ssNPA E13.5 6 7 0.5343838 0.4101959
## 33 ssNPA E13.5 7 6 0.4720886 0.3514341
## 34 ssNPA E13.5 8 6 0.4687013 0.3434875
## 35 ssNPA E13.5 9 6 0.4696692 0.3576294
## 36 ssNPA E14.5 10 7 0.5110207 0.3762143
## 37 ssNPA E14.5 11 7 0.5218555 0.3817916
## 38 ssNPA E14.5 12 7 0.5058115 0.3705352
## 39 ssNPA E14.5 4 6 0.5740522 0.4723229
## 40 ssNPA E14.5 5 7 0.5714402 0.4315123
## 41 ssNPA E14.5 6 7 0.5401851 0.3833062
## 42 ssNPA E14.5 7 7 0.4937949 0.3686072
## 43 ssNPA E14.5 8 7 0.5066050 0.3846853
## 44 ssNPA E14.5 9 7 0.4931975 0.3606796
## 45 ssNPA E15.5 10 6 0.5014041 0.3901273
## 46 ssNPA E15.5 11 6 0.5002681 0.3930109
## 47 ssNPA E15.5 12 6 0.5032696 0.3938568
## 48 ssNPA E15.5 4 8 0.6150530 0.4873983
## 49 ssNPA E15.5 5 8 0.5976367 0.4478579
## 50 ssNPA E15.5 6 7 0.5148128 0.3515344
## 51 ssNPA E15.5 7 6 0.4934968 0.3591223
## 52 ssNPA E15.5 8 6 0.5037886 0.3964979
## 53 ssNPA E15.5 9 7 0.5137178 0.3802511
## 54 ssNPA E17.5 10 6 0.4595449 0.3449952
## 55 ssNPA E17.5 11 6 0.4502055 0.3345621
## 56 ssNPA E17.5 12 6 0.4325164 0.3125855
## 57 ssNPA E17.5 4 7 0.5188686 0.3782065
## 58 ssNPA E17.5 5 7 0.5380958 0.4139808
## 59 ssNPA E17.5 6 6 0.4937677 0.3811921
## 60 ssNPA E17.5 7 6 0.4927237 0.3814886
## 61 ssNPA E17.5 8 6 0.4863517 0.3675997
## 62 ssNPA E17.5 9 6 0.4596659 0.3496078
## 63 ssNPA E17.5 hepatocytes 10 6 0.4423047 0.3399355
## 64 ssNPA E17.5 hepatocytes 11 6 0.4483608 0.3443706
## 65 ssNPA E17.5 hepatocytes 12 6 0.4417953 0.3414855
## 66 ssNPA E17.5 hepatocytes 5 7 0.5244172 0.4103018
## 67 ssNPA E17.5 hepatocytes 6 6 0.4588314 0.3620383
## 68 ssNPA E17.5 hepatocytes 7 6 0.4579592 0.3556687
## 69 ssNPA E17.5 hepatocytes 8 6 0.4390137 0.3349598
## 70 ssNPA E17.5 hepatocytes 9 6 0.4486123 0.3457244
## 71 ssNPA Random 10 5 0.3994849 0.2940187
## 72 ssNPA Random 11 6 0.4615890 0.3325610
## 73 ssNPA Random 12 5 0.3952019 0.2968861
## 74 ssNPA Random 4 5 0.2971578 0.1786121
## 75 ssNPA Random 5 5 0.2951630 0.2001413
## 76 ssNPA Random 6 5 0.3449468 0.2433838
## 77 ssNPA Random 7 5 0.3774365 0.2693926
## 78 ssNPA Random 8 5 0.4055045 0.2950569
## 79 ssNPA Random 9 5 0.3927834 0.2829422
## 80 ssNPA Random 10 5 0.4036376 0.3017254
## 81 ssNPA Random 11 5 0.4107778 0.3040372
## 82 ssNPA Random 12 5 0.4107587 0.3029480
## 83 ssNPA Random 4 5 0.2900579 0.1844669
## 84 ssNPA Random 5 6 0.3414785 0.2011797
## 85 ssNPA Random 6 6 0.4170360 0.2886322
## 86 ssNPA Random 7 5 0.3752135 0.2631230
## 87 ssNPA Random 8 6 0.4419078 0.3080828
## 88 ssNPA Random 9 6 0.4319798 0.3082374
## 89 ssNPA Random 10 5 0.3908403 0.2875533
## 90 ssNPA Random 11 5 0.3899106 0.2877471
## 91 ssNPA Random 12 5 0.3981384 0.2979477
## 92 ssNPA Random 4 5 0.2940171 0.1872200
## 93 ssNPA Random 5 6 0.3385853 0.2241929
## 94 ssNPA Random 6 5 0.3349233 0.2278658
## 95 ssNPA Random 7 5 0.3513398 0.2317307
## 96 ssNPA Random 8 5 0.4063303 0.2823776
## 97 ssNPA Random 9 5 0.3975408 0.2912891
## 98 ssNPA Random 10 7 0.4605585 0.3216140
## 99 ssNPA Random 11 6 0.4532103 0.3282535
## 100 ssNPA Random 12 6 0.4543818 0.3314441
## 101 ssNPA Random 4 5 0.3012069 0.1922797
## 102 ssNPA Random 5 6 0.3374999 0.2291097
## 103 ssNPA Random 6 5 0.3404424 0.2119682
## 104 ssNPA Random 7 6 0.4252663 0.2786030
## 105 ssNPA Random 8 6 0.4219663 0.2748916
## 106 ssNPA Random 9 6 0.4284912 0.2825565
## 107 ssNPA Random 10 5 0.4294952 0.3208720
## 108 ssNPA Random 11 5 0.4201252 0.3109494
## 109 ssNPA Random 12 5 0.4192327 0.3142967
## 110 ssNPA Random 4 5 0.2863549 0.1888866
## 111 ssNPA Random 5 5 0.3301963 0.2209320
## 112 ssNPA Random 6 6 0.3465440 0.2381293
## 113 ssNPA Random 7 5 0.3663433 0.2449496
## 114 ssNPA Random 8 5 0.3817106 0.2816567
## 115 ssNPA Random 9 5 0.3910132 0.2999313
mean(ssnpa_results$nmi[ssnpa_results$reference_set=="Random"])
## [1] 0.3819507
s <- Rmisc::summarySE(ssnpa_results, measurevar="nmi", groupvars=c("reference_set","pds"))
## Warning in qt(conf.interval/2 + 0.5, datac$N - 1): NaNs produced
g <- ggplot2::ggplot(s,ggplot2::aes(x=reference_set,y=nmi, fill=as.factor(pds),width=0.75))
g <- g + ggplot2::geom_bar(stat="identity", position=ggplot2::position_dodge(width=0.9))
g <- g + ggplot2::geom_errorbar(ggplot2::aes(ymin=nmi-se, ymax=nmi+se), width=0.5,position=ggplot2::position_dodge(.9))
g <- g + ggplot2::theme_bw()
g <- g + ggplot2::xlab('Reference Cells')
g <- g + ggplot2::ylab('NMI')
g <- g + ggplot2::guides(fill=ggplot2::guide_legend(title="Penalty\nDiscount"))
g <- g + ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
axis.text.x = ggplot2::element_text(angle = 45, hjust = 1),
text = ggplot2::element_text(size=20),
legend.text=ggplot2::element_text(size=18),
axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)))
pdf('ssnpa_liver_referencecells_by_pd.nmi.pdf', width=12, height=6)
g
## Warning: Removed 70 rows containing missing values (geom_errorbar).
dev.off()
## quartz_off_screen
## 2
g
## Warning: Removed 70 rows containing missing values (geom_errorbar).
s <- Rmisc::summarySE(ssnpa_results, measurevar="ari", groupvars=c("reference_set","pds"))
## Warning in qt(conf.interval/2 + 0.5, datac$N - 1): NaNs produced
g <- ggplot2::ggplot(s,ggplot2::aes(x=reference_set,y=ari, fill=as.factor(pds),width=0.75))
g <- g + ggplot2::geom_bar(stat="identity", position=ggplot2::position_dodge(width=0.9))
g <- g + ggplot2::geom_errorbar(ggplot2::aes(ymin=ari-se, ymax=ari+se), width=0.5,position=ggplot2::position_dodge(.9))
g <- g + ggplot2::theme_bw()
g <- g + ggplot2::xlab('Reference Cells')
g <- g + ggplot2::ylab('ARI')
g <- g + ggplot2::guides(fill=ggplot2::guide_legend(title="Penalty\nDiscount"))
g <- g + ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
axis.text.x = ggplot2::element_text(angle = 45, hjust = 1),
text = ggplot2::element_text(size=20),
legend.text=ggplot2::element_text(size=18),
axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)))
pdf('ssnpa_liver_referencecells_by_pd.ari.pdf', width=12, height=6)
g
## Warning: Removed 70 rows containing missing values (geom_errorbar).
dev.off()
## quartz_off_screen
## 2
g
## Warning: Removed 70 rows containing missing values (geom_errorbar).
ssnpa_features_selected <- read.table('./liver_scrnaseq_analysis/test_refxpd/E15.5/selectedfeatures.fges_pd4.txt', header=T, sep='\t',quote = "")
mean(ssnpa_features_selected$num_var)
## [1] 3.584
dir.create('./liver_scrnaseq_analysis/test_lr_ref/')
for (ref in c("E10.5","E11.5","E12.5","E13.5","E14.5","E15.5","E17.5")){
dir.create(paste0('./liver_scrnaseq_analysis/test_lr_ref/',ref))
ssnpa_cluster <- ssnpa::run_ssnpa_lr(gene_exp = exp_voom,
reference_samples = (class$time==ref),
out_dir = paste0('./liver_scrnaseq_analysis/test_lr_ref/',ref),
seed_value = 1,
n_pc = 10,
cluster_resolution = 0.6,
cluster_ref_samples = T)
write.table(ssnpa_cluster$clustering,file=paste0('./liver_scrnaseq_analysis/test_lr_ref/',ref,'/ssNPA-LR_cluster_output_',ref,'_.txt'),quote = F, sep='\t')
write.table(ssnpa_cluster$loadings,file=paste0('./liver_scrnaseq_analysis/test_lr_ref/',ref,'/ssNPA-LR_loading_output_',ref,'_.txt'),quote = F, sep='\t')
}
# add in E17.5 Hepatocytes
dir.create(paste0('./liver_scrnaseq_analysis/test_lr_ref/E17.5hepatocytes/'))
ssnpa_cluster <- ssnpa::run_ssnpa_lr(gene_exp = exp_voom,
reference_samples = (class$timexcell=='E17.5_hepatoblast/hepatocyte'),
out_dir = paste0('./liver_scrnaseq_analysis/test_lr_ref/','E17.5hepatocytes'),
seed_value = 1,
n_pc = 10,
cluster_resolution = 0.6,
cluster_ref_samples = T)
write.table(ssnpa_cluster$clustering,file=paste0('./liver_scrnaseq_analysis/test_lr_ref/','E17.5hepatocytes','/ssNPA-LR_cluster_output_','E17.5hepatocytes','_.txt'),quote = F, sep='\t')
write.table(ssnpa_cluster$loadings,file=paste0('./liver_scrnaseq_analysis/test_lr_ref/','E17.5hepatocytes','/ssNPA-LR_loading_output_','E17.5hepatocytes','_.txt'),quote = F, sep='\t')
for (i in 1:5){
dir.create(paste0("./liver_scrnaseq_analysis/test_lr_ref/random",toString(i)))
ref_list <- read.table(paste0('./liver_scrnaseq_analysis/random_reference_cells/reference_cells_',toString(i),'_.txt'), quote="", sep='\t', header =F)
ref_list <- ref_list$V1
ref_sam <- (row.names(exp_voom) %in% ref_list)
ssnpa_cluster <- ssnpa::run_ssnpa_lr(gene_exp = exp_voom,
reference_samples = ref_sam,
out_dir = paste0('./liver_scrnaseq_analysis/test_lr_ref/','random',toString(i)),
seed_value = 1,
n_pc = 10,
cluster_resolution = 0.6,
cluster_ref_samples = T)
write.table(ssnpa_cluster$clustering,file=paste0('./liver_scrnaseq_analysis/test_lr_ref/','random',toString(i),'/ssNPA-LR_cluster_output_','random','_.txt'),quote = F, sep='\t')
write.table(ssnpa_cluster$loadings,file=paste0('./liver_scrnaseq_analysis/test_lr_ref/','random',toString(i),'/ssNPA-LR_loading_output_','random','_.txt'),quote = F, sep='\t')
}
Read in all ssNPA-LR results files and calculate NMI and ARI
result_files <- list.files(path = "./liver_scrnaseq_analysis/test_lr_ref", pattern = 'ssNPA-LR_cluster_output',full.names = T, recursive = T,ignore.case = F, include.dirs = T)
method <- character()
reference_set <- character()
pds <- integer()
num_clusters <- integer()
nmi <- double()
ari <- double()
for (file in result_files){
a <- strsplit(file,"_",fixed=T)
method <- c(method, 'ssNPA-LR')
reference_set <- c(reference_set, a[[1]][8])
pds <- c(pds,NA)
df <- read.table(file,header=T,sep='\t', quote="",row.names=1,stringsAsFactors=F)
num_clusters <- c(num_clusters,length(unique(df$Cluster)))
nmi <- c(nmi,aricode::NMI(class$timexcell,df$Cluster))
ari <- c(ari,mclust::adjustedRandIndex(class$timexcell, df$Cluster))
}
ssnpa_lr_results <- data.frame(method,reference_set,pds,num_clusters,nmi,ari)
levels(ssnpa_lr_results$reference_set)[levels(ssnpa_lr_results$reference_set)=="E17.5hepatocytes"] <- "E17.5 hepatocytes"
levels(ssnpa_lr_results$reference_set)[levels(ssnpa_lr_results$reference_set)=="random"] <- "Random"
ssnpa_lr_results
## method reference_set pds num_clusters nmi ari
## 1 ssNPA-LR E10.5 NA 6 0.5343157 0.4103438
## 2 ssNPA-LR E11.5 NA 7 0.5619260 0.4851154
## 3 ssNPA-LR E12.5 NA 7 0.5401574 0.4101360
## 4 ssNPA-LR E13.5 NA 7 0.5956836 0.4771295
## 5 ssNPA-LR E14.5 NA 6 0.5944397 0.4973185
## 6 ssNPA-LR E15.5 NA 7 0.5919097 0.4755229
## 7 ssNPA-LR E17.5 NA 6 0.5100085 0.3861713
## 8 ssNPA-LR E17.5 hepatocytes NA 7 0.5199538 0.4069017
## 9 ssNPA-LR Random NA 6 0.3327361 0.2236332
## 10 ssNPA-LR Random NA 6 0.3348679 0.2181527
## 11 ssNPA-LR Random NA 6 0.3153343 0.2041835
## 12 ssNPA-LR Random NA 6 0.3155976 0.1994630
## 13 ssNPA-LR Random NA 6 0.3513737 0.2508438
mean(ssnpa_lr_results$nmi[ssnpa_lr_results$reference_set=="Random"])
## [1] 0.3299819
s <- Rmisc::summarySE(ssnpa_lr_results, measurevar="nmi", groupvars=c("reference_set"))
## Warning in qt(conf.interval/2 + 0.5, datac$N - 1): NaNs produced
g <- ggplot2::ggplot(s,ggplot2::aes(x=reference_set,y=nmi, width=0.75))
g <- g + ggplot2::geom_bar(stat="identity", position=ggplot2::position_dodge(width=0.9),fill="dodgerblue3")
g <- g + ggplot2::geom_errorbar(ggplot2::aes(ymin=nmi-se, ymax=nmi+se), width=0.5,position=ggplot2::position_dodge(.9))
g <- g + ggplot2::theme_bw()
g <- g + ggplot2::xlab('Reference Cells')
g <- g + ggplot2::ylab('NMI')
g <- g + ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
axis.text.x = ggplot2::element_text(angle = 45, hjust = 1),
text = ggplot2::element_text(size=20),
legend.text=ggplot2::element_text(size=18),
axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)))
pdf('liver_ssnpa-lr_by_referencecells.nmi.pdf', width=8, height=6)
g
## Warning: Removed 8 rows containing missing values (geom_errorbar).
dev.off()
## quartz_off_screen
## 2
g
## Warning: Removed 8 rows containing missing values (geom_errorbar).
s <- Rmisc::summarySE(ssnpa_lr_results, measurevar="ari", groupvars=c("reference_set"))
## Warning in qt(conf.interval/2 + 0.5, datac$N - 1): NaNs produced
g <- ggplot2::ggplot(s,ggplot2::aes(x=reference_set,y=ari, width=0.75))
g <- g + ggplot2::geom_bar(stat="identity", position=ggplot2::position_dodge(width=0.9),fill="dodgerblue3")
g <- g + ggplot2::geom_errorbar(ggplot2::aes(ymin=ari-se, ymax=ari+se), width=0.5,position=ggplot2::position_dodge(.9))
g <- g + ggplot2::theme_bw()
g <- g + ggplot2::xlab('Reference Cells')
g <- g + ggplot2::ylab('ARI')
g <- g + ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
axis.text.x = ggplot2::element_text(angle = 45, hjust = 1),
text = ggplot2::element_text(size=20),
legend.text=ggplot2::element_text(size=18),
axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)))
pdf('liver_ssnpa-lr_by_referencecells.ari.pdf', width=8, height=6)
g
## Warning: Removed 8 rows containing missing values (geom_errorbar).
dev.off()
## quartz_off_screen
## 2
g
## Warning: Removed 8 rows containing missing values (geom_errorbar).
ssnpalr_features_selected <- read.table('./liver_scrnaseq_analysis/test_lr_ref/E13.5/ssnpa_lr_selectedfeatures.txt', header=T, sep='\t',quote = "")
mean(ssnpalr_features_selected$num_var)
## [1] 13.51167
dir.create(paste0('./liver_scrnaseq_analysis/test_gex'))
gex_cluster <- ssnpa::cluster_samples(features = exp_voom,
meta_data_df = class,
n_pc = 10,
cluster_resolution =0.6,
cluster_ref_samples = T,
ref_samples = rep(T,dim(exp_voom)[1]),
seed_value = 1)$clustering
method <- 'Gene Expression'
reference_set <- NA
pds <- NA
num_clusters <- length(unique(gex_cluster$Cluster))
nmi <- aricode::NMI(class$timexcell,gex_cluster$Cluster)
ari <- mclust::adjustedRandIndex(class$timexcell, gex_cluster$Cluster)
write.table(gex_cluster,file=paste0('./liver_scrnaseq_analysis/test_gex/gex_cluster_output.txt'),quote = F, sep='\t')
Read in gene expression result file and calcule NMI and ARI
result_files <- list.files(path = "./liver_scrnaseq_analysis/test_gex", pattern = 'gex_cluster_output',full.names = T, recursive = T,ignore.case = F, include.dirs = T)
method <- character()
reference_set <- character()
pds <- integer()
num_clusters <- integer()
nmi <- double()
ari <- double()
for (file in result_files){
method <- c(method, 'Gene Expression')
reference_set <- c(reference_set, NA)
pds <- c(pds,NA)
df <- read.table(file,header=T,sep='\t', quote="",row.names=1,stringsAsFactors=F)
num_clusters <- c(num_clusters,length(unique(df$Cluster)))
nmi <- c(nmi,aricode::NMI(class$timexcell,df$Cluster))
ari <- c(ari,mclust::adjustedRandIndex(class$timexcell, df$Cluster))
}
gex_results <- data.frame(method,reference_set,pds,num_clusters,nmi,ari)
gex_results
## method reference_set pds num_clusters nmi ari
## 1 Gene Expression <NA> NA 5 0.4967235 0.3748872
Use Pathifier with KEGG pathways to cluster these data. First, read in KEGG pathways.
dir.create(paste0('./output_data'))
kegg_full <- list()
pathways <- KEGGREST::keggList("pathway", "mmu") #mmu for mouse
count2 <- 1
for (i in 1:length(pathways)){
x <- attr(pathways,"names")[i]
x <- gsub("path:","",x, fixed=T)
path_full <- KEGGREST::keggGet(x)
p <- path_full[[1]]$GENE
if (length(p)>0){
even_indexes<-seq(2,length(p),2)
q <- p[even_indexes]
r <- strsplit(q,";",fixed=T)
gene_names <- NULL
count <- 1
for (j in 1:length(r)){
if (length(r[[j]])>1){
gene_names[count] <- r[[j]][1]
count <- count+1
}
}
# only keep pathways with at least 10 genes
if (length(gene_names)>10){
kegg_full$gs[count2] <- list(gene_names)
kegg_full$pathwaynames[count2] <- path_full[[1]]$NAME
count2 <- count2 + 1
}
}
}
save(kegg_full,file="./output_data/kegg_full.mmu.RData")
summary(kegg_full)
Convert ENSEMBL IDs to gene names to match KEGG.
ensembl <- biomaRt::useEnsembl(biomart="ensembl", dataset="mmusculus_gene_ensembl")
lookup <- biomaRt::getBM(attributes=c('ensembl_gene_id','external_gene_name'), filters ='ensembl_gene_id', values =names(exp_voom), mart = ensembl)
t <- match(names(exp_voom), lookup$ensembl_gene_id)
gene_names <- lookup$external_gene_name[t]
exp_voom_genename <- exp_voom
names(exp_voom_genename)[!is.na(gene_names)] <- gene_names[!is.na(gene_names)]
Run Pathifier with all groups of cells as the reference set.
data <- t(exp_voom_genename)
dir.create(paste0('./liver_scrnaseq_analysis/test_pathifier_ref/'))
for (ref in c("E10.5","E11.5","E12.5","E13.5","E14.5","E15.5","E17.5")){
dir.create(paste0('./liver_scrnaseq_analysis/test_pathifier_ref/',ref))
pathway_dereg <- pathifier::quantify_pathways_deregulation(data,allgenes = row.names(data),syms = kegg_full$gs,pathwaynames = kegg_full$pathwaynames,normals = (class$time==ref), attempts=100)
scores <- do.call(rbind,pathway_dereg$scores)
scores <- as.data.frame(t(scores))
names(scores) <- names(pathway_dereg$scores)
names(scores) <- make.names(names(scores))
row.names(scores) <- row.names(exp_voom_genename)
pathifier_cluster <- ssnpa::cluster_samples(features = scores,
meta_data_df = class,
n_pc = 10,
cluster_resolution =0.6,
cluster_ref_samples = T,
ref_samples = (class$time==ref),
seed_value = 1)$clustering
write.table(pathifier_cluster,file=paste0('./liver_scrnaseq_analysis/test_pathifier_ref/',ref,'/pathifier_cluster_output_',ref,'_.txt'),quote = F, sep='\t')
}
for (i in 1:5){
dir.create(paste0("./liver_scrnaseq_analysis/test_pathifier_ref/random",toString(i)))
ref_list <- read.table(paste0('./liver_scrnaseq_analysis/random_reference_cells/reference_cells_',toString(i),'_.txt'), quote="", sep='\t', header =F)
ref_list <- ref_list$V1
ref_sam <- (row.names(exp_voom) %in% ref_list)
pathway_dereg <- pathifier::quantify_pathways_deregulation(data,allgenes = row.names(data),syms = kegg_full$gs,pathwaynames = kegg_full$pathwaynames,normals = ref_sam, attempts=100)
scores <- do.call(rbind,pathway_dereg$scores)
scores <- as.data.frame(t(scores))
names(scores) <- names(pathway_dereg$scores)
names(scores) <- make.names(names(scores))
row.names(scores) <- row.names(exp_voom_genename)
write.table(scores, file=paste0('./liver_scrnaseq_analysis/test_pathifier_ref/random', toString(i),'/pathifier_scores_random_.txt'), quote=F, sep='\t')
pathifier_cluster <- ssnpa::cluster_samples(features = scores,
meta_data_df = class,
n_pc = 10,
cluster_resolution =0.6,
cluster_ref_samples = T,
ref_samples = ref_sam,
seed_value = 1)$clustering
write.table(pathifier_cluster,file=paste0('./liver_scrnaseq_analysis/test_pathifier_ref/','random',toString(i),'/pathifier_cluster_output_','random','_.txt'),quote = F, sep='\t')
}
Read in Pathifier result files and calcule NMI and ARI
result_files <- list.files(path = "./liver_scrnaseq_analysis/test_pathifier_ref", pattern = 'pathifier_cluster_output',full.names = T, recursive = T,ignore.case = F, include.dirs = T)
method <- character()
reference_set <- character()
pds <- integer()
num_clusters <- integer()
nmi <- double()
ari <- double()
for (file in result_files){
a <- strsplit(file,"_",fixed=T)
method <- c(method, 'Pathifier')
reference_set <- c(reference_set, a[[1]][8])
pds <- c(pds,NA)
df <- read.table(file,header=T,sep='\t', quote="",row.names=1,stringsAsFactors=F)
num_clusters <- c(num_clusters,length(unique(df$Cluster)))
nmi <- c(nmi,aricode::NMI(class$timexcell,df$Cluster))
ari <- c(ari,mclust::adjustedRandIndex(class$timexcell, df$Cluster))
}
pathifier_results <- data.frame(method,reference_set,pds,num_clusters,nmi,ari)
levels(pathifier_results$reference_set)[levels(pathifier_results$reference_set)=="random"] <- "Random"
pathifier_results
## method reference_set pds num_clusters nmi ari
## 1 Pathifier E10.5 NA 6 0.5039087 0.3571078
## 2 Pathifier E11.5 NA 6 0.4930281 0.3824889
## 3 Pathifier E12.5 NA 6 0.5103271 0.3741414
## 4 Pathifier E13.5 NA 6 0.5289707 0.3993093
## 5 Pathifier E14.5 NA 6 0.5072126 0.3641602
## 6 Pathifier E15.5 NA 5 0.4475906 0.3258259
## 7 Pathifier E17.5 NA 5 0.4361635 0.3074956
## 8 Pathifier Random NA 5 0.4424782 0.3142321
## 9 Pathifier Random NA 6 0.4601782 0.3006597
## 10 Pathifier Random NA 6 0.5026206 0.3678690
## 11 Pathifier Random NA 5 0.4747649 0.3528858
## 12 Pathifier Random NA 5 0.4448886 0.3192647
mean(pathifier_results$nmi[pathifier_results$reference_set=="Random"])
## [1] 0.4649861
s <- Rmisc::summarySE(pathifier_results, measurevar="nmi", groupvars=c("reference_set"))
## Warning in qt(conf.interval/2 + 0.5, datac$N - 1): NaNs produced
g <- ggplot2::ggplot(s,ggplot2::aes(x=reference_set,y=nmi, width=0.75))
g <- g + ggplot2::geom_bar(stat="identity", position=ggplot2::position_dodge(width=0.9),fill="dodgerblue3")
g <- g + ggplot2::geom_errorbar(ggplot2::aes(ymin=nmi-se, ymax=nmi+se), width=0.5,position=ggplot2::position_dodge(.9))
g <- g + ggplot2::theme_bw()
g <- g + ggplot2::xlab('Reference Cells')
g <- g + ggplot2::ylab('NMI')
g <- g + ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
axis.text.x = ggplot2::element_text(angle = 45, hjust = 1),
text = ggplot2::element_text(size=20),
legend.text=ggplot2::element_text(size=18),
axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)))
pdf('liver_pathifier_by_referencecells.nmi.pdf', width=8, height=6)
g
## Warning: Removed 7 rows containing missing values (geom_errorbar).
dev.off()
## quartz_off_screen
## 2
g
## Warning: Removed 7 rows containing missing values (geom_errorbar).
s <- Rmisc::summarySE(pathifier_results, measurevar="ari", groupvars=c("reference_set"))
## Warning in qt(conf.interval/2 + 0.5, datac$N - 1): NaNs produced
g <- ggplot2::ggplot(s,ggplot2::aes(x=reference_set,y=ari, width=0.75))
g <- g + ggplot2::geom_bar(stat="identity", position=ggplot2::position_dodge(width=0.9),fill="dodgerblue3")
g <- g + ggplot2::geom_errorbar(ggplot2::aes(ymin=ari-se, ymax=ari+se), width=0.5,position=ggplot2::position_dodge(.9))
g <- g + ggplot2::theme_bw()
g <- g + ggplot2::xlab('Reference Cells')
g <- g + ggplot2::ylab('ARI')
g <- g + ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
axis.text.x = ggplot2::element_text(angle = 45, hjust = 1),
text = ggplot2::element_text(size=20),
legend.text=ggplot2::element_text(size=18),
axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)))
pdf('liver_pathifier_by_referencecells.ari.pdf', width=8, height=6)
g
## Warning: Removed 7 rows containing missing values (geom_errorbar).
dev.off()
## quartz_off_screen
## 2
g
## Warning: Removed 7 rows containing missing values (geom_errorbar).
Use ssGSEA with MSigDB pathways to cluster these data. First, convert ENSEMBL IDs to Entrez gene IDs.
ensembl <- biomaRt::useEnsembl(biomart="ensembl", dataset="mmusculus_gene_ensembl")
lookup <- biomaRt::getBM(attributes=c('ensembl_gene_id','entrezgene_id'), filters ='ensembl_gene_id', values =names(exp_voom), mart = ensembl)
t <- match(names(exp_voom), lookup$ensembl_gene_id)
gene_names <- lookup$entrezgene[t]
exp_voom_entrez <- exp_voom
names(exp_voom_entrez)[!is.na(gene_names)] <- gene_names[!is.na(gene_names)]
Load in the C2 collection from MSigDB (version 5.2 with mouse identifiers) downloaded from http://bioinf.wehi.edu.au/software/MSigDB/index.html
load('./input_data/mouse_c2_v5p2.rdata')
Run ssGSEA and cluster cells.
dir.create(paste0('./liver_scrnaseq_analysis/test_ssgsea'))
ssgsea_features <- GSVA::gsva(as.matrix(t(exp_voom_entrez)), Mm.c2, method="ssgsea")
ssgsea_cluster_input <- as.data.frame(t(ssgsea_features))
names(ssgsea_cluster_input) <- make.names(names(ssgsea_cluster_input))
ssgsea_cluster <- ssnpa::cluster_samples(features = as.data.frame(t(ssgsea_features)),
meta_data_df = class,
n_pc = 10,
cluster_resolution =0.6,
cluster_ref_samples = T,
ref_samples = rep(T,dim(class)[1]),
seed_value = 1)$clustering
write.table(ssgsea_cluster,file=paste0('./liver_scrnaseq_analysis/test_ssgsea/ssgsea_cluster_output.txt'),quote = F, sep='\t')
Read in ssGSEA result file and calculate NMI and ARI
result_files <- list.files(path = "./liver_scrnaseq_analysis/test_ssgsea", pattern = 'ssgsea_cluster_output',full.names = T, recursive = T,ignore.case = F, include.dirs = T)
method <- character()
reference_set <- character()
pds <- integer()
num_clusters <- integer()
nmi <- double()
ari <- double()
for (file in result_files){
method <- c(method, 'ssGSEA')
reference_set <- c(reference_set, NA)
pds <- c(pds,NA)
df <- read.table(file,header=T,sep='\t', quote="",row.names=1,stringsAsFactors=F)
num_clusters <- c(num_clusters,length(unique(df$Cluster)))
nmi <- c(nmi,aricode::NMI(class$timexcell,df$Cluster))
ari <- c(ari,mclust::adjustedRandIndex(class$timexcell, df$Cluster))
}
ssgsea_results <- data.frame(method,reference_set,pds,num_clusters,nmi,ari)
ssgsea_results
## method reference_set pds num_clusters nmi ari
## 1 ssGSEA <NA> NA 5 0.4776922 0.3531489
Generate random datasets.
dir.create(paste0('./liver_scrnaseq_analysis/test_fewgenes/datasets'))
set.seed(1)
for (p in c(0.95, 0.9, 0.85, 0.8, 0.75, 0.5, 0.25)){
for (i in 1:5){
s <- sample(names(exp_voom), floor(dim(exp_voom)[2]*p), replace = FALSE)
exp_s <- exp_voom[,s]
write.table(exp_s,paste0('./liver_scrnaseq_analysis/test_fewgenes/datasets/expvoom_p_',toString(p),'_',toString(i),'_.txt'),quote=F,sep='\t',row.names = T, col.names=T)
}
}
Run ssNPA on each of these random datasets.
ref <- 'E15.5'
pd <- 4
result_files <- list.files(path = "./liver_scrnaseq_analysis/test_fewgenes/datasets", pattern = 'expvoom',full.names = T, recursive = T,ignore.case = F, include.dirs = T)
for (file in result_files){
df <- read.table(file,header=T,sep='\t',quote="",row.names=1)
a <- strsplit(file,'_',fixed = T)
p <- a[[1]][6]
i <- a[[1]][7]
dir.create(paste0('./liver_scrnaseq_analysis/test_fewgenes/p',toString(p),'/',toString(i)))
ssnpa_cluster <- ssnpa::run_ssnpa(gene_exp = df,
reference_samples = (class$time==ref),
out_dir = paste0('./liver_scrnaseq_analysis/test_fewgenes/p',toString(p),'/',toString(i)),
seed_value = 1,
fgs_pd = pd,
n_pc = 10,
cluster_resolution = 0.6,
cluster_ref_samples = T)$clustering
write.table(ssnpa_cluster,file=paste0('./liver_scrnaseq_analysis/test_fewgenes/ssNPA_cluster_output_',ref,'_pd_',toString(pd),'_',toString(p),'_',toString(i),'_.txt'),quote = F, sep='\t')
}
#Apply ssNPA to full dataset (all 3,000 genes)
p <- 1
i <- 1
dir.create(paste0('./liver_scrnaseq_analysis/test_fewgenes/p',toString(p),'/',toString(i)))
ssnpa_cluster <- ssnpa::run_ssnpa(gene_exp = exp_voom,
reference_samples = (class$time==ref),
out_dir = paste0('./liver_scrnaseq_analysis/test_fewgenes/p',toString(p),'/',toString(i)),
seed_value = 1,
fgs_pd = pd,
n_pc = 10,
cluster_resolution = 0.6,
cluster_ref_samples = T)$clustering
write.table(ssnpa_cluster,file=paste0('./liver_scrnaseq_analysis/test_fewgenes/ssNPA_cluster_output_',ref,'_pd_',toString(pd),'_',toString(p),'_',toString(i),'_.txt'),quote = F, sep='\t')
Read in reduced gene result files and calculate NMI and ARI
result_files <- list.files(path = "./liver_scrnaseq_analysis/test_fewgenes", pattern = 'ssNPA_cluster_output',full.names = T, recursive = T,ignore.case = F, include.dirs = T)
f <- double()
num_clusters <- integer()
nmi <- double()
ari <- double()
for (file in result_files){
a <- strsplit(file,'_',fixed = T)
f <- c(f, as.numeric(a[[1]][10])*100)
df <- read.table(file,header=T,sep='\t', quote="",row.names=1,stringsAsFactors=F)
num_clusters <- c(num_clusters,length(unique(df$Cluster)))
nmi <- c(nmi,aricode::NMI(class$timexcell,df$Cluster))
ari <- c(ari,mclust::adjustedRandIndex(class$timexcell, df$Cluster))
}
ssnpa_fewgene_results <- data.frame(f,num_clusters,nmi,ari)
s <- Rmisc::summarySE(ssnpa_fewgene_results, measurevar="nmi", groupvars=c("f"))
s
## f N nmi sd se ci
## 1 25 5 0.3855473 0.03936100 0.01760277 0.04887314
## 2 50 5 0.5216856 0.03135548 0.01402260 0.03893297
## 3 75 5 0.5421200 0.04445185 0.01987947 0.05519426
## 4 80 5 0.5592278 0.04314140 0.01929342 0.05356712
## 5 85 5 0.5833929 0.02407098 0.01076487 0.02988807
## 6 90 5 0.5823763 0.02749929 0.01229806 0.03414488
## 7 95 5 0.5948758 0.03501905 0.01566099 0.04348189
## 8 100 1 0.6150530 NA NA NaN
g_genes <- ggplot2::ggplot(s, ggplot2::aes(x=as.numeric(f), y=nmi)) +
ggplot2::geom_errorbar(ggplot2::aes(ymin=nmi-se, ymax=nmi+se), width=1) + ggplot2::geom_line() + ggplot2::geom_point() + ggplot2::theme_bw() + ggplot2::xlab("Percentage of 3,000 Genes Selected") + ggplot2::ylab("NMI")
g_genes
pdf('ssnpa_liver_percentgenes.pdf',width=7, heigh=5)
g_genes
dev.off()
## quartz_off_screen
## 2
Generate random datasets.
dir.create(paste0('./liver_scrnaseq_analysis/test_fewcells/datasets'))
set.seed(1)
for (p in c(0.95, 0.9, 0.85, 0.8, 0.75)){
for (i in 1:5){
cells_keep <- list()
# maintain proportion of true classes
for (j in unique(class$timexcell)){
s <- sample(row.names(exp_voom)[class$timexcell==j], round(dim(exp_voom[class$timexcell==j,])[1]*p,digits = 0), replace = FALSE)
cells_keep <- c(cells_keep, s)
}
exp_s <- exp_voom[unlist(cells_keep),]
write.table(exp_s,paste0('./liver_scrnaseq_analysis/test_fewcells/datasets/expvoom_p_',toString(p),'_',toString(i),'_.txt'),quote=F,sep='\t',row.names = T, col.names=T)
}
}
Run ssNPA on each of these random datasets.
ref <- 'E15.5'
pd <- 4
result_files <- list.files(path = "./liver_scrnaseq_analysis/test_fewcells/datasets", pattern = 'expvoom',full.names = T, recursive = T,ignore.case = F, include.dirs = T)
for (file in result_files){
df <- read.table(file,header=T,sep='\t',quote="",row.names=1)
a <- strsplit(file,'_',fixed = T)
p <- a[[1]][6]
i <- a[[1]][7]
t <- match(row.names(df),row.names(class))
class_s <- class[t,]
dir.create(paste0('./liver_scrnaseq_analysis/test_fewcells/p',toString(p),'/',toString(i)))
ssnpa_cluster <- ssnpa::run_ssnpa(gene_exp = df,
reference_samples = (class_s$time==ref),
out_dir = paste0('./liver_scrnaseq_analysis/test_fewcells/p',toString(p),'/',toString(i)),
seed_value = 1,
fgs_pd = pd,
n_pc = 10,
cluster_resolution = 0.6,
cluster_ref_samples = T)$clustering
write.table(ssnpa_cluster,file=paste0('./liver_scrnaseq_analysis/test_fewcells/ssNPA_cluster_output_',ref,'_pd_',toString(pd),'_',toString(p),'_',toString(i),'_.txt'),quote = F, sep='\t')
}
#Apply ssNPA to full dataset (all 447 cells)
p <- 1
i <- 1
dir.create(paste0('./liver_scrnaseq_analysis/test_fewcells/p',toString(p),'/',toString(i)))
ssnpa_cluster <- ssnpa::run_ssnpa(gene_exp = exp_voom,
reference_samples = (class$time==ref),
out_dir = paste0('./liver_scrnaseq_analysis/test_fewcells/p',toString(p),'/',toString(i)),
seed_value = 1,
fgs_pd = pd,
n_pc = 10,
cluster_resolution = 0.6,
cluster_ref_samples = T)$clustering
write.table(ssnpa_cluster,file=paste0('./liver_scrnaseq_analysis/test_fewcells/ssNPA_cluster_output_',ref,'_pd_',toString(pd),'_',toString(p),'_',toString(i),'_.txt'),quote = F, sep='\t')
Read in reduced cell result files and calculate NMI and ARI
result_files <- list.files(path = "./liver_scrnaseq_analysis/test_fewcells", pattern = 'ssNPA_cluster_output',full.names = T, recursive = T,ignore.case = F, include.dirs = T)
f <- double()
num_clusters <- integer()
nmi <- double()
ari <- double()
for (file in result_files){
a <- strsplit(file,'_',fixed = T)
f <- c(f, as.numeric(a[[1]][10])*100)
df <- read.table(file,header=T,sep='\t', quote="",row.names=1,stringsAsFactors=F)
num_clusters <- c(num_clusters,length(unique(df$Cluster)))
t <- match(row.names(df),row.names(class))
nmi <- c(nmi,aricode::NMI(class$timexcell[t],df$Cluster))
ari <- c(ari,mclust::adjustedRandIndex(class$timexcell[t], df$Cluster))
}
ssnpa_fewcell_results <- data.frame(f,num_clusters,nmi,ari)
s <- Rmisc::summarySE(ssnpa_fewcell_results, measurevar="nmi", groupvars=c("f"))
s
## f N nmi sd se ci
## 1 75 5 0.5140561 0.05266120 0.02355081 0.06538752
## 2 80 5 0.5034949 0.03532563 0.01579810 0.04386257
## 3 85 5 0.5433212 0.02829108 0.01265216 0.03512802
## 4 90 5 0.5608482 0.03289078 0.01470920 0.04083930
## 5 95 5 0.5756385 0.04746641 0.02122763 0.05893734
## 6 100 1 0.6150530 NA NA NA
g_cells <- ggplot2::ggplot(s, ggplot2::aes(x=as.numeric(f), y=nmi)) +
ggplot2::geom_errorbar(ggplot2::aes(ymin=nmi-se, ymax=nmi+se), width=1) + ggplot2::geom_line() + ggplot2::geom_point() + ggplot2::theme_bw() + ggplot2::xlab("Percentage of 447 Cells Selected") + ggplot2::ylab("NMI")
g_cells
pdf('ssnpa_liver_percentcells.pdf',width=7, heigh=5)
g_cells
dev.off()
## quartz_off_screen
## 2
ssnpa_loadings <- read.table('./liver_scrnaseq_analysis/test_refxpd/E15.5/ssNPA_loading_output_E15.5_pd_4_.txt', quote="", sep='\t', header=T)
ssnpa_loadings_abs <- abs(ssnpa_loadings)
top_pc_genes <- list()
top_pc_loadings <- list()
pc <- list()
top_n <- 10
for (i in 1:dim(ssnpa_loadings_abs)[2]){
loadings <- ssnpa_loadings[order(-ssnpa_loadings_abs[,i]),i]
genes <- row.names(ssnpa_loadings)[order(-ssnpa_loadings_abs[,i])]
top_pc_genes <- c(top_pc_genes, genes[1:top_n])
top_pc_loadings <- c(top_pc_loadings, loadings[1:top_n])
pc <- c(pc, rep(paste0("PC", toString(i)),top_n))
}
gene_ensembl <- unlist(top_pc_genes)
loading <- unlist(top_pc_loadings)
PC <- unlist(pc)
ssnpa_top_loadings <- data.frame(gene_ensembl, loading, PC)
ensembl <- biomaRt::useEnsembl(biomart="ensembl", dataset="mmusculus_gene_ensembl")
lookup <- biomaRt::getBM(attributes=c('ensembl_gene_id','external_gene_name','name_1006'), filters ='ensembl_gene_id', values = ssnpa_top_loadings$gene_ensembl, mart = ensembl)
t <- match(ssnpa_top_loadings$gene_ensembl, lookup$ensembl_gene_id)
ssnpa_top_loadings$gene <- lookup$external_gene_name[t]
ssnpa_top_loadings$description <- lookup$name_1006[t]
length(unique(ssnpa_top_loadings$gene))
## [1] 94
ssnpa_top_loadings[,1:4]
## gene_ensembl loading PC gene
## 1 ENSMUSG00000037470 -0.03513024 PC1 Uggt1
## 2 ENSMUSG00000035478 -0.03488756 PC1 Mbd3
## 3 ENSMUSG00000016018 -0.03458942 PC1 Mtrex
## 4 ENSMUSG00000024683 -0.03444034 PC1 Mrpl16
## 5 ENSMUSG00000024082 -0.03428993 PC1 Ndufaf7
## 6 ENSMUSG00000026843 -0.03419275 PC1 Fubp3
## 7 ENSMUSG00000001627 -0.03377094 PC1 Ifrd1
## 8 ENSMUSG00000054115 -0.03369422 PC1 Skp2
## 9 ENSMUSG00000037461 -0.03357244 PC1 Ints7
## 10 ENSMUSG00000028394 -0.03356370 PC1 Pole3
## 11 ENSMUSG00000033831 0.08639173 PC2 Fgb
## 12 ENSMUSG00000028307 0.07976233 PC2 Aldob
## 13 ENSMUSG00000045092 0.07904960 PC2 S1pr1
## 14 ENSMUSG00000015243 0.07885478 PC2 Abca1
## 15 ENSMUSG00000066406 0.07555713 PC2 Akap13
## 16 ENSMUSG00000022766 0.07493626 PC2 Serpind1
## 17 ENSMUSG00000002769 0.07428510 PC2 Gnmt
## 18 ENSMUSG00000029260 0.07360931 PC2 Ugt2b34
## 19 ENSMUSG00000068566 0.07352332 PC2 Myadm
## 20 ENSMUSG00000031402 0.07327199 PC2 Mpp1
## 21 ENSMUSG00000055912 -0.07676164 PC3 Tmem150a
## 22 ENSMUSG00000064225 -0.06875963 PC3 Paqr9
## 23 ENSMUSG00000023262 -0.06219225 PC3 Acy1
## 24 ENSMUSG00000018796 -0.06193448 PC3 Acsl1
## 25 ENSMUSG00000047945 0.06092703 PC3 Marcksl1
## 26 ENSMUSG00000021892 -0.05854530 PC3 Sh3bp5
## 27 ENSMUSG00000062300 0.05811363 PC3 Nectin2
## 28 ENSMUSG00000006517 -0.05713638 PC3 Mvd
## 29 ENSMUSG00000053646 0.05700964 PC3 Plxnb1
## 30 ENSMUSG00000028832 0.05559043 PC3 Stmn1
## 31 ENSMUSG00000060807 -0.11085462 PC4 Serpina6
## 32 ENSMUSG00000031383 -0.09393560 PC4 Dusp9
## 33 ENSMUSG00000010307 -0.09294081 PC4 Tmem86a
## 34 ENSMUSG00000051159 -0.08805587 PC4 Cited1
## 35 ENSMUSG00000029581 -0.07606941 PC4 Fscn1
## 36 ENSMUSG00000070348 -0.06926351 PC4 Ccnd1
## 37 ENSMUSG00000040699 -0.06851796 PC4 Limd2
## 38 ENSMUSG00000075266 -0.06646365 PC4 Cenpw
## 39 ENSMUSG00000022505 -0.06600525 PC4 Emp2
## 40 ENSMUSG00000019461 -0.06441820 PC4 Plscr3
## 41 ENSMUSG00000045294 -0.09008979 PC5 Insig1
## 42 ENSMUSG00000001285 -0.07178131 PC5 Myg1
## 43 ENSMUSG00000038383 -0.06790060 PC5 Pigu
## 44 ENSMUSG00000022914 -0.06664825 PC5 Brwd1
## 45 ENSMUSG00000048000 -0.06640357 PC5 Gigyf2
## 46 ENSMUSG00000046814 -0.06587141 PC5 Gchfr
## 47 ENSMUSG00000026272 -0.06440306 PC5 Agxt
## 48 ENSMUSG00000036606 -0.06406238 PC5 Plxnb2
## 49 ENSMUSG00000028180 0.06388521 PC5 Zranb2
## 50 ENSMUSG00000066979 -0.06286190 PC5 Bub3
## 51 ENSMUSG00000040616 0.07585918 PC6 Tmem51
## 52 ENSMUSG00000021149 -0.07583079 PC6 Gtpbp4
## 53 ENSMUSG00000021395 0.07485412 PC6 Spin1
## 54 ENSMUSG00000002804 -0.07218189 PC6 Nudt14
## 55 ENSMUSG00000025337 0.07195760 PC6 Sbds
## 56 ENSMUSG00000028334 -0.07013933 PC6 Nans
## 57 ENSMUSG00000026603 -0.06842760 PC6 Smyd2
## 58 ENSMUSG00000045767 0.06743263 PC6 B230219D22Rik
## 59 ENSMUSG00000018189 -0.06718611 PC6 Uchl5
## 60 ENSMUSG00000052423 0.06704137 PC6 B4galt3
## 61 ENSMUSG00000008333 -0.10055473 PC7 Snrpb2
## 62 ENSMUSG00000017950 0.08026718 PC7 Hnf4a
## 63 ENSMUSG00000008690 0.07898537 PC7 Ncaph2
## 64 ENSMUSG00000043131 0.07633742 PC7 Mob1a
## 65 ENSMUSG00000056121 -0.07355063 PC7 Fez2
## 66 ENSMUSG00000064037 0.06854111 PC7 Gpn1
## 67 ENSMUSG00000010911 0.06727907 PC7 Apip
## 68 ENSMUSG00000020609 0.06722823 PC7 Apob
## 69 ENSMUSG00000024908 0.06704612 PC7 Ppp6r3
## 70 ENSMUSG00000022023 0.06411352 PC7 Wbp4
## 71 ENSMUSG00000020116 0.07241917 PC8 Pno1
## 72 ENSMUSG00000022698 -0.06991674 PC8 Naa50
## 73 ENSMUSG00000010911 -0.06984377 PC8 Apip
## 74 ENSMUSG00000001525 0.06797213 PC8 Tubb5
## 75 ENSMUSG00000016756 0.06708920 PC8 Cmah
## 76 ENSMUSG00000040451 -0.06459179 PC8 Sgms1
## 77 ENSMUSG00000022024 0.06378986 PC8 Sugt1
## 78 ENSMUSG00000066979 0.06364009 PC8 Bub3
## 79 ENSMUSG00000007050 0.06139804 PC8 Lsm2
## 80 ENSMUSG00000028233 0.06119468 PC8 Tgs1
## 81 ENSMUSG00000034269 0.08225236 PC9 Setd5
## 82 ENSMUSG00000026547 0.07865298 PC9 Tagln2
## 83 ENSMUSG00000024603 0.07725204 PC9 Dctn4
## 84 ENSMUSG00000018189 0.07723070 PC9 Uchl5
## 85 ENSMUSG00000001525 0.07326395 PC9 Tubb5
## 86 ENSMUSG00000020687 0.07030661 PC9 Cdc27
## 87 ENSMUSG00000017950 0.06980342 PC9 Hnf4a
## 88 ENSMUSG00000028229 0.05801654 PC9 Rmdn1
## 89 ENSMUSG00000032485 0.05767303 PC9 Scap
## 90 ENSMUSG00000031819 0.05685796 PC9 Emc8
## 91 ENSMUSG00000031201 0.09666380 PC10 Brcc3
## 92 ENSMUSG00000034621 0.08961701 PC10 Gpatch8
## 93 ENSMUSG00000029250 0.08421378 PC10 Polr2b
## 94 ENSMUSG00000026349 0.08276864 PC10 Ccnt2
## 95 ENSMUSG00000020929 0.08200153 PC10 Eftud2
## 96 ENSMUSG00000012535 0.07000487 PC10 Tnpo3
## 97 ENSMUSG00000028898 0.06570566 PC10 Trnau1ap
## 98 ENSMUSG00000024231 0.06504754 PC10 Cul2
## 99 ENSMUSG00000028832 0.06497135 PC10 Stmn1
## 100 ENSMUSG00000027367 -0.06221578 PC10 Stard7
write.table(ssnpa_top_loadings,'./liver_scrnaseq_analysis/ssnpa_top_loadings.txt',quote=F, sep='\t', row.names=F)
class$cell_type <- factor(class$cell_type, levels = c("hepatoblast/hepatocyte","cholangiocyte"))
gex_result <- read.table('./liver_scrnaseq_analysis/test_gex/gex_cluster_output.txt', quote="", sep='\t', header=T)
gex_class <- ggplot2::ggplot(gex_result,ggplot2::aes(tSNE_1,tSNE_2))+ggplot2::geom_point(ggplot2::aes(colour=class$time,shape=as.factor(class$cell_type)), size=4.5)+ggplot2::theme_bw()+ggplot2::labs(color="Time",shape="Cell Type")+ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(size = 10)),shape = ggplot2::guide_legend(override.aes = list(size = 10))) +
ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
text = ggplot2::element_text(size=20),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=48),
axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
plot.margin = ggplot2::unit(c(3,3,3,3),"lines")) +
ggplot2::scale_colour_brewer(palette="Set2") + ggplot2::ggtitle('Gene Expression')
gex_cluster <- ggplot2::ggplot(gex_result,ggplot2::aes(tSNE_1,tSNE_2))+ggplot2::geom_point(ggplot2::aes(colour=as.factor(Cluster)), size=4.5)+ggplot2::theme_bw()+ggplot2::labs(color="Cluster")+ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(size = 10))) +
ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
text = ggplot2::element_text(size=20),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=48),
axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
plot.margin = ggplot2::unit(c(3,3,3,3),"lines")) + ggplot2::ggtitle('Gene Expression')
ssnpa_result <- read.table('./liver_scrnaseq_analysis/test_refxpd/E15.5/ssNPA_cluster_output_E15.5_pd_4_.txt', quote="", sep='\t', header=T)
ssnpa_class <- ggplot2::ggplot(ssnpa_result,ggplot2::aes(tSNE_1,tSNE_2))+ggplot2::geom_point(ggplot2::aes(colour=class$time,shape=as.factor(class$cell_type)), size=4.5)+ggplot2::theme_bw()+ggplot2::labs(color="Time",shape="Cell Type")+ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(size = 10)),shape = ggplot2::guide_legend(override.aes = list(size = 10))) +
ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
text = ggplot2::element_text(size=20),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=48),
axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
plot.margin = ggplot2::unit(c(3,3,3,3),"lines")) +
ggplot2::scale_colour_brewer(palette="Set2") + ggplot2::ggtitle('ssNPA')
ssnpa_cluster <- ggplot2::ggplot(ssnpa_result,ggplot2::aes(tSNE_1,tSNE_2))+ggplot2::geom_point(ggplot2::aes(colour=as.factor(Cluster)), size=4.5)+ggplot2::theme_bw()+ggplot2::labs(color="Cluster")+ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(size = 10))) +
ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
text = ggplot2::element_text(size=20),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=48),
axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
plot.margin = ggplot2::unit(c(3,3,3,3),"lines")) + ggplot2::ggtitle('ssNPA')
pathifier_result <- read.table('./liver_scrnaseq_analysis/test_pathifier_ref/E13.5/pathifier_cluster_output_E13.5_.txt', quote="", sep='\t', header=T)
pathifier_class <- ggplot2::ggplot(pathifier_result,ggplot2::aes(tSNE_1,tSNE_2))+ggplot2::geom_point(ggplot2::aes(colour=class$time,shape=as.factor(class$cell_type)), size=4.5)+ggplot2::theme_bw()+ggplot2::labs(color="Time",shape="Cell Type")+ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(size = 10)),shape = ggplot2::guide_legend(override.aes = list(size = 10))) +
ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
text = ggplot2::element_text(size=20),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=48),
axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
plot.margin = ggplot2::unit(c(3,3,3,3),"lines")) +
ggplot2::scale_colour_brewer(palette="Set2") + ggplot2::ggtitle('Pathifier')
pathifier_cluster <- ggplot2::ggplot(pathifier_result,ggplot2::aes(tSNE_1,tSNE_2))+ggplot2::geom_point(ggplot2::aes(colour=as.factor(Cluster)), size=4.5)+ggplot2::theme_bw()+ggplot2::labs(color="Cluster")+ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(size = 10))) +
ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
text = ggplot2::element_text(size=20),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=48),
axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
plot.margin = ggplot2::unit(c(3,3,3,3),"lines")) + ggplot2::ggtitle('Pathifier')
ssgsea_result <- read.table('./liver_scrnaseq_analysis/test_ssgsea/ssgsea_cluster_output.txt', quote="", sep='\t', header=T)
ssgsea_class <- ggplot2::ggplot(ssgsea_result,ggplot2::aes(tSNE_1,tSNE_2))+ggplot2::geom_point(ggplot2::aes(colour=class$time,shape=as.factor(class$cell_type)), size=4.5)+ggplot2::theme_bw()+ggplot2::labs(color="Time",shape="Cell Type")+ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(size = 10)),shape = ggplot2::guide_legend(override.aes = list(size = 10))) +
ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
text = ggplot2::element_text(size=20),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=48),
axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
plot.margin = ggplot2::unit(c(3,3,3,3),"lines")) +
ggplot2::scale_colour_brewer(palette="Set2") + ggplot2::ggtitle('ssGSEA')
ssgsea_cluster <- ggplot2::ggplot(ssgsea_result,ggplot2::aes(tSNE_1,tSNE_2))+ggplot2::geom_point(ggplot2::aes(colour=as.factor(Cluster)), size=4.5)+ggplot2::theme_bw()+ggplot2::labs(color="Cluster")+ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(size = 10))) +
ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
text = ggplot2::element_text(size=20),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=48),
axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
plot.margin = ggplot2::unit(c(3,3,3,3),"lines"))+ ggplot2::ggtitle('ssGSEA')
ssnpalr_result <- read.table('./liver_scrnaseq_analysis/test_lr_ref/E13.5/ssNPA-LR_cluster_output_E13.5_.txt', quote="", sep='\t', header=T)
ssnpalr_class <- ggplot2::ggplot(ssnpalr_result,ggplot2::aes(tSNE_1,tSNE_2))+ggplot2::geom_point(ggplot2::aes(colour=class$time,shape=as.factor(class$cell_type)), size=4.5)+ggplot2::theme_bw()+ggplot2::labs(color="Time",shape="Cell Type")+ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(size = 10)),shape = ggplot2::guide_legend(override.aes = list(size = 10))) +
ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
text = ggplot2::element_text(size=20),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=48),
axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
plot.margin = ggplot2::unit(c(3,3,3,3),"lines")) +
ggplot2::scale_colour_brewer(palette="Set2")
ssnpalr_cluster <- ggplot2::ggplot(ssnpalr_result,ggplot2::aes(tSNE_1,tSNE_2))+ggplot2::geom_point(ggplot2::aes(colour=as.factor(Cluster)), size=4.5)+ggplot2::theme_bw()+ggplot2::labs(color="Cluster")+ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(size = 10))) +
ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
text = ggplot2::element_text(size=20),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=48),
axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
plot.margin = ggplot2::unit(c(3,22,3,3),"lines"))
# Make panel figure comparing the methods, colored by time and cell type
method_comp_class <- ggpubr::ggarrange(gex_class,ssnpa_class,pathifier_class,ssgsea_class, common.legend = T, legend="right", labels="AUTO",font.label=list(size=52, face="bold"), nrow=2, ncol=2)
pdf("liver_method_comparison_class.pdf", width=30, height=24)
method_comp_class
dev.off()
## quartz_off_screen
## 2
# Make panel figure comparing the methods, colored by cluster assignment
method_comp_cluster <- ggpubr::ggarrange(gex_cluster,ssnpa_cluster,pathifier_cluster,ssgsea_cluster, common.legend = F, legend="right", labels="AUTO",font.label=list(size=52, face="bold"), nrow=2, ncol=2)
pdf("liver_method_comparison_cluster.pdf", width=29, height=24)
method_comp_cluster
dev.off()
## quartz_off_screen
## 2
# Make panel figure with ssNPA-LR results
ssnpalr_figure <- ggpubr::ggarrange(ssnpalr_class,ssnpalr_cluster, common.legend = F,legend="right",labels='AUTO',font.label = list(size = 52, face = "bold"),nrow=1,ncol=2)
pdf("liver_ssnpa_lr_result_figure.pdf", width=36, height=12)
ssnpalr_figure
dev.off()
## quartz_off_screen
## 2
# Make panel figure fewer genes/cells results
g_genes <- g_genes + ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
text = ggplot2::element_text(size=20),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=48),
axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
plot.margin = ggplot2::unit(c(3,3,3,3),"lines"))
g_cells <- g_cells + ggplot2::theme(axis.title.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 15, b = 0, l = 0)),
text = ggplot2::element_text(size=20),
legend.text=ggplot2::element_text(size=36),
legend.title=ggplot2::element_text(size=48),
axis.title.x = ggplot2::element_text(margin = ggplot2::margin(t = 15, r = 0, b = 0, l = 0)),
legend.key.size = ggplot2::unit(3.5, 'lines'),
plot.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
plot.title = ggplot2::element_text(hjust = 0.5, size=40),
plot.margin = ggplot2::unit(c(3,3,3,3),"lines"))
few_figure <- ggpubr::ggarrange(g_genes,g_cells, common.legend = F,labels='AUTO',font.label = list(size = 52, face = "bold"),nrow=1,ncol=2)
## Warning: Removed 1 rows containing missing values (geom_errorbar).
## Warning: Removed 1 rows containing missing values (geom_errorbar).
pdf("fewer_genes_cells_result_figure.pdf", width=24, height=12)
few_figure
dev.off()
## quartz_off_screen
## 2
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] mclust_5.4.5 Rcpp_1.0.2 locfit_1.5-9.1
## [4] lattice_0.20-38 prettyunits_1.0.2 assertthat_0.2.1
## [7] zeallot_0.1.0 digest_0.6.22 R6_2.4.0
## [10] plyr_1.8.4 backports_1.1.5 stats4_3.6.1
## [13] RSQLite_2.1.2 evaluate_0.14 httr_1.4.1
## [16] ggplot2_3.2.1 pillar_1.4.2 rlang_0.4.1
## [19] Rmisc_1.5 progress_1.2.2 lazyeval_0.2.2
## [22] curl_4.2 blob_1.2.0 S4Vectors_0.22.1
## [25] Matrix_1.2-17 rmarkdown_1.16 labeling_0.3
## [28] stringr_1.4.0 RCurl_1.95-4.12 bit_1.1-14
## [31] biomaRt_2.40.5 munsell_0.5.0 compiler_3.6.1
## [34] xfun_0.10 pkgconfig_2.0.3 BiocGenerics_0.30.0
## [37] htmltools_0.4.0 tidyselect_0.2.5 gridExtra_2.3
## [40] tibble_2.1.3 edgeR_3.26.8 IRanges_2.18.3
## [43] XML_3.98-1.20 aricode_0.1.2 crayon_1.3.4
## [46] dplyr_0.8.3 ggpubr_0.2.3 bitops_1.0-6
## [49] grid_3.6.1 gtable_0.3.0 DBI_1.0.0
## [52] magrittr_1.5 scales_1.0.0 stringi_1.4.3
## [55] ggsignif_0.6.0 limma_3.40.6 vctrs_0.2.0
## [58] cowplot_1.0.0 RColorBrewer_1.1-2 tools_3.6.1
## [61] bit64_0.9-7 Biobase_2.44.0 glue_1.3.1
## [64] purrr_0.3.3 hms_0.5.2 parallel_3.6.1
## [67] yaml_2.2.0 ssnpa_0.0.0.9003 AnnotationDbi_1.46.1
## [70] colorspace_1.4-1 memoise_1.1.0 knitr_1.25