int
read.csv("/petra/data/tbu/yongtao/cosmic.out.csv",stringsAsFactors=FALSE)
dat <- read.csv("/petra/data/tbu/yongtao/cosmic.out.csv",stringsAsFactors=FALSE)
dat
dat <- read.csv("/petra/data/tbu/yongtao/cosmic.out.csv",stringsAsFactors=FALSE,sep=="\t")
dat <- read.csv("/petra/data/tbu/yongtao/cosmic.out.csv",stringsAsFactors=FALSE,sep="\t")
hist(dat$AA.Mutation.Start)
hist(dat$meanIDR)
boot <- read.csv("/petra/data/tbu/yongtao/cosmic.boot.csv",stringsAsFactors=FALSE,sep="\t")
hist(dat$meanIDR)
hist(boot$meanIDR)
hist(dat$meanIDR)
hist(boot$meanIDR)
hist(dat$meanIDR)
hist(boot$meanIDR)
hist(dat$meanIDR)
hist(dat$meanIDR)
hist(boot$meanIDR)
hist(dat$meanIDR)
hist(boot$meanIDR)
dat <- read.csv("/petra/data/tbu/yongtao/cosmic.out.csv",stringsAsFactors=FALSE,sep="\t")
hist(dat$meanIDR)
plot(dat,type="l")
dat<-as.matrix(t(read.csv("/home/tbudev3/temp.csv",header=FALSE)))  # oneprofile.csv
plot(dat,type="l")
dat <- read.csv("/petra/data/tbu/yongtao/cosmic.out.csv",stringsAsFactors=FALSE,sep="\t")
hist(dat$meanIDR)
hist(boot$meanIDR)
hist(dat$meanIDR)
hist(boot$meanIDR)
hist(dat$meanIDR)
plot(dat,type="l")
oneprof
oneprof<-as.matrix(t(read.csv("/home/tbudev3/temp.csv",header=FALSE)))  # oneprofile.csv
plot(oneprof,type="l")
plot(ma(oneprof,30),type="l")
plot(oneprof,type="l")
dat <- read.csv("/petra/data/tbu/yongtao/cosmic.out.csv",stringsAsFactors=FALSE,sep="\t")
hist(dat$meanIDR)
dat <- read.csv("/petra/data/tbu/yongtao/cosmic.out.csv",stringsAsFactors=FALSE,sep="\t")
dat <- read.csv("/petra/data/tbu/yongtao/cosmic.out.csv",stringsAsFactors=FALSE,sep="\t")
hist(dat$protPos)
hist(dat$protPos,bins=50)
hist(dat$protPos,breaks=50)
hist(dat$protPos,breaks=100)
hist(dat$protPos,breaks=200)
nrow(dat=
nrow(dat)
nrow(dat)
hist(dat$protPos,breaks=200)
hist(dat$protPos,breaks=300)
hist(dat$meanIDR)
dat$protPos>0.7 & dat$prosPos<0.9
dat$protPos>0.7 && dat$prosPos<0.9
dat$protPos>0.7
dat$protPos>0.7 && dat$protPos<0.9
dat$protPos>0.7 & dat$protPos<0.9
hist(dat[which(dat$protPos>0.7 & dat$protPos<0.9),])
hist(dat[which(dat$protPos>0.7 & dat$protPos<0.9),])
dat[which(dat$protPos>0.7 & dat$protPos<0.9),]
hist(dat[which(dat$protPos>0.7 & dat$protPos<0.9),]$protPos)
hist(dat[which(dat$protPos>0.75 & dat$protPos<0.8),]$protPos)
hist(dat[which(dat$protPos>0.77 & dat$protPos<0.8),]$protPos)
hist(dat[which(dat$protPos>0.77 & dat$protPos<0.79),]$protPos)
dat[which(dat$protPos>0.77 & dat$protPos<0.79),]
dat[which(dat$protPos>0.77 & dat$protPos<0.79),]
colnames(dat)
dat[which(dat$protPos>0.77 & dat$protPos<0.79),]$Gene.ID
colnames(dat)
dat[which(dat$protPos>0.77 & dat$protPos<0.79),]$Gene.Name
dat$Gene.Name
unique(dat$Gene.Name)
size(dat$Gene.Name=="BRAF")
length(dat$Gene.Name=="BRAF")
unique(dat$Gene.Name)
library(hash)
dat <- read.csv("/petra/data/tbu/yongtao/cosmic.out.csv",stringsAsFactors=FALSE,sep="\t")
pickOneMutFromEachGene <- function(dat){
geneNames <- unique(dat$Gene.Name)
for(i in 1:length(geneNames)){
thisGene<-geneNames[i]
print(min(which(dat$Gene.Name==thisGene)))
}
}
pickOneMutFromEachGene <- function(dat){
geneNames <- unique(dat$Gene.Name)
for(i in 1:length(geneNames)){
thisGene<-geneNames[i]
print(min(which(dat$Gene.Name==thisGene)))
}
}
pickOneMutFromEachGene(dat)
pickOneMutFromEachGene <- function(dat){
geneNames <- unique(dat$Gene.Name)
for(i in 1:length(geneNames)){
thisGene<-geneNames[i]
print(thisGene)
print(min(which(dat$Gene.Name==thisGene)))
}
}
pickOneMutFromEachGene(dat)
pickOneMutFromEachGene <- function(dat){
geneNames <- unique(dat$Gene.Name)
outind<-c()
for(i in 1:length(geneNames)){
thisGene<-geneNames[i]
outind <- c(outind, min(which(dat$Gene.Name==thisGene)))
}
return(dat[outind,])
}
pickOneMutFromEachGene(dat)
dat1<-pickOneMutFromEachGene(dat)
hist(dat$meanIDR)
hist(dat1$meanIDR)
boot1<-pickOneMutFromEachGene(dat)
hist(boot1$meanIDR)
hist(dat1$meanIDR)
hist(boot1$meanIDR)
boot1<-pickOneMutFromEachGene(boot)
hist(boot1$meanIDR)
hist(dat1$meanIDR)
hist(boot1$meanIDR)
hist(dat1$meanIDR)
hist(boot1$meanIDR)
hist(dat1$meanIDR)
hist(boot1$meanIDR)
hist(boot1$meanIDR)
hist(dat1$meanIDR)
hist(boot1$meanIDR)
hist(dat1$meanIDR)
hist(boot1$meanIDR)
mean(dat1$meanIDR)
mean(boot1$meanIDR)
boot1$meanIDR
dat <- read.csv("/petra/data/tbu/yongtao/cosmic.out.csv",stringsAsFactors=FALSE,sep="\t")
boot <- read.csv("/petra/data/tbu/yongtao/cosmic.boot.csv",stringsAsFactors=FALSE,sep="\t")
pickOneMutFromEachGene <- function(dat){
geneNames <- unique(dat$Gene.Name)
outind<-c()
for(i in 1:length(geneNames)){
thisGene<-geneNames[i]
outind <- c(outind, min(which(dat$Gene.Name==thisGene)))  # this can be made insanely much faster with hash
}
return(dat[outind,])
}
dat1<-pickOneMutFromEachGene(dat)
boot1<-pickOneMutFromEachGene(boot)
hist(dat$meanIDR)
hist(boot$meanIDR)
hist(dat1$meanIDR)
hist(boot1$meanIDR)
mean(dat1$meanIDR)
mean(boot1$meanIDR)
hist(dat$protPos)
hist(dat1$protPos)
hist(boot1$protPos)
hist(dat1$protPos)
hist(boot1$protPos)
hist(dat1$protPos)
hist(boot1$protPos)
dat <- read.csv("/petra/data/tbu/yongtao/cosmic.out.csv",stringsAsFactors=FALSE,sep="\t")
boot <- read.csv("/petra/data/tbu/yongtao/cosmic.boot.csv",stringsAsFactors=FALSE,sep="\t")
dat1<-pickOneMutFromEachGene(dat)
boot1<-pickOneMutFromEachGene(boot)
dat1
nrow(dat1)
dat1
hist(dat$meanIDR)
hist(boot$meanIDR)
boot1<-pickOneMutFromEachGene(boot)
dat1<-pickOneMutFromEachGene(dat)
boot1<-pickOneMutFromEachGene(boot)
hist(dat1$meanIDR)
hist(boot1$meanIDR)
mean(dat1$meanIDR)
mean(boot1$meanIDR)
hist(dat1$meanIDR)
hist(boot1$meanIDR)
hist(dat1$meanIDR)
hist(boot1$meanIDR)
hist(dat1$meanIDR)
hist(boot1$meanIDR)
hist(dat1$meanIDR)
hist(boot1$meanIDR)
hist(boot1$regPos)
hash()
h <- hash()
?hashmap()
?hash
dat <- read.csv("/petra/data/tbu/yongtao/cosmic.out.csv",stringsAsFactors=FALSE,sep="\t")
boot <- read.csv("/petra/data/tbu/yongtao/cosmic.boot.csv",stringsAsFactors=FALSE,sep="\t")
pickOneMutFromEachGene <- function(dat){
geneNames <- unique(dat$Gene.Name)
outind<-c()
for(i in 1:length(geneNames)){
thisGene<-geneNames[i]
outind <- c(outind, min(which(dat$Gene.Name==thisGene)))  # this can be made insanely much faster with hash
}
return(dat[outind,])
}
dat1<-pickOneMutFromEachGene(dat)
boot1<-pickOneMutFromEachGene(boot)
hist(dat$meanIDR)
hist(boot$meanIDR)
hist(dat1$meanIDR)
hist(boot1$meanIDR)
mean(dat1$meanIDR)
mean(boot1$meanIDR)
#hist(dat$protPos)
#yikes! a whole ton with the same length!!! what is going on? need to normalize
#maybe only pick one mutation per gene?
#dat[which(dat$protPos>0.77 & dat$protPos<0.79),]
#this is all because of BRAF!!!!
hist(dat1$protPos)
hist(boot1$protPos)
hist(dat1$protPos)
hist(boot1$protPos)
hist(dat1$regPos)
hist(boot1$regPos)
hist(dat1$regPos)
hist(boot1$regPos)
hist(dat1$regPos)
hist(boot1$regPos)
hist(dat1$regPos)
hist(boot1$regPos)
hist(dat1$regPos)
hist(boot1$regPos)
hist(dat1$regPos)
hist(boot1$regPos)
hist(dat1$regPos)
hist(boot1$regPos)
?hash
hash["gene"]
h<-hash()
h["gene"]
h["gene"]<-1
h["gene"]
h["gene2"]
h[["gene2"]]
h[["gene1"]]
h[["gene"]]
h[["gene1"]]
h
values(h)
dat <- read.csv("/petra/data/tbu/yongtao/cosmic.out.csv",stringsAsFactors=FALSE,sep="\t")
boot <- read.csv("/petra/data/tbu/yongtao/cosmic.boot.csv",stringsAsFactors=FALSE,sep="\t")
pickOneMutFromEachGene <- function(dat){
h<-hash()
for(i in 1:nrow(dat)){
thisGene<-dat$Gene.Name[i]
if(is.null(h[[thisGene]])){
h[[thisGene]]<-i
}
}
return(dat[values(h),])
#
#
#
#   geneNames <- unique(dat$Gene.Name)
#   outind<-c()
#   for(i in 1:length(geneNames)){
#     thisGene<-geneNames[i]
#     outind <- c(outind, min(which(dat$Gene.Name==thisGene)))  # this can be made insanely much faster with hash
#   }
#   return(dat[outind,])
}
dat1<-pickOneMutFromEachGene(dat)
boot1<-pickOneMutFromEachGene(boot)
dat1
dat <- read.csv("/petra/data/tbu/yongtao/cosmic.out.csv",stringsAsFactors=FALSE,sep="\t")
boot <- read.csv("/petra/data/tbu/yongtao/cosmic.boot.csv",stringsAsFactors=FALSE,sep="\t")
pickOneMutFromEachGene <- function(dat){
h<-hash()
for(i in 1:nrow(dat)){
thisGene<-dat$Gene.Name[i]
if(is.null(h[[thisGene]])){
h[[thisGene]]<-i
}
}
return(dat[values(h),])
#
#
#
#   geneNames <- unique(dat$Gene.Name)
#   outind<-c()
#   for(i in 1:length(geneNames)){
#     thisGene<-geneNames[i]
#     outind <- c(outind, min(which(dat$Gene.Name==thisGene)))  # this can be made insanely much faster with hash
#   }
#   return(dat[outind,])
}
dat1<-pickOneMutFromEachGene(dat)
dat1
dat <- read.csv("/petra/data/tbu/yongtao/cosmic.out.csv",stringsAsFactors=FALSE,sep="\t")
boot <- read.csv("/petra/data/tbu/yongtao/cosmic.boot.csv",stringsAsFactors=FALSE,sep="\t")
pickOneMutFromEachGene <- function(dat){
h<-hash()
for(i in 1:nrow(dat)){
thisGene<-dat$Gene.Name[i]
if(is.null(h[[thisGene]])){
h[[thisGene]]<-i
}
}
return(dat[values(h),])
#
#
#
#   geneNames <- unique(dat$Gene.Name)
#   outind<-c()
#   for(i in 1:length(geneNames)){
#     thisGene<-geneNames[i]
#     outind <- c(outind, min(which(dat$Gene.Name==thisGene)))  # this can be made insanely much faster with hash
#   }
#   return(dat[outind,])
}
dat1<-pickOneMutFromEachGene(dat)
boot1<-pickOneMutFromEachGene(boot)
dat
dat1
mean(dat1$meanIDR)
hist(dat$meanIDR)
hist(boot$meanIDR)
hist(dat1$meanIDR)
hist(boot1$meanIDR)
mean(dat1$meanIDR)
mean(boot1$meanIDR)
#hist(dat$protPos)
#yikes! a whole ton with the same length!!! what is going on? need to normalize
#maybe only pick one mutation per gene?
#dat[which(dat$protPos>0.77 & dat$protPos<0.79),]
#this is all because of BRAF!!!!
hist(dat1$protPos)
hist(boot1$protPos)
hist(dat1$regPos)
hist(boot1$regPos)
hist(dat1$regPos)
hist(boot1$regPos)
length(unique(dat$Gene.Name))
length(dat$Gene.Name)
length(unique(dat$Gene.Name))
length(unique(dat1$Gene.Name))
length(dat1$Gene.Name)
mean(dat1$meanIDR)
mean(boot1$meanIDR)
dat <- read.csv("/petra/data/tbu/yongtao/cosmic.out.csv",stringsAsFactors=FALSE,sep="\t")
boot <- read.csv("/petra/data/tbu/yongtao/cosmic.boot.csv",stringsAsFactors=FALSE,sep="\t")
dat1<-pickOneMutFromEachGene(dat)
boot1<-pickOneMutFromEachGene(boot)
hist(dat$meanIDR)
hist(boot$meanIDR)
hist(dat1$meanIDR)
hist(boot1$meanIDR)
mean(dat1$meanIDR)
mean(boot1$meanIDR)
hist(dat1$regPos)
hist(boot1$regPos)
hist(dat1$regPos)
hist(boot1$regPos)
hist(dat1$regPos)
hist(boot1$regPos)
hist(dat1$regPos)
hist(boot1$regPos)
hist(boot1$protPos)
hist(dat1$protPos)
hist(boot1$protPos)
mean(dat1$meanIDR)
mean(boot1$meanIDR)
mean(dat1$meanIDR)
mean(boot1$meanIDR)
t.test(dat1$meanIDR, boot1$meanIDR)
t.test(dat1$meanIDR, boot1$meanIDR)
t.test(dat1$meanIDR, boot1$meanIDR+1)
t.test(dat1$meanIDR, boot1$meanIDR)
length(dat$Gene.Name=="BRAF")
length(dat$Gene.Name=="BRAF")/length(dat$Gene.Name)
length(which(dat$Gene.Name=="BRAF")/length(dat$Gene.Name)
length(which(dat$Gene.Name=="BRAF"))/length(dat$Gene.Name)
dat1$Sample.Source
which(dat1$Sample.Source=="cell-line")
nrow(dat1)
length(which(dat1$Sample.Source=="cell-line"))
nrow(dat1)
?choose
pickOneMutFromEachGene <- function(dat){
dat <- dat[sample(nrow(dat)),]
h<-hash()
for(i in 1:nrow(dat)){
thisGene<-dat$Gene.Name[i]
if(is.null(h[[thisGene]])){
h[[thisGene]]<-i
}
}
return(dat[values(h),])
#   geneNames <- unique(dat$Gene.Name)
#   outind<-c()
#   for(i in 1:length(geneNames)){
#     thisGene<-geneNames[i]
#     outind <- c(outind, min(which(dat$Gene.Name==thisGene)))
#   }
#   return(dat[outind,])
}
dat1<-pickOneMutFromEachGene(dat)
boot1<-pickOneMutFromEachGene(boot)
hist(dat$meanIDR)
hist(boot$meanIDR)
hist(dat1$meanIDR)
hist(boot1$meanIDR)
t.test(dat1$meanIDR, boot1$meanIDR)
mean(dat1$meanIDR)
mean(boot1$meanIDR)
#hist(dat$protPos)
#yikes! a whole ton with the same length!!! what is going on? need to normalize
#maybe only pick one mutation per gene?
#dat[which(dat$protPos>0.77 & dat$protPos<0.79),]
#this is all because of BRAF!!!!
hist(dat1$protPos)
hist(boot1$protPos)
hist(dat1$regPos)    #this WILL not be even, because we take the first mutation for each gene!
hist(boot1$regPos)
#### remove cancer cell lines? 220 out of 5210 samples (dat1)
length(which(dat1$Sample.Source=="cell-line"))
nrow(dat1)
hist(dat1$protPos)
hist(boot1$protPos)
hist(dat1$regPos)    #this WILL not be even, because we take the first mutation for each gene!
hist(boot1$regPos)
hist(dat1$regPos)    #this WILL not be even, because we take the first mutation for each gene!
hist(boot1$regPos)
pickOneMutFromEachGene <- function(dat){
#dat <- dat[sample(nrow(dat)),]
h<-hash()
for(i in 1:nrow(dat)){
thisGene<-dat$Gene.Name[i]
if(is.null(h[[thisGene]])){
h[[thisGene]]<-i
}
}
return(dat[values(h),])
#   geneNames <- unique(dat$Gene.Name)
#   outind<-c()
#   for(i in 1:length(geneNames)){
#     thisGene<-geneNames[i]
#     outind <- c(outind, min(which(dat$Gene.Name==thisGene)))
#   }
#   return(dat[outind,])
}
dat2<-pickOneMutFromEachGene(dat)
hist(dat1$protPos)
hist(dat2$protPos)
hist(dat1$protPos)
hist(dat2$protPos)
hist(dat1$protPos)
hist(dat2$protPos)
hist(dat1$protPos)
hist(dat2$protPos)
hist(dat1$protPos)
hist(dat2$protPos)
hist(dat1$protPos)
hist(dat2$protPos)
nrow(dat2)
length(which(dat$Sample.Source=="cell-line"))
nrow(dat)
220/5210
length(which(dat$Sample.Source=="cell-line"))/nrow(dat)
dat$Tumour.Source
unique(dat$Tumour.Source)
pickOneMutFromEachGene <- function(dat){
dat <- dat[sample(nrow(dat)),]
h<-hash()
for(i in 1:nrow(dat)){
thisGene<-dat$Gene.Name[i]
if(is.null(h[[thisGene]])){
h[[thisGene]]<-i
}
}
return(dat[values(h),])
#   geneNames <- unique(dat$Gene.Name)
#   outind<-c()
#   for(i in 1:length(geneNames)){
#     thisGene<-geneNames[i]
#     outind <- c(outind, min(which(dat$Gene.Name==thisGene)))
#   }
#   return(dat[outind,])
}
View(dat2)
clear(dat21)
clear(dat2)
?clear
dat$Sample.Name
library(bnlearn)
> data(learning.test)
library(bnlearn)
data(learning.test)
res <- iamb(learning.test)
res
learning.test
colnames(learning.test)
data(gaussian.test)
res=iamb(gaussian.test)
undirected.arcs(res)
plot(res)
coefficients()
?iamb
plot(res)
?graphviz.plot
?Rgraphviz
??Rgraphviz
?graphviz.plot
plot(res)
graphviz.plot(res)
plot(res)
graphviz.plot(res)
