Prof. Moysés Nascimento – 5.Article
5.Article: Bayesian forecasting of temporal gene expression by using an autoregressive panel data approach. Genetics and Molecular Reserach (accept).
1. R code
#1 - Through model selection criteria (AIC, BIC, etc. ..), the groups of genes sharing similar
temporal pattern have been identified.
#2 - In each group from the previous step, the Bayesian model-based clustering was applied following
the methodology proposed by Nascimento et al. (2012).
#2.1 - Paste the gibbspanel function in R
###################################### Gibbspanel function #########################################
gibbspanel<-function(dados1,iter,nlag,burnin)
{
m<-nrow(dados1)
p<-2
fi_est<-matrix(0,nrow=iter,(nrow(dados1)*p)+nrow(dados1))
sigma_e<-matrix(0, nrow=iter,1)
fi_est[1,]<- t(t(fi_est[1,]))
sigma_e[1]<-0.05
dados_agrup<-matrix(0,nrow=nrow(dados1),p+2)
m=nrow(dados1)
n=ncol(dados1)-2
P=diag(1,(m*p)+m,(m*p)+m)
mu=matrix(0,(m*p)+m,1)
alpha1=50
beta1=49
y<-dados1[,3:ncol(dados1)]
c<-matrix(t(y[,1:(n-p)]),m*(n-p),1)
X_aux<-matrix(0,m*(n-p),m*(p+1))
M<-matrix(1,n-p,3) #3= número de parâmetros
X_auxi<-kronecker(diag(1, m), M)
Xi<-matrix(cbind(1,b,c),m*(n-p),m*(p+1))
X<-Xi*X_auxi
Y1<-matrix(t(y[,(p+1):n]),m*(n-p),1)
sigma_M<-t(X)%*%X+solve(P) #matriz covariância da condicional de fi
fiB<-solve(sigma_M)%*%(t(X)%*%Y1+P%*%mu) #média da condicional de fi
D<-beta1 +0.5*((t(Y1)%*%Y1+t(mu)%*%solve(P)%*%mu)
-t((t(X)%*%Y1+solve(P)%*%mu))%*%
solve(sigma_M)%*%(t(X)%*%Y1+solve(P)%*%mu))
shape1<-0.5*((m*(n+1))+(2*alpha1))
for(iter in 2:iter)
{
fi_est[iter,]<-mvrnorm(n=1, fiB, sigma_e[iter-1]*sigma_M)
sigma_e[iter,]<-rinvgamma(1,shape1,D+0.5*(t(t(t(fi_est[iter,]))-fiB)%*%solve(sigma_M)%*%
(t(t(fi_est[iter,]))-fiB)))
}
a<<-fi_est
b<<-sigma_e
h<-as.data.frame(fi_est)
media1<-mean(h[burnin:iter,])
media<-matrix(media1,1,(m*p)+m)
precisao<-mean(sigma_e[burnin:iter,])
sigma_e1_grupo<-rep(precisao,nrow(dados1))
dados_agrup[,4]<-t(t(sigma_e1_grupo))
dados_agrup[,1]<-t(media[1,c(seq(1,3*m,3))])
dados_agrup[,2]<-t(media[1,c(seq(2,(3*m)-1,3))])
dados_agrup[,3]<-t(media[1,c(seq(3,(3*m),3))])
dados_agrup1<-cbind(dados1[,2],dados1[,1],dados_agrup)
#return(list(fi_est=fi_est,sigma_e=sigma_e,media=media,precisao=precisao,dados_agrup=dados_agrup,
dados_agrup1=dados_agrup1))
# return(list(dados_agrup1=dados_agrup1,fi_est=fi_est,sigma_e=sigma_e))
return(list(dados_agrup1=dados_agrup1))
}
####################################################################################################
#2.2 - Paste the "grouping" function in R
####"grouping" function ###############
grouping<-function(dados_agr,dados1)
{
mdist<-dist(dados[,3:6], method="euclidean")
agrup<-hclust(mdist^2,method ="ward")
mojema=mean(agrup$height)+1.25*sd(agrup$height)
k=length(agrup$height[agrup$height>mojema]) + 1
grupos<-cutree(agrup, k=k)
grupos1<<-as.data.frame(grupos)
dados1<-cbind(grupos,dados1[,-1] )
return(list(dados1=dados1,grupos=grupos))
}
####################################################################################################
#3- Install and load the MCMCpack and mnormt packages
####################################################################################################
install.packages("MCMCpack")
install.packages("mnormt")
library(MCMCpack)
library(mnormt)
####################################################################################################
#4. Read the data set containing only genes whose expression series were modeled by an AR(2)
panel data model (simplest multi-parametric model).
###################################################################################################
#The first column refers to the indentification of genes and the others are values of the
fold-change in different evaluated time points
dados<-read.table(" "data path"\\data.set.txt",header=T)
grupos<-rep(1,nrow(dados))
dados2<-cbind(grupos,dados)
dados1<-dados2
####################################################################################################
#5. Bayesian model-based clustering of temporal gene expression using autoregressive panel
data approach
####################################################################################################
grupo_teste<-matrix(0,100,ncol=nrow(dados1))
i<-2
repeat
{
x<-gibbspanel(dados1,20000,2,2000)
agr<-grouping(x$dados_agrup1,dados1)
grupo_teste[i,]<-t(as.matrix(agr$grupos))
x1<-by(agr$dados1,agr$dados1[,1], function(x) gibbspanel(x,10000,2,2000))
x2<-do.call("rbind",x1)
x2<-x2[order(x2[,1]),]
grupos1<-t(t(agr$grupo))
dados1<-cbind(grupos1,dados1[,-1])
if(sum(grupo_teste[i,]-grupo_teste[i-1,])==0)
break
i<-i+1
}
x2
####################################################################################################
#6. In each group from the previous step, the Bayesian forecasting of temporal gene expression
it was applied
####################################################################################################
kmean<-matrix((X%*%t(a[,1:(m*p)])),iter,m)
pred_norm<-matrix((mvrnorm(iter,Kmean,diag(iter))),iter,m)*fc
mean_prednorm<-matrix(0,1,m)
for (v in 1:(ncol(pred_norm)))
{
mean_prednorm[v]<-mean(pred_norm[,v])
}
####################################################################################################
|






