Endereço

Departamento de Estatística – UFV
Av. Peter Henry Rolfs, s/n
Campus Universitário
36570-900, Viçosa, MG
Tel.: +55 (31) 3612-6150
E-mail: det@ufv.br

Prof. Moysés Nascimento – 5.Article

5.Article: Bayesian forecasting of temporal gene expression by using an autoregressive panel data approach. Genetics and Molecular Reserach (accept).

1. R code

#1 - Through model selection criteria (AIC, BIC, etc. ..), the groups of genes sharing similar
     temporal pattern have been identified.
#2 - In each group from the previous step, the Bayesian model-based clustering was applied following
     the methodology proposed by Nascimento et al. (2012).
#2.1 - Paste the gibbspanel function in R

###################################### Gibbspanel function #########################################

gibbspanel<-function(dados1,iter,nlag,burnin)
{
  m<-nrow(dados1)
  p<-2
  fi_est<-matrix(0,nrow=iter,(nrow(dados1)*p)+nrow(dados1))
  sigma_e<-matrix(0, nrow=iter,1)
  fi_est[1,]<- t(t(fi_est[1,]))
  sigma_e[1]<-0.05
  dados_agrup<-matrix(0,nrow=nrow(dados1),p+2)
  m=nrow(dados1)
  n=ncol(dados1)-2
  P=diag(1,(m*p)+m,(m*p)+m) 
  mu=matrix(0,(m*p)+m,1)
  alpha1=50
  beta1=49
  y<-dados1[,3:ncol(dados1)]
  c<-matrix(t(y[,1:(n-p)]),m*(n-p),1)
  X_aux<-matrix(0,m*(n-p),m*(p+1))
  M<-matrix(1,n-p,3) #3= número de parâmetros
  X_auxi<-kronecker(diag(1, m), M)
  Xi<-matrix(cbind(1,b,c),m*(n-p),m*(p+1))
  X<-Xi*X_auxi
  Y1<-matrix(t(y[,(p+1):n]),m*(n-p),1)
  sigma_M<-t(X)%*%X+solve(P) #matriz covariância da condicional de fi
  fiB<-solve(sigma_M)%*%(t(X)%*%Y1+P%*%mu) #média da condicional de fi 
  D<-beta1 +0.5*((t(Y1)%*%Y1+t(mu)%*%solve(P)%*%mu)
                -t((t(X)%*%Y1+solve(P)%*%mu))%*%
                 solve(sigma_M)%*%(t(X)%*%Y1+solve(P)%*%mu))  
  shape1<-0.5*((m*(n+1))+(2*alpha1))
  for(iter in 2:iter)
   {
   fi_est[iter,]<-mvrnorm(n=1, fiB, sigma_e[iter-1]*sigma_M)
   sigma_e[iter,]<-rinvgamma(1,shape1,D+0.5*(t(t(t(fi_est[iter,]))-fiB)%*%solve(sigma_M)%*%
(t(t(fi_est[iter,]))-fiB)))
   }
  a<<-fi_est
  b<<-sigma_e
  h<-as.data.frame(fi_est)
  media1<-mean(h[burnin:iter,])
  media<-matrix(media1,1,(m*p)+m)
  precisao<-mean(sigma_e[burnin:iter,])
  sigma_e1_grupo<-rep(precisao,nrow(dados1))
  dados_agrup[,4]<-t(t(sigma_e1_grupo))
  dados_agrup[,1]<-t(media[1,c(seq(1,3*m,3))])
  dados_agrup[,2]<-t(media[1,c(seq(2,(3*m)-1,3))])
  dados_agrup[,3]<-t(media[1,c(seq(3,(3*m),3))])
  dados_agrup1<-cbind(dados1[,2],dados1[,1],dados_agrup)
  #return(list(fi_est=fi_est,sigma_e=sigma_e,media=media,precisao=precisao,dados_agrup=dados_agrup,
dados_agrup1=dados_agrup1))
 # return(list(dados_agrup1=dados_agrup1,fi_est=fi_est,sigma_e=sigma_e))
  return(list(dados_agrup1=dados_agrup1))
}
####################################################################################################

#2.2 - Paste the "grouping" function in R

####"grouping" function ###############
 
grouping<-function(dados_agr,dados1)
{
  mdist<-dist(dados[,3:6], method="euclidean")
  agrup<-hclust(mdist^2,method ="ward")
  mojema=mean(agrup$height)+1.25*sd(agrup$height)
  k=length(agrup$height[agrup$height>mojema]) + 1
  grupos<-cutree(agrup, k=k)
  grupos1<<-as.data.frame(grupos)
  dados1<-cbind(grupos,dados1[,-1] )
  return(list(dados1=dados1,grupos=grupos))
} 

####################################################################################################

#3- Install and load the MCMCpack and  mnormt packages

####################################################################################################

install.packages("MCMCpack")
install.packages("mnormt")
library(MCMCpack)
library(mnormt)

####################################################################################################

#4. Read the data set containing only genes whose expression series were modeled by an AR(2) 
    panel data model (simplest multi-parametric model).

###################################################################################################

#The first column refers to the indentification of genes and the others are  values of the 
 fold-change in different evaluated time points 
dados<-read.table(" "data path"\\data.set.txt",header=T) 
grupos<-rep(1,nrow(dados))
dados2<-cbind(grupos,dados)
dados1<-dados2


####################################################################################################

#5. Bayesian model-based clustering of temporal gene expression using autoregressive panel 
    data approach

####################################################################################################

grupo_teste<-matrix(0,100,ncol=nrow(dados1))
i<-2
repeat
{
x<-gibbspanel(dados1,20000,2,2000)
agr<-grouping(x$dados_agrup1,dados1)
grupo_teste[i,]<-t(as.matrix(agr$grupos))
x1<-by(agr$dados1,agr$dados1[,1], function(x) gibbspanel(x,10000,2,2000))
x2<-do.call("rbind",x1)
x2<-x2[order(x2[,1]),]
grupos1<-t(t(agr$grupo))
dados1<-cbind(grupos1,dados1[,-1]) 
if(sum(grupo_teste[i,]-grupo_teste[i-1,])==0)
break
i<-i+1
}
x2

####################################################################################################

#6. In each group from the previous step, the Bayesian forecasting of temporal gene expression 
    it was applied

####################################################################################################

kmean<-matrix((X%*%t(a[,1:(m*p)])),iter,m)
pred_norm<-matrix((mvrnorm(iter,Kmean,diag(iter))),iter,m)*fc
mean_prednorm<-matrix(0,1,m)
 for (v in 1:(ncol(pred_norm)))
 {
  mean_prednorm[v]<-mean(pred_norm[,v])
 }

####################################################################################################

 


© 2020 Universidade Federal de Viçosa - Todos os Direitos Reservados