Prof. Moysés Nascimento – 5.Article
5.Article: Bayesian forecasting of temporal gene expression by using an autoregressive panel data approach. Genetics and Molecular Reserach (accept).
1. R code
#1 - Through model selection criteria (AIC, BIC, etc. ..), the groups of genes sharing similar temporal pattern have been identified. #2 - In each group from the previous step, the Bayesian model-based clustering was applied following the methodology proposed by Nascimento et al. (2012). #2.1 - Paste the gibbspanel function in R ###################################### Gibbspanel function ######################################### gibbspanel<-function(dados1,iter,nlag,burnin) { m<-nrow(dados1) p<-2 fi_est<-matrix(0,nrow=iter,(nrow(dados1)*p)+nrow(dados1)) sigma_e<-matrix(0, nrow=iter,1) fi_est[1,]<- t(t(fi_est[1,])) sigma_e[1]<-0.05 dados_agrup<-matrix(0,nrow=nrow(dados1),p+2) m=nrow(dados1) n=ncol(dados1)-2 P=diag(1,(m*p)+m,(m*p)+m) mu=matrix(0,(m*p)+m,1) alpha1=50 beta1=49 y<-dados1[,3:ncol(dados1)] c<-matrix(t(y[,1:(n-p)]),m*(n-p),1) X_aux<-matrix(0,m*(n-p),m*(p+1)) M<-matrix(1,n-p,3) #3= número de parâmetros X_auxi<-kronecker(diag(1, m), M) Xi<-matrix(cbind(1,b,c),m*(n-p),m*(p+1)) X<-Xi*X_auxi Y1<-matrix(t(y[,(p+1):n]),m*(n-p),1) sigma_M<-t(X)%*%X+solve(P) #matriz covariância da condicional de fi fiB<-solve(sigma_M)%*%(t(X)%*%Y1+P%*%mu) #média da condicional de fi D<-beta1 +0.5*((t(Y1)%*%Y1+t(mu)%*%solve(P)%*%mu) -t((t(X)%*%Y1+solve(P)%*%mu))%*% solve(sigma_M)%*%(t(X)%*%Y1+solve(P)%*%mu)) shape1<-0.5*((m*(n+1))+(2*alpha1)) for(iter in 2:iter) { fi_est[iter,]<-mvrnorm(n=1, fiB, sigma_e[iter-1]*sigma_M) sigma_e[iter,]<-rinvgamma(1,shape1,D+0.5*(t(t(t(fi_est[iter,]))-fiB)%*%solve(sigma_M)%*% (t(t(fi_est[iter,]))-fiB))) } a<<-fi_est b<<-sigma_e h<-as.data.frame(fi_est) media1<-mean(h[burnin:iter,]) media<-matrix(media1,1,(m*p)+m) precisao<-mean(sigma_e[burnin:iter,]) sigma_e1_grupo<-rep(precisao,nrow(dados1)) dados_agrup[,4]<-t(t(sigma_e1_grupo)) dados_agrup[,1]<-t(media[1,c(seq(1,3*m,3))]) dados_agrup[,2]<-t(media[1,c(seq(2,(3*m)-1,3))]) dados_agrup[,3]<-t(media[1,c(seq(3,(3*m),3))]) dados_agrup1<-cbind(dados1[,2],dados1[,1],dados_agrup) #return(list(fi_est=fi_est,sigma_e=sigma_e,media=media,precisao=precisao,dados_agrup=dados_agrup, dados_agrup1=dados_agrup1)) # return(list(dados_agrup1=dados_agrup1,fi_est=fi_est,sigma_e=sigma_e)) return(list(dados_agrup1=dados_agrup1)) } #################################################################################################### #2.2 - Paste the "grouping" function in R ####"grouping" function ############### grouping<-function(dados_agr,dados1) { mdist<-dist(dados[,3:6], method="euclidean") agrup<-hclust(mdist^2,method ="ward") mojema=mean(agrup$height)+1.25*sd(agrup$height) k=length(agrup$height[agrup$height>mojema]) + 1 grupos<-cutree(agrup, k=k) grupos1<<-as.data.frame(grupos) dados1<-cbind(grupos,dados1[,-1] ) return(list(dados1=dados1,grupos=grupos)) } #################################################################################################### #3- Install and load the MCMCpack and mnormt packages #################################################################################################### install.packages("MCMCpack") install.packages("mnormt") library(MCMCpack) library(mnormt) #################################################################################################### #4. Read the data set containing only genes whose expression series were modeled by an AR(2) panel data model (simplest multi-parametric model). ################################################################################################### #The first column refers to the indentification of genes and the others are values of the fold-change in different evaluated time points dados<-read.table(" "data path"\\data.set.txt",header=T) grupos<-rep(1,nrow(dados)) dados2<-cbind(grupos,dados) dados1<-dados2 #################################################################################################### #5. Bayesian model-based clustering of temporal gene expression using autoregressive panel data approach #################################################################################################### grupo_teste<-matrix(0,100,ncol=nrow(dados1)) i<-2 repeat { x<-gibbspanel(dados1,20000,2,2000) agr<-grouping(x$dados_agrup1,dados1) grupo_teste[i,]<-t(as.matrix(agr$grupos)) x1<-by(agr$dados1,agr$dados1[,1], function(x) gibbspanel(x,10000,2,2000)) x2<-do.call("rbind",x1) x2<-x2[order(x2[,1]),] grupos1<-t(t(agr$grupo)) dados1<-cbind(grupos1,dados1[,-1]) if(sum(grupo_teste[i,]-grupo_teste[i-1,])==0) break i<-i+1 } x2 #################################################################################################### #6. In each group from the previous step, the Bayesian forecasting of temporal gene expression it was applied #################################################################################################### kmean<-matrix((X%*%t(a[,1:(m*p)])),iter,m) pred_norm<-matrix((mvrnorm(iter,Kmean,diag(iter))),iter,m)*fc mean_prednorm<-matrix(0,1,m) for (v in 1:(ncol(pred_norm))) { mean_prednorm[v]<-mean(pred_norm[,v]) } ####################################################################################################
|