Prof. Moysés Nascimento – 9.Article
9. Article: Factor analysis applied to genome prediction for high-dimensional phenotypes in pigs. Genetics and Molecular Reserach
1. R Code
##### READING DATABASE ##### dados<-read.table("C:/Users/Filipe/Desktop/Dissertação/Bancos de dados/bdartigo2.txt",h=T) mapa<-read.table("C:/Users/Filipe/Desktop/Dissertação/Bancos de dados/mapa2.txt",h=T) attach(dados) head(dados) ##### LOADING PACKAGES ##### library("psych") library("GPArotation") library("BGLR") library("gap") ##### SEPARATING THE VARIABLES AND REMOVING THE FIXED EFFECTS ##### v<-matrix(NA,345,41) for (i in 5:45){ v[,i-4]<-dados[,i] } head(v) v # CORRECTION OF v MATRIX sexo<-dados[,2] lote<-dados[,3] hal<-dados[,4] vcor<-matrix(NA,345,41) for (i in 1:41){ vcor[,i]<-lm(v[,i]~sexo+lote+hal)$residuals } ##### VARIABLES ASSOCIATED WITH FACTOR 2 (FAT) ##### ETSH<-vcor[,9] # SBT ETUC<-vcor[,10] # LR ETUL<-vcor[,11] # LL ETL<-vcor[,12] # L ETO<-vcor[,13] # BFT EBACON<-vcor[,14] # BCD PBR<-vcor[,27] # AF ##### OTHER VARIABLES ##### PCARC<-vcor[,1] PCD<-vcor[,2] TLD<-vcor[,3] TLN<-vcor[,4] IDA<-vcor[,5] RCARC<-vcor[,6] MBCC<-vcor[,7] MLC<-vcor[,8] PROLOM<-vcor[,15] AOL<-vcor[,16] CORAC<-vcor[,17] PP<-vcor[,18] PPL<-vcor[,19] PCOPA<-vcor[,20] PPA<-vcor[,21] PC<-vcor[,22] PL<-vcor[,23] PB<-vcor[,24] PCOST<-vcor[,25] PF<-vcor[,26] CR<-vcor[,28] GPD<-vcor[,29] CA<-vcor[,30] NT<-vcor[,31] PA<-vcor[,32] PN<-vcor[,33] ph45<-vcor[,34] ph24<-vcor[,35] L<-vcor[,36] GOINTR<-vcor[,37] PGOTEJ<-vcor[,38] PCOZ<-vcor[,39] MACIEZ<-vcor[,40] C<-vcor[,41] ##### KMO, SCREE-PLOT AND FACTOR ANALYSIS ##### C<-cor(vcor) KMO(C) autos<-eigen(C) plot(seq(1:ncol(vcor)),autos$values,type="l",ylab="Autovalores",xlab="Fatores") AF<-principal(vcor,nfactors=10,rotate="varimax") AF escores<-AF$scores # FA SCORES head(escores) e.peso<-escores[,1] # WEIGHT (FACTOR 1) e.gordura<-escores[,2] # FAT (FACTOR 2) e.lombo<-escores[,3] # LOIN (FACTOR 3) e.desempenho<-escores[,4] # PERFORMANCE (FACTOR 4) ##### APPLYING GWS METHODOLOGIES FOR FACTOR 1 ##### ##### PHENOTYPIC AND GENOTYPIC DATA ##### geno<-dados[,46:282] head(geno) dim(geno) feno<-e.peso feno<-as.matrix(feno) rownames(feno)<-c(ID) feno #### CROSS VALIDATION (K-FOLDS WITH K=3) ##### ##### PHENOTYPIC DATA ##### # Populações individuais: feno1=feno[1:115] feno1=as.matrix(feno1) dim(feno1) feno2=feno[116:230] feno2=as.matrix(feno2) feno3=feno[231:345] feno3=as.matrix(feno3) # Populações agrupadas: feno12=feno[1:230] feno12=as.matrix(feno12) feno13=rbind(feno1,feno3) feno23=feno[116:345] feno23=as.matrix(feno23) ##### GENOTYPIC DATA ##### ### Populações individuais ### geno1=geno[1:115,] geno1=as.matrix(geno1) geno2=geno[116:230,] geno2=as.matrix(geno2) geno3=geno[231:345,] geno3=as.matrix(geno3) ### Populações agrupadas ### geno12=geno[1:230,] geno12=as.matrix(geno12) geno13=rbind(geno1,geno3) geno23=geno[116:345,] geno23=as.matrix(geno23) ##### ALLELIC FREQUENCIES ##### p2=matrix(0,ncol(geno),1) q2=matrix(0,ncol(geno),1) for(i in 1:ncol(geno)) { q2[i,]=(2*length(which(geno[,i]==0))+length(which(geno[,i]==1)))/(2*length(which(geno[,i]==0))+ 2*length(which(geno[,i]==1))+2*length(which(geno[,i]==2))) p2[i,]=(length(which(geno[,i]==1))+2*length(which(geno[,i]==2)))/(2*length(which(geno[,i]==0))+ 2*length(which(geno[,i]==1))+2*length(which(geno[,i]==2))) } ### Títulos das linhas (para seleção) ### nlinhas=dados[,1] np1=nlinhas[1:115] # população 1 np2=nlinhas[116:230] # população 2 np3=nlinhas[231:345] # população 3 ############### APLICANDO BLASSO ############### ##### BLASSO FOR FACTOR 2 (FAT) ##### ######### PHENOTYPIC AND GENOTIPIC DATA ######### geno<-dados[,46:282] head(geno) dim(geno) feno<-e.gordura # FAT feno<-as.matrix(feno) rownames(feno)<-c(ID) feno ##### CROSS-VALIDATION (K-FOLDS WITH K=3) ##### ##### PHENOTYPIC DATA ##### # INDIVIDUAL POPULATIONS: feno1=feno[1:115] feno1=as.matrix(feno1) dim(feno1) feno2=feno[116:230] feno2=as.matrix(feno2) feno3=feno[231:345] feno3=as.matrix(feno3) # GROUP POPULATIONS: feno12=feno[1:230] feno12=as.matrix(feno12) feno13=rbind(feno1,feno3) feno23=feno[116:345] feno23=as.matrix(feno23) ##### GENOTIPIC DATA ##### ### INDIVIDUAL POPULATIONS ### geno1=geno[1:115,] geno1=as.matrix(geno1) geno2=geno[116:230,] geno2=as.matrix(geno2) geno3=geno[231:345,] geno3=as.matrix(geno3) ### GROUPED POPULATIONS ### geno12=geno[1:230,] geno12=as.matrix(geno12) geno13=rbind(geno1,geno3) geno23=geno[116:345,] geno23=as.matrix(geno23) ### ALLELIC FREQUENCY ### p2=matrix(0,ncol(geno),1) q2=matrix(0,ncol(geno),1) for(i in 1:ncol(geno)) { q2[i,]=(2*length(which(geno[,i]==0))+length(which(geno[,i]==1)))/(2*length(which(geno[,i]==0))+ 2*length(which(geno[,i]==1))+2*length(which(geno[,i]==2))) p2[i,]=(length(which(geno[,i]==1))+2*length(which(geno[,i]==2)))/(2*length(which(geno[,i]==0))+ 2*length(which(geno[,i]==1))+2*length(which(geno[,i]==2))) } ### LINE TITLES (FOR SELECTION) ### nlinhas=dados[,1] np1=nlinhas[1:115] # população 1 np2=nlinhas[116:230] # população 2 np3=nlinhas[231:345] # população 3 ### TRAINING: 1 e 2 | VALIDATION: 3 ### BL1=BGLR(y=feno12,ETA=list(list(X=geno12,model='BL')),nIter=100000,burnIn=20000,thin=10) ahat_BL1=BL1$ETA[[1]]$b #vetor de efeitos estimados SNPs BLASSO gebv_val_BL3=geno3%*%BL1$ETA[[1]]$b cor_BL1=cor(feno3,gebv_val_BL3) cor_BL1 ## Herdabilidade ## v=BL1$ETA[[1]]$tau2 Ve=BL1$varE t=matrix(v)*BL1$varE Va=sum(2*p2*q2*t) Vfen=Va+Ve h2aBL1=Va/Vfen h2aBL1 acurBL1<-cor_BL1/sqrt(h2aBL1) acurBL1 ### SELECTING BEST 10% INDIVIDUALS ### rownames(gebv_val_BL3)=np3 top10_BL=quantile(gebv_val_BL3[,1], probs = c(0.9)) #top10% top10_BL # IDs selecionados e respectivos GEBVs idsel_BL.f1=data.frame(gebv_val_BL3[,1][gebv_val_BL3[,1]>=top10_BL]) colnames(idsel_BL.f1)=c("GEBV") idsel_BL.f1 # indivíduos que serão selecionados (fator 1) id.f23<-rownames(idsel_BL.f1) id.f23 ### TRAINING: 1 e 3 | VALIDATION: 2 ### BL2=BGLR(y=feno13,ETA=list(list(X=geno13,model='BL')),nIter=100000,burnIn=20000,thin=10) ahat_BL2=BL2$ETA[[1]]$b #vetor de efeitos estimados SNPs BRR gebv_val_BL2=geno2%*%BL2$ETA[[1]]$b cor_BL2=cor(feno2,gebv_val_BL2) cor_BL2 ## Herdabilidade ## v=BL2$ETA[[1]]$tau2 Ve=BL2$varE t=matrix(v)*BL2$varE Va=sum(2*p2*q2*t) Vfen=Va+Ve h2aBL2=Va/Vfen h2aBL2 acurBL2<-cor_BL2/sqrt(h2aBL2) acurBL2 ### SELECTING BEST 10% INDIVIDUALS #### rownames(gebv_val_BL2)=np2 top10_BL=quantile(gebv_val_BL2[,1], probs = c(0.9)) #top10% top10_BL # IDs selecionados e respectivos GEBVs idsel_BL.f2=data.frame(gebv_val_BL2[,1][gebv_val_BL2[,1]>=top10_BL]) colnames(idsel_BL.f2)=c("GEBV") idsel_BL.f2 # indivíduos que serão selecionados (fator 2) id.f22<-rownames(idsel_BL.f2) id.f22 ### Treinamento: 2 e 3 | Validação: 1 ### BL3=BGLR(y=feno23,ETA=list(list(X=geno23,model='BL')),nIter=100000,burnIn=20000,thin=10) ahat_BL3=BL3$ETA[[1]]$b #vetor de efeitos estimados SNPs BRR gebv_val_BL1=geno1%*%BL3$ETA[[1]]$b cor_BL3=cor(feno1,gebv_val_BL1) cor_BL3 ## Herdabilidade ## v=BL3$ETA[[1]]$tau2 Ve=BL3$varE t=matrix(v)*BL3$varE Va=sum(2*p2*q2*t) Vfen=Va+Ve h2aBL3=Va/Vfen h2aBL3 acurBL3<-cor_BL3/sqrt(h2aBL3) acurBL3 maBL.f2<-(acurBL1+acurBL2+acurBL3)/3 maBL.f2 # média das acurácias mc.f2<-(cor_BL1+cor_BL2+cor_BL3)/3 mc.f2 # média das capacidades preditivas mhBL.f2<-(h2aBL1+h2aBL2+h2aBL3)/3 mhBL.f2 # média das herdabilidades BLASSO ### SELECTING BEST 10% INDIVIDUALS ### rownames(gebv_val_BL1)=np1 top10_BL=quantile(gebv_val_BL1[,1], probs = c(0.9)) #top10% top10_BL # IDs selecionados e respectivos GEBVs idsel_BL.f2=data.frame(gebv_val_BL1[,1][gebv_val_BL1[,1]>=top10_BL]) colnames(idsel_BL.f2)=c("GEBV") idsel_BL.f2 # indivíduos que serão selecionados (fator 2) id.f21<-rownames(idsel_BL.f2) id.f21 ##### MANHATTAN PLOT FOR FACTOR 2 ##### ahat_BL.f2<-((ahat_BL1+ahat_BL2+ahat_BL3)/3)*3 ahat_BL.f2 graf_BL.f2=cbind(mapa,ahat_BL.f2) par(las=2, xpd=TRUE, cex.axis=0.9, cex=0.8,family="serif") color = rep(c("blue","red"),5) #funcao mhplot - grafico Manhattan mhtpBL.f2<-mhtplot(abs(graf_BL.f2[,-1]),mht.control(logscale=FALSE, colors=color, labels=c("1","4","7","8","17","x"),srt=0), xlab="Chromosome", ylab="SNP effect",ylim=c(0,0.2),pch=12) title("Fator 2") axis(2, at = seq(0, 0.2, by = 0.05)) ##### FOR APLICATION OF BAYES A, BAYES B AND BAYESIAN RR, REPLACE model='BLASSO' FOR model='BayesA', ##### model='BayesB' AND model='BRR', RESPECTIVELY ON BB1, BB2 AND BB3 FUNCTIONS ##### FOR GWS APLICATION ON OTHER VARIABLES, REPLACE THE NAME OF THE VARIABLE ON PHENOTYPIC DATA ##### COHEN'S KAPPA BETWEEN FACTOR 2 AND A VARIABLE (var) USING POPULATION 3 TO VALIDATION ##### id.f23 # GROUP1 id.var3 # GROUP2 ab<-cbind(id.f23,id.pbr3) cont<-0 for(i in 1:nrow(ab)) { for(j in 1:nrow(ab)) { if(ab[i,1]==ab[j,2]) cont<-cont+1 cont<-cont } } c<-cont/nrow(ab) kappa1<- (c-(12/115))/(1-(12/115)) kappa1 ##### COHEN'S KAPPA BETWEEN FACTOR 2 AND A VARIABLE (var) USING POPULATION 2 TO VALIDATION ##### id.f22 id.var2 ab<-cbind(id.f22,id.pbr2) cont<-0 for(i in 1:nrow(ab)) { for(j in 1:nrow(ab)) { if(ab[i,1]==ab[j,2]) cont<-cont+1 cont<-cont } } c<-cont/nrow(ab) kappa2<- (c-(12/115))/(1-(12/115)) kappa2 ##### COHEN'S KAPPA BETWEEN FACTOR 2 AND A VARIABLE (var) USING POPULATION 1 TO VALIDATION ##### id.f21 id.var1 ab<-cbind(id.f21,id.pbr1) cont<-0 for(i in 1:nrow(ab)) { for(j in 1:nrow(ab)) { if(ab[i,1]==ab[j,2]) cont<-cont+1 cont<-cont } } c<-cont/nrow(ab) kappa1<- (c-(12/115))/(1-(12/115)) kappa1 ##### KAPPA BETWEEN FACTOR 2 AND A VARIABLE ASSOCIATED ##### kappav=(kappa1+kappa2+kappa3)/3 |
|
2. Figures
Figure 1. Results of accuracy for factor 1 (weight) and variables associated. CW – Hot carcass weight; RHCW – Right half carcass weight; MBCC – Carcass length by the Brazilian carcass classification method; MLC – Carcass length by the Brazilian carcass classification method; HEART – Heart weight; THW – Total ham weight; HW – Skinless and fatless ham weight; TBSW – Total boston shoulder weight; TPSW – Total picnic shoulder weight; TLW – Total (bone-in) loin weight; BCW – Bacon weight; RW – Rib weight; SLW – Sirloin weight; SW – Slaughter weight.
Figure 2. Results of accuracy for factor 3 (loin) and variables associated. LEA – Loin eye area; LD – Loin depht; LW – Loin weight.
Figure 3. Results of accuracy for factor 4 (performance) and variables associated. SA – Slaughter age; FI – Feed intake; ADG – Average daily gain; BW – Birth weight.