Prof. Moysés Nascimento – 9.Article
9. Article: Factor analysis applied to genome prediction for high-dimensional phenotypes in pigs. Genetics and Molecular Reserach
1. R Code
##### READING DATABASE #####
dados<-read.table("C:/Users/Filipe/Desktop/Dissertação/Bancos de dados/bdartigo2.txt",h=T)
mapa<-read.table("C:/Users/Filipe/Desktop/Dissertação/Bancos de dados/mapa2.txt",h=T)
attach(dados)
head(dados)
##### LOADING PACKAGES #####
library("psych")
library("GPArotation")
library("BGLR")
library("gap")
##### SEPARATING THE VARIABLES AND REMOVING THE FIXED EFFECTS #####
v<-matrix(NA,345,41)
for (i in 5:45){
v[,i-4]<-dados[,i]
}
head(v)
v
# CORRECTION OF v MATRIX
sexo<-dados[,2]
lote<-dados[,3]
hal<-dados[,4]
vcor<-matrix(NA,345,41)
for (i in 1:41){
vcor[,i]<-lm(v[,i]~sexo+lote+hal)$residuals
}
##### VARIABLES ASSOCIATED WITH FACTOR 2 (FAT) #####
ETSH<-vcor[,9] # SBT
ETUC<-vcor[,10] # LR
ETUL<-vcor[,11] # LL
ETL<-vcor[,12] # L
ETO<-vcor[,13] # BFT
EBACON<-vcor[,14] # BCD
PBR<-vcor[,27] # AF
##### OTHER VARIABLES #####
PCARC<-vcor[,1]
PCD<-vcor[,2]
TLD<-vcor[,3]
TLN<-vcor[,4]
IDA<-vcor[,5]
RCARC<-vcor[,6]
MBCC<-vcor[,7]
MLC<-vcor[,8]
PROLOM<-vcor[,15]
AOL<-vcor[,16]
CORAC<-vcor[,17]
PP<-vcor[,18]
PPL<-vcor[,19]
PCOPA<-vcor[,20]
PPA<-vcor[,21]
PC<-vcor[,22]
PL<-vcor[,23]
PB<-vcor[,24]
PCOST<-vcor[,25]
PF<-vcor[,26]
CR<-vcor[,28]
GPD<-vcor[,29]
CA<-vcor[,30]
NT<-vcor[,31]
PA<-vcor[,32]
PN<-vcor[,33]
ph45<-vcor[,34]
ph24<-vcor[,35]
L<-vcor[,36]
GOINTR<-vcor[,37]
PGOTEJ<-vcor[,38]
PCOZ<-vcor[,39]
MACIEZ<-vcor[,40]
C<-vcor[,41]
##### KMO, SCREE-PLOT AND FACTOR ANALYSIS #####
C<-cor(vcor)
KMO(C)
autos<-eigen(C)
plot(seq(1:ncol(vcor)),autos$values,type="l",ylab="Autovalores",xlab="Fatores")
AF<-principal(vcor,nfactors=10,rotate="varimax")
AF
escores<-AF$scores # FA SCORES
head(escores)
e.peso<-escores[,1] # WEIGHT (FACTOR 1)
e.gordura<-escores[,2] # FAT (FACTOR 2)
e.lombo<-escores[,3] # LOIN (FACTOR 3)
e.desempenho<-escores[,4] # PERFORMANCE (FACTOR 4)
##### APPLYING GWS METHODOLOGIES FOR FACTOR 1 #####
##### PHENOTYPIC AND GENOTYPIC DATA #####
geno<-dados[,46:282]
head(geno)
dim(geno)
feno<-e.peso
feno<-as.matrix(feno)
rownames(feno)<-c(ID)
feno
#### CROSS VALIDATION (K-FOLDS WITH K=3) #####
##### PHENOTYPIC DATA #####
# Populações individuais:
feno1=feno[1:115]
feno1=as.matrix(feno1)
dim(feno1)
feno2=feno[116:230]
feno2=as.matrix(feno2)
feno3=feno[231:345]
feno3=as.matrix(feno3)
# Populações agrupadas:
feno12=feno[1:230]
feno12=as.matrix(feno12)
feno13=rbind(feno1,feno3)
feno23=feno[116:345]
feno23=as.matrix(feno23)
##### GENOTYPIC DATA #####
### Populações individuais ###
geno1=geno[1:115,]
geno1=as.matrix(geno1)
geno2=geno[116:230,]
geno2=as.matrix(geno2)
geno3=geno[231:345,]
geno3=as.matrix(geno3)
### Populações agrupadas ###
geno12=geno[1:230,]
geno12=as.matrix(geno12)
geno13=rbind(geno1,geno3)
geno23=geno[116:345,]
geno23=as.matrix(geno23)
##### ALLELIC FREQUENCIES #####
p2=matrix(0,ncol(geno),1)
q2=matrix(0,ncol(geno),1)
for(i in 1:ncol(geno))
{
q2[i,]=(2*length(which(geno[,i]==0))+length(which(geno[,i]==1)))/(2*length(which(geno[,i]==0))+
2*length(which(geno[,i]==1))+2*length(which(geno[,i]==2)))
p2[i,]=(length(which(geno[,i]==1))+2*length(which(geno[,i]==2)))/(2*length(which(geno[,i]==0))+
2*length(which(geno[,i]==1))+2*length(which(geno[,i]==2)))
}
### Títulos das linhas (para seleção) ###
nlinhas=dados[,1]
np1=nlinhas[1:115] # população 1
np2=nlinhas[116:230] # população 2
np3=nlinhas[231:345] # população 3
############### APLICANDO BLASSO ###############
##### BLASSO FOR FACTOR 2 (FAT) #####
######### PHENOTYPIC AND GENOTIPIC DATA #########
geno<-dados[,46:282]
head(geno)
dim(geno)
feno<-e.gordura # FAT
feno<-as.matrix(feno)
rownames(feno)<-c(ID)
feno
##### CROSS-VALIDATION (K-FOLDS WITH K=3) #####
##### PHENOTYPIC DATA #####
# INDIVIDUAL POPULATIONS:
feno1=feno[1:115]
feno1=as.matrix(feno1)
dim(feno1)
feno2=feno[116:230]
feno2=as.matrix(feno2)
feno3=feno[231:345]
feno3=as.matrix(feno3)
# GROUP POPULATIONS:
feno12=feno[1:230]
feno12=as.matrix(feno12)
feno13=rbind(feno1,feno3)
feno23=feno[116:345]
feno23=as.matrix(feno23)
##### GENOTIPIC DATA #####
### INDIVIDUAL POPULATIONS ###
geno1=geno[1:115,]
geno1=as.matrix(geno1)
geno2=geno[116:230,]
geno2=as.matrix(geno2)
geno3=geno[231:345,]
geno3=as.matrix(geno3)
### GROUPED POPULATIONS ###
geno12=geno[1:230,]
geno12=as.matrix(geno12)
geno13=rbind(geno1,geno3)
geno23=geno[116:345,]
geno23=as.matrix(geno23)
### ALLELIC FREQUENCY ###
p2=matrix(0,ncol(geno),1)
q2=matrix(0,ncol(geno),1)
for(i in 1:ncol(geno))
{
q2[i,]=(2*length(which(geno[,i]==0))+length(which(geno[,i]==1)))/(2*length(which(geno[,i]==0))+
2*length(which(geno[,i]==1))+2*length(which(geno[,i]==2)))
p2[i,]=(length(which(geno[,i]==1))+2*length(which(geno[,i]==2)))/(2*length(which(geno[,i]==0))+
2*length(which(geno[,i]==1))+2*length(which(geno[,i]==2)))
}
### LINE TITLES (FOR SELECTION) ###
nlinhas=dados[,1]
np1=nlinhas[1:115] # população 1
np2=nlinhas[116:230] # população 2
np3=nlinhas[231:345] # população 3
### TRAINING: 1 e 2 | VALIDATION: 3 ###
BL1=BGLR(y=feno12,ETA=list(list(X=geno12,model='BL')),nIter=100000,burnIn=20000,thin=10)
ahat_BL1=BL1$ETA[[1]]$b #vetor de efeitos estimados SNPs BLASSO
gebv_val_BL3=geno3%*%BL1$ETA[[1]]$b
cor_BL1=cor(feno3,gebv_val_BL3)
cor_BL1
## Herdabilidade ##
v=BL1$ETA[[1]]$tau2
Ve=BL1$varE
t=matrix(v)*BL1$varE
Va=sum(2*p2*q2*t)
Vfen=Va+Ve
h2aBL1=Va/Vfen
h2aBL1
acurBL1<-cor_BL1/sqrt(h2aBL1)
acurBL1
### SELECTING BEST 10% INDIVIDUALS ###
rownames(gebv_val_BL3)=np3
top10_BL=quantile(gebv_val_BL3[,1], probs = c(0.9)) #top10%
top10_BL # IDs selecionados e respectivos GEBVs
idsel_BL.f1=data.frame(gebv_val_BL3[,1][gebv_val_BL3[,1]>=top10_BL])
colnames(idsel_BL.f1)=c("GEBV")
idsel_BL.f1 # indivíduos que serão selecionados (fator 1)
id.f23<-rownames(idsel_BL.f1)
id.f23
### TRAINING: 1 e 3 | VALIDATION: 2 ###
BL2=BGLR(y=feno13,ETA=list(list(X=geno13,model='BL')),nIter=100000,burnIn=20000,thin=10)
ahat_BL2=BL2$ETA[[1]]$b #vetor de efeitos estimados SNPs BRR
gebv_val_BL2=geno2%*%BL2$ETA[[1]]$b
cor_BL2=cor(feno2,gebv_val_BL2)
cor_BL2
## Herdabilidade ##
v=BL2$ETA[[1]]$tau2
Ve=BL2$varE
t=matrix(v)*BL2$varE
Va=sum(2*p2*q2*t)
Vfen=Va+Ve
h2aBL2=Va/Vfen
h2aBL2
acurBL2<-cor_BL2/sqrt(h2aBL2)
acurBL2
### SELECTING BEST 10% INDIVIDUALS ####
rownames(gebv_val_BL2)=np2
top10_BL=quantile(gebv_val_BL2[,1], probs = c(0.9)) #top10%
top10_BL # IDs selecionados e respectivos GEBVs
idsel_BL.f2=data.frame(gebv_val_BL2[,1][gebv_val_BL2[,1]>=top10_BL])
colnames(idsel_BL.f2)=c("GEBV")
idsel_BL.f2 # indivíduos que serão selecionados (fator 2)
id.f22<-rownames(idsel_BL.f2)
id.f22
### Treinamento: 2 e 3 | Validação: 1 ###
BL3=BGLR(y=feno23,ETA=list(list(X=geno23,model='BL')),nIter=100000,burnIn=20000,thin=10)
ahat_BL3=BL3$ETA[[1]]$b #vetor de efeitos estimados SNPs BRR
gebv_val_BL1=geno1%*%BL3$ETA[[1]]$b
cor_BL3=cor(feno1,gebv_val_BL1)
cor_BL3
## Herdabilidade ##
v=BL3$ETA[[1]]$tau2
Ve=BL3$varE
t=matrix(v)*BL3$varE
Va=sum(2*p2*q2*t)
Vfen=Va+Ve
h2aBL3=Va/Vfen
h2aBL3
acurBL3<-cor_BL3/sqrt(h2aBL3)
acurBL3
maBL.f2<-(acurBL1+acurBL2+acurBL3)/3
maBL.f2 # média das acurácias
mc.f2<-(cor_BL1+cor_BL2+cor_BL3)/3
mc.f2 # média das capacidades preditivas
mhBL.f2<-(h2aBL1+h2aBL2+h2aBL3)/3
mhBL.f2 # média das herdabilidades BLASSO
### SELECTING BEST 10% INDIVIDUALS ###
rownames(gebv_val_BL1)=np1
top10_BL=quantile(gebv_val_BL1[,1], probs = c(0.9)) #top10%
top10_BL # IDs selecionados e respectivos GEBVs
idsel_BL.f2=data.frame(gebv_val_BL1[,1][gebv_val_BL1[,1]>=top10_BL])
colnames(idsel_BL.f2)=c("GEBV")
idsel_BL.f2 # indivíduos que serão selecionados (fator 2)
id.f21<-rownames(idsel_BL.f2)
id.f21
##### MANHATTAN PLOT FOR FACTOR 2 #####
ahat_BL.f2<-((ahat_BL1+ahat_BL2+ahat_BL3)/3)*3
ahat_BL.f2
graf_BL.f2=cbind(mapa,ahat_BL.f2)
par(las=2, xpd=TRUE, cex.axis=0.9, cex=0.8,family="serif")
color = rep(c("blue","red"),5)
#funcao mhplot - grafico Manhattan
mhtpBL.f2<-mhtplot(abs(graf_BL.f2[,-1]),mht.control(logscale=FALSE, colors=color,
labels=c("1","4","7","8","17","x"),srt=0),
xlab="Chromosome", ylab="SNP effect",ylim=c(0,0.2),pch=12)
title("Fator 2")
axis(2, at = seq(0, 0.2, by = 0.05))
##### FOR APLICATION OF BAYES A, BAYES B AND BAYESIAN RR, REPLACE model='BLASSO' FOR model='BayesA',
##### model='BayesB' AND model='BRR', RESPECTIVELY ON BB1, BB2 AND BB3 FUNCTIONS
##### FOR GWS APLICATION ON OTHER VARIABLES, REPLACE THE NAME OF THE VARIABLE ON PHENOTYPIC DATA
##### COHEN'S KAPPA BETWEEN FACTOR 2 AND A VARIABLE (var) USING POPULATION 3 TO VALIDATION #####
id.f23 # GROUP1
id.var3 # GROUP2
ab<-cbind(id.f23,id.pbr3)
cont<-0
for(i in 1:nrow(ab))
{
for(j in 1:nrow(ab))
{
if(ab[i,1]==ab[j,2])
cont<-cont+1
cont<-cont
}
}
c<-cont/nrow(ab)
kappa1<- (c-(12/115))/(1-(12/115))
kappa1
##### COHEN'S KAPPA BETWEEN FACTOR 2 AND A VARIABLE (var) USING POPULATION 2 TO VALIDATION #####
id.f22
id.var2
ab<-cbind(id.f22,id.pbr2)
cont<-0
for(i in 1:nrow(ab))
{
for(j in 1:nrow(ab))
{
if(ab[i,1]==ab[j,2])
cont<-cont+1
cont<-cont
}
}
c<-cont/nrow(ab)
kappa2<- (c-(12/115))/(1-(12/115))
kappa2
##### COHEN'S KAPPA BETWEEN FACTOR 2 AND A VARIABLE (var) USING POPULATION 1 TO VALIDATION #####
id.f21
id.var1
ab<-cbind(id.f21,id.pbr1)
cont<-0
for(i in 1:nrow(ab))
{
for(j in 1:nrow(ab))
{
if(ab[i,1]==ab[j,2])
cont<-cont+1
cont<-cont
}
}
c<-cont/nrow(ab)
kappa1<- (c-(12/115))/(1-(12/115))
kappa1
##### KAPPA BETWEEN FACTOR 2 AND A VARIABLE ASSOCIATED #####
kappav=(kappa1+kappa2+kappa3)/3
|
|
|
2. Figures
Figure 1. Results of accuracy for factor 1 (weight) and variables associated. CW – Hot carcass weight; RHCW – Right half carcass weight; MBCC – Carcass length by the Brazilian carcass classification method; MLC – Carcass length by the Brazilian carcass classification method; HEART – Heart weight; THW – Total ham weight; HW – Skinless and fatless ham weight; TBSW – Total boston shoulder weight; TPSW – Total picnic shoulder weight; TLW – Total (bone-in) loin weight; BCW – Bacon weight; RW – Rib weight; SLW – Sirloin weight; SW – Slaughter weight.
Figure 2. Results of accuracy for factor 3 (loin) and variables associated. LEA – Loin eye area; LD – Loin depht; LW – Loin weight.
Figure 3. Results of accuracy for factor 4 (performance) and variables associated. SA – Slaughter age; FI – Feed intake; ADG – Average daily gain; BW – Birth weight.






