Script Use of Lexicometry in Sensometrics

4.5. MFACT Plane

Wine and Word Configuration on the First Plane Figure 2 shows the configuration of the eight wines on the first MFACT plane.

Global representation of the documents
plot.MFA(res.mfact.23,choix="ind",col.hab=rep("grey30",8),
title="Global representation of the wines-documents",cex=1,graph.type="classic")

Another global representation of the documents

dfp <- as.data.frame(res.mfact.23$ind$coord)
ax1 <-1 ; ax2 <-2
labx <- paste0("Dim 1 (", round(res.mfact.23$eig[ax1,2],1),"%)" )
laby <- paste0("Dim 2 (", round(res.mfact.23$eig[ax2,2],1),"%)" )
Figure2 <- ggplot(dfp, aes(x=Dim.1, y=Dim.2, label = rownames(dfp)))+
theme_light() + xlab(labx)+ ylab(laby) + coord_fixed()+
geom_hline(yintercept=0, linetype="dashed", color = "grey")+
geom_vline(xintercept=0, linetype="dashed", color = "grey")+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())+
ggplot2::annotate("rect", xmin = c(-6), xmax = c(8),
ymin = -2.1, ymax = 2.2, alpha = .2, fill = c("green"),
color="black", linewidth=1) +
ggplot2::annotate("rect", xmin = c(-3), xmax = c(2),
ymin = -6.3, ymax = 5.8, alpha = .2, fill = c("brown"),color="black",
linewidth=1, lty="dashed")+
geom_text(size=5, fontface = "bold")+
theme(axis.title.x= element_text(size=15, face="bold"))+
theme(axis.title.y= element_text(size=15, face="bold"))+
theme(plot.margin = grid::unit(c(t=0,r= 2,b=5, l=2), "mm"))+
theme(axis.title.x = element_text(vjust = -2, hjust=0.5)) +
theme(panel.border = element_rect(colour = "black", fill=NA, linewidth=1))+
theme( axis.text.x = element_text(size=rel(1.6)))+
theme( axis.text.y = element_text(size=rel(1.6)))+
ggplot2::annotate("text", x=6.2,y=5.2,label = "Grenache wines", color ="black", size=5)+
ggplot2::annotate("text", x=6.2,y=4.6,label = "Carignan wines", color ="black", size=5)+
ggplot2::annotate("rect", xmin = c(3.5), xmax = c(3.9),
ymin = 4.9, ymax = 5.5, alpha = .2, fill = c("brown"),
color="black", linewidth=1, lty="dashed")+
ggplot2::annotate("rect", xmin = c(3.5), xmax = c(3.9),
ymin = 4.2, ymax = 4.8, alpha = .2, fill = c("green"), color="black", linewidth=1)+
ggtitle("Figure 2. Global representation of the wines documents")
Figure2

 

 

Characteristic words of the three poles

There are three clusters or poles in the first MFA plane:

  • Cluster 1: (pole1) “PC06” “EC05” # Right of first factorial plane (first and second quadrants)
  • Cluster 2: (pole2) “PG05” “PG06” “PC05” “EC06” # (Second quadrant)
  • Cluster 3: (pole3) “EG05” “EG06” # Lower part of Dim 2 (third quadrant)

In order to position the characteristic words on the first factorial plane, the LexChar function of the Xplortext package will be used. Given the large number of words, only those with a minimum frequency of 3 for the French judges and 2 for the Catalan judges will be considered. A probability of 0.1 is established for this selection in order to have a reduced number of words and thus facilitate interpretation.

We must to remove FP2 judge (column 24) saving the results to new.base data frame.

The rows (wines) are: “PG05” “PG06” “EG05” “EG06” “PC05” “PC06” “EC05” “EC06”.

We have to add the cluster number (pole) to each of the wines.

new.base[,27]<-c(“Cl2”,“Cl2”,“Cl3”,“Cl3”,“Cl2”,“Cl1”,“Cl1”,“Cl2”)
colnames(base)

new.base <- base[,-24]
colnames(new.base)

cat(dim(base))

new.base$pole<-c("Cl2","Cl2","Cl3","Cl3","Cl2","Cl1","Cl1","Cl2")
colnames(new.base)

To take the positions of 14 French judges:

posit.Fr <- c(5,6,12:23)
names(new.base[, posit.Fr])

To take the positions of 9 Catalan judges:

posit.Cat <- c(1:4, 7:11)
names(new.base[, posit.Cat])

To obtain the French characteristic words for each cluster:

res.TD.Fr.Agg<-TextData(new.base,var.text=c(posit.Fr), stop.word.user=str.Fr.stopworduser,var.agg="pole",Fmin=3)
LC.Fig3.Fr <-LexChar(res.TD.Fr.Agg,proba=0.1)
LC.Fig3.Fr$CharWord

To obtain the Catalan characteristic words for each cluster:

res.TD.Cat.Agg<-TextData(new.base,var.text=c(posit.Cat), stop.word.user=str.Cat.stopworduser,var.agg="pole",Fmin=2)
LC.Fig3.Cat <-LexChar(res.TD.Cat.Agg,proba=0.1)
LC.Fig3.Cat$CharWord

We only need to construct a vector containing the words that appear in one of the three groups.

Words characteristic of French judges in one of the three poles:

Words.Fig3.Fr <- unique(unlist(lapply(LC.Fig3.Fr$CharWord, rownames)))
Words.Fig3.Fr <- sort(Words.Fig3.Fr)
Words.Fig3.Fr

Words characteristic of Catalan judges in one of the three poles:

Words.Fig3.Cat <- unique(unlist(lapply(LC.Fig3.Cat$CharWord, rownames)))
Words.Fig3.Cat <- sort(Words.Fig3.Cat)
Words.Fig3.Cat

 

The first 135 words are French:

Fr.Words.MFA <- rownames(res.mfact.23$quanti.var.sup$coord[1:135,])
Fr.Words.MFA

 

The words that occupy positions 136 to 230 are Catalan words:

Cat.Words.MFA <- rownames(res.mfact.23$freq.sup$coord[136:230,])
Cat.Words.MFA

 

To build a vector with the selected characteristic words and plot them:

pos.words.Fig3.Fr <- which(Fr.Words.MFA %in% Words.Fig3.Fr)
pos.words.Fig3.Cat <- which(Cat.Words.MFA %in% Words.Fig3.Cat)
pos.words.Fig3 <- c(pos.words.Fig3.Fr, 135+pos.words.Fig3.Cat)
plot.MFA(res.mfact.23,choix=c("freq"),invisible=c("row","col"),axes=c(1,2), select= pos.words.Fig3,unselect=1,
legend=list(plot=FALSE),habillage="none",autoLab = c("yes"),cex=0.8, title="",graph.type="classic")

Characteristic words in English on the MFACT plane using different colors for each judge group
Separate translations are provided for French and Catalan words.

Characteristic French words

Coord.Fr <- data.frame(res.mfact.23$freq.sup$coord[pos.words.Fig3.Fr,], lang="Fr")
Coord.Fr

Characteristic Catalan words

Coord.Cat <- data.frame(res.mfact.23$freq.sup$coord[135+pos.words.Fig3.Cat,], lang="Cat")
Coord.Cat

French words:

Words.Fig3.Fr

df.Char.Fr <- data.frame(orig= "animal", transl="animal")
df.Char.Fr <- rbind(df.Char.Fr, data.frame(orig= "asséchant", transl="drying") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "bois", transl="wood") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "boisé", transl="woody") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "charpenté", transl="structured") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "dominé", transl="surpass") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "épice", transl="spicy") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "évent", transl="staleness") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "finale", transl="final") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "fraîcheur", transl="freshness") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "fruit", transl="fruit") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "fruité", transl="fruity") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "léger", transl="light") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "mûr", transl="ripe") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "neuf", transl="new") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "noir", transl="black") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "nonboisé", transl="unwooded") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "rond", transl="round") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "souple", transl="supple") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "tannique", transl="tannic") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "toasté", transl="toasted") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "très", transl="very") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "vanillé", transl="vanillin") )
df.Char.Fr

 

To join French coordinates and their translation:

Coord.Fr.Fig3 <- merge(Coord.Fr, df.Char.Fr, by.x=0, by.y="orig")
Coord.Fr.Fig3[,c(1:5,9,10)]

Catalan words:

Words.Fig3.Cat

df.Char.Cat <- data.frame(orig= "alt", transl="high")
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "astringent", transl="astringent") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "bota", transl="barrel") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "cafè", transl="coffee") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "cartró", transl="cardboard") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "cedre", transl="cedar") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "claudolor", transl="clove") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "confitura", transl="jelly") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "especiat", transl="spicy") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "formatge", transl="cheese") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "fruita", transl="fruit") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "fusta", transl="wood") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "iode", transl="iodine") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "madur", transl="mature") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "malaqualitat", transl="poorquality") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "mantegós", transl="buttered") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "marcat", transl="marked") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "neopre", transl="neoprene") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "regalèssia", transl="licorice") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "secant", transl="drying") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "sutja", transl="soot") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "taní", transl="tannin") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "torrat", transl="toasted") )
df.Char.Cat

 

To join Catalan coordinates and their translation:

Coord.Cat.Fig3 <- merge(Coord.Cat, df.Char.Cat, by.x=0, by.y="orig")
Coord.Cat.Fig3[,c(1:5,9,10)]

 

To build Figure 3:

ax1 <-1 ; ax2 <-2
labx <- paste0("Dim 1 (", round(res.mfact.23$eig[ax1,2],1),"%)" )
laby <- paste0("Dim 2 (", round(res.mfact.23$eig[ax2,2],1),"%)" )
font.type.FRCat <- c("bold", "bold.italic")
color.type.FRCat <- c("Green", "Blue")col.margin = c("black", "red")
Coord.Fig3$lang <- as.factor(Coord.Fig3$lang)
set.seed(123)
library(ggtext)
Figure3 <- ggplot(Coord.Fig3)+
theme_light() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())+
xlab(labx)+ ylab(laby) + coord_fixed()+
geom_hline(yintercept=0, linetype="dashed", color = "grey")+
geom_vline(xintercept=0, linetype="dashed", color = "grey")+
geom_text_repel(size=5, fontface = font.type.FRCat[Coord.Fig3$lang], max.overlaps=100,
box.padding = unit(0.35, "lines"),
aes(x=Dim.1, y=Dim.2, label = transl, color=lang))+
theme( axis.text.x = element_text(size=rel(1.6)))+
theme( axis.text.y = element_text(size=rel(1.6)))+
labs(x=labx)+labs(y=laby)+
theme(axis.title.x= element_text(size=17, face="bold"))+
theme(axis.title.y= element_text(size=17, face="bold"))+
theme(plot.margin = grid::unit(c(t=5,r= 2,b=5, l=2), "mm"))+
scale_color_manual(name="Language",
labels=c("Catalan","French"),
values = setNames(col.margin, levels(Coord.Fig3$lang))) +
theme(axis.title.x = element_text(margin=margin(t=10))) +
theme(panel.border = element_rect(colour = "black", fill=NA, linewidth=1)) +
theme(legend.position = "none")+
labs(title = "Words originally in <b style='color:#FF0000'>**_French_**</b> and **Catalan**")+
theme(plot.title = ggtext::element_markdown(lineheight = 1.1, hjust=1, size=20))+
ylim(-1.9, 1.1)+
ggtitle("Figure 3. Characteristic words of the three poles")

 

To plot Figure 3:

set.seed(123)
Figure3