Script Use of Lexicometry in Sensometrics

4.5. MFACT Plane

Wine and Word Configuration on the First Plane Figure 2 shows the configuration of the eight wines on the first MFACT plane.

Global representation of the documents

plot.MFA(res.mfact.23,choix="ind",col.hab=rep("grey30",8),
title="Global representation of the wines-documents",cex=1,graph.type="classic")

Another global representation of the documents

dfp <- as.data.frame(res.mfact.23$ind$coord)
ax1 <-1 ; ax2 <-2
labx <- paste0("Dim 1 (", round(res.mfact.23$eig[ax1,2],1),"%)" )
laby <- paste0("Dim 2 (", round(res.mfact.23$eig[ax2,2],1),"%)" )
Figure2 <- ggplot(dfp, aes(x=Dim.1, y=Dim.2, label = rownames(dfp)))+
theme_light() + xlab(labx)+ ylab(laby) + coord_fixed()+
geom_hline(yintercept=0, linetype="dashed", color = "grey")+
geom_vline(xintercept=0, linetype="dashed", color = "grey")+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())+
ggplot2::annotate("rect", xmin = c(-6), xmax = c(8),
ymin = -2.1, ymax = 2.2, alpha = .2, fill = c("green"),
color="black", linewidth=1) +
ggplot2::annotate("rect", xmin = c(-3), xmax = c(2),
ymin = -6.3, ymax = 5.8, alpha = .2, fill = c("brown"),color="black",
linewidth=1, lty="dashed")+
geom_text(size=5, fontface = "bold")+
theme(axis.title.x= element_text(size=15, face="bold"))+
theme(axis.title.y= element_text(size=15, face="bold"))+
theme(plot.margin = grid::unit(c(t=0,r= 2,b=5, l=2), "mm"))+
theme(axis.title.x = element_text(vjust = -2, hjust=0.5)) +
theme(panel.border = element_rect(colour = "black", fill=NA, linewidth=1))+
theme( axis.text.x = element_text(size=rel(1.6)))+
theme( axis.text.y = element_text(size=rel(1.6)))+
ggplot2::annotate("text", x=6.2,y=5.2,label = "Grenache wines", color ="black", size=5)+
ggplot2::annotate("text", x=6.2,y=4.6,label = "Carignan wines", color ="black", size=5)+
ggplot2::annotate("rect", xmin = c(3.5), xmax = c(3.9),
ymin = 4.9, ymax = 5.5, alpha = .2, fill = c("brown"),
color="black", linewidth=1, lty="dashed")+
ggplot2::annotate("rect", xmin = c(3.5), xmax = c(3.9),
ymin = 4.2, ymax = 4.8, alpha = .2, fill = c("green"), color="black", linewidth=1)+
ggtitle("Figure 2. Global representation of the wines documents")
Figure2

Characteristic words of the three poles

There are three clusters or poles in the first MFA plane:

Cluster 1: (pole1) “PC06” “EC05” # Right of first factorial plane (first and second quadrants)
Cluster 2: (pole2) “PG05” “PG06” “PC05” “EC06” # (Second quadrant)
Cluster 3: (pole3) “EG05” “EG06” # Lower part of Dim 2 (third quadrant)

In order to position the characteristic words on the first factorial plane, the LexChar function of the Xplortext package will be used. Given the large number of words, only those with a minimum frequency of 3 for the French judges and 2 for the Catalan judges will be considered. A probability of 0.1 is established for this selection in order to have a reduced number of words and thus facilitate interpretation.

We must to remove FP2 judge (column 24) saving the results to new.base data frame.

The rows (wines) are: “PG05” “PG06” “EG05” “EG06” “PC05” “PC06” “EC05” “EC06”.

We have to add the cluster number (pole) to each of the wines.

new.base[,27]<-c(“Cl2”,“Cl2”,“Cl3”,“Cl3”,“Cl2”,“Cl1”,“Cl1”,“Cl2”)

colnames(base)

[1] "CE1"      "CE2"      "CE3"      "CE4"      "FE5"      "FE6"     
 [7] "CE7"      "CE8"      "CE9"      "CE10"     "CE11"     "FE12"    
[13] "FP1"      "FP3"      "FP4"      "FP5"      "FP6"      "FP7"     
[19] "FP8"      "FP9"      "FP10"     "FP11"     "FP12"     "FP2"     
[25] "FrScore"  "CatScore"

new.base <- base[,-24]
colnames(new.base)

[1] "CE1"      "CE2"      "CE3"      "CE4"      "FE5"      "FE6"     
 [7] "CE7"      "CE8"      "CE9"      "CE10"     "CE11"     "FE12"    
[13] "FP1"      "FP3"      "FP4"      "FP5"      "FP6"      "FP7"     
[19] "FP8"      "FP9"      "FP10"     "FP11"     "FP12"     "FrScore" 
[25] "CatScore"

cat(dim(base))

8 26

new.base$pole<-c("Cl2","Cl2","Cl3","Cl3","Cl2","Cl1","Cl1","Cl2")
colnames(new.base)

[1] "CE1"      "CE2"      "CE3"      "CE4"      "FE5"      "FE6"     
 [7] "CE7"      "CE8"      "CE9"      "CE10"     "CE11"     "FE12"    
[13] "FP1"      "FP3"      "FP4"      "FP5"      "FP6"      "FP7"     
[19] "FP8"      "FP9"      "FP10"     "FP11"     "FP12"     "FrScore" 
[25] "CatScore" "pole"

To take the positions of 14 French judges:

posit.Fr <- c(5,6,12:23)
names(new.base[, posit.Fr])

 [1] "FE5"  "FE6"  "FE12" "FP1"  "FP3"  "FP4"  "FP5"  "FP6"  "FP7"  "FP8" 
[11] "FP9"  "FP10" "FP11" "FP12"

To take the positions of 9 Catalan judges:

posit.Cat <- c(1:4, 7:11)
names(new.base[, posit.Cat])

"CE1"  "CE2"  "CE3"  "CE4"  "CE7"  "CE8"  "CE9"  "CE10" "CE11"

To obtain the French characteristic words for each cluster:

res.TD.Fr.Agg<-TextData(new.base,var.text=c(posit.Fr), stop.word.user=str.Fr.stopworduser,var.agg="pole",Fmin=3)
LC.Fig3.Fr <-LexChar(res.TD.Fr.Agg,proba=0.1)
LC.Fig3.Fr$CharWord

$Cl1
            Intern %    glob % Intern freq Glob freq       p.value    v.test
bois       4.4585987 1.2519562           7          8 0.0003207468  3.597941
boisé     10.1910828 4.6948357          16         30 0.0006021027  3.430665
neuf       2.5477707 0.6259781           4          4 0.0035396358  2.916517
dominé     1.9108280 0.4694836           3          3 0.0146182364  2.441701
charpenté  2.5477707 0.7824726           4          5 0.0151396078  2.429022
vanillé    3.8216561 1.7214397           6         11 0.0368077948  2.087890
finale     1.9108280 0.6259781           3          4 0.0513936738  1.948178
nonboisé   0.0000000 1.4084507           0          9 0.0776001980 -1.764783
animal     0.0000000 1.4084507           0          9 0.0776001980 -1.764783
fruit      1.2738854 4.2253521           2         27 0.0253952179 -2.235337
fruité     0.6369427 3.7558685           1         24 0.0101671008 -2.570094

$Cl2
          Intern %    glob % Intern freq Glob freq       p.value    v.test
fruit     6.508876 4.2253521          22         27 2.011013e-03  3.088601
fruité    5.325444 3.7558685          18         24 2.740705e-02  2.205671
souple    2.071006 1.2519562           7          8 5.463815e-02  1.921743
léger     2.071006 1.2519562           7          8 5.463815e-02  1.921743
rond      1.183432 0.6259781           4          4 7.762696e-02  1.764624
noir      1.183432 0.6259781           4          4 7.762696e-02  1.764624
mûr       2.958580 2.0344288          10         13 8.608338e-02  1.716430
fraîcheur 1.775148 1.0954617           6          7 9.388001e-02  1.675276
évent     1.775148 1.0954617           6          7 9.388001e-02  1.675276
tannique  1.183432 2.1909233           4         14 7.342496e-02 -1.790180
toasté    0.000000 0.6259781           0          4 4.871437e-02 -1.971083
neuf      0.000000 0.6259781           0          4 4.871437e-02 -1.971083
finale    0.000000 0.6259781           0          4 4.871437e-02 -1.971083
bois      0.295858 1.2519562           1          8 2.582280e-02 -2.228866
vanillé   0.591716 1.7214397           2         11 2.325646e-02 -2.269195
charpenté 0.000000 0.7824726           0          5 2.278452e-02 -2.277029
boisé     1.183432 4.6948357           4         30 5.537868e-06 -4.543306

$Cl3
          Intern %    glob % Intern freq Glob freq     p.value    v.test
toasté    2.083333 0.6259781           3          4 0.04004621  2.053272
asséchant 2.777778 1.0954617           4          7 0.05678802  1.904939
épice     0.000000 1.4084507           0          9 0.09879800 -1.650709
très      0.000000 2.0344288           0         13 0.03488995 -2.109633

attr(,"class")
[1] "descfreq" "list "

To obtain the Catalan characteristic words for each cluster:

res.TD.Cat.Agg<-TextData(new.base,var.text=c(posit.Cat), stop.word.user=str.Cat.stopworduser,var.agg="pole",Fmin=2)
LC.Fig3.Cat <-LexChar(res.TD.Cat.Agg,proba=0.1)
LC.Fig3.Cat$CharWord

$Cl1
           Intern %    glob % Intern freq Glob freq      p.value    v.test
fusta      8.333333 2.7863777           7          9 0.001595904  3.156654
alt        4.761905 1.2383901           4          4 0.004334538  2.852730
cafè       3.571429 0.9287926           3          3 0.017124102  2.384032
especiat   4.761905 1.5479876           4          5 0.018411597  2.357235
torrat     3.571429 1.2383901           3          4 0.059827331  1.882064
claudolor  3.571429 1.2383901           3          4 0.059827331  1.882064
sutja      2.380952 0.6191950           2          2 0.067034594  1.831442
marcat     2.380952 0.6191950           2          2 0.067034594  1.831442
bota       2.380952 0.6191950           2          2 0.067034594  1.831442
regalèssia 4.761905 2.1671827           4          7 0.092345322  1.683154
madur      0.000000 2.4767802           0          8 0.087113463 -1.710825
fruita     0.000000 3.7151703           0         12 0.025018213 -2.241121
confitura  0.000000 4.0247678           0         13 0.018260882 -2.360285

$Cl2
            Intern %   glob % Intern freq Glob freq     p.value    v.test
fruita     6.0240964 3.715170          10         12 0.02588888  2.227875
fusta      1.2048193 2.786378           2          9 0.08878735 -1.701829
regalèssia 0.6024096 2.167183           1          7 0.05793784 -1.896168
astringent 0.6024096 2.167183           1          7 0.05793784 -1.896168
torrat     0.0000000 1.238390           0          4 0.05472216 -1.921076
taní       0.0000000 1.238390           0          4 0.05472216 -1.921076
alt        0.0000000 1.238390           0          4 0.05472216 -1.921076

$Cl3
             Intern %   glob % Intern freq Glob freq     p.value    v.test
secant       5.479452 1.857585           4          6 0.02772228  2.201194
neopre       2.739726 0.619195           2          2 0.05053555  1.955403
mantegós     2.739726 0.619195           2          2 0.05053555  1.955403
malaqualitat 2.739726 0.619195           2          2 0.05053555  1.955403
iode         2.739726 0.619195           2          2 0.05053555  1.955403
formatge     2.739726 0.619195           2          2 0.05053555  1.955403
cedre        2.739726 0.619195           2          2 0.05053555  1.955403
cartró       2.739726 0.619195           2          2 0.05053555  1.955403
astringent   5.479452 2.167183           4          7 0.05609819  1.910272
fusta        0.000000 2.786378           0          9 0.09643008 -1.662413

attr(,"class")
[1] "descfreq" "list "

We only need to construct a vector containing the words that appear in one of the three groups.

Words characteristic of French judges in one of the three poles:

Words.Fig3.Fr <- unique(unlist(lapply(LC.Fig3.Fr$CharWord, rownames)))
Words.Fig3.Fr <- sort(Words.Fig3.Fr)
Words.Fig3.Fr

 [1] "animal"    "asséchant" "bois"      "boisé"     "charpenté" "dominé"    "épice"     "évent"     "finale"   
[10] "fraîcheur" "fruit"     "fruité"    "léger"     "mûr"       "neuf"      "noir"      "nonboisé"  "rond"     
[19] "souple"    "tannique"  "toasté"    "très"      "vanillé"

Words characteristic of Catalan judges in one of the three poles:

Words.Fig3.Cat <- unique(unlist(lapply(LC.Fig3.Cat$CharWord, rownames)))
Words.Fig3.Cat <- sort(Words.Fig3.Cat)
Words.Fig3.Cat

 [1] "alt"          "astringent"   "bota"         "cafè"         "cartró"       "cedre"        "claudolor"   
 [8] "confitura"    "especiat"     "formatge"     "fruita"       "fusta"        "iode"         "madur"       
[15] "malaqualitat" "mantegós"     "marcat"       "neopre"       "regalèssia"   "secant"       "sutja"       
[22] "taní"         "torrat"

The first 135 words are French:

Fr.Words.MFA <- rownames(res.mfact.23$quanti.var.sup$coord[1:135,])
Fr.Words.MFA

  [1] "acétate"       "acide"         "acidité"       "acidulé"       "agréable"      "agrume"       
  [7] "alcool"        "amer"          "amertume"      "ample"         "animal"        "aspect"       
 [13] "asséchant"     "astringence"   "astringent"    "attendre"      "beau"          "bois"         
 [19] "boisé"         "bon"           "bouche"        "caramel"       "carignan"      "carton"       
 [25] "cassis"        "cerise"        "chair"         "chaleureux"    "charpenté"     "complexe"     
 [31] "compoté"       "concentration" "concentré"     "confituré"     "crayon"        "cuit"         
 [37] "curieux"       "cyste"         "défaut"        "degarde"       "dominé"        "doux"         
 [43] "dur"           "emy"           "encens"        "épice"         "épicé"         "équilibre"    
 [49] "équilibré"     "étable"        "évent"         "évolué"        "évolution"     "extrême"      
 [55] "farineux"      "fin"           "final"         "finale"        "floral"        "fondu"        
 [61] "fort"          "fraîche"       "fraîcheur"     "frais"         "fruit"         "fruité"       
 [67] "fumée"         "garrigue"      "généreux"      "gouleyant"     "gourmand"      "gras"         
 [73] "grenache"      "grillé"        "humide"        "jeune"         "kirché"        "lacté"        
 [79] "lactique"      "léger"         "limite"        "long"          "longueur"      "manquedefondu"
 [85] "matière"       "minéral"       "moins"         "mou"           "mûr"           "mûre"         
 [91] "neuf"          "neutre"        "nez"           "noir"          "nonboisé"      "olive"        
 [97] "onctueux"      "particulier"   "petitegarde"   "peu"           "peutannique"   "plat"         
[103] "plus"          "prédominance"  "présent"       "prononcé"      "pruneau"       "puissance"    
[109] "puissant"      "rafle"         "râpeux"        "réduction"     "réglisse"      "résine"       
[115] "rond"          "rouge"         "sec"           "sécheresse"    "souple"        "structuré"    
[121] "sucrée"        "sucrosité"     "taille"        "tanin"         "tannique"      "toasté"       
[127] "touche"        "très"          "trop"          "type"          "vanillé"       "végétal"      
[133] "velouté"       "vif"           "vin"

The words that occupy positions 136 to 230 are Catalan words:

Cat.Words.MFA <- rownames(res.mfact.23$freq.sup$coord[136:230,])
Cat.Words.MFA

 [1] "acetatdetil"    "acètic"         "acidesabaix"    "acidulat"       "afrutat"        "alcohòliques"  
 [7] "alt"            "arbust"         "aroma"          "astringent"     "balsàmic"       "boca"          
[13] "bota"           "brisa"          "cacau"          "cafè"           "caramel"        "cartró"        
[19] "cassis"         "cedre"          "cirera"         "cítric"         "claudolor"      "complex"       
[25] "compostos"      "confitada"      "confitura"      "cos"            "cosmitjabaix"   "dens"          
[31] "desequilibri"   "dolç"           "especiat"       "espigola"       "eucaliptus"     "farigola"      
[37] "floral"         "florsseques"    "formatge"       "fruita"         "fruitsec"       "fum"           
[43] "fumat"          "fusta"          "gerani"         "glicerol"       "greix"          "herbesseques"  
[49] "iode"           "jove"           "làctic"         "liniment"       "lleuger"        "madur"         
[55] "malaqualitat"   "mantegós"       "marcat"         "mel"            "melós"          "mentolat"      
[61] "mora"           "nas"            "neopre"         "oxidat"         "pebrotverd"     "pegadolça"     
[67] "picant"         "picat"          "pinassa"        "pla"            "pocaaroma"      "pocestructurat"
[73] "pocpersistent"  "pollen"         "potència"       "prunasec"       "químic"         "regalèssia"    
[79] "rodó"           "roure"          "secant"         "sensacions"     "sofre"          "sotabosc"      
[85] "sucrositat"     "sutja"          "taní"           "tànnic"         "toffe"          "torrat"        
[91] "torrefacte"     "vainilla"       "vegetal"        "vi"             "xocolata"

To build a vector with the selected characteristic words and plot them:

pos.words.Fig3.Fr <- which(Fr.Words.MFA %in% Words.Fig3.Fr)
pos.words.Fig3.Cat <- which(Cat.Words.MFA %in% Words.Fig3.Cat)
pos.words.Fig3 <- c(pos.words.Fig3.Fr, 135+pos.words.Fig3.Cat)
plot.MFA(res.mfact.23,choix=c("freq"),invisible=c("row","col"),axes=c(1,2), select= pos.words.Fig3,unselect=1,
legend=list(plot=FALSE),habillage="none",autoLab = c("yes"),cex=0.8, title="",graph.type="classic")

Characteristic words in English on the MFACT plane using different colors for each judge group
Separate translations are provided for French and Catalan words.

Characteristic French words

Coord.Fr <- data.frame(res.mfact.23$freq.sup$coord[pos.words.Fig3.Fr,], lang="Fr")
Coord.Fr

                Dim.1        Dim.2        Dim.3       Dim.4       Dim.5         Dim.6       Dim.7 lang
animal    -0.59343967 -0.511722034 -0.290823763 -0.14090781 -0.14469656 -5.900284e-01 -0.36933291   Fr
asséchant -0.67338104 -0.803561875 -0.101463172  0.27194795  0.26204007 -9.519306e-02  0.06850627   Fr
bois       1.30114701  0.051069203 -0.483807494  0.08205348 -0.22732622  4.965811e-01 -0.19931170   Fr
boisé      0.66641435 -0.480026128 -0.044177786  0.10843598 -0.03863853  1.937070e-01  0.13171020   Fr
charpenté  1.24802969 -0.336910906 -0.255218575  0.22412317 -0.01192454  2.704951e-01  0.26337581   Fr
dominé     1.48972280 -0.132100595 -0.425383218  0.44185159 -0.56820124  6.596303e-01 -0.27950682   Fr
épice     -0.01134125  0.253361545 -0.831494258  0.18607987 -0.17130206 -2.950341e-01  0.16920614   Fr
évent     -0.92317035  0.072908887 -0.754317378  0.28753767  0.01821749 -3.888060e-02 -0.09357353   Fr
finale     1.23519276 -0.323182362 -0.272182884  0.14037764  0.37991193 -5.370408e-03  0.47145789   Fr
fraîcheur -0.60089901  0.385991015  0.172514233  0.19735824  0.05102441  8.928621e-02  0.34090288   Fr
fruit     -0.47749854  0.355953911 -0.027557246  0.06103203 -0.14054131  3.862215e-02 -0.06106851   Fr
fruité    -0.39982720  0.329897705  0.288711006 -0.08539560  0.19727933 -4.020929e-05 -0.09620369   Fr
léger     -0.84266652  0.315674093 -0.285823379  0.17022403  0.44263986  7.037213e-01  0.32350305   Fr
mûr       -0.32364408  0.302200189 -0.019685111 -0.63726117 -0.17254803  1.266075e-01 -0.08732380   Fr
neuf       1.58489551 -0.002238351 -0.544394158  0.38322473 -0.06266666  3.024667e-01 -0.13478398   Fr
noir      -0.51200080  0.979784095  0.588623369  0.08993399  0.22809222  3.054174e-01 -0.03684067   Fr
nonboisé  -0.68346651  0.164631867  0.048985206  0.30313197  0.20450185  1.163886e-01  0.06586744   Fr
rond      -0.51200080  0.979784095  0.588623369  0.08993399  0.22809222  3.054174e-01 -0.03684067   Fr
souple    -0.03439260  0.902010962  0.328552340 -0.57750861  0.04507460 -1.428955e-01 -0.04713331   Fr
tannique   0.30973816 -0.467442131 -0.005427685  0.10256343 -0.31188564 -2.826680e-01  0.08310495   Fr
toasté     0.29012704 -1.118934073  0.300860299 -0.16743876  0.63777844 -2.796487e-01  0.47369343   Fr
très       0.33371388  0.531209809 -0.227265792 -0.34394285  0.07777862  1.705868e-01 -0.05254504   Fr
vanillé    0.70680948 -0.354176022 -0.232543146 -0.06772137 -0.05969652  4.114211e-02  0.08403530   Fr

Characteristic Catalan words

Coord.Cat <- data.frame(res.mfact.23$freq.sup$coord[135+pos.words.Fig3.Cat,], lang="Cat")
Coord.Cat

                  Dim.1        Dim.2      Dim.3       Dim.4       Dim.5       Dim.6        Dim.7 lang
alt           1.5947624  0.036211572 -0.4229721  0.35593743 -0.07250461  0.31522990 -0.101814243  Cat
astringent    0.2729478 -0.671920946  0.5839807  0.03244007  0.23168459 -0.08569083 -0.098122551  Cat
bota          1.5947624  0.036211572 -0.4229721  0.35593743 -0.07250461  0.31522990 -0.101814243  Cat
cafè          1.4995897 -0.093650671 -0.3039612  0.41456430 -0.57803919  0.67239344 -0.246537076  Cat
cartró       -0.2953690 -1.555291851  0.7231142 -0.25969536  0.44322841 -0.23332663 -0.097343151  Cat
cedre        -0.2953690 -1.555291851  0.7231142 -0.25969536  0.44322841 -0.23332663 -0.097343151  Cat
claudolor     1.1700245  0.337620046 -0.4803152 -0.33434536  0.35647814  0.16771341 -0.013785433  Cat
confitura    -0.4074681  0.108617159  0.4205172 -0.35982503 -0.01193924 -0.14860409 -0.007032928  Cat
especiat      1.1978684  0.199421005 -0.3974400 -0.16111268 -0.03263916  0.41151483 -0.118224895  Cat
formatge     -0.2953690 -1.555291851  0.7231142 -0.25969536  0.44322841 -0.23332663 -0.097343151  Cat
fruita       -0.4190979  0.443405443  0.4582364 -0.11395668 -0.22491157 -0.36188649  0.048398249  Cat
fusta         1.2179704  0.326736013 -0.1746441  0.07115059  0.12553131  0.18164269 -0.061806163  Cat
iode         -0.2953690 -1.555291851  0.7231142 -0.25969536  0.44322841 -0.23332663 -0.097343151  Cat
madur        -0.2837087  0.006574687  0.6378617 -0.53775737 -0.02690668 -0.13696810  0.190794988  Cat
malaqualitat -0.2953690 -1.555291851  0.7231142 -0.25969536  0.44322841 -0.23332663 -0.097343151  Cat
mantegós     -0.2953690 -1.555291851  0.7231142 -0.25969536  0.44322841 -0.23332663 -0.097343151  Cat
marcat        1.5947624  0.036211572 -0.4229721  0.35593743 -0.07250461  0.31522990 -0.101814243  Cat
neopre       -0.2953690 -1.555291851  0.7231142 -0.25969536  0.44322841 -0.23332663 -0.097343151  Cat
regalèssia    0.6234730 -0.376522195 -0.1704102  0.32518356  0.19355106  0.26633032 -0.035120850  Cat
secant       -0.5367664 -0.947644286  0.1811539  0.05678312  0.16783263 -0.27450917  0.059039122  Cat
sutja         1.5947624  0.036211572 -0.4229721  0.35593743 -0.07250461  0.31522990 -0.101814243  Cat
taní          0.6496967 -0.759540140  0.1500710  0.04812104  0.18536190  0.04095164 -0.099578697  Cat
torrat        1.1421585 -0.243802765 -0.3006566  0.20302783  0.50108518 -0.18695654 -0.488736316  Cat

French words:

Words.Fig3.Fr

[1] "animal"    "asséchant" "bois"      "boisé"     "charpenté" "dominé"   
 [7] "épice"     "évent"     "finale"    "fraîcheur" "fruit"     "fruité"   
[13] "léger"     "mûr"       "neuf"      "noir"      "nonboisé"  "rond"     
[19] "souple"    "tannique"  "toasté"    "très"      "vanillé"

df.Char.Fr <- data.frame(orig= "animal", transl="animal")
df.Char.Fr <- rbind(df.Char.Fr, data.frame(orig= "asséchant", transl="drying") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "bois", transl="wood") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "boisé", transl="woody") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "charpenté", transl="structured") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "dominé", transl="surpass") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "épice", transl="spicy") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "évent", transl="staleness") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "finale", transl="final") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "fraîcheur", transl="freshness") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "fruit", transl="fruit") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "fruité", transl="fruity") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "léger", transl="light") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "mûr", transl="ripe") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "neuf", transl="new") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "noir", transl="black") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "nonboisé", transl="unwooded") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "rond", transl="round") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "souple", transl="supple") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "tannique", transl="tannic") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "toasté", transl="toasted") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "très", transl="very") )
df.Char.Fr<- rbind(df.Char.Fr, data.frame(orig= "vanillé", transl="vanillin") )
df.Char.Fr

        orig     transl
1     animal     animal
2  asséchant     drying
3       bois       wood
4      boisé      woody
5  charpenté structured
6     dominé    surpass
7      épice      spicy
8      évent  staleness
9     finale      final
10 fraîcheur  freshness
11     fruit      fruit
12    fruité     fruity
13     léger      light
14       mûr       ripe
15      neuf        new
16      noir      black
17  nonboisé   unwooded
18      rond      round
19    souple     supple
20  tannique     tannic
21    toasté    toasted
22      très       very
23   vanillé   vanillin

To join French coordinates and their translation:

Coord.Fr.Fig3 <- merge(Coord.Fr, df.Char.Fr, by.x=0, by.y="orig")
Coord.Fr.Fig3[,c(1:5,9,10)]

   Row.names       Dim.1        Dim.2        Dim.3       Dim.4 lang     transl
1     animal -0.59343967 -0.511722034 -0.290823763 -0.14090781   Fr     animal
2  asséchant -0.67338104 -0.803561875 -0.101463172  0.27194795   Fr     drying
3       bois  1.30114701  0.051069203 -0.483807494  0.08205348   Fr       wood
4      boisé  0.66641435 -0.480026128 -0.044177786  0.10843598   Fr      woody
5  charpenté  1.24802969 -0.336910906 -0.255218575  0.22412317   Fr structured
6     dominé  1.48972280 -0.132100595 -0.425383218  0.44185159   Fr    surpass
7      épice -0.01134125  0.253361545 -0.831494258  0.18607987   Fr      spicy
8      évent -0.92317035  0.072908887 -0.754317378  0.28753767   Fr  staleness
9     finale  1.23519276 -0.323182362 -0.272182884  0.14037764   Fr      final
10 fraîcheur -0.60089901  0.385991015  0.172514233  0.19735824   Fr  freshness
11     fruit -0.47749854  0.355953911 -0.027557246  0.06103203   Fr      fruit
12    fruité -0.39982720  0.329897705  0.288711006 -0.08539560   Fr     fruity
13     léger -0.84266652  0.315674093 -0.285823379  0.17022403   Fr      light
14       mûr -0.32364408  0.302200189 -0.019685111 -0.63726117   Fr       ripe
15      neuf  1.58489551 -0.002238351 -0.544394158  0.38322473   Fr        new
16      noir -0.51200080  0.979784095  0.588623369  0.08993399   Fr      black
17  nonboisé -0.68346651  0.164631867  0.048985206  0.30313197   Fr   unwooded
18      rond -0.51200080  0.979784095  0.588623369  0.08993399   Fr      round
19    souple -0.03439260  0.902010962  0.328552340 -0.57750861   Fr     supple
20  tannique  0.30973816 -0.467442131 -0.005427685  0.10256343   Fr     tannic
21    toasté  0.29012704 -1.118934073  0.300860299 -0.16743876   Fr    toasted
22      très  0.33371388  0.531209809 -0.227265792 -0.34394285   Fr       very
23   vanillé  0.70680948 -0.354176022 -0.232543146 -0.06772137   Fr   vanillin

Catalan words:

Words.Fig3.Cat

 [1] "alt"          "astringent"   "bota"         "cafè"         "cartró"      
 [6] "cedre"        "claudolor"    "confitura"    "especiat"     "formatge"    
[11] "fruita"       "fusta"        "iode"         "madur"        "malaqualitat"
[16] "mantegós"     "marcat"       "neopre"       "regalèssia"   "secant"      
[21] "sutja"        "taní"         "torrat"

df.Char.Cat <- data.frame(orig= "alt", transl="high")
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "astringent", transl="astringent") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "bota", transl="barrel") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "cafè", transl="coffee") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "cartró", transl="cardboard") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "cedre", transl="cedar") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "claudolor", transl="clove") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "confitura", transl="jelly") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "especiat", transl="spicy") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "formatge", transl="cheese") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "fruita", transl="fruit") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "fusta", transl="wood") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "iode", transl="iodine") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "madur", transl="mature") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "malaqualitat", transl="poorquality") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "mantegós", transl="buttered") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "marcat", transl="marked") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "neopre", transl="neoprene") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "regalèssia", transl="licorice") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "secant", transl="drying") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "sutja", transl="soot") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "taní", transl="tannin") )
df.Char.Cat <- rbind(df.Char.Cat , data.frame(orig= "torrat", transl="toasted") )
df.Char.Cat

           orig      transl
1           alt        high
2    astringent  astringent
3          bota      barrel
4          cafè      coffee
5        cartró   cardboard
6         cedre       cedar
7     claudolor       clove
8     confitura       jelly
9      especiat       spicy
10     formatge      cheese
11       fruita       fruit
12        fusta        wood
13         iode      iodine
14        madur      mature
15 malaqualitat poorquality
16     mantegós    buttered
17       marcat      marked
18       neopre    neoprene
19   regalèssia    licorice
20       secant      drying
21        sutja        soot
22         taní      tannin
23       torrat     toasted

To join Catalan coordinates and their translation:

Coord.Cat.Fig3 <- merge(Coord.Cat, df.Char.Cat, by.x=0, by.y="orig")
Coord.Cat.Fig3[,c(1:5,9,10)]

      Row.names      Dim.1        Dim.2      Dim.3       Dim.4 lang      transl
1           alt  1.5947624  0.036211572 -0.4229721  0.35593743  Cat        high
2    astringent  0.2729478 -0.671920946  0.5839807  0.03244007  Cat  astringent
3          bota  1.5947624  0.036211572 -0.4229721  0.35593743  Cat      barrel
4          cafè  1.4995897 -0.093650671 -0.3039612  0.41456430  Cat      coffee
5        cartró -0.2953690 -1.555291851  0.7231142 -0.25969536  Cat   cardboard
6         cedre -0.2953690 -1.555291851  0.7231142 -0.25969536  Cat       cedar
7     claudolor  1.1700245  0.337620046 -0.4803152 -0.33434536  Cat       clove
8     confitura -0.4074681  0.108617159  0.4205172 -0.35982503  Cat       jelly
9      especiat  1.1978684  0.199421005 -0.3974400 -0.16111268  Cat       spicy
10     formatge -0.2953690 -1.555291851  0.7231142 -0.25969536  Cat      cheese
11       fruita -0.4190979  0.443405443  0.4582364 -0.11395668  Cat       fruit
12        fusta  1.2179704  0.326736013 -0.1746441  0.07115059  Cat        wood
13         iode -0.2953690 -1.555291851  0.7231142 -0.25969536  Cat      iodine
14        madur -0.2837087  0.006574687  0.6378617 -0.53775737  Cat      mature
15 malaqualitat -0.2953690 -1.555291851  0.7231142 -0.25969536  Cat poorquality
16     mantegós -0.2953690 -1.555291851  0.7231142 -0.25969536  Cat    buttered
17       marcat  1.5947624  0.036211572 -0.4229721  0.35593743  Cat      marked
18       neopre -0.2953690 -1.555291851  0.7231142 -0.25969536  Cat    neoprene
19   regalèssia  0.6234730 -0.376522195 -0.1704102  0.32518356  Cat    licorice
20       secant -0.5367664 -0.947644286  0.1811539  0.05678312  Cat      drying
21        sutja  1.5947624  0.036211572 -0.4229721  0.35593743  Cat        soot
22         taní  0.6496967 -0.759540140  0.1500710  0.04812104  Cat      tannin
23       torrat  1.1421585 -0.243802765 -0.3006566  0.20302783  Cat     toasted

To build Figure 3:

ax1 <-1 ; ax2 <-2
labx <- paste0("Dim 1 (", round(res.mfact.23$eig[ax1,2],1),"%)" )
laby <- paste0("Dim 2 (", round(res.mfact.23$eig[ax2,2],1),"%)" )
font.type.FRCat <- c("bold", "bold.italic")
color.type.FRCat <- c("Green", "Blue")col.margin = c("black", "red")
Coord.Fig3$lang <- as.factor(Coord.Fig3$lang)
set.seed(123)

library(ggtext)
Figure3 <- ggplot(Coord.Fig3)+
theme_light() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())+
xlab(labx)+ ylab(laby) + coord_fixed()+
geom_hline(yintercept=0, linetype="dashed", color = "grey")+
geom_vline(xintercept=0, linetype="dashed", color = "grey")+
geom_text_repel(size=5, fontface = font.type.FRCat[Coord.Fig3$lang], max.overlaps=100,
box.padding = unit(0.35, "lines"),
aes(x=Dim.1, y=Dim.2, label = transl, color=lang))+
theme( axis.text.x = element_text(size=rel(1.6)))+
theme( axis.text.y = element_text(size=rel(1.6)))+
labs(x=labx)+labs(y=laby)+
theme(axis.title.x= element_text(size=17, face="bold"))+
theme(axis.title.y= element_text(size=17, face="bold"))+
theme(plot.margin = grid::unit(c(t=5,r= 2,b=5, l=2), "mm"))+
scale_color_manual(name="Language",
labels=c("Catalan","French"),
values = setNames(col.margin, levels(Coord.Fig3$lang))) +
theme(axis.title.x = element_text(margin=margin(t=10))) +
theme(panel.border = element_rect(colour = "black", fill=NA, linewidth=1)) +
theme(legend.position = "none")+
labs(title = "Words originally in <b style='color:#FF0000'>_French_</b> and Catalan")+
theme(plot.title = ggtext::element_markdown(lineheight = 1.1, hjust=1, size=20))+
ylim(-1.9, 1.1)+
ggtitle("Figure 3. Characteristic words of the three poles")

To plot Figure 3:

4.5. MFACT Plane

Global representation of the documents

plot.MFA(res.mfact.23,choix="ind",col.hab=rep("grey30",8),
title="Global representation of the wines-documents",cex=1,graph.type="classic")

Characteristic words of the three poles

new.base[,27]<-c(“Cl2”,“Cl2”,“Cl3”,“Cl3”,“Cl2”,“Cl1”,“Cl1”,“Cl2”)

colnames(base)

new.base <- base[,-24]
colnames(new.base)

cat(dim(base))

new.base$pole<-c("Cl2","Cl2","Cl3","Cl3","Cl2","Cl1","Cl1","Cl2")
colnames(new.base)

posit.Fr <- c(5,6,12:23)
names(new.base[, posit.Fr])

posit.Cat <- c(1:4, 7:11)
names(new.base[, posit.Cat])

res.TD.Fr.Agg<-TextData(new.base,var.text=c(posit.Fr), stop.word.user=str.Fr.stopworduser,var.agg="pole",Fmin=3)
LC.Fig3.Fr <-LexChar(res.TD.Fr.Agg,proba=0.1)
LC.Fig3.Fr$CharWord

res.TD.Cat.Agg<-TextData(new.base,var.text=c(posit.Cat), stop.word.user=str.Cat.stopworduser,var.agg="pole",Fmin=2)
LC.Fig3.Cat <-LexChar(res.TD.Cat.Agg,proba=0.1)
LC.Fig3.Cat$CharWord

Words.Fig3.Fr <- unique(unlist(lapply(LC.Fig3.Fr$CharWord, rownames)))
Words.Fig3.Fr <- sort(Words.Fig3.Fr)
Words.Fig3.Fr

Words.Fig3.Cat <- unique(unlist(lapply(LC.Fig3.Cat$CharWord, rownames)))
Words.Fig3.Cat <- sort(Words.Fig3.Cat)
Words.Fig3.Cat

Fr.Words.MFA <- rownames(res.mfact.23$quanti.var.sup$coord[1:135,])
Fr.Words.MFA

Cat.Words.MFA <- rownames(res.mfact.23$freq.sup$coord[136:230,])
Cat.Words.MFA

Coord.Fr <- data.frame(res.mfact.23$freq.sup$coord[pos.words.Fig3.Fr,], lang="Fr")
Coord.Fr

Coord.Cat <- data.frame(res.mfact.23$freq.sup$coord[135+pos.words.Fig3.Cat,], lang="Cat")
Coord.Cat

Words.Fig3.Fr

Coord.Fr.Fig3 <- merge(Coord.Fr, df.Char.Fr, by.x=0, by.y="orig")
Coord.Fr.Fig3[,c(1:5,9,10)]

Words.Fig3.Cat

Coord.Cat.Fig3 <- merge(Coord.Cat, df.Char.Cat, by.x=0, by.y="orig")
Coord.Cat.Fig3[,c(1:5,9,10)]

set.seed(123)
Figure3

Pages: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15

Script Use of Lexicometry in Sensometrics

4.5. MFACT Plane

Global representation of the documents

plot.MFA(res.mfact.23,choix="ind",col.hab=rep("grey30",8), title="Global representation of the wines-documents",cex=1,graph.type="classic")

Characteristic words of the three poles

new.base[,27]<-c(“Cl2”,“Cl2”,“Cl3”,“Cl3”,“Cl2”,“Cl1”,“Cl1”,“Cl2”)

colnames(base)

new.base <- base[,-24] colnames(new.base)

cat(dim(base))

new.base$pole<-c("Cl2","Cl2","Cl3","Cl3","Cl2","Cl1","Cl1","Cl2") colnames(new.base)

posit.Fr <- c(5,6,12:23) names(new.base[, posit.Fr])

posit.Cat <- c(1:4, 7:11) names(new.base[, posit.Cat])

res.TD.Fr.Agg<-TextData(new.base,var.text=c(posit.Fr), stop.word.user=str.Fr.stopworduser,var.agg="pole",Fmin=3) LC.Fig3.Fr <-LexChar(res.TD.Fr.Agg,proba=0.1) LC.Fig3.Fr$CharWord

res.TD.Cat.Agg<-TextData(new.base,var.text=c(posit.Cat), stop.word.user=str.Cat.stopworduser,var.agg="pole",Fmin=2) LC.Fig3.Cat <-LexChar(res.TD.Cat.Agg,proba=0.1) LC.Fig3.Cat$CharWord

Words.Fig3.Fr <- unique(unlist(lapply(LC.Fig3.Fr$CharWord, rownames))) Words.Fig3.Fr <- sort(Words.Fig3.Fr) Words.Fig3.Fr

Words.Fig3.Cat <- unique(unlist(lapply(LC.Fig3.Cat$CharWord, rownames))) Words.Fig3.Cat <- sort(Words.Fig3.Cat) Words.Fig3.Cat

Fr.Words.MFA <- rownames(res.mfact.23$quanti.var.sup$coord[1:135,]) Fr.Words.MFA

Cat.Words.MFA <- rownames(res.mfact.23$freq.sup$coord[136:230,]) Cat.Words.MFA

Coord.Fr <- data.frame(res.mfact.23$freq.sup$coord[pos.words.Fig3.Fr,], lang="Fr") Coord.Fr

Coord.Cat <- data.frame(res.mfact.23$freq.sup$coord[135+pos.words.Fig3.Cat,], lang="Cat") Coord.Cat

Words.Fig3.Fr

Coord.Fr.Fig3 <- merge(Coord.Fr, df.Char.Fr, by.x=0, by.y="orig") Coord.Fr.Fig3[,c(1:5,9,10)]

Words.Fig3.Cat

Coord.Cat.Fig3 <- merge(Coord.Cat, df.Char.Cat, by.x=0, by.y="orig") Coord.Cat.Fig3[,c(1:5,9,10)]

set.seed(123) Figure3

Pages: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15

plot.MFA(res.mfact.23,choix="ind",col.hab=rep("grey30",8),
title="Global representation of the wines-documents",cex=1,graph.type="classic")

new.base <- base[,-24]
colnames(new.base)

new.base$pole<-c("Cl2","Cl2","Cl3","Cl3","Cl2","Cl1","Cl1","Cl2")
colnames(new.base)

posit.Fr <- c(5,6,12:23)
names(new.base[, posit.Fr])

posit.Cat <- c(1:4, 7:11)
names(new.base[, posit.Cat])

res.TD.Fr.Agg<-TextData(new.base,var.text=c(posit.Fr), stop.word.user=str.Fr.stopworduser,var.agg="pole",Fmin=3)
LC.Fig3.Fr <-LexChar(res.TD.Fr.Agg,proba=0.1)
LC.Fig3.Fr$CharWord

res.TD.Cat.Agg<-TextData(new.base,var.text=c(posit.Cat), stop.word.user=str.Cat.stopworduser,var.agg="pole",Fmin=2)
LC.Fig3.Cat <-LexChar(res.TD.Cat.Agg,proba=0.1)
LC.Fig3.Cat$CharWord

Words.Fig3.Fr <- unique(unlist(lapply(LC.Fig3.Fr$CharWord, rownames)))
Words.Fig3.Fr <- sort(Words.Fig3.Fr)
Words.Fig3.Fr

Words.Fig3.Cat <- unique(unlist(lapply(LC.Fig3.Cat$CharWord, rownames)))
Words.Fig3.Cat <- sort(Words.Fig3.Cat)
Words.Fig3.Cat

Fr.Words.MFA <- rownames(res.mfact.23$quanti.var.sup$coord[1:135,])
Fr.Words.MFA

Cat.Words.MFA <- rownames(res.mfact.23$freq.sup$coord[136:230,])
Cat.Words.MFA

Coord.Fr <- data.frame(res.mfact.23$freq.sup$coord[pos.words.Fig3.Fr,], lang="Fr")
Coord.Fr

Coord.Cat <- data.frame(res.mfact.23$freq.sup$coord[135+pos.words.Fig3.Cat,], lang="Cat")
Coord.Cat

Coord.Fr.Fig3 <- merge(Coord.Fr, df.Char.Fr, by.x=0, by.y="orig")
Coord.Fr.Fig3[,c(1:5,9,10)]

Coord.Cat.Fig3 <- merge(Coord.Cat, df.Char.Cat, by.x=0, by.y="orig")
Coord.Cat.Fig3[,c(1:5,9,10)]

set.seed(123)
Figure3