Script Use of Lexicometry in Sensometrics

Catalan panel

25 most frequent Catalan words ordered by frequency

res.TD.Cat.Before <-TextData(baseCat,var.text=c(1:ncol(baseCat)), stop.word.user=str.Cat.stopworduser,Fmin=1)
summary(res.TD.Cat.Before, ndoc=0, nword=25, info=FALSE)

TextData summary

            Before  After
Documents     8.00   8.00
Occurrences 323.00 319.00
Words        97.00  95.00
Mean-length  40.38  39.88

Index of the  25  most frequent words
          Word Frequency N.Documents
1  confitura          13           6
2  fruita             12           6
3  tànnic             10           7
4  fusta               9           4
5  vainilla            9           7
6  madur               8           5
7  nas                 8           8
8  aroma               7           7
9  astringent          7           5
10 boca                7           5
11 greix               7           6
12 regalèssia          7           5
13 balsàmic            6           5
14 rodó                6           5
15 secant              6           4
16 acetatdetil         5           3
17 cos                 5           4
18 especiat            5           3
19 fum                 5           4
20 xocolata            5           5
21 alt                 4           2
22 cirera              4           4
23 claudolor           4           3
24 complex             4           3
25 floral              4           3

 

To translate the name of 15 most frequent Catalan words.

Building a copy of res.TD.Cat.Before object and creating a vector (original.Cat) with the 15 most frequent Catalan words.

res.Cat.Trans <- res.TD.Cat.Before
original.Cat <- rownames(res.TD.Cat.Before$indexW[1:15,])
cat(original.Cat)
confitura fruita tànnic fusta vainilla madur nas aroma astringent boca greix regalèssia balsàmic rodó secant

Creating a vector translation.Cat with the words in English with the same order than original.Cat:

translation.Cat <- c("confitura (jelly)", "fruita (fruit)", "tànnic (tannic)", "fusta (wood)", "vainilla (vanilla)", "madur (mature)", "nas (nose)", "aroma (bouquet)", "astringent (astringent)", "boca (mouth)", "greix (unctuous/fat)", "regalèssia (liquorice)", "balsàmic (balsamic)", "rodó (round)", "secant (drying)" )

 

Creating a data frame with the original words and translation:

df.CatChange <- data.frame(original.Cat, translation.Cat)

 

To change Catalan DocTerm object (only for the 15 most frequent words):

res.Cat.Trans$DocTerm$dimnames$Terms[match(df.CatChange$original.Cat , res.Cat.Trans$DocTerm$dimnames$Terms)] <- df.CatChange$translation.Cat cat(res.Cat.Trans$DocTerm$dimnames$Terms)
acetatdetil acètic acidesabaix acidulat afrutat alcohòliques alt arbust aroma (bouquet) astringent (astringent) balsàmic (balsamic) boca (mouth) bota brisa cacau cafè caramel cartró 
cassis cedre cirera cítric claudolor complex compostos confitada confitura (jelly) cos cosmitjabaix dens desequilibri dolç especiat espigola eucaliptus farigola floral florsseques 
formatge fruita (fruit) fruitsec fum fumat fusta (wood) gerani glicerol greix (unctuous/fat) herbesseques iode jove làctic liniment lleuger madur (mature) malaqualitat mantegós marcat 
mel melós mentolat mora nas (nose) neopre oxidat pebrotverd pegadolça picant picat pinassa pla pocaaroma pocestructurat pocpersistent pollen potència prunasec químic 
regalèssia (liquorice) rodó (round) roure secant (drying) sensacions sofre sotabosc sucrositat sutja taní tànnic (tannic) toffe torrat torrefacte vainilla (vanilla) vegetal vi xocolata

To change indexW with the frequencies (only for the 15 most frequent words)

rownames(res.Cat.Trans$indexW)[match(df.CatChange$original.Cat , rownames(res.Cat.Trans$indexW))] <- df.CatChange$translation.Cat res.Cat.Trans$indexW[1:20,]
                        Frequency N.Documents
confitura (jelly)              13           6
fruita (fruit)                 12           6
tànnic (tannic)                10           7
fusta (wood)                    9           4
vainilla (vanilla)              9           7
madur (mature)                  8           5
nas (nose)                      8           8
aroma (bouquet)                 7           7
astringent (astringent)         7           5
boca (mouth)                    7           5
greix (unctuous/fat)            7           6
regalèssia (liquorice)          7           5
balsàmic (balsamic)             6           5
rodó (round)                    6           5
secant (drying)                 6           4
acetatdetil                     5           3
cos                             5           4
especiat                        5           3
fum                             5           4
xocolata                        5           5

Other way to check changes:

summary(res.Cat.Trans, ndoc=0, nword=15, info=FALSE)
TextData summary

            Before  After
Documents     8.00   8.00
Occurrences 323.00 319.00
Words        97.00  95.00
Mean-length  40.38  39.88

Index of the  15  most frequent words
                      Word Frequency N.Documents
1  confitura (jelly)              13           6
2  fruita (fruit)                 12           6
3  tànnic (tannic)                10           7
4  fusta (wood)                    9           4
5  vainilla (vanilla)              9           7
6  madur (mature)                  8           5
7  nas (nose)                      8           8
8  aroma (bouquet)                 7           7
9  astringent (astringent)         7           5
10 boca (mouth)                    7           5
11 greix (unctuous/fat)            7           6
12 regalèssia (liquorice)          7           5
13 balsàmic (balsamic)             6           5
14 rodó (round)                    6           5
15 secant (drying)                 6           4

 

Building a datafrane with the frequency of Catalan words. Two ways:

df.CatW <- data.frame(res.Cat.Trans$indexW[1:15,])
df.CatW <- data.frame(rownames(df.CatW), df.CatW)
df.CatW
                              rownames.df.CatW. Frequency N.Documents
confitura (jelly)             confitura (jelly)        13           6
fruita (fruit)                   fruita (fruit)        12           6
tànnic (tannic)                 tànnic (tannic)        10           7
fusta (wood)                       fusta (wood)         9           4
vainilla (vanilla)           vainilla (vanilla)         9           7
madur (mature)                   madur (mature)         8           5
nas (nose)                           nas (nose)         8           8
aroma (bouquet)                 aroma (bouquet)         7           7
astringent (astringent) astringent (astringent)         7           5
boca (mouth)                       boca (mouth)         7           5
greix (unctuous/fat)       greix (unctuous/fat)         7           6
regalèssia (liquorice)   regalèssia (liquorice)         7           5
balsàmic (balsamic)         balsàmic (balsamic)         6           5
rodó (round)                       rodó (round)         6           5
secant (drying)                 secant (drying)         6           4

Building the table

row.names(df.CatW) <- NULL
colnames(df.CatW) <- c("Words", "Count", "No.docs")
df.CatW
                     Words Count No.docs
1        confitura (jelly)    13       6
2           fruita (fruit)    12       6
3          tànnic (tannic)    10       7
4             fusta (wood)     9       4
5       vainilla (vanilla)     9       7
6           madur (mature)     8       5
7               nas (nose)     8       8
8          aroma (bouquet)     7       7
9  astringent (astringent)     7       5
10            boca (mouth)     7       5
11    greix (unctuous/fat)     7       6
12  regalèssia (liquorice)     7       5
13     balsàmic (balsamic)     6       5
14            rodó (round)     6       5
15         secant (drying)     6       4

Table 2.b. Most frequent Catalan words

kableExtra::kable(df.CatW,
caption = "<left><strong>Table 2.b. Most frequent Catalan words</strong></left>") %>%
column_spec(1, bold = T) %>% kable_classic(full_width = F, html_font = "Cambria") %>%
row_spec(seq(2,nrow(df.FrW),2), background="#CCFFFF")

Table 2. Joining Catalan and French words

df.join <- cbind(df.CatW, df.FrW)
kableExtra::kable(df.join,
caption = "<left><strong>Table 2. Most frequent words</strong></left>") %>%
column_spec(column=c(1,4), bold = T) %>% kable_classic(full_width = F, html_font = "Cambria") %>%
# kable_styling(latex_options = "striped", font_size = 16) %>%
row_spec(seq(2,nrow(df.join),2), background="#CCFFFF") %>%
column_spec (4,border_left = T, border_right = F) %>%
row_spec(0,bold=T) %>%
add_header_above(c("Most frequent Catalan words", " "= 2, "Most frequent French words", " " = 2))