Script Use of Lexicometry in Sensometrics

Catalan panel

25 most frequent Catalan words ordered by frequency

res.TD.Cat.Before <-TextData(baseCat,var.text=c(1:ncol(baseCat)), stop.word.user=str.Cat.stopworduser,Fmin=1)
summary(res.TD.Cat.Before, ndoc=0, nword=25, info=FALSE)

 

To translate the name of 15 most frequent Catalan words.

Building a copy of res.TD.Cat.Before object and creating a vector (original.Cat) with the 15 most frequent Catalan words.

res.Cat.Trans <- res.TD.Cat.Before
original.Cat <- rownames(res.TD.Cat.Before$indexW[1:15,])
cat(original.Cat)

Creating a vector translation.Cat with the words in English with the same order than original.Cat:

translation.Cat <- c("confitura (jelly)", "fruita (fruit)", "tànnic (tannic)", "fusta (wood)", "vainilla (vanilla)", "madur (mature)", "nas (nose)", "aroma (bouquet)", "astringent (astringent)", "boca (mouth)", "greix (unctuous/fat)", "regalèssia (liquorice)", "balsàmic (balsamic)", "rodó (round)", "secant (drying)" )

 

Creating a data frame with the original words and translation:

df.CatChange <- data.frame(original.Cat, translation.Cat)

 

To change Catalan DocTerm object (only for the 15 most frequent words):

res.Cat.Trans$DocTerm$dimnames$Terms[match(df.CatChange$original.Cat , res.Cat.Trans$DocTerm$dimnames$Terms)] <- df.CatChange$translation.Cat cat(res.Cat.Trans$DocTerm$dimnames$Terms)

To change indexW with the frequencies (only for the 15 most frequent words)

rownames(res.Cat.Trans$indexW)[match(df.CatChange$original.Cat , rownames(res.Cat.Trans$indexW))] <- df.CatChange$translation.Cat res.Cat.Trans$indexW[1:20,]

Other way to check changes:

summary(res.Cat.Trans, ndoc=0, nword=15, info=FALSE)

 

Building a datafrane with the frequency of Catalan words. Two ways:

df.CatW <- data.frame(res.Cat.Trans$indexW[1:15,])
df.CatW <- data.frame(rownames(df.CatW), df.CatW)
df.CatW

Building the table

row.names(df.CatW) <- NULL
colnames(df.CatW) <- c("Words", "Count", "No.docs")
df.CatW

Table 2.b. Most frequent Catalan words

kableExtra::kable(df.CatW,
caption = "<left><strong>Table 2.b. Most frequent Catalan words</strong></left>") %>%
column_spec(1, bold = T) %>% kable_classic(full_width = F, html_font = "Cambria") %>%
row_spec(seq(2,nrow(df.FrW),2), background="#CCFFFF")

Table 2. Joining Catalan and French words

df.join <- cbind(df.CatW, df.FrW)
kableExtra::kable(df.join,
caption = "<left><strong>Table 2. Most frequent words</strong></left>") %>%
column_spec(column=c(1,4), bold = T) %>% kable_classic(full_width = F, html_font = "Cambria") %>%
# kable_styling(latex_options = "striped", font_size = 16) %>%
row_spec(seq(2,nrow(df.join),2), background="#CCFFFF") %>%
column_spec (4,border_left = T, border_right = F) %>%
row_spec(0,bold=T) %>%
add_header_above(c("Most frequent Catalan words", " "= 2, "Most frequent French words", " " = 2))