Spanish Discourses _Pg4

4. Cluster from CA coordinates (three factors ncp=3)

Previous syntax

load("C:/RData/SpanishDisc.RData")
library(Xplortext)
swu <- c("consiguiente", "ello", "hacia", "punto", "Señorías", "si", "Sus", "vista", "A", "B", "C", "D", "E",
"F", "a", "b", "c", "d")
TD <- TextData(SpanishDisc, var.text=c(1), context.quanti="year", Fmin=10, Dmin=2, idiom="es", lower=FALSE,
remov.number=TRUE, stop.word.tm=TRUE, stop.word.user=swu, graph=FALSE)

To select only the first 3 factors:

resLexCA <- LexCA(TD, ncp=3, graph=FALSE)

To cut the tree at the level the user clicks on use nb.clust=0 (the default). In this case, a suggested level is provided.

If -1, the tree is automatically cut at the suggested level.

If a (positive) integer, the tree is cut with nb.clust clusters (for example nb.clust=4 provides in this case the same result as the automatic cut nb.clust=-1):

res.ccah <- LexCHCca(resLexCA, nb.clust=4, graph=TRUE)

Cluster description using hierarchical words

res.Label function extracts the hierarchical characteristic words associated to the nodes of a chronological hierarchical tree; the characteristic words of each node are extracted, then each word is associated to the node that it best characterizes. The argument “proba” is a threshold on the p-value to select the characteristic words(by default 0.05):

res.Label <- LabelTree(res.ccah, proba=0.0005)
res.Label

$`Su79 CS81 Gz82 Gz86 Gz89`
                Intern %     glob % Intern freq Glob freq       p.value   v.test
política      1.00394407 0.75956385         252        411 1.520663e-09 6.042158
inflación     0.16732401 0.09240436          42         50 8.371088e-08 5.358942
realización   0.08366201 0.03880983          21         21 1.966820e-07 5.202446
situación     0.27090554 0.18296064          68         99 1.211158e-05 4.375570
Europeas      0.07171029 0.03511366          18         19 2.101523e-05 4.253822
problema      0.14740449 0.08870819          37         48 2.824537e-05 4.187170
problemas     0.31871240 0.22731473          80        123 4.670773e-05 4.071518
hecho         0.16334011 0.10349288          41         56 8.392449e-05 3.932908
tema          0.06374248 0.03141748          16         17 8.788456e-05 3.921815
decir         0.14342058 0.08870819          36         48 1.046602e-04 3.879526
balanza       0.04780686 0.02217705          12         12 1.983259e-04 3.721139
conseguir     0.13545277 0.08501201          34         46 2.774408e-04 3.635501
sino          0.20716306 0.14230272          52         77 2.908652e-04 3.623303
propia        0.15138839 0.09794862          38         53 3.341141e-04 3.587304
tiempo        0.23106649 0.16263168          58         88 3.464961e-04 3.577802
pagos         0.04382296 0.02032896          11         11 4.276299e-04 3.522418
desigualdades 0.04382296 0.02032896          11         11 4.276299e-04 3.522418
veces         0.07967810 0.04435409          20         24 4.420569e-04 3.513613

$`Gz93 Az96 Az00 Zp04 Zp08 Rj11`
                   Intern %     glob % Intern freq Glob freq       p.value   v.test
reforma          0.36540384 0.24579560         106        133 6.287601e-10 6.183047
compromiso       0.20338516 0.12197376          59         66 8.626845e-10 6.132942
Unión            0.18270192 0.10903715          53         59 4.514608e-09 5.864142
confianza        0.26198766 0.17187211          76         93 2.533931e-08 5.570923
legislatura      0.29990693 0.20883386          87        113 4.538645e-07 5.044855
modelo           0.15167707 0.09425245          44         51 1.567986e-06 4.802370
Consejo          0.12065221 0.07392349          35         40 1.109591e-05 4.394635
Administraciones 0.16891310 0.11273332          49         61 2.532742e-05 4.211862
debemos          0.16546589 0.11088523          48         60 3.824057e-05 4.117861
competitividad   0.11720501 0.07392349          34         40 5.851322e-05 4.018728
acuerdo          0.22751560 0.16263168          66         88 5.855959e-05 4.018541
innovación       0.07928574 0.04620218          23         25 8.396760e-05 3.932784
reformas         0.20338516 0.14415080          59         78 1.000659e-04 3.890432
diálogo          0.18270192 0.12751802          53         69 1.163157e-04 3.853764
fiscal           0.17580751 0.12197376          51         66 1.208190e-04 3.844458

$`Su79 CS81 Gz82`
             Intern %     glob % Intern freq Glob freq       p.value   v.test
pueblo     0.22535958 0.08316393          34         45 8.579335e-11 6.490076
palabras   0.09942334 0.03326557          15         18 3.137806e-06 4.661587
autonómico 0.10605157 0.03696174          16         20 3.818565e-06 4.621021
crisis     0.21873136 0.10903715          33         59 1.111395e-05 4.394282
Pública    0.11930801 0.04620218          18         25 1.167991e-05 4.383479
programa   0.29827003 0.16817594          45         91 2.095435e-05 4.254471
ciudadana  0.11267979 0.04435409          17         24 3.044251e-05 4.170131
dignidad   0.09279512 0.03511366          14         19 8.864315e-05 3.919744
real       0.11267979 0.04805027          17         26 1.502052e-04 3.790730
hombre     0.05965401 0.01848087           9         10 1.523318e-04 3.787237
pueblos    0.09279512 0.03696174          14         20 2.192793e-04 3.695703
Proyecto   0.08616690 0.03326557          13         18 2.366758e-04 3.676263
histórica  0.07953868 0.02956940          12         16 2.448826e-04 3.667553

$`Gz93 Az96 Az00 Zp04`
            Intern %     glob % Intern freq Glob freq       p.value   v.test
pacto     0.10835354 0.03880983          21         21 8.591716e-10 6.133591
Autónomas 0.24766524 0.15154315          48         82 4.591331e-05 4.075512
impulsará 0.06191631 0.02402513          12         13 7.743940e-05 3.952189
impulso   0.13931170 0.07392349          27         40 9.137347e-05 3.912426
Senado    0.06707600 0.02772131          13         15 1.492434e-04 3.792325

$`Az96 Az00 Zp04`
              Intern %     glob % Intern freq Glob freq       p.value   v.test
españoles   0.44751831 0.23655517          66        128 9.703979e-09 5.735823
Gobierno    1.03742880 0.74477915         153        403 3.477264e-06 4.640405
PP          0.06780580 0.01848087          10         10 4.514625e-06 4.586165
mejor       0.27800380 0.14969507          41         81 1.356978e-05 4.350709
proyección  0.08136697 0.02587322          12         14 1.709485e-05 4.299805
pleno       0.08814755 0.03326557          13         18 1.830652e-04 3.741308
convivencia 0.13561161 0.06468305          20         35 3.769605e-04 3.555709

$`Zp08 Rj11`
          Intern %     glob % Intern freq Glob freq       p.value   v.test
euros    0.1765683 0.03511366          17         19 4.206091e-11 6.596630
España   1.0282509 0.59138791          99        320 1.419999e-08 5.670960
ámbito   0.2908184 0.10903715          28         59 3.288481e-07 5.106116
millones 0.1869547 0.06098688          18         33 4.175774e-06 4.602436
Ley      0.3842958 0.18480872          37        100 8.160580e-06 4.460927
economía 0.4362277 0.23655517          42        128 5.978821e-05 4.013645
sector   0.2804321 0.13121419          27         71 8.936805e-05 3.917780
primera  0.1973411 0.08316393          19         45 2.322412e-04 3.681088

$`Su79 CS81`
         Intern %     glob % Intern freq Glob freq       p.value   v.test
UCD     0.1250240 0.02402513          13         13 9.685824e-10 6.114503
régimen 0.1634930 0.04805027          17         26 6.830515e-07 4.966122
persona 0.1057896 0.02402513          11         13 1.392050e-06 4.826140
acción  0.3943066 0.20328960          41        110 1.510638e-05 4.327131
reales  0.1250240 0.03696174          13         20 1.909671e-05 4.275198
Estado  0.6635892 0.43430050          69        235 2.300775e-04 3.683473

$`Az00 Zp04`
        Intern %    glob % Intern freq Glob freq       p.value   v.test
quiero 0.4110775 0.1866568          38        101 1.354444e-06 4.831594

$`Gz86 Gz89`
              Intern %     glob % Intern freq Glob freq       p.value   v.test
Comunidad   0.33952467 0.10164480          34         55 3.141381e-12 6.971250
Única       0.14979029 0.02956940          15         16 2.683746e-10 6.316026
Acta        0.14979029 0.02956940          15         16 2.683746e-10 6.316026
Comunitaria 0.15977631 0.03326557          16         18 3.915904e-10 6.257343
querría     0.14979029 0.03326557          15         18 9.359239e-09 5.741950
países      0.46934292 0.22177047          47        120 2.013040e-07 5.198129
posibilidad 0.12981825 0.03326557          13         18 1.988072e-06 4.754633
cooperación 0.36948272 0.17002403          37         92 2.064246e-06 4.747031
crecimiento 0.39944078 0.19404916          40        105 3.990170e-06 4.611894
decía       0.08987418 0.01848087           9         10 4.233166e-06 4.599593
juicio      0.14979029 0.04435409          15         24 4.815837e-06 4.572655
interior    0.10984622 0.02956940          11         16 3.006057e-05 4.173007
mantener    0.15977631 0.05913879          16         32 1.080774e-04 3.871703
importantes 0.13980427 0.04805027          14         26 1.107178e-04 3.865817
propio      0.16976233 0.06653114          17         36 1.598318e-04 3.775274
sostenido   0.08987418 0.02402513           9         13 1.757295e-04 3.751572
importante  0.20970641 0.09240436          21         50 2.047600e-04 3.713070
relación    0.20970641 0.09425245          21         51 2.876952e-04 3.626135
instrumento 0.14979029 0.05913879          15         32 4.634950e-04 3.501012

$Su79
               Intern %     glob % Intern freq Glob freq       p.value   v.test
libertad      0.6354113 0.17372020          37         94 1.018233e-12 7.128020
Constitución  0.6182380 0.22177047          36        120 1.393545e-08 5.674180
orden         0.3262923 0.08501201          19         46 1.811338e-07 5.217725
Derecho       0.2404259 0.04989836          14         27 2.812388e-07 5.135604
libertades    0.2404259 0.05544262          14         30 1.488937e-06 4.812715
realidad      0.3091190 0.09055627          18         49 3.099698e-06 4.664101
libre         0.1889061 0.04065792          11         22 1.007277e-05 4.415605
siguientes    0.1889061 0.04065792          11         22 1.007277e-05 4.415605
social        0.6697579 0.34189614          39        185 6.159218e-05 4.006626
deberá        0.1545595 0.03511366           9         19 1.292940e-04 3.827799
ejercicio     0.1889061 0.05174644          11         28 1.658111e-04 3.766109
independencia 0.1545595 0.03696174           9         20 2.126037e-04 3.703549
estatal       0.1030397 0.01848087           6         10 4.426631e-04 3.513249

$CS81
              Intern %     glob % Intern freq Glob freq       p.value   v.test
transición   0.1748634 0.02402513           8         13 4.529647e-06 4.585471
industrial   0.1748634 0.03326557           8         18 1.045805e-04 3.879711
reconversión 0.1311475 0.01848087           6         10 1.133520e-04 3.860075
investidura  0.1967213 0.04805027           9         26 3.611390e-04 3.566966

$Gz82
             Intern %     glob % Intern freq Glob freq       p.value   v.test
paro        0.3838772 0.08131584          18         44 1.661632e-08 5.643980
encuentra   0.1919386 0.02587322           9         14 7.311037e-07 4.952914
progreso    0.3198976 0.08870819          15         48 1.565388e-05 4.319280
áreas       0.1919386 0.03696174           9         20 3.784071e-05 4.120283
humanos     0.1706121 0.02956940           8         16 4.302152e-05 4.090623
hombres     0.2345916 0.05544262          11         30 4.700744e-05 4.070028
Universidad 0.1279591 0.01848087           6         10 1.303861e-04 3.825728
horizonte   0.1919386 0.04250601           9         23 1.447636e-04 3.799884
pueden      0.2345916 0.06837923          11         37 4.163684e-04 3.529486

$Gz86
              Intern %     glob % Intern freq Glob freq       p.value   v.test
integración  0.4825737 0.09979671          27         54 5.422674e-13 7.214263
esfuerzo     0.7506702 0.22177047          42        120 7.962399e-13 7.161801
proceso      0.6434316 0.22916282          36        124 1.275835e-08 5.689270
intentar     0.1787310 0.02217705          10         12 1.503503e-08 5.661163
creo         0.3753351 0.10903715          21         59 4.031343e-07 5.067472
esperamos    0.1429848 0.01848087           8         10 9.658220e-07 4.898477
Económica    0.2144772 0.04989836          12         27 1.152889e-05 4.386312
evolución    0.2144772 0.04989836          12         27 1.152889e-05 4.386312
saneamiento  0.1608579 0.02956940           9         16 1.556418e-05 4.320549
coordinación 0.2323503 0.05913879          13         32 1.579552e-05 4.317292
adaptación   0.1966041 0.04435409          11         24 1.975604e-05 4.267630
recuperación 0.2323503 0.06098688          13         33 2.359350e-05 4.227850
Europea      0.3932082 0.14784698          22         80 2.764008e-05 4.192086
clima        0.1429848 0.02587322           8         14 4.393008e-05 4.085774
elementos    0.1429848 0.02587322           8         14 4.393008e-05 4.085774
mayoría      0.2680965 0.08316393          15         45 5.410116e-05 4.037164
parte        0.4289544 0.17741637          24         96 6.536059e-05 3.992572
cuanto       0.2323503 0.06653114          13         36 7.059633e-05 3.974267
pueda        0.2502234 0.07946775          14         43 1.338704e-04 3.819227
presión      0.1608579 0.03696174           9         20 1.552780e-04 3.782473
negociación  0.1608579 0.03696174           9         20 1.552780e-04 3.782473
técnicas     0.1072386 0.01848087           6         10 3.538326e-04 3.572320
supone       0.1429848 0.03326557           8         18 4.372548e-04 3.516513
avance       0.1251117 0.02587322           7         14 4.469592e-04 3.510682

$Gz89
               Intern %     glob % Intern freq Glob freq       p.value   v.test
ESTE          0.3620729 0.04435409          16         24 2.970736e-12 6.979102
posición      0.3168138 0.06283497          14         34 3.319247e-07 5.104355
Instituciones 0.2036660 0.02772131           9         15 1.017348e-06 4.888253
cambios       0.2262955 0.04435409          10         24 1.754243e-05 4.294074
ritmo         0.1810364 0.03141748           8         17 4.873136e-05 4.061630
aumentar      0.2036660 0.04250601           9         23 9.074591e-05 3.914090
proyecto      0.3620729 0.12012567          16         65 1.027367e-04 3.884036
responsables  0.1810364 0.03511366           8         19 1.305416e-04 3.825435
toma          0.1357773 0.02032896           6         11 1.904265e-04 3.731390
superior      0.1357773 0.02217705           6         12 3.544386e-04 3.571872
cuadro        0.1357773 0.02217705           6         12 3.544386e-04 3.571872

$Gz93
                Intern %     glob % Intern freq Glob freq       p.value   v.test
dificultades   0.3237643 0.05729070          15         31 1.515610e-08 5.659787
Partidos       0.2158429 0.04250601          10         23 1.686342e-05 4.302825
económica      0.6259443 0.26427647          29        143 2.090850e-05 4.254961
interlocutores 0.1510900 0.02402513           7         13 7.242517e-05 3.968175
políticas      0.5180229 0.21437812          24        116 8.281980e-05 3.936090

$Az96
              Intern %     glob % Intern freq Glob freq       p.value   v.test
cumplimiento 0.2725291 0.06098688          15         33 4.384750e-07 5.051447
presida      0.1453488 0.01848087           8         10 8.497958e-07 4.923570
Cámara       0.6722384 0.28460543          37        154 1.094226e-06 4.873889
señor        0.2543605 0.07022731          14         38 2.251898e-05 4.238329
convergencia 0.1271802 0.02217705           7         12 1.118298e-04 3.863378
Comunidades  0.5087209 0.23655517          28        128 1.513734e-04 3.788805
obtengo      0.1998547 0.05544262          11         30 2.068161e-04 3.710542
Presidente   0.2543605 0.08686010          14         47 3.295021e-04 3.590928

$Az00
                Intern %     glob % Intern freq Glob freq       p.value   v.test
necesitamos    0.2293100 0.04065792          11         22 1.468401e-06 4.815489
sociedad       0.8338545 0.40288302          40        218 1.632507e-05 4.310005
queremos       0.3126954 0.10349288          15         56 1.551181e-04 3.782729
liberalización 0.1459245 0.02587322           7         14 1.677227e-04 3.763245
abierto        0.1667709 0.03511366           8         19 2.341048e-04 3.679050
presupuestario 0.1459245 0.02956940           7         16 4.766377e-04 3.493553

$Zp04
              Intern %      glob % Intern freq Glob freq       p.value   v.test
year        45.0640881 40.52670486        2004      21929 1.681337e-10 6.387941
terrorismo   0.3822802  0.11273332          17         61 1.099603e-05 4.396599
atención     0.4272543  0.14045463          19         76 1.812864e-05 4.286774
cultura      0.2698448  0.07761966          12         42 1.990900e-04 3.720168
inmigración  0.1349224  0.02217705           6         12 3.670935e-04 3.562676

$Zp08
              Intern %     glob % Intern freq Glob freq       p.value   v.test
idea         0.5155747 0.06468305          24         35 8.312841e-18 8.595188
país         1.0526316 0.28645352          49        155 6.700470e-16 8.075856
mujeres      0.3222342 0.05544262          15         30 9.069817e-09 5.747266
seguiremos   0.2148228 0.02587322          10         14 3.184348e-08 5.530987
pido         0.1718582 0.02402513           8         13 5.167313e-06 4.557878
pasada       0.2148228 0.04250601          10         23 1.759129e-05 4.293456
promoveremos 0.1288937 0.02032896           6         11 2.550887e-04 3.657096

$Rj11
                      Intern %     glob % Intern freq Glob freq       p.value   v.test
euro                 0.1809773 0.02032896           9         11 4.303052e-08 5.477940
Públicas             0.2815202 0.06098688          14         33 9.092421e-07 4.910328
laboral              0.3217374 0.07946775          16         43 1.190552e-06 4.857204
deuda                0.2010859 0.03326557          10         18 1.863007e-06 4.767744
internacionalización 0.1407601 0.01848087           7         10 1.030401e-05 4.410693
entidades            0.1407601 0.01848087           7         10 1.030401e-05 4.410693
cifra                0.1407601 0.02032896           7         11 2.606973e-05 4.205334
empresas             0.3619546 0.12936611          18         70 9.124018e-05 3.912779
Presupuestos         0.2010859 0.04989836          10         27 1.653283e-04 3.766838
pondremos            0.1206515 0.01848087           6         10 1.820225e-04 3.742743
año                  0.3016288 0.10349288          15         56 2.330939e-04 3.680153
déficit              0.3217374 0.11827758          16         64 3.392304e-04 3.583339

Spanish Discourses _Pg4

4. Cluster from CA coordinates (three factors ncp=3)

Previous syntax

resLexCA <- LexCA(TD, ncp=3, graph=FALSE)

res.ccah <- LexCHCca(resLexCA, nb.clust=4, graph=TRUE)

Cluster description using hierarchical words

res.Label <- LabelTree(res.ccah, proba=0.0005)
res.Label

Hierarchical Agglomerative Clustering without Contiguity-Constrained

res.HCca <- LexHCca(resLexCA, nb.clust=-1)

Spanish Discourses _Pg4

4. Cluster from CA coordinates (three factors ncp=3)

Previous syntax

resLexCA <- LexCA(TD, ncp=3, graph=FALSE)

res.ccah <- LexCHCca(resLexCA, nb.clust=4, graph=TRUE)

Cluster description using hierarchical words

res.Label <- LabelTree(res.ccah, proba=0.0005) res.Label

Hierarchical Agglomerative Clustering without Contiguity-Constrained

res.HCca <- LexHCca(resLexCA, nb.clust=-1)

res.Label <- LabelTree(res.ccah, proba=0.0005)
res.Label