1. script.lupus
script.lupus.R in UTF-8 format can be downloaded from:
File lupus-506.csv in UTF-8 format can be downloaded from the Internet and save it to some directory:
2. Loading Xplortext package
library(Xplortext)
3. Loading the database
base<-read.csv2("lupus-506.csv",sep=";",header=TRUE,dec=".",row.names=1, encoding="UTF-8")
4. PART I
Exploratoria analysis
TextData summary
Before After
Documents 506.00 506.00
Occurrences 93123.00 49049.00
Words 7079.00 786.00
Mean-length 184.04 96.93
Index of the 10 most frequent words
Word Frequency N.Documents
1 patients 2273 462
2 sle 2126 466
3 disease 864 310
4 lupus 832 471
5 activity 592 224
6 systemic 588 453
7 erythematosus 542 448
8 treatment 505 209
9 study 501 307
10 group 477 160
Correspondence analysis
lupus.LexCA<-LexCA(lupus.TD,ncp=10,lmd=6,lmw=6,graph=FALSE) summary(lupus.LexCA,ncp=10,nb.dec=2,ndoc=0,nword=0)
Correspondence analysis summary
Eigenvalues
Variance % of var. Cumulative % of var.
dim 1 0.29 1.94 1.94
dim 2 0.25 1.73 3.67
dim 3 0.19 1.32 4.99
dim 4 0.18 1.24 6.23
dim 5 0.17 1.14 7.37
dim 6 0.16 1.09 8.46
dim 7 0.16 1.07 9.53
dim 8 0.15 1.02 10.54
dim 9 0.14 0.95 11.49
dim 10 0.13 0.90 12.39
Cramer's V 0.17 Inertia 14.75
Barchart of the first ten eigenvalues
barplot(lupus.LexCA$eig[1:10,1], main="Eigenvalues", names.arg=paste("",1:10))
Metakeys and Dockeys
summary(lupus.LexCA,ncp=4,nb.dec=3, ndoc=0, nword=0, metaDocs=TRUE,metaWords=TRUE)
Correspondence analysis summary
Eigenvalues
Variance % of var. Cumulative % of var.
dim 1 0.286 1.939 1.939
dim 2 0.255 1.726 3.666
dim 3 0.195 1.322 4.987
dim 4 0.183 1.240 6.227
Cramer's V 0.171 Inertia 14.753
Documents whose contribution is over 6 times the average document contribution
Dimension 1 +
181 190 5 317 109 246 87 250
Dimension 1 -
Dimension 2 +
350 441 172 209 450 290 130 302
Dimension 2 -
280 310 192 140
Dimension 3 +
113 484 410
Dimension 3 -
162 288 343 126 261 262 181 8 151 401
Dimension 4 +
162 487 253
Dimension 4 -
350 290 130
Words whose contribution is over 6 times the average word contribution
Dimension 1 +
association allele gene susceptibility polymorphism polymorphisms hla associated risk genetic alleles controls sle genotypes genes genotype
Dimension 1 -
placebo treatment group dhea months dose day bmd weeks therapy
Dimension 2 +
damage health disease sdi physical factors social
Dimension 2 -
cells cell expression depletion rituximab lymphocytes anti gene
Dimension 3 +
anti antibodies dsdna syndrome acl
Dimension 3 -
calcium bmd lumbar group premenopausal bone dhea allele polymorphism cholesterol density alone control groups controls susceptibility gene taking polymorphisms
Dimension 4 +
calcium bmd pulmonary vascular mortality risk death events cases alone lumbar
Dimension 4 -
cells health activity physical cell scores social bilag damage self depletion mental
Plot of planes (1,2) and (3,4)
plot.LexCA(lupus.LexCA,selDoc="meta 7",selWord="meta 7",col.doc="grey30", col.word="black", title="Meta-keys and doc-keys on the plane (1,2)", cex=0.9, ,graph.type="classic")
plot.LexCA(lupus.LexCA,selDoc="meta 7",selWord="meta 7",axes=c(3,4),col.doc="grey30", col.word="black", title="Meta-keys and doc-keys on the plane (3,4)",cex=0.9,graph.type="classic")
PART II
Analysis years x words
lupuschrono.TD<-TextData(base,var.text=c(5),Fmin=15,Dmin=10,stop.word.tm=TRUE,idiom = "en", lminword = 3,var.agg=c(2))
Display of a part of the aggregate table
as.matrix(lupuschrono.TD$DocTerm)[,c("sle","lupus","clinical","trials","trial")]
Terms
Docs sle lupus clinical trials trial
1994 63 27 15 1 13
1995 48 17 4 1 4
1996 86 46 20 0 4
1997 39 19 8 0 2
1998 61 37 20 0 4
1999 135 58 35 7 8
2000 87 27 16 0 0
2001 84 27 21 0 3
2002 73 25 15 0 3
2003 70 26 17 0 2
2004 139 53 10 2 6
2005 160 78 21 7 13
2006 177 54 22 3 3
2007 200 65 18 0 6
2008 123 75 25 1 8
2009 158 39 17 2 6
2010 148 64 28 5 15
2011 154 60 28 0 11
2012 121 35 16 5 10
Initial statistics
summary(lupuschrono.TD,ndoc=0,nword=0)
TextData summary
Before After
Documents 506.00 19.00
Occurrences 93123.00 49049.00
Words 7079.00 786.00
Mean-length 184.04 2581.53
NonEmpty.Docs 506.00 19.00
NonEmpty.Mean-length 184.04 2581.53
Display of the number of documents of every year
lupuschrono.TD$summDoc[,c(1,4)]
DocName NumberDocs
1 1994 18
2 1995 15
3 1996 22
4 1997 13
5 1998 19
6 1999 33
7 2000 21
8 2001 19
9 2002 19
10 2003 19
11 2004 30
12 2005 38
13 2006 35
14 2007 42
15 2008 34
16 2009 31
17 2010 39
18 2011 34
19 2012 25
Correspondence analysis of the ALT years*words
lupuschrono.LexCA=LexCA(lupuschrono.TD,graph=FALSE) summary(lupuschrono.LexCA, ncp=10,nb.dec=2)
Correspondence analysis summary
Eigenvalues
Variance % of var. Cumulative % of var.
dim 1 0.06 10.69 10.69
dim 2 0.05 8.25 18.94
dim 3 0.04 6.86 25.80
dim 4 0.04 6.71 32.51
dim 5 0.04 6.49 39.00
Cramer's V 0.18 Inertia 0.57
DOCUMENTS
All documents are aggregate documents
Coordinates
Only the first 10 elements are shown
Dim 1 Dim 2 Dim 3 Dim 4 Dim 5
1994 -0.51 -0.01 0.17 -0.16 0.18
1995 -0.44 -0.19 0.06 0.50 -0.20
1996 -0.54 0.02 -0.30 -0.38 0.09
1997 -0.09 1.01 0.13 0.37 0.45
1998 -0.37 -0.19 0.15 0.04 -0.06
1999 -0.29 -0.11 0.41 0.09 -0.14
2000 -0.26 0.32 -0.18 0.05 -0.04
2001 0.01 0.20 -0.07 -0.01 -0.17
2002 0.01 -0.21 -0.38 0.53 0.03
2003 -0.08 0.12 -0.17 -0.18 -0.37
Contributions (by column total=100)
Only the first 10 elements are shown
Dim 1 Dim 2 Dim 3 Dim 4 Dim 5
1994 12.05 0.00 2.09 1.94 2.35
1995 7.99 1.92 0.25 16.47 2.81
1996 20.02 0.04 9.45 15.61 0.98
1997 0.36 55.81 1.10 9.07 13.85
1998 7.66 2.70 2.05 0.16 0.29
1999 8.11 1.59 25.03 1.09 3.06
2000 4.37 8.85 3.17 0.30 0.21
2001 0.00 3.04 0.41 0.01 2.97
2002 0.00 3.69 14.14 29.25 0.12
2003 0.37 1.24 2.87 3.35 13.85
Square cosinus (by row total=1)
Only the first 10 elements are shown
Dim 1 Dim 2 Dim 3 Dim 4 Dim 5
1994 0.25 0.00 0.03 0.03 0.03
1995 0.15 0.03 0.00 0.19 0.03
1996 0.33 0.00 0.10 0.16 0.01
1997 0.01 0.65 0.01 0.09 0.13
1998 0.17 0.05 0.03 0.00 0.00
1999 0.16 0.02 0.32 0.01 0.04
2000 0.08 0.13 0.04 0.00 0.00
2001 0.00 0.05 0.01 0.00 0.04
2002 0.00 0.06 0.18 0.35 0.00
2003 0.01 0.02 0.04 0.05 0.19
Inertia
1994 0.03
1995 0.03
1996 0.04
1997 0.04
1998 0.03
1999 0.03
2000 0.03
2001 0.03
2002 0.03
2003 0.03
WORDS
Coordinates
Only the first 10 elements are shown
Dim 1 Dim 2 Dim 3 Dim 4 Dim 5
abnormal -0.02 0.03 0.21 0.28 0.02
abnormalities -0.62 -0.27 0.23 0.13 -0.15
according -0.16 -0.08 -0.02 0.01 0.09
achieved -0.06 0.29 0.11 -0.12 -0.19
acid -0.91 -0.32 -0.25 -0.59 0.21
acl -0.60 0.28 0.32 -0.19 -0.81
acr 0.35 0.34 0.17 0.23 -0.27
across 0.09 0.11 -0.18 -0.04 0.02
activation 0.31 0.14 0.39 -0.09 0.11
active 0.02 0.08 -0.01 0.03 -0.07
Contributions (by-column total=100)
Only the first 10 elements are shown
Dim 1 Dim 2 Dim 3 Dim 4 Dim 5
abnormal 0.00 0.00 0.07 0.13 0.00
abnormalities 0.52 0.12 0.11 0.03 0.05
according 0.02 0.01 0.00 0.00 0.01
achieved 0.00 0.11 0.02 0.03 0.06
acid 0.65 0.10 0.07 0.43 0.06
acl 0.57 0.16 0.25 0.09 1.72
acr 0.20 0.25 0.08 0.13 0.20
across 0.01 0.01 0.03 0.00 0.00
activation 0.18 0.05 0.46 0.03 0.04
active 0.00 0.05 0.00 0.01 0.05
Square cosinus (by-row total=1)
Only the first 10 elements are shown
Dim 1 Dim 2 Dim 3 Dim 4 Dim 5
abnormal 0.00 0.00 0.05 0.09 0.00
abnormalities 0.34 0.06 0.05 0.01 0.02
according 0.03 0.01 0.00 0.00 0.01
achieved 0.00 0.08 0.01 0.01 0.03
acid 0.30 0.04 0.02 0.13 0.02
acl 0.14 0.03 0.04 0.01 0.26
acr 0.13 0.12 0.03 0.05 0.08
across 0.01 0.01 0.03 0.00 0.00
activation 0.10 0.02 0.16 0.01 0.01
active 0.00 0.03 0.00 0.01 0.03
Inertia
abnormal 0
abnormalities 0
according 0
achieved 0
acid 0
acl 0
acr 0
across 0
activation 0
active 0
Barchart of the eigenvalues
barplot(lupuschrono.LexCA$eig[1:18 ,1], main="Eigenvalues", names.arg=" ",xlab="")
Representation of the years
plot.LexCA(lupuschrono.LexCA,selWord=NULL, title=" ",col.doc="grey30", graph.type="classic") lines(lupuschrono.LexCA$row$coord[,1],lupuschrono.LexCA$row$coord[,2],lwd=1,col="black")
Representation of the words
plot.LexCA(lupuschrono.LexCA,selWord="contrib 1", selDoc=NULL, col.word="black",title=" ", graph.type="classic" )
PART III
Analysis of the table years x drug names
lupusmed.TD<-TextData(base,var.text=c(5),Fmin=5,Dmin=5,stop.word.tm=TRUE,idiom = "en",lminword = 2,var.agg=c(2))
setA: all the words, excepted the stopwords
setA<-colnames(lupusmed.TD$DocTerm)
setB: drugnames cited in the database
setB<-c("methotrexate","cytotoxic","cyclophosphamide", "prednisone","anticoagulant","anticoagulants","antibody","antibodies","antiinflammatory", "hydroxychloroquine","anticardiolipin", "immunosuppressive","immunosuppression","corticosteroid","corticosteroids","glucosteroid", "glucosteroids","steroid","steroids", "chloroquine","azathioprine","antigens","rituximab","belimumab","pharmacokinetics", "dhea","methotrexate","rituximab","plasma","prasterone","predisolone","immunoglobulin", "immunoglobulins","infliximab","mizoribine","interleukin", "dehydroepiandrosterone","mycophelonate","abetimus","bromocriptine","antimalarial", "antimalarials","mm","hrt","csa","ivig","mtx","irf5")
Nomedic: non-drugs which have to be eliminated
nomedic<-setdiff(setA,setB)
Building the table years x drugs
lupusmedic.TD<-TextData(base,var.text=c(5),Fmin=15,Dmin=6,stop.word.tm=TRUE,stop.word.user=nomedic, idiom = "en",lminword = 2, var.agg=c(2))
Characteristic drugs for the year
lupuschar<-LexChar(lupusmedic.TD,maxCharDoc=0)
CHARACTERISTIC WORDS
(DETAILED INFORMATION)
Group1: 1994
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Word Intern % glob % Intern freq Glob freq p.value v.test
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
corticosteroids 17.949 3.354 7 44 4e-04 3.538414
interleukin 7.692 1.601 3 21 0.04461 2.008358
Group2: 1995
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Word Intern % glob % Intern freq Glob freq p.value v.test
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
prednisone 29.545 8.308 13 109 5e-05 4.049304
anticardiolipin 13.636 1.982 6 26 0.00028 3.633209
Group3: 1996
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Word Intern % glob % Intern freq Glob freq p.value v.test
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
cyclophosphamide 20.968 7.698 13 101 0.00106 3.273648
anticoagulant 8.065 1.524 5 20 0.00359 2.912227
hydroxychloroquine 8.065 1.905 5 25 0.01024 2.567731
steroids 6.452 1.753 4 23 0.04073 2.046302
dhea 0.000 6.326 0 83 0.03145 -2.151298
antibody 0.000 9.832 0 129 0.00278 -2.991389
Group4: 1997
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Word Intern % glob % Intern freq Glob freq p.value v.test
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
cytotoxic 23.636 1.601 13 21 0.00000 7.455421
methotrexate 10.909 1.905 6 25 0.00079 3.356919
steroid 14.545 5.107 8 67 0.01078 2.549687
corticosteroids 10.909 3.354 6 44 0.01692 2.388521
azathioprine 7.273 1.905 4 25 0.03628 2.093724
prednisone 0.000 8.308 0 109 0.01525 -2.426272
Group5: 1998
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Word Intern % glob % Intern freq Glob freq p.value v.test
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
dhea 32.143 6.326 18 83 0.00000 5.952324
cyclophosphamide 0.000 7.698 0 101 0.02037 -2.319454
Group6: 1999
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Word Intern % glob % Intern freq Glob freq p.value v.test
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
anticoagulant 6.452 1.524 4 20 0.02479 2.244728
Group7: 2000
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Word Intern % glob % Intern freq Glob freq p.value v.test
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
prednisone 22.951 8.308 14 109 0.00053 3.466421
cyclophosphamide 19.672 7.698 12 101 0.00322 2.945695
dhea 0.000 6.326 0 83 0.03369 -2.123789
antibody 1.639 9.832 1 129 0.02479 -2.244608
Group8: 2001
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Word Intern % glob % Intern freq Glob freq p.value v.test
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
antibodies 37.931 16.235 11 213 0.00734 2.680808
Group9: 2002
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Word Intern % glob % Intern freq Glob freq p.value v.test
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
chloroquine 9.333 1.372 7 18 6e-05 4.021279
prasterone 9.333 2.896 7 38 0.00906 2.609868
plasma 0.000 4.878 0 64 0.04198 -2.033695
rituximab 0.000 5.488 0 72 0.02553 -2.233235
Group10: 2003
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Word Intern % glob % Intern freq Glob freq p.value v.test
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
cyclophosphamide 18.182 7.698 10 101 0.01454 2.443712
antibodies 27.273 16.235 15 213 0.04727 1.983876
Group11: 2004
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Word Intern % glob % Intern freq Glob freq p.value v.test
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
dhea 17.241 6.326 15 83 0.00038 3.550645
prasterone 9.195 2.896 8 38 0.00511 2.800082
rituximab 12.644 5.488 11 72 0.01210 2.509323
belimumab 0.000 4.345 0 57 0.03663 -2.08982
Group12: 2005
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Word Intern % glob % Intern freq Glob freq p.value v.test
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
antibody 19.685 9.832 25 129 0.00056 3.452042
prasterone 6.299 2.896 8 38 0.04978 1.961864
cyclophosphamide 3.150 7.698 4 101 0.04727 -1.98389
rituximab 1.575 5.488 2 72 0.04445 -2.00982
belimumab 0.000 4.345 0 57 0.00528 -2.789644
Group13: 2006
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Word Intern % glob % Intern freq Glob freq p.value v.test
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
steroids 5.983 1.753 7 23 0.00550 2.776268
rituximab 11.966 5.488 14 72 0.00638 2.727773
chloroquine 4.274 1.372 5 18 0.03444 2.114918
plasma 0.855 4.878 1 64 0.03302 -2.131786
belimumab 0.000 4.345 0 57 0.00862 -2.626825
dhea 0.000 6.326 0 83 0.00066 -3.40623
Group14: 2007
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Word Intern % glob % Intern freq Glob freq p.value v.test
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rituximab 24.286 5.488 17 72 0.00000 5.341853
mm 5.714 1.143 4 15 0.01298 2.484302
immunosuppressive 10.000 3.735 7 49 0.02585 2.228443
steroid 0.000 5.107 0 67 0.04601 -1.995285
dhea 0.000 6.326 0 83 0.01812 -2.363078
Group15: 2008
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Word Intern % glob % Intern freq Glob freq p.value v.test
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
prasterone 10.870 2.896 10 38 0.00032 3.595288
methotrexate 8.696 1.905 8 25 0.00035 3.577386
rituximab 11.957 5.488 11 72 0.01855 2.354478
belimumab 9.783 4.345 9 57 0.02985 2.172018
dhea 0.000 6.326 0 83 0.00390 -2.885783
Group16: 2009
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Word Intern % glob % Intern freq Glob freq p.value v.test
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
belimumab 18.033 4.345 11 57 6e-05 4.008059
dhea 0.000 6.326 0 83 0.03369 -2.123789
Group17: 2010
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Word Intern % glob % Intern freq Glob freq p.value v.test
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
dhea 13.187 6.326 12 83 0.01888 2.347879
plasma 10.989 4.878 10 64 0.02070 2.313419
antibodies 6.593 16.235 6 213 0.00833 -2.638348
Group18: 2011
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Word Intern % glob % Intern freq Glob freq p.value v.test
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
belimumab 28.378 4.345 21 57 0.00000 7.293687
antibody 21.622 9.832 16 129 0.00283 2.9860
prednisone 16.216 8.308 12 109 0.03126 2.153687
dhea 0.000 6.326 0 83 0.01374 -2.464023
cyclophosphamide 0.000 7.698 0 101 0.00446 -2.843903
antibodies 4.054 16.235 3 213 0.00188 -3.108972
Group19: 2012
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Word Intern % glob % Intern freq Glob freq p.value v.test
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
belimumab 27.273 4.345 15 57 0.00000 5.89998
plasma 16.364 4.878 9 64 0.00189 3.106941
corticosteroid 10.909 2.744 6 36 0.00606 2.744752
mm 5.455 1.143 3 15 0.04436 2.010703
antibodies 1.818 16.235 1 213 0.00114 -3.252759
plot.LexChar(lupuschar, numc=3,text.size=12 ,char.negat=FALSE, top=" ")