This appendix illustrates the grouping of the participation events data. Methodologically, I use a simple keyword search on all codings of a year. The original coding data can be retrieved by opening the IAEA.rqda and OPCW.rqda files with the RQDA software package.¹

For the keyword classification, I extracted all ACT.Part codings from the files and copied them to text files for each year. The search is thus only performed on those parts of texts of the Annual Reports that I coded qualitatively as relevant statements about participation events.

The following packages were used for the analysis²:

library(tm)
library(slam)
library(dplyr)
library(ggplot2)
library(reshape2)
library(xtable)

IAEA

In the first step I create a corpus from the annual codings and pre-process the texts to remove stopwords, punctuation and upper case letters. Next, I create a document term matrix, which includes the freuqnecy of each term in each document. The matrix is then used to extract relevant search terms.

corpus <- Corpus(DirSource("../data/corpora/iaea-part-events/", encoding="UTF-8"),
            readerControl=list(language="en"))
corpusVars <- data.frame(var1=factor(rep("", length(corpus))),
            row.names=names(corpus))
dtmCorpus <- corpus
dtmCorpus <- tm_map(dtmCorpus, content_transformer(tolower))
dtmCorpus <- tm_map(dtmCorpus, content_transformer(function(x)
            gsub("(['’\n]|[[:punct:]]|[[:space:]]|[[:cntrl:]])+",
                     " ", x)))
dtmCorpus <- tm_map(dtmCorpus, removeNumbers)
dtm <- DocumentTermMatrix(dtmCorpus, control=list(tolower=FALSE,
                    wordLengths=c(2, Inf)))
rm(dtmCorpus)
dictionary <- data.frame(row.names=colnames(dtm),
                "Occurrences"=col_sums(dtm),
                "Stopword"=ifelse(colnames(dtm) %in% stopwords("en"),
                "Stopword", ""),   stringsAsFactors=FALSE)
dtm <- dtm[, !colnames(dtm) %in% stopwords("en")]
attr(dtm, "dictionary") <- dictionary
rm(dictionary)
meta(corpus, type="corpus", tag="language") <-
    attr(dtm, "language") <- "en"
meta(corpus, type="corpus", tag="processing") <-
    attr(dtm, "processing") <- c(lowercase=TRUE, punctuation=TRUE,
                    digits=TRUE, stopwords=TRUE, stemming=FALSE,
                    customStemming=FALSE, twitter=FALSE,
                    removeHashtags=NA, removeNames=NA)
corpus

## <<VCorpus>>
## Metadata:  corpus specific: 2, document level (indexed): 0
## Content:  documents: 55

dtm

## <<DocumentTermMatrix (documents: 55, terms: 7986)>>
## Non-/sparse entries: 48860/390370
## Sparsity           : 89%
## Maximal term length: 26
## Weighting          : term frequency (tf)

In the second step, I first collect all search terms that still may have different orthography. Second, I group them together according to the overarching topics of Science, Training, and Advise.

terms <- as.data.frame(as.matrix(dtm))
terms$Year <- 1957:2011

## combine relevant terms
terms$WORKSHOP <- terms$workshop + terms$workshops +
    terms$workshopsí
terms$SEMINAR <- terms$seminar + terms$seminarí +
    terms$seminars +terms$seminarsã
terms$TRAINING <- terms$training + terms$trainingí
terms$MEETING <- terms$meeting + terms$meetingís +
    terms$meetings
terms$COURSE <- terms$course + terms$courses
terms$PANEL <- terms$panel + terms$panelonthe +
    terms$panels
terms$CONSULTANT <- terms$consultant +
    terms$consultants
terms$SYMPOSIA <- terms$symposia + terms$symposium
terms$NETWORK <- terms$network + terms$networki +
    terms$networkís + terms$networks
terms$ADVISOR <- terms$advisor + terms$advisory

## create term categories
terms$GROUP_SCIENCE <- terms$SEMINAR + terms$PANEL +
    terms$SYMPOSIA
terms$GROUP_TRAINING <- terms$TRAINING + terms$COURSE +
    terms$WORKSHOP
terms$GROUP_ADVICE <- terms$MEETING + terms$CONSULTANT +
    terms$NETWORK + terms$ADVISOR


write.csv(terms, file = "coding_terms.csv")

terms2 <- terms %>% select(Year, WORKSHOP, SEMINAR, TRAINING,
                        MEETING, COURSE, PANEL, CONSULTANT, SYMPOSIA,
                        NETWORK, GROUP_SCIENCE, GROUP_TRAINING,
                        GROUP_ADVICE)
write.csv(terms2, file = "iaea-participation-events.csv")

Year	WORKSHOP	SEMINAR	TRAINING	MEETING	COURSE	PANEL	CONSULTANT	SYMPOSIA	NETWORK	GROUP_SCIENCE	GROUP_TRAINING	GROUP_ADVICE
1957	0	3	22	4	0	3	5	2	0	8	22	12
1958	0	8	19	17	14	12	1	12	0	32	33	25
1959	1	5	18	14	25	39	0	12	0	56	44	14
1960	0	2	12	11	12	29	1	15	0	46	24	14
1961	0	7	8	15	10	9	3	10	0	26	18	19
1962	0	4	6	20	3	15	4	10	0	29	9	26
1963	0	1	15	21	10	23	1	10	0	34	25	22
1964	0	4	13	26	7	29	1	16	0	49	20	34
1965	0	8	14	19	9	20	4	13	0	41	23	25
1966	0	1	17	5	12	12	0	14	0	27	29	5
1967	0	3	15	17	12	21	3	15	0	39	27	20
1968	0	3	9	8	9	23	5	16	0	42	18	13
1969	0	2	8	18	8	22	7	12	0	36	16	25
1970	0	7	15	23	13	21	2	14	0	42	28	27
1971	1	6	17	25	12	17	9	10	0	33	30	36
1972	1	4	4	23	3	22	5	17	0	43	8	28
1973	3	5	8	28	7	11	1	15	0	31	18	30
1974	3	7	18	26	10	7	4	15	1	29	31	45
1975	2	12	17	24	8	1	7	20	2	33	27	55
1976	5	5	9	40	9	0	7	5	4	10	23	70
1977	3	5	11	29	13	0	5	13	2	18	27	53
1978	7	7	19	19	13	0	4	13	1	20	39	32
1979	6	7	16	19	10	1	1	12	2	20	32	30
1980	3	8	22	8	17	0	1	7	2	15	42	14
1981	8	17	33	25	26	0	11	13	1	30	67	44
1982	14	10	45	41	44	0	13	10	1	20	103	71
1983	14	18	61	48	41	2	13	6	5	26	116	84
1984	2	15	41	58	38	1	17	14	8	30	81	102
1985	20	11	65	46	47	0	18	12	5	23	132	89
1986	20	14	60	69	45	1	26	12	5	27	125	121
1987	38	18	75	46	53	1	11	15	4	34	166	82
1988	26	17	86	90	57	1	21	11	5	29	169	138
1989	10	5	43	61	21	1	3	14	0	20	74	86
1990	11	13	52	79	20	1	10	16	6	30	83	117
1991	14	9	45	78	24	2	17	14	2	25	83	121
1992	14	5	36	107	12	1	22	13	9	19	62	154
1993	15	8	43	105	22	2	20	9	2	19	80	154
1994	10	5	37	86	10	1	17	6	3	12	57	123
1995	7	12	23	36	7	0	0	12	2	24	37	45
1996	2	6	20	32	6	0	2	5	6	11	28	59
1997	3	3	17	39	6	1	2	13	5	17	26	71
1998	15	10	24	63	9	0	5	14	6	24	48	90
1999	28	8	43	70	24	1	3	15	10	24	95	105
2000	12	8	38	31	16	2	1	9	6	19	66	52
2001	22	8	71	36	29	2	2	5	8	15	122	54
2002	20	4	68	42	22	1	4	10	9	15	110	64
2003	20	4	46	24	28	0	0	2	17	6	94	43
2004	20	7	41	39	14	5	0	2	14	14	75	56
2005	16	5	65	24	25	1	1	6	12	12	106	43
2006	24	6	46	28	19	1	0	1	6	8	89	39
2007	27	6	68	43	30	0	0	4	16	10	125	64
2008	27	4	47	25	32	1	0	3	11	8	106	38
2009	35	5	60	48	33	1	0	10	18	16	128	73
2010	26	5	65	37	36	2	1	6	14	13	127	59
2011	34	10	70	55	36	1	0	1	24	12	140	83

OPCW

Again, in the first step I create a corpus from the annual codings and pre-process the texts to remove stopwords, punctuation and upper case letters.

corpus <- Corpus(DirSource("../data/corpora/opcw-part-events/", encoding="UTF-8"),
                    readerControl=list(language="en"))
corpusVars <- data.frame(var1=factor(rep("", length(corpus))),
                    row.names=names(corpus))
dtmCorpus <- corpus
dtmCorpus <- tm_map(dtmCorpus, content_transformer(tolower))
dtmCorpus <- tm_map(dtmCorpus,
    content_transformer(function(x)
    gsub("(['’\n]|[[:punct:]]|[[:space:]]|[[:cntrl:]])+", " ", x)))
dtmCorpus <- tm_map(dtmCorpus, removeNumbers)
dtm <- DocumentTermMatrix(dtmCorpus,
                    control=list(tolower=FALSE, wordLengths=c(2, Inf)))
rm(dtmCorpus)
dictionary <- data.frame(row.names=colnames(dtm),
                    "Occurrences"=col_sums(dtm),
                    "Stopword"=ifelse(colnames(dtm) %in% stopwords("en"),
                    "Stopword", ""),   stringsAsFactors=FALSE)
dtm <- dtm[, !colnames(dtm) %in% stopwords("en")]
attr(dtm, "dictionary") <- dictionary
rm(dictionary)
meta(corpus, type="corpus", tag="language") <-
    attr(dtm, "language") <- "en"
meta(corpus, type="corpus", tag="processing") <-
    attr(dtm, "processing") <- c(lowercase=TRUE, punctuation=TRUE,
                    digits=TRUE, stopwords=TRUE, stemming=FALSE,
                    customStemming=FALSE, twitter=FALSE,
                    removeHashtags=NA, removeNames=NA)
corpus

## <<VCorpus>>
## Metadata:  corpus specific: 2, document level (indexed): 0
## Content:  documents: 15

dtm

## <<DocumentTermMatrix (documents: 15, terms: 2238)>>
## Non-/sparse entries: 7251/26319
## Sparsity           : 78%
## Maximal term length: 24
## Weighting          : term frequency (tf)

In the second step, I first collect all search terms that still have different orthography. Second, I group them together according to the overarching topics of Science, Training, and Advise.

terms <- as.data.frame(as.matrix(dtm))
terms$Year <- 1997:2011

## combine relevant terms
terms$WORKSHOP <- terms$workshop + terms$workshops + terms$workshopin
terms$SEMINAR <- terms$seminar + terms$seminars +terms$seminarfrom
terms$TRAINING <- terms$training
terms$MEETING <- terms$meeting + terms$meetings
terms$COURSE <- terms$course + terms$courseswere +
    terms$coursebefore + terms$coursefor + terms$courses +
    terms$courseswere
terms$PANEL <- terms$panelists
terms$SYMPOSIA <- terms$symposium
terms$NETWORK <- terms$network
terms$ADVISOR <- terms$advisory + terms$adviser

## create term categories
terms$GROUP_SCIENCE <- terms$SEMINAR + terms$PANEL + terms$SYMPOSIA
terms$GROUP_TRAINING <- terms$TRAINING + terms$COURSE + terms$WORKSHOP
terms$GROUP_ADVICE <- terms$MEETING + terms$NETWORK + terms$ADVISOR


write.csv(terms, file = "coding_terms_opcw.csv")

terms2 <- terms %>% select(Year, WORKSHOP, SEMINAR, TRAINING,
                    MEETING, COURSE, PANEL, SYMPOSIA, NETWORK,
                    GROUP_SCIENCE, GROUP_TRAINING, GROUP_ADVICE)
write.csv(terms2, file = "opcw-participation-events.csv")

Year	WORKSHOP	SEMINAR	TRAINING	MEETING	COURSE	PANEL	SYMPOSIA	NETWORK	GROUP_SCIENCE	GROUP_TRAINING	GROUP_ADVICE
1997	1	4	6	1	10	0	0	0	4	17	2
1998	3	9	4	4	9	0	5	2	14	16	11
1999	7	8	22	8	25	0	3	3	11	54	15
2000	18	3	22	12	21	0	0	5	3	61	19
2001	13	2	10	12	8	1	1	2	4	31	17
2002	6	3	12	9	17	0	0	1	3	35	12
2003	9	3	6	8	6	0	0	6	3	21	16
2004	12	1	9	7	9	0	0	4	1	30	14
2005	13	1	11	5	10	0	0	0	1	34	8
2006	12	3	7	7	14	0	0	0	3	33	9
2007	11	0	9	4	9	0	0	0	0	29	6
2008	13	2	12	9	11	0	0	0	2	36	11
2009	16	3	15	17	19	0	0	1	3	50	20
2010	12	4	20	12	27	0	0	1	4	59	15
2011	11	9	14	8	14	0	0	1	9	39	11

HUANG, Ronggui (2014). RQDA: R-based Qualitative Data Analysis. R package version 0.2-7. URL http://rqda.r-forge.r-project.org/.↩
David B. Dahl (2014). xtable: Export tables to LaTeX or HTML. R package version 1.7-4. http://CRAN.R-project.org/package=xtable. Ingo Feinerer and Kurt Hornik (2014). tm: Text Mining Package. R package version 0.6. http://CRAN.R-project.org/package=tm. Kurt Hornik, David Meyer and Christian Buchta (2014). slam: Sparse Lightweight Arrays and Matrices. R package version 0.1-32. http://CRAN.R-project.org/package=slam. Hadley Wickham and Romain Francois (2014). dplyr: A Grammar of Data Manipulation. R package version 0.3.0.2. http://CRAN.R-project.org/package=dplyr. Hadley Wickham (2009) ggplot2: elegant graphics for data analysis. Springer New York, 2009. Hadley Wickham (2007). Reshaping Data with the reshape Package. Journal of Statistical Software, 21(12), 1-2.↩

Participation events categorization

Tobias Weise

IAEA

OPCW