# Correlation Analysis – Repeated Measures Categorical Data

categorical datachi-squared-testcorrelationrrepeated measures

I'm confused about the correct way to deal with my data. They are structured as follows:

and here is a sample:

structure(list(id = c(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6), trial = c(0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12,
13, 14, 15, 17, 18, 19, 20, 21, 24, 25, 26, 27, 29, 31, 32, 33,
34, 35, 0, 1, 2, 3, 5, 6, 7, 8, 11, 12, 13, 14, 15, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 0, 1,
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 19, 20, 22, 24
), response = c(-1, -1, 1, 0, 0, 0, 0, -1, -1, 0, 0, -1, 0, -1,
1, 0, -1, -1, -1, 1, 1, -1, -1, 0, -1, 0, 0, -1, 0, 0, -1, 0,
-1, -1, 0, -1, 0, 0, 0, 1, 0, 1, 1, 0, 0, -1, -1, 0, 1, 1, 0,
-1, -1, 1, 0, -1, 1, 0, 1, -1, 0, 1, 1, -1, 1, -1, 1, 1, 0, 1,
1, 1, -1, 0, 0, 1, -1, 1, -1, 0), default = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), correct = c(1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), iscorrect = c(0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0,
0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0,
1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0), min = c(150,
150, 100, 100, 150, 150, 100, 50, 150, 50, 100, 50, 150, 100,
100, 50, 100, 50, 50, 150, 50, 150, 100, 150, 150, 50, 50, 100,
100, 50, 50, 150, 50, 100, 150, 100, 100, 150, 50, 100, 100,
150, 50, 150, 150, 150, 50, 50, 150, 100, 150, 50, 150, 50, 100,
100, 50, 50, 100, 100, 50, 100, 150, 50, 100, 100, 100, 150,
150, 150, 50, 100, 100, 100, 50, 150, 50, 150, 100, 50), max = c(180.8,
180.8, 125.4, 125.4, 180.8, 180.8, 125.4, 62.4, 180.8, 62.4,
125.4, 62.4, 180.8, 125.4, 125.4, 62.4, 125.4, 62.4, 62.4, 180.8,
62.4, 180.8, 125.4, 180.8, 180.8, 62.4, 62.4, 125.4, 125.4, 62.4,
62.4, 180.8, 62.4, 125.4, 180.8, 125.4, 125.4, 180.8, 62.4, 125.4,
125.4, 180.8, 62.4, 180.8, 180.8, 180.8, 62.4, 62.4, 180.8, 125.4,
180.8, 62.4, 180.8, 62.4, 125.4, 125.4, 62.4, 62.4, 125.4, 125.4,
62.4, 125.4, 180.8, 62.4, 125.4, 125.4, 125.4, 180.8, 180.8,
180.8, 62.4, 125.4, 125.4, 125.4, 62.4, 180.8, 62.4, 180.8, 125.4,
62.4), time = c(1, 7, 9, 3, 5, 5, 5, 1, 3, 9, 1, 5, 3, 5,
9, 7, 7, 1, 3, 9, 3, 9, 1, 7, 1, 9, 7, 7, 3, 5, 5, 7, 9, 7, 1,
5, 1, 5, 3, 9, 3, 9, 7, 1, 3, 7, 5, 3, 9, 9, 3, 1, 5, 9, 7, 1,
7, 1, 3, 5, 9, 9, 3, 1, 7, 5, 1, 5, 1, 7, 7, 9, 3, 5, 5, 9, 5,
5, 3, 9), condition = c(0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1,
0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1,
0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0,
0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0,
1, 1, 0)), row.names = c(NA, -80L), class = c("tbl_df", "tbl",
"data.frame"))


id represents 1 participant.

I'm interested in correlations between:

• iscorrect and condition (if the condition influences the incorrect/correct ratio of answers but also if inside the same condition the relation between incorrect/correct is significant, like in the figure below)
• response and condition (if the response changes depending on the condition but also the differences between the responses within each condition)

These are all categorical variables right? I used the Pearson chi-squared test to determine interdependence, but someone suggested I make boxplots to compare the means/medians (similar to what's in the figure below). Is that a good idea?

Your data are not independent. I don't have a problem with looking at boxplots, but you need to remember that the data are not independent. Since the chi-squared test assumes independence, it is not appropriate. Your situation seems rather simple, so you could probably use the Cochranâ€“Mantelâ€“Haenszel test. You see if there is a relationship between condition and the outcome (i.e., correct or response) within (i.e., controlling for) strata.

d = structure(list(id = c(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
...)
with(d, mantelhaen.test(x=condition, y=iscorrect, z=id))
#
#   Mantel-Haenszel chi-squared test with continuity correction
#
# data:  condition and iscorrect and id
# Mantel-Haenszel X-squared = 0.26908, df = 1, p-value = 0.604
# alternative hypothesis: true common odds ratio is not equal to 1
# 95 percent confidence interval:
#  0.2305694 1.8691867
# sample estimates:
# common odds ratio
#         0.6564885