lfmAnalysis.Rmd

---
title: "LFM Analysis"
output: html_document
date: "2022-12-01"
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
library(tidyr)
library(readxl)
library(ggplot2)
library(PairedData)

library(lme4)      # Mixed models
library(lmerTest)  # Summary and p-values for mixed models
library(sjPlot)   # APA-style tables for mixed models
library(broom.mixed)
library(kableExtra)
library(ggsci)
```

Load the test data set that includes 4 ppts:
```{r}
testData <- read_excel("data/testData.xlsx") %>% 
  mutate(condition = replace(condition, condition == 1, "error")) %>% 
  mutate(condition = replace(condition, condition == 2, "study")) 

```

Summarize Group Performance:
```{r message=FALSE, warning=FALSE}
timeOut <- testData %>% 
  filter(phase == "learn") %>% 
  filter(correct == 3) %>% 
  group_by(participant) %>% 
  summarize(
    count = n()
  )

# Remove the points in which ppt guessed correctly
study_cond <- testData %>% 
  filter(condition == "study")
wrong_guess <- testData %>% 
  filter(phase == "learn") %>% 
  filter(correct != 1) %>% 
  dplyr::select(participant, cue)

clean_test <- left_join(wrong_guess, testData, by = c("participant", "cue")) %>% 
  filter(phase == "test") %>% 
  full_join(study_cond)

indivLFM <- testData %>% 
  filter(phase == "test") %>% 
  filter(correct == 1) %>% 
  group_by(participant, condition) %>%
  summarize(
    performance = n()/30,
    avgRT = mean(rt)
  )

indivLFM_clean <- clean_test %>% 
  filter(phase == "test") %>% 
  filter(correct == 1) %>% 
  group_by(participant, condition) %>%
  summarize(
    performance = n()/30,
    avgRT = mean(rt)
  )

groupLFM <- indivLFM %>% 
  group_by(condition) %>% 
  summarize(
    meanPerformance = mean(performance),
    stdPerformance = sd(performance),
    meanRT = mean(avgRT),
    stdRT = sd(avgRT)
  )

groupLFM_clean <- indivLFM_clean %>% 
  group_by(condition) %>% 
  summarize(
    meanPerformance = mean(performance),
    stdPerformance = sd(performance),
    meanRT = mean(avgRT),
    stdRT = sd(avgRT)
  )
```

Plot something:
```{r}
ggplot(data = indivLFM_clean) +
  geom_point(mapping = aes(x = condition, y = performance, color = participant)) +
  geom_line(mapping = aes(x = condition, y = performance, group = participant)) +
  labs(
    title = "Performance Compared by Condition",
    x = "Condition",
    y = "Performance"
  )
```

Checkout real data same way:
```{r}
# take out ppt 1066 bc timeout > 15
lfmData <- read_excel("data/lfmData.xlsx")

```

```{r}
timeOut <- lfmData %>% 
  filter(phase == "learn") %>% 
  filter(correct == 3) %>% 
  group_by(participant) %>% 
  summarize(
    count = n()
  )
# 1017 & 1061 ?? 

# need to remove where learn was correct
# can also do analysis where all timeout trials are taken out**

studyItems <- lfmData %>% 
  filter(condition == 2)

correct_guess <- lfmData %>% 
  filter(phase == "learn") %>% 
  filter(correct == 1) %>% 
  dplyr::select(participant, cue)

guess <- lfmData %>% 
  filter(phase == "learn") %>% 
  filter(correct != 1) %>% 
  dplyr::select(participant, cue)

cleanLFM <- left_join(guess, lfmData, by = c("participant", "cue")) %>% 
  filter(phase == "test") %>% 
  full_join(studyItems, by = c("participant", "cue", "phase", "condition", "target", "response", "rt", "correct", "Column1", "Column2"))

# sanity check: guess + timeout + correct_guess = test LFM data condition 1
check = lfmData %>% 
  filter(phase == "test") %>%
  filter(condition == 1)
checka = lfmData %>% 
  filter(phase == "learn")
check1 = lfmData %>% 
  filter(phase == "test") %>% 
  filter(condition == 1)

#cleanLFM should be size of test LFM data minus the size of correct_guess and timeout
check2 = lfmData %>% 
  filter(phase == "test")
# pass: 3720 - 66 - 232 = 3422 

LFM <- lfmData %>% 
  filter(phase == "test") %>% 
  filter(correct == 1) %>% 
  group_by(participant, condition) %>%
  summarize(
    performance = n()/30,
    avgRT = mean(rt)
  )

clean_lfm <- cleanLFM %>% 
  filter(phase == "test") %>% 
  filter(correct == 1) %>% 
  group_by(participant, condition) %>%
  summarize(
    performance = n()/30,
    avgRT = mean(rt)
  ) %>% 
  mutate(condition = replace(condition, condition == 1, "error")) %>% 
  mutate(condition = replace(condition, condition == 2, "study")) 

group <- LFM %>% 
  group_by(condition) %>% 
  summarize(
    meanPerformance = mean(performance),
    stdPerformance = sd(performance),
    meanRT = mean(avgRT),
    stdRT = sd(avgRT)
  )

clean_group <- clean_lfm %>% 
  group_by(condition) %>% 
  summarize(
    meanPerformance = mean(performance),
    stdPerformance = sd(performance),
    meanRT = mean(avgRT),
    stdRT = sd(avgRT)
  ) %>% 
  mutate(condition = replace(condition, condition == 1, "error")) %>% 
  mutate(condition = replace(condition, condition == 2, "study")) 
```

Plot something:
```{r}
ggplot(data = clean_lfm) +
  geom_point(mapping = aes(x = condition, y = performance, color = participant)) +
  geom_line(mapping = aes(x = condition, y = performance, group = participant)) +
  labs(
    title = "Performance Compared by Condition",
    x = "Condition",
    y = "Performance"
  )
```

Calculate slope:
```{r}
calc_sign <- function(numbers) {
  slope = c()
  for (i in 1:length(numbers)) {
    if (numbers[i] < 0) {
    slope[i] = "negative"
  } else if (numbers[i] > 0) {
    slope[i] = "positive"
  } else {
        slope[i] = "zero"
      }
  }
  return(slope)
}


slopeData <- clean_lfm %>% 
  group_by(participant) %>% 
  mutate(difference = diff(performance)) %>% 
  mutate(slope = calc_sign(difference)) 

ggplot(data = slopeData) +
  geom_point(mapping = aes(x = condition, y = performance, color = slope)) +
  geom_line(mapping = aes(x = condition, y = performance, group = participant, color = slope)) +
  labs(
    title = "Performance Compared by Condition",
    x = "Condition",
    y = "Performance"
  )

```

Make kernel density plot
```{r}
clean_study <- clean_lfm %>% 
  filter(condition == "study") %>% 
  dplyr::select(performance)
study <- clean_study$performance

clean_error <- clean_lfm %>% 
  filter(condition == "error") %>% 
  dplyr::select(performance)
error <- clean_error$performance

study_dens <- density(study)
error_dens <- density(error)

plot(study_dens, col='blue', ylim=c(0,3), main="Density Plot of Performance", xlab="Accuracy")
lines(error_dens, col='red')
legend(0.1, 2.5, legend=c("Study", "Error"), col=c("blue", "red"), lty=1, cex=0.8)

```

Start stat analyses:
```{r}
study <- subset(clean_lfm,  condition == "study", performance,
                 drop = TRUE)
error <-subset(clean_lfm,  condition == "error", performance,
                 drop = TRUE)

pd <- paired(error, study)
p <- plot(pd, type = "profile") + theme_bw()

ggplot(clean_lfm, aes(x = condition, y = performance, color = condition)) +
  geom_boxplot() +
  ggtitle("Comparing Final Test Performance Between Conditions") +
  ylab("Accuracy") +
  theme(plot.title = element_text(hjust = 0.5, size=15))


result <- t.test(error, study, paired =TRUE)
result
```

## Response Times

Look at reaction times:
```{r}
reactionTest <- clean_lfm %>% 
  pivot_wider(
    id_cols = c(participant),
    names_from = condition,
    values_from = c(avgRT)
  )

reactionResult <- t.test(reactionTest$error, reactionTest$study, paired=TRUE)
reactionResult
```

```{r}
reaction <- clean_lfm %>% 
  pivot_wider(
    id_cols = c(participant),
    names_from = condition,
    values_from = c(performance, avgRT)
  ) %>% 
  mutate(accurDiff = performance_error - performance_study) %>% 
  mutate(RTDiff = avgRT_error - avgRT_study) %>% 
  mutate(lfm = case_when(accurDiff > 0 ~ TRUE,
                         accurDiff <= 0 ~ FALSE))

# plot both and see relationship 
ggplot(reaction) +
  geom_point(mapping = aes(x = performance_error, y = avgRT_error), color="blue") +
  geom_smooth(mapping = aes(x = performance_error, y = avgRT_error), method = "lm", color="blue") +
  geom_point(mapping = aes(x = performance_study, y = avgRT_study), color="red") +
  geom_smooth(mapping = aes(x = performance_study, y = avgRT_study), method = "lm", color = "red")

ggplot(reaction) +
  geom_point(mapping = aes(x = accurDiff, y = RTDiff, color = lfm)) +
  geom_smooth(mapping = aes(x = accurDiff, y = RTDiff), method = "lm")

resultsCor <- cor(reaction$performance_study, reaction$avgRT_study)
resultsCor

```


# Mixed Linear Models Analysis

Let's analyze the data using mixed linear models. First, let's look at the distribution of response times to find if there are any obvious outliers:

```{r}
ggplot(cleanLFM, aes(x=rt)) +
  geom_histogram(bins=70, color="white") +
  xlab("Response Times (ms)") +
  ggtitle("Distributuion of Response Times") +
  ylab("Number") +
  geom_vline(aes(xintercept=15000),
             color="red", linetype="dashed") + 
  annotate("text", x = 15000, y = 1000, label = "Cutoff", hjust=-0.25, color="red") +
  theme_minimal()
```

There are clearly some trials, from a few participants, that take a huge amount of time, up to 96 seconds. We do not want our data contaminated by trials in which participants looked at their phones, so we are going to use a cutoff of 15000 ms. We are also going to remove extremely fast trial, i.e. trials whose RT is greater than 200 ms.

```{r}
cleandata <- cleanLFM %>% filter(rt > 200, rt < 15000)
```

Finally, we are going to rename the conditions to make sure we are using meaningful labels, and make sure that correct is between 0 and 1.

```{r}
cleandata <- cleandata %>% 
  mutate(condition = replace(condition, condition == 1, "error")) %>% 
  mutate(condition = replace(condition, condition == 2, "study")) %>%
  mutate(correct = replace(correct, correct == 2, 0))
```


## Mixed Linear Model for Accuracy

Mixed Linear Models are great because they naturally account for variability and individual differences. We are going to first analyze accuracies, which are encoded in the `correct` column of the `cleandata` dataframe.

In our mixed linear models, we are going to model responses are arising from a combination of factors. First, we are going to model a fixed effect of _condition_, i.e., whether the particular response was given to an item in the _Error_ or _Study_ condition. Then, we are going to add a participant-level random _intercept_, which accounts for the fact that different participants have different baseline accuracies. And, finally, we are going to make sure that the model uses a binomial distribution, because accuracy data is binary (correct or not).

```{r}
cleandata$participant <- as.factor(cleandata$participant)
acc_model <- glmer(correct ~ condition # Fixed effect of condition
                   + (1|participant),  # Intercept for participant
                   family=binomial, 
                   cleandata)
```


### Comparing the model to other models

We can ask ourselves whether this model is a good model -- for example, does it have the right amount of complexity? To answer this question, we can compare it to a _simpler_ model, whichn does not have any random effects, and to a _more complex_ model, which also includes a random slope per participant.


```{r}
acc_model_simple <- glm(correct ~ condition, # Fixed effect of condition
                        family=binomial, 
                        cleandata)

acc_model_complex <- glmer(correct ~ condition # Fixed effect of condition
                           + (1|participant)  # Interface for participant
                           + (0 + condition|participant),
                           family=binomial, 
                           cleandata)

```

We can see that our model is better than the simpler model:

```{r}
anova(acc_model, acc_model_simple) %>% 
  tidy() %>%
  kable() %>%
  kable_styling(bootstrap_options = c("hover", "striped"))
```

while the complex model does not gain much in terms of fit.


```{r}
anova(acc_model, acc_model_complex) %>% 
  tidy() %>%
  kable() %>%
  kable_styling(bootstrap_options = c("hover", "striped"))
```

###  Statistical Results

Having picked the model, we can visualize the results here:

```{r}
acc_model %>%
  tidy() %>%
  kable() %>%
  kable_styling(bootstrap_options = c("hover", "striped"))
```

And in APA style

```{r}
tab_model(acc_model)
```

### Visualizing the data

And here is a quick visualization:

```{r, fig.width=5, fig.height=5}
cleandata_agg <- cleandata %>% group_by(participant, condition) %>%
  summarise(Accuracy = mean(correct))

cleandata_summary <- cleandata_agg %>% group_by(condition) %>%
  summarise(Accuracy = mean(Accuracy))

ggplot(cleandata_agg, aes(x=condition, y=Accuracy, color=condition)) +
  geom_line(color="grey", aes(group=participant), size=0.1) +
  geom_point(position =   position_jitter(width=0.1), alpha=0.75, color="grey") +
  scale_color_aaas() +

  ggtitle("Mean Differences in Accuracy") +
  xlab("Condition") +
  stat_summary(geom="point", fun.data = "mean_se", size=3) +
  stat_summary(geom="errorbar", fun.data = "mean_se", width=0.1) +
  geom_text(data=cleandata_summary, 
            aes(x=condition, 
                y=Accuracy, 
                label=paste(round(Accuracy*100, 2), "%")), 
            hjust= c(1.2, -0.2)) +

  theme_minimal()
```


### Compare Data to Model Predictions

How is the data compared to the linear model predictions? To do so, we can generate the model predictions and visualize them side by side to the data:

```{r}
cleandata <- cleandata %>% mutate(pred_correct = fitted(acc_model))

cleandata %>% pivot_longer(cols=c("correct", "pred_correct"),
                           values_to = "accuracy",
                           names_to = "type") -> cleandata_long

cleandata_agg_pred <- cleandata_long %>% 
  mutate(type = replace(type, type == "correct",  "Observed")) %>% 
  mutate(type = replace(type, type == "pred_correct", "Predicted")) %>%
  group_by(participant, condition, type) %>%
  summarise(Accuracy = mean(accuracy))

cleandata_summary_pred <- cleandata_agg_pred %>% 
  group_by(condition, type) %>%
  summarise(Accuracy = mean(Accuracy))

ggplot(cleandata_agg_pred, aes(x=condition, y=Accuracy, color=condition)) +
  facet_wrap(~ type) +
  geom_line(color="grey", aes(group=participant), size=0.1) +
  geom_point(position =   position_jitter(width=0.1), alpha=0.75, color="grey") +
  scale_color_aaas() +
  geom_text(data=cleandata_summary_pred, 
            aes(x=condition, 
                y=Accuracy, 
                label=paste(round(Accuracy*100, 2), "%")), 
            hjust= c(-0.2)) +
  ggtitle("Mean Differences in Accuracy") +
  xlab("Condition") +
  stat_summary(geom="point", fun.data = "mean_se", size=3) +
  stat_summary(geom="errorbar", fun.data = "mean_se", width=0.1) +
  theme_minimal()

```

# Response Times

To analyze response times, we are going to first include only correct trials

```{r}
cleandata_rt <- cleandata %>% filter(correct == 1)
```

Then, we are going to run another mixed level model using the same structure as the Accuracy model. Response times have a complicated relationship to accuracy, since they are the sum of retrieval times and perceptual-motor non-retrieval times.

```{r}
cleandata_rt$participant <- as.factor(cleandata_rt$participant)
rt_model <- lmer(rt ~ condition # Fixed effect of condition 
                 + (1|participant),   # Random intercept of participant
                 #+ (0 + condition|participant),  # Random slope for participant 
                 cleandata_rt)
```


### Comparing the model to other models

We can ask ourselves whether this model is a good model -- for example, does it have the right amount of complexity? To answer this question, we can compare it to a _simpler_ model, whichn does not have any random effects, and to a _more complex_ model, which also includes a random slope per participant.


```{r}
rt_model_simple <- lm(rt ~ condition, cleandata_rt)

rt_model_complex <- lmer(rt ~ condition # Fixed effect of condition 
                         + (1|participant)   # Random intercept of participant
                         + (0 + condition|participant),  # Random slope for participant 
                         cleandata_rt)

```


We can see that our model is better than the simpler model:

```{r}
anova(rt_model, rt_model_simple) %>% 
  tidy() %>%
  kable() %>%
  kable_styling(bootstrap_options = c("hover", "striped"))
```

while the complex model does not gain much in terms of fit.


```{r}
anova(rt_model, rt_model_complex) %>% 
  tidy() %>%
  kable() %>%
  kable_styling(bootstrap_options = c("hover", "striped"))
```

### Statistical Results

The results are here:

```{r paged.print=TRUE}
summary(rt_model)
```

And here is a nicer table formatting.

```{r}
rt_model %>%
  tidy() %>%
  kable() %>%
  kable_styling(bootstrap_options = c("hover", "striped"))
```

And here it is in APA style:

And in APA style

```{r}
tab_model(rt_model)
```


### Visualizing the data

And here is a visualization:

```{r, fig.width=5, fig.height=5}
cleandata_rt_agg <- cleandata_rt %>% group_by(participant, condition) %>%
  summarise(RT = mean(rt))

cleandata_rt_summary <- cleandata_rt_agg %>% group_by(condition) %>%
  summarise(RT = mean(RT))

ggplot(cleandata_rt_agg, aes(x=condition, y=RT, color=condition)) +
  geom_line(color="grey", aes(group=participant), size=0.25) +

  #geom_point(position =   position_jitter(width=0.1), alpha=0.75, color="grey") +
  geom_point(size=0.5, alpha=0.75, color="grey") +
  scale_color_aaas() +
  ggtitle("Mean Differences in Response Times") +
  xlab("Condition") +
  ylab("Response Times (ms)") +
  stat_summary(geom="point", fun.data = "mean_se", size=3) +
  stat_summary(geom="errorbar", fun.data = "mean_se", width=0.1) +
  geom_text(data=cleandata_rt_summary, 
            aes(x=condition, y=RT, label=paste(round(RT), "ms")), hjust= c(1.2, -0.2)) +
  theme_minimal()
```


### Compare Data to Model Predictions

How is the data compared to the linear model predictions? To do so, we can generate the model predictions and visualize them side by side to the data:

```{r}
cleandata_rt <- cleandata_rt %>% mutate(pred_rt = fitted(rt_model))

cleandata_rt %>% pivot_longer(cols=c("rt", "pred_rt"),
                              values_to = "RT",
                              names_to = "type") -> cleandata_rt_long

cleandata_rt_agg_pred <- cleandata_rt_long %>% 
  mutate(type = replace(type, type == "rt",  "Observed")) %>% 
  mutate(type = replace(type, type == "pred_rt", "Predicted")) %>%
  group_by(participant, condition, type) %>%
  summarise(RT = mean(RT))

cleandata_rt_summary_pred <- cleandata_rt_agg_pred %>% 
  group_by(condition, type) %>%
  summarise(RT = mean(RT))

ggplot(cleandata_rt_agg_pred, aes(x=condition, y=RT, color=condition)) +
  facet_wrap(~ type) +
  geom_line(color="grey", aes(group=participant), size=0.1) +
  #geom_point(position =   position_jitter(width=0.1), alpha=0.75, color="grey") +
  geom_point(size=0.5, alpha=0.75, color="grey") +
  scale_color_aaas() +
  geom_text(data=cleandata_rt_summary_pred, 
            aes(x=condition, 
                y=RT, 
                label=paste(round(RT), "ms")), 
            hjust= c(-0.2)) +
  ggtitle("Mean Differences in Response Times") +
  xlab("Condition") +
  stat_summary(geom="point", fun.data = "mean_se", size=3) +
  stat_summary(geom="errorbar", fun.data = "mean_se", width=0.1) +
  theme_minimal()
```

## Individual differences

Some participants seem to have slopes in the _opposite_ direction, suggesting the use of the elaborative strategy. Is that the case? To examine this, we can go back to the complex RT model, which include random slopes as well as random intercepts.

```{r}
cleandata_rt <- cleandata_rt %>% mutate(pred_rt = fitted(rt_model_complex))

cleandata_rt %>% pivot_longer(cols=c("rt", "pred_rt"),
                              values_to = "RT",
                              names_to = "type") -> cleandata_rt_long

cleandata_rt_agg_pred <- cleandata_rt_long %>% 
  mutate(type = replace(type, type == "rt",  "Observed")) %>% 
  mutate(type = replace(type, type == "pred_rt", "Predicted")) %>%
  group_by(participant, condition, type) %>%
  summarise(RT = mean(RT))

cleandata_rt_summary_pred <- cleandata_rt_agg_pred %>% 
  group_by(condition, type) %>%
  summarise(RT = mean(RT))

ggplot(cleandata_rt_agg_pred, aes(x=condition, y=RT, color=condition)) +
  facet_wrap(~ type) +
  geom_line(color="grey", aes(group=participant), size=0.1) +
  #geom_point(position =   position_jitter(width=0.1), alpha=0.75, color="grey") +
  scale_color_aaas() +
  geom_text(data=cleandata_rt_summary_pred, 
            aes(x=condition, 
                y=RT, 
                label=paste(round(RT), "ms")), 
            hjust= c(-0.2)) +
  ggtitle("Mean Differences in Response Times (with Random Slopes)") +
  xlab("Condition") +
  stat_summary(geom="point", fun.data = "mean_se", size=3) +
  stat_summary(geom="errorbar", fun.data = "mean_se", width=0.1) +
  theme_minimal()
```

Note that __all the slopes are negative__: this means that the apparent upward slopes are the effects of outlier data. 

To check, let's examine the distribution of the slopes of each participant:

```{r}
coeffs <- coef(rt_model_complex)
slopes <- coeffs$participant[,3] - coeffs$participant[,1]
slopedata <- tibble(Slope=slopes)
ggplot(slopedata, aes(x=Slope)) +
  geom_histogram(bin=10, col="white") +
  xlim(-400, 400) +
  ylab("Number of Participants") +
  ggtitle("Distribution of Best Fitting Random Slopes") +
  theme_minimal() +
  geom_vline(xintercept = 0, linetype="dashed", 
                color = "black", size=1)
```

## Participants who might be elaborative

```{r}
elab <- c("1008","1015", "1032",
          "1051", "1053", "1061")
elab_acc <- filter(cleandata, cleandata$participant %in% elab) 
elab_acc_agg <- elab_acc %>%
  group_by(participant, condition) %>%
  summarise(Accuracy = mean(correct))

elab_rt <- filter(cleandata_rt, cleandata_rt$participant %in% elab) 
elab_rt_agg <- elab_rt %>%
  group_by(participant, condition) %>%
  summarise(RT = mean(rt))


ggplot(elab_acc_agg, aes(x=condition, y=Accuracy, color=condition)) +
  geom_line(color="grey", aes(group=participant), size=0.1) +
  geom_point( alpha=0.5, color="grey") +
  scale_color_aaas() +

  ggtitle("Mean Differences in Accuracy") +
  xlab("Condition") +
  stat_summary(geom="point", fun.data = "mean_se", size=3) +
  stat_summary(geom="errorbar", fun.data = "mean_se", width=0.1) +
  # geom_text(data=cleandata_summary, 
  #           aes(x=condition, 
  #               y=Accuracy, 
  #               label=paste(round(Accuracy*100, 2), "%")), 
  #           hjust= c(1.2, -0.2)) +

  theme_minimal()

ggplot(elab_rt_agg, aes(x=condition, y=RT, color=condition)) +
  geom_line(color="grey", aes(group=participant), size=0.1) +
  geom_point(alpha=0.75, color="grey") +
  scale_color_aaas() +

  ggtitle("Mean Differences in Accuracy") +
  xlab("Condition") +
  stat_summary(geom="point", fun.data = "mean_se", size=3) +
  stat_summary(geom="errorbar", fun.data = "mean_se", width=0.1) +
  # geom_text(data=cleandata_summary, 
  #           aes(x=condition, 
  #               y=Accuracy, 
  #               label=paste(round(Accuracy*100, 2), "%")), 
  #           hjust= c(1.2, -0.2)) +

  theme_minimal()
```
Some numbers

```{r}
elab_acc_agg %>%
  group_by(condition) %>%
  summarise(#Accuracy = mean(Accuracy), 
            SD_Accuracy = sd(Accuracy, na.rm=T),
            Accuracy = mean(Accuracy, na.rm=T)) %>%
  kable()

elab_rt_agg %>%
  group_by(condition) %>%
  summarise(#Accuracy = mean(Accuracy), 
            SD_RT = sd(RT, na.rm=T),
            RT = mean(RT, na.rm=T)) 
```


```{r}
cleandata_rt <- cleandata_rt %>%
  mutate(model = if_else(participant %in% elab, "Elaborative", "Mediator"))

cleandata_agg <- cleandata_agg %>%
  mutate(model = if_else(participant %in% elab, "Elaborative", "Mediator"))

cleandata_rt_agg <- cleandata_rt_agg %>%
  mutate(model = if_else(participant %in% elab, "Elaborative", "Mediator"))

ggplot(cleandata_rt, aes(x=rt, fill=model)) +
  geom_density(alpha=0.5, color="white") +
  scale_fill_manual(values = c("gold3", "purple3")) +
  theme_minimal()

ggplot(cleandata_rt_agg, aes(x=condition, y=RT, color=model)) +
  geom_line(aes(group=participant), size=0.1) +
  #geom_point(alpha=0.75, color="grey") +
  scale_color_manual(values = c("gold3", "purple3")) +
  ggtitle("Mean Differences in Accuracy") +
  xlab("Condition") +
  stat_summary(geom="point", fun.data = "mean_se", size=3) +
  stat_summary(geom="errorbar", fun.data = "mean_se", width=0.1) +
  theme_minimal()
```

Summary of behavioral differences between the two model-assigned grouops:

```{r}
cleandata_agg %>%
  group_by(condition, model) %>%
  summarise(#Accuracy = mean(Accuracy), 
            SD_Accuracy = sd(Accuracy, na.rm=T),
            Accuracy = mean(Accuracy, na.rm=T)) 

cleandata_rt_agg %>%
  group_by(condition, model) %>%
  summarise(#Accuracy = mean(Accuracy), 
            SD_RT = sd(RT, na.rm=T),
            RT = mean(RT, na.rm=T))
```