ShepHerd - Preliminary Testing August 9

Shep.Herd Evolutionary Model Analysis

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

safe_read_csv <- function(file) {
  tryCatch(
    list(result = read_csv(file, show_col_types = FALSE) %>%
           rename_with(~ str_replace_all(., " ", ".")),
         error = NULL),
    error = function(e) list(result = NULL, error = e)
  )
}

# Read all CSV files and combine them into a single dataframe
allfiles <- list.files(pattern = "*.csv", full.names = TRUE) %>%
  map_df(function(file) {
    result <- safe_read_csv(file)
    if (is.null(result$error)) {
      # Extract characters 6 through 9 from the file name
      layout_value <- substr(basename(file), 4, 7)
      tournament<- substr(basename(file), 1, 3)
      tower <- substr(basename(file), 8,10)
      
      # Combine the data with file and layout columns
      result$result %>%
        mutate(file = file, layout = layout_value,
               tournament = tournament,
               tower = tower)
    } else {
      warning(paste("Error reading file:", file, "\n", result$error))
      return(NULL)
    }
  })

New names:
New names:
New names:
New names:
New names:
• `` -> `...24`

# Check if allfiles is empty
if (nrow(allfiles) == 0) {
  stop("No data was successfully read from the CSV files.")
}

# Process the combined dataframe
allfiles <- allfiles %>%
  mutate(
    Wave.Number = as.numeric(Wave.Number),
    Unique.Slime.ID = paste(Wave.Number, Slime.ID, sep = "."),
    Unique.Parent.One = paste(Wave.Number - 1, Parent.One, sep = "."),
    Unique.Parent.Two = paste(Wave.Number - 1, Parent.Two, sep = ".")
  ) %>%
  pivot_longer(
    cols = c(Unique.Parent.One, Unique.Parent.Two),
    names_to = "parent_type",
    values_to = "parent_id"
  ) %>%
  group_by(parent_id) %>%
  mutate(offspring_count = n()) %>%
  ungroup() %>%
  filter(parent_id != "-1.N/A") %>%
  select(-parent_type) %>%
  distinct()

Visualize Basic Trait Patterns

sumstats <- allfiles %>%
  group_by(file, Wave.Number, tournament, tower)%>%
  summarise(meanSpeed = mean(Speed.Trait),
            meanTower = mean(Tower.Attraction.Trait),
            meanTurn = mean(Turn.Rate.Trait),
            meanPersonal = mean(Slime.Optimal.Distance.Trait),
            Main.Blaster = sum(Main.Type == "Blaster"),
            Main.Ice = sum(Main.Type == "Ice"),
            Sec.Blaster = sum(Secondary.Type == "Blaster"),
            Sec.Ice = sum(Secondary.Type == "Ice"))%>%
  ungroup()

`summarise()` has grouped output by 'file', 'Wave.Number', 'tournament'. You
can override using the `.groups` argument.

#| fig-cap: "Change in the Speed.Trait value over time."
ggplot(allfiles, aes(x= Wave.Number, y = Speed.Trait))+
  geom_jitter(data = allfiles, aes(x=Wave.Number, y = Speed.Trait),
              alpha = 0.1)+
  geom_smooth(data = sumstats, aes(x= Wave.Number, y = meanSpeed))+
  facet_grid(tournament~tower)

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

ggplot(allfiles, aes(x= Wave.Number, y = Turn.Rate.Trait))+
  geom_jitter(data = allfiles, aes(x=Wave.Number, y = Turn.Rate.Trait),
              alpha = 0.1)+
  geom_smooth(data = sumstats, aes(x= Wave.Number, y = meanTurn))+
  facet_grid(tournament~tower)

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Change in the Turn.Rate.Trait value over time.

ggplot(allfiles, aes(x= Wave.Number, y = Slime.Optimal.Distance.Trait))+
  geom_jitter(data = allfiles, aes(x=Wave.Number, y = Slime.Optimal.Distance.Trait),
              alpha = 0.1)+
  geom_smooth(data = sumstats, aes(x= Wave.Number, y = meanPersonal))+
  facet_grid(tournament~tower)

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Change in the Slime.Distance.Trait value over time.

ggplot(allfiles, aes(x= Wave.Number, y = Tower.Attraction.Trait))+
  geom_jitter(data = allfiles, aes(x=Wave.Number, y = Tower.Attraction.Trait),
              alpha = 0.1)+
  geom_smooth(data = sumstats, aes(x= Wave.Number, y = meanTower))+
  facet_grid(tournament~tower)

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Change in the Tower.Distance.Trait value over time.

Visualize Basic Type Patterns

Main Slime Type
Secondary Slime Type

# Assuming your data frame is named 'allfiles'
# Step 1: Summarize the data to get counts of Main.Type for each Wave.Number
summary_types <- allfiles %>%
  group_by(Wave.Number, Main.Type, file, tournament, tower) %>%
  summarise(count = n()) %>%
  ungroup()

`summarise()` has grouped output by 'Wave.Number', 'Main.Type', 'file',
'tournament'. You can override using the `.groups` argument.

# Step 2: Create the stacked bar chart
ggplot(summary_types, aes(x = factor(Wave.Number), y = count, fill = Main.Type)) +
  geom_bar(stat = "identity") +
  labs(title = "Stacked Bar Chart of Main.Type Across Wave.Numbers",
       x = "Wave Number",
       y = "Count",
       fill = "Main Type") +
  theme_minimal()+
  facet_grid(tournament~tower)

Change in the frequency of Main Type over time.

# Assuming your data frame is named 'allfiles'
# Step 1: Summarize the data to get counts of Main.Type for each Wave.Number
summary_types <- allfiles %>%
  group_by(Wave.Number, Secondary.Type, file, tournament, tower) %>%
  summarise(count = n()) %>%
  ungroup()

`summarise()` has grouped output by 'Wave.Number', 'Secondary.Type', 'file',
'tournament'. You can override using the `.groups` argument.

# Step 2: Create the stacked bar chart
ggplot(summary_types, aes(x = factor(Wave.Number), y = count, fill = Secondary.Type)) +
  geom_bar(stat = "identity") +
  labs(title = "Stacked Bar Chart of Secondary.Type Across Wave.Numbers",
       x = "Wave Number",
       y = "Count",
       fill = "Main Type") +
  theme_minimal()+
  facet_grid(tournament~tower)

Change in the frequency of Secondary Type over time.

Fitness Analysis

fit_ranked <- allfiles %>%
  group_by(file, Wave.Number) %>%
  mutate(fitrank = rank(Sheep.Distance.Fitness, ties.method = "min")) %>%
  mutate(fitscale = scale(Sheep.Distance.Fitness))%>%
  ungroup()

Raw Fitness
Log Fitness

ggplot(allfiles, aes(x = as.factor(Wave.Number), y = Sheep.Distance.Fitness, color = offspring_count, alpha = 0.01)) + 
  geom_jitter() +
  theme(legend.position = "none")+
  facet_grid(tournament~tower)

ggplot(allfiles, aes(x = as.factor(Wave.Number), y = log10(Sheep.Distance.Fitness), color = offspring_count, alpha = 0.01)) + 
  geom_jitter() +
  theme(legend.position = "none")+
  facet_grid(tournament~tower)

fitness <- ggplot(fit_ranked%>% filter(Wave.Number < max(Wave.Number)), aes(x = Sheep.Distance.Fitness, y = offspring_count))+ geom_point(aes(color = Speed.Trait))+ geom_smooth(method = “lm”)+ facet_grid(Wave.Number~file)

ggsave(fitness, file = “fitness.png”, height = 12, width =4)

fitnessrank <- ggplot(fit_ranked%>% filter(Wave.Number < max(Wave.Number)), aes(x = fitrank, y = offspring_count))+ geom_point(aes(color = Speed.Trait))+ geom_smooth(method = “lm”)+ facet_grid(Wave.Number~file)

ggsave(fitnessrank, file = “fitnessrank.png”, height = 12, width =4)

speed <- ggplot(fit_ranked%>% filter(Wave.Number < max(Wave.Number)), aes(x = Speed.Trait, y = offspring_count))+ geom_point(aes(color = Main.Type))+ geom_smooth(method = “lm”)+ facet_grid(Wave.Number~file)

ggsave(speed, file = “speed.png”, height = 12, width =4)


::: {.cell}

```{.r .cell-code}
ggplot(fit_ranked, aes(x = Speed.Trait)) +
  geom_histogram(data = subset(fit_ranked, Wave.Number == 8), 
                 aes(y = ..density..), 
                 fill = "lightgray", 
                 color = "black", 
                 alpha = 0.7) +
  geom_density(data = subset(fit_ranked, Wave.Number %in% 0:8), 
               aes(color = factor(Wave.Number))) +
  scale_color_discrete(name = "Wave Number") +
  labs(title = "Speed Trait Distribution",
       x = "Speed Trait",
       y = "Density") +
  theme_minimal()+
  facet_grid(~file)

Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
ℹ Please use `after_stat(density)` instead.

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(fit_ranked, aes(x = Speed.Trait)) +
  geom_histogram( aes(y = ..density..), 
                 fill = "lightgray", 
                 color = "black", 
                 alpha = 0.7) +

  labs(title = "Speed Trait Distribution",
       x = "Speed Trait",
       y = "Density") +
  theme_minimal()+
  facet_grid(Wave.Number~file)

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(fit_ranked, aes(x = Sheep.Distance.Fitness)) +
  geom_histogram(data = subset(fit_ranked, Wave.Number == 4), 
                 aes(y = ..density..), 
                 fill = "lightgray", 
                 color = "black", 
                 alpha = 0.7) +
  geom_density(data = subset(fit_ranked, Wave.Number %in% 0:3), 
               aes(color = factor(Wave.Number))) +
  scale_color_discrete(name = "Wave Number") +
  labs(title = "Offfspring Distribution",
       x = "Speed Trait",
       y = "Density") +
  theme_minimal()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

:::

library(brms)

formula <- "offspring_count ~ speedscale + file + Main.Type + (1|Wave.Number)"

# gaussianmodel <- brm(
#   formula = formula,
#   data = fit_ranked,
#   family = gaussian(),
#   prior = c(
#     prior(normal(0, 1), class = b),
#     prior(cauchy(0, 2), class = sd)
#   ),
#   chains = 4,
#   iter = 2000,
#   warmup = 1000
# )

zeromodel <- brm(
  formula = formula,
  data = fit_ranked,
  family = zero_inflated_negbinomial(),
  prior = c(
    prior(normal(0, 1), class = b),
    prior(cauchy(0, 2), class = sd)
  ),
  chains = 4,
  iter = 2000,
  warmup = 1000
)

# summary(gaussianmodel)
# posterior <- as.data.frame(gaussianmodel)
# hist(posterior$b_fitscale, main="Posterior Distribution of Selection Gradient", xlab="Selection Gradient")


summary(zeromodel)
posterior <- as.data.frame(zeromodel)
hist(posterior$b_speedscale, main="Posterior Distribution of Selection Gradient", xlab="Selection Gradient")