Split and plot NextClade mutations

Using the Gisaid entries: EPI_ISL_3152653, EPI_ISL_3426926, EPI_ISL_3229925, EPI_ISL_3260666, EPI_ISL_3426910 and running them through the NextClade web interface and downloading the results as a .csv file.

library(tidyverse)

# Nextclade
nextclade <- read_delim("nextclade.csv",
                        ";", escape_double = FALSE, trim_ws = TRUE)


# Plot all samples and all aa Substitutions
nextclade %>%
  select(seqName, aaSubstitutions) %>%
  # Remove samples with no sequence (if any)
  drop_na() %>%
  mutate(tmp = str_split(aaSubstitutions, ",")) %>%
  unnest(tmp) %>%
  separate(tmp, into = c("Gene", "Mutation")) %>%
  select(-aaSubstitutions) %>%
  ggplot(aes(x = Mutation, y = seqName)) +
  geom_tile() +
  theme(axis.text.x = element_text(angle = 90),
        panel.background = element_blank())

# Only Spike protein
nextclade %>%
  select(seqName, aaSubstitutions) %>%
  # Remove samples with no sequence (if any)
  drop_na() %>%
  mutate(tmp = str_split(aaSubstitutions, ",")) %>%
  unnest(tmp) %>%
  separate(tmp, into = c("Gene", "Mutation")) %>%
  select(-aaSubstitutions) %>%
  filter(Gene == "S") %>%
  ggplot(aes(x = Mutation, y = seqName)) +
  geom_tile() +
  theme(axis.text.x = element_text(angle = 90),
        panel.background = element_blank())

Leave a Comment