Intro

This document is meant to analyze the results of GWA analyses for insect herbivores and aspen traits as they relate to insect herbivores.

First, we’ll setup this document by loading options, packages, and functions:

GWA.results.sig <- list()
set.seed(954) # random numbers always the same
# define traits of interest
gwa.traits <- c("Hobs","BA.2012sqrt","Sex.TEMP","SLA","ALA","CT","PG","Npct",
               "CN","BAsqrt","Vol","EFNMean","BBreakDegDay","Flprev")

diversity.traits <- c("richness","shannon.index","evenness",paste0("MDS",1:4))

environ.vars <- c("SerialNo","Genet.SSRrev",
                  "Block","Dist.Edge","Age",
                  "Survey.Year","Survey.Month",
                  "survey.event")

common.insects <- unname(unlist(fread("data/common-insects.txt", 
                               header = FALSE)))
insect.vars <- c(common.insects,"Min.per.Tree")

SNP.N <- 113674 # total number of SNPs
# get a list of insect gwa files
insect.files <- grep(list.files(params$gwa.results.dir),value = TRUE,
                     pattern = "^InsectGWA_")
instrait.files <- grep(list.files(params$gwa.results.dir),value = TRUE,
                       pattern = "^InsTraitGWA_")
trait.files <- grep(list.files(params$gwa.results.dir),value = TRUE,
                    pattern = "^TraitGWA_")
# exclude sex and hobs files
trait.files <- trait.files[!grepl(trait.files,pattern = ".*(Sex.TEMP|Hobs).*")]
# crosstrait.files <- grep(list.files(params$gwa.results.dir),value = TRUE,
#                      pattern = "^crossed-effects_")
covartrait.files <- grep(list.files(params$gwa.results.dir),value = TRUE,
                         pattern = "^CovarTraits_")
covartrait.files <- covartrait.files[!grepl(covartrait.files, pattern = ".*(Sex.TEMP|Hobs|Flprev).*")]

diversity.files <- grep(list.files(params$gwa.results.dir),value = TRUE,
                        pattern = "^Diversity")

Indv.trait.files <- grep(list.files(params$gwa.results.dir),value = TRUE,
                        pattern = "^Indv-randeffects")
Indv.trait.files <- Indv.trait.files[!grepl(Indv.trait.files, pattern = ".*(BA\\.2012sqrt.|Hobs).*")]
 
CompleteCross.trait.files <- grep(list.files(params$gwa.results.dir), value = TRUE, pattern = "CompleteCross")
CompleteCross.trait.files <- CompleteCross.trait.files[!grepl(CompleteCross.trait.files, pattern = ".*(BA\\.2012sqrt.|Hobs).*")]

IndvCrossed.trait.files <- grep(list.files(params$gwa.results.dir), value = TRUE, pattern = "IndvCrossed")
IndvCrossed.trait.files <- IndvCrossed.trait.files[!grepl(IndvCrossed.trait.files, pattern = ".*(BA\\.2012sqrt.|Hobs).*")]

CrossNestIndv.trait.files <- grep(list.files(params$gwa.results.dir), value = TRUE, pattern = "CrossNestIndv")
CrossNestIndv.trait.files <- CrossNestIndv.trait.files[!grepl(CrossNestIndv.trait.files, pattern = ".*(BA\\.2012sqrt.|Hobs).*")]

YearNest.trait.files <- grep(list.files(params$gwa.results.dir), value = TRUE, pattern = "YearNest")
YearNest.trait.files <- YearNest.trait.files[!grepl(YearNest.trait.files, pattern = ".*(BA\\.2012sqrt.|Hobs).*")]


# annotated genes list
anno.genes <- fread("data/gwa-files/annotated-potra-genes.csv")
# results function
sig.GWA <- function(files.list = insect.files, 
                    method = "qval", #or 'bonf'
                    sig.lvl = 0.05, fdr.lvl = 0.05,
                    full.results = TRUE){
  GWA.results.sig <- data.frame()
  GWA.sig.table <- data.frame()
  full.results.df <- data.frame()
  return.list <- list()
  for(f in files.list){
    # attributes
    gwa.type <- unlist(strsplit(f,split = "_"))[1]
    trait <- unlist(strsplit(f,split = "_"))[2] #trait name
    path <- file.path(params$gwa.results.dir,f) #path to file
    stopifnot(file.exists(path)) #check that the file exists
    # data
    tmp <- fread(path,header = TRUE) #read the file
    tmp$trait <- trait
    tmp$gwa.type <- gwa.type
    tmp <- tmp %>% #add scaffold and within-scaffold base pair 
      mutate(scaff.name = gsub(SNP.name,pattern =  "(.*):(.*)",
                               replacement = "\\1"),
             scaff.bp = gsub(SNP.name,pattern =  "(.*):(.*)",
                             replacement = "\\2"))
    
    if(method == "qval"){
      qvalues <- qvalue(p = tmp$pval.chisqr,fdr.level = fdr.lvl,lfdr.out = FALSE) #calculate q
      tmp$qval <- qvalues$qvalues #add q to data
      
      # add in gene annotations
      # scaff <- tmp$scaff.name
      # bp <- tmp$scaff.bp
      # test <- mapply(x = scaff, y = bp, FUN = function(x,y){
      #   anns <- anno.genes %>% filter(Chrom == x,START <= y, END >= y)
      #   # anns <- ifelse(nrow(anns) < 1,NA,no = anns)
      #   if(nrow(anns) > 0){
      #   return(anns %>% select(Potra_Gene_ID,mean_alleleFreq,Functional_Description))
      #   # which( (anno.genes$Chrom == x) & 
      #   #          (anno.genes$START <= y) & 
      #   #          (anno.genes$END >= y))
      #   } else { return(data.frame(Potra_Gene_ID = NA,
      #                              mean_alleleFreq = NA,
      #                              Functional_Description = NA))}
      # }) %>% t
      
      # significant results
      sig.data <- tmp %>% filter(qval <= sig.lvl) %>% #remove non-significant rows
        select(gwa.type,trait,SNP.name,scaff.name,scaff.bp,#id columns
               Estimate,Std.Error,pval.chisqr,qval)#result columns
      GWA.results.sig <- rbind(GWA.results.sig, sig.data)
      GWA.sig.table <- rbind(GWA.sig.table,
                             data.frame(gwa.type,trait,
                                        n = length(which(tmp$qval <= sig.lvl))))
    }
    if(method == "bonf"){
      tmp$bonf.p <- tmp$pval.chisqr*nrow(tmp)
      sig.data <- tmp %>% filter(pval.chisqr <= sig.lvl/nrow(tmp)) %>% 
        select(gwa.type,trait,SNP.name,scaff.name,scaff.bp,
               Estimate,Std.Error,pval.chisqr,bonf.p)
      GWA.results.sig <- rbind(GWA.results.sig, sig.data)
      GWA.sig.table <- rbind(GWA.sig.table,
                             data.frame(gwa.type,trait,
                                        n = length(which(tmp$bonf.p <= sig.lvl))))
    }
    
    if(full.results){
      full.results.df <-  plyr::rbind.fill(full.results.df,tmp)
      # full.results.df <- rbindlist(list(full.results.df,tmp),fill = TRUE,use.names = TRUE)
    }
  }
  # add an overall q value
  if(full.results){
    if(method == "qval"){
      full.results.df$q.all <- qvalue(p = full.results.df$pval.chisqr,
                                      fdr.level = fdr.lvl,
                                      lfdr.out = FALSE)$qvalues
      full.results.df <- full.results.df %>%
        select(gwa.type,trait,SNP.name,scaff.name,scaff.bp,#id columns
               Estimate,Std.Error,pval.chisqr,qval,q.all)
    }
    if(method == "bonf"){
      full.results.df$bonf.all <- full.results.df$pval.chisqr*nrow(full.results.df)
      full.results.df <- full.results.df %>% 
        select(gwa.type,trait,SNP.name,scaff.name,scaff.bp,#id columns
               Estimate,Std.Error,pval.chisqr,bonf.p,bonf.all)
    }
  }
  # build return list
  return.list$sig.df <- GWA.results.sig
  return.list$sig.tab <-  GWA.sig.table
  return.list$full.results.df <- full.results.df
  
  return(return.list)
}

Analyze Results

Here we will compile a list of results from each model to use in the remaining sections.

# insect results
insect.sig.gwa <- sig.GWA(insect.files) #no traits
instrait.sig.gwa <- sig.GWA(instrait.files) #with traits
# trait results
trait.sig05.gwa <- sig.GWA(trait.files,sig.lvl = .05)#.05 cutoff
trait.sig01.gwa <- sig.GWA(trait.files,sig.lvl = .01)#.01 cutoff
trait.sig005.gwa <- sig.GWA(trait.files,sig.lvl = .005)#.005 cutoff

indv.trait.gwa <- sig.GWA(files.list = Indv.trait.files, sig.lvl = .05)

covartrait.sig05.gwa <- sig.GWA(covartrait.files, sig.lvl = .05)
covartrait.sig01.gwa <- sig.GWA(covartrait.files, sig.lvl = .01)
# crosstrait.sig.gwa <- sig.GWA(grep( pattern = ".*(Sex|Flprev).*",x = crosstrait.files,value = T,invert = TRUE))

diversity.sig05.gwa <- sig.GWA(diversity.files, sig.lvl = .05)

CrossNestIndv.GWA <- sig.GWA(CrossNestIndv.trait.files, sig.lvl = .05)
CompleteCross.GWA <- sig.GWA(CompleteCross.trait.files, sig.lvl = .05)
IndvCrossed.GWA <- sig.GWA(IndvCrossed.trait.files, sig.lvl = .05)

YearNest.GWA <- sig.GWA(YearNest.trait.files, sig.lvl = .05)

For model fit diagnostics see this document.

Insect GWA

# insect.sig.gwa %>% group_by(trait) %>% tally()
(p <- ggplot(data = rbind(insect.sig.gwa$sig.tab,instrait.sig.gwa$sig.tab),
             aes(x = trait, y = n, fill = gwa.type)) + 
   geom_bar(stat = "identity",position = "dodge", col = "grey50", width=.75) + 
   geom_text(aes(label = round(n/SNP.N, digits = 2)),
   # geom_text(aes(label = n),
             position = position_dodge(width = 1),
             size = 2.5,
             # angle = 25,
             vjust = -.25,
             hjust = .5,
             col = "black") +
   theme_bw(base_size = 10) +
   theme(axis.text.x = element_text(angle = 25,hjust = 1),
         panel.grid.minor = element_blank(),
         panel.grid.major.x = element_blank()) + 
   labs(y = "Significant Associations")
) %+% labs(title = "Insect GWA Results")

merge(insect.sig.gwa$sig.tab %>% select(-gwa.type),instrait.sig.gwa$sig.tab %>% select(-gwa.type), by = "trait", suffixes = c(".InsectGWA",".InsTraitGWA"))

These are the results that I would expect. There are a reasonable number of associations for the insects and they decrease (mostly) as trait covariates are added to the model.

Shared SNPs

shared.SNP.list <- insect.sig.gwa$sig.df %>% group_by(SNP.name) %>% tally() %>% filter(n>1)
# which insects have each SNP in our list?
shared.ins.list <- data.frame()
for (snp in shared.SNP.list$SNP.name){
  tmp <- paste(insect.sig.gwa$sig.df$trait[which(insect.sig.gwa$sig.df$SNP.name == snp)],
               collapse = ",")
  shared.ins.list[snp,"insects"] <- tmp
}

shared.ins.tab <- shared.ins.list %>% group_by(insects) %>% tally() %>% arrange(desc(n))

There are a total of 975 SNPs that are shared among our common insects and there are 34 unique insect combinations that share SNPs. More on that in the Shared Associations section

Trait GWA

# trait.full.tab <- trait.full.res %>% filter(q.all < .05) %>% group_by(trait) %>% tally()
# trait.full.tab$gwa.type = "trait-full";trait.full.tab$q.cutoff="0.05 (all)"

trait.comp.dat <- rbind(cbind(trait.sig05.gwa$sig.tab,
                              "q.cutoff" = "0.05", "covars" = "no covars"),
                        # trait.full.tab,
                        cbind(trait.sig01.gwa$sig.tab,
                              "q.cutoff" = "0.01", "covars" = "no covars"),
                        # cbind(trait.sig005.gwa$sig.tab,"q.cutoff" = "0.005"),
                        cbind(covartrait.sig05.gwa$sig.tab, 
                              "q.cutoff" = "0.05","covars" = "with covars"),
                        cbind(covartrait.sig01.gwa$sig.tab, 
                              "q.cutoff" = "0.01","covars" = "with covars"))

p %+% (data = trait.comp.dat) %+% aes(fill = q.cutoff) %+% 
  labs(title = "Trait GWA Results", fill = "q value cutoff") + 
  facet_wrap(~covars) + theme(axis.text.x = element_text(angle = 45))

total.trait.assoc.prop01 <-  (trait.sig01.gwa$sig.df$SNP.name %>% unique() %>% length())/SNP.N
total.covtrait.assoc.prop <-  (covartrait.sig01.gwa$sig.df$SNP.name %>% unique() %>% length())/SNP.N
total.trait.assoc.prop001 <-  (covartrait.sig01.gwa$sig.df %>% 
                                 filter(qval <= .001) %>% 
                                 select(SNP.name) %>% unlist() %>% 
                                 unique()  %>% length())/SNP.N

In general, there is a relatively high percentage of associations with our traits when using a cutoff of .05, especially when compared with insects. What could be causing that? Is it possible that the random survey event effect is not appropriate for trait data? Using a cutoff of .01 gives more conservative estimates.

With the .01 cutoff, and no trait covariates, 0.62% of all SNPs are associated with at least one of these traits and at a .001 cutoff, 0.26% of SNPs are trait-associated.

Sex will remain highly associated with our SNPs because it is completely confounded with Genet. An individual component of genet (a SNP) can explain the sex of an individual as well as the complete genet information. Sex consistently had 46% of the SNPs come up as significant (random chance). For these reasons, Sex-associated SNPs will not be considered.

It should be noted that this method of GWA is meant to be sensitive, in order to detect associations with extended phenotypes (insects) and so, it makes sense that associations with directly related traits are more numerous.

It is important to remember that these are corralative associations NOT causative ones. The SNPs that are associated with Sex are NOT those that determine sex, they are those whose genotype differs by sex, on average.

Trait-Trait Correlations

Additionally, the traits are relatively highly correlated:

# numeric trait data
trait.data <- fread("data/phenos-and-covars.csv",
                   select = c("Block","Dist.Edge",
                              # diversity.traits,
                              gwa.traits,
                              "richness","shannon.index","evenness"
                              ),
                   stringsAsFactors = TRUE)
trait.data.num <- trait.data %>% 
  na.omit() %>% mutate_at(c("Sex.TEMP"), as.numeric)

# correlation plot function
corr.tile.plot <- function(data = trait.data.num, corr.meth = "pearson",
                           high.col = "red",low.col = "blue",text.size = 10){
  cor(data,method = corr.meth) %>% 
    melt() %>% 
    ggplot(aes(x = Var1, y = Var2, 
               fill = ifelse(value == 1, NA, value))) + #don't show diag in col.
    geom_tile(col = "black",size = .25) + 
    geom_text(aes(label = round(value,2)),size = text.size*.225) +
    scale_fill_gradient2(low = low.col, high = high.col, mid = "white") +
    labs(fill = "pearson's r",y=NULL,x=NULL,title = "Trait Correlation Matrix") +
    theme_bw(base_size = text.size) +
    theme(axis.text.x = element_text(angle = 35,hjust=1),
          panel.border = element_blank(), 
          panel.grid = element_blank(), axis.text = element_text(face = "bold"))
}

corr.tile.plot(trait.data.num)

I’ll now build models to correct for these correlations (code hidden). I’ll include traits as covariates in a model if there is a correlation with an absolute value > .15. Again, Block, Age, Dist.Edge, and (1|Survey.Year/Survey.Month/Genet.SSRrev) were used in models for each trait (including) diversity indices.

We can see that the traits from SLA to Vol are fairly intercorrelated. This means that, since the trait GWA models don’t include other traits as covariates, a SNP may be detected that is actually associated with a correlated trait, and not the actual trait of interest. This will especially be true for Sex.TEMP because, even though it is not directly correlated with our traits, it is very highly correlated with genet. Therefore, strong genet associations will be correlated with sex associations.

Random Effects Specification

Additionally, I decided to look at different random effects specifications for the trait responses (no covariates):

trait.comp.2 <- rbind(
                      # cbind(trait.sig05.gwa$sig.tab, "label" = ".05"),
                      # cbind(trait.sig01.gwa$sig.tab, "label" = ".01"),
                      cbind(indv.trait.gwa$sig.tab, "label" = ".05,ind-RE"),
                      cbind(CompleteCross.GWA$sig.tab, "label" = ".05,CompCross"),
                      cbind(CrossNestIndv.GWA$sig.tab, "label" = ".05,CrossNest"),
                      cbind(IndvCrossed.GWA$sig.tab, "label" = ".05,IndvCrossed"),
                      cbind(YearNest.GWA$sig.tab, "label" = ".05,YearNest"))   

p %+% (data = trait.comp.2) %+% aes(fill = label) %+% 
  labs(title = "Triat GWA Comparison", fill = "trait model") +
  theme(axis.text.x = element_text(angle = 45))

#sigtabs for different random effects structures
rbind(CrossNestIndv.GWA$sig.tab,
CompleteCross.GWA$sig.tab,
IndvCrossed.GWA$sig.tab,
YearNest.GWA$sig.tab)

It appears that the CrossNest random effect version ((1|year/month)+(1|month/genet)+(1|ID)) yields the the more favorable results, in terms of # of associations. However, this requires a relaxed assumption of correlation (among observations) when compared to the true design (i.e. CompCross: (1|year)+(1|month)+(1|genet)+(1|ID)). This may not be appropriate or justified.

The CompCross model is the most justified for identifying associations for traits only (without considering insects) but we can see that this method does not identify many SNPs for traits. In fact only BBreakDegDay and FlPrev had any associations identified by this method. In fact, the IndvCrossed ((1|year/month)+(1|genet)+(1|ID)) specification yields identical results. These associations are the ones we would use as candidate SNPs for our traits if we were strictly interested in trait GWA.

Shared Associations

Let’s look at how many shared associations each trait and insect has:

shared.SNP.plot <- function(
  # Arguments
  GWA.res.list = trait.sig05.gwa, prop = TRUE,
  sig.lvl = .05, low.col = "white",high.col = "red",
  text.size = 10,GWA.list2 = NULL, 
  sig.lvl2 = sig.lvl, all.2 = FALSE,
  genes = FALSE){
  # Body
  ## define variables
  data = GWA.res.list #data input
  df = data$sig.df #data frame
  tab = data$sig.tab #count table
  trait.list = (unique(df$trait) %>% na.omit()) #trait list
  if(is.null(GWA.list2)){ # case of only 1 data object
    comb.array <- combn(x = trait.list,m = 2,simplify = TRUE) #all unique trait combos
    df2 = df #second verse same as the first
    # tab2 = tab
    trait.list2 = trait.list #ditto
  } else { #2 data objects
    data2 = GWA.list2 #second data object
    df2 = data2$sig.df #second data frame
    trait.list2 = (unique(df2$trait) %>% na.omit()) #second trait list
    # creat combination array
    comb.array <- NULL
    for(i in 1:length(trait.list)){ # pair each trait from 1, with each from 2
      comb.array <- cbind(comb.array,
                          rbind(trait.list[i],
                                trait.list2[1:length(trait.list2)]))
    }
  }
  # check for SNP intersections
  in.common.pairs <- data.frame() #object to fill
  for(i in 1:dim(comb.array)[2]){
    trait.1 <- comb.array[1,i] #x axis
    trait.2 <- comb.array[2,i] #y axis
    # snp intersections of 1st and 2nd traits
    snp.inx <- intersect(df %>% filter(qval < sig.lvl, trait == trait.1) %>% 
                           select(SNP.name),
                         df2 %>% filter(qval < sig.lvl2, trait == trait.2) %>% 
                           select(SNP.name))
    in.common.pairs[trait.1,trait.2] <- nrow(snp.inx) #count them
    if(is.null(GWA.list2)){ #copy to the lower triangle when df1 != df2
      in.common.pairs[trait.2,trait.1] <- nrow(snp.inx)
    }
  }
  # summary case
  if(all.2) {#summary row to check if a SNP in x is present in any y
    all.inx <- data.frame() # object to fill
    for(trait.1 in trait.list){
      all.inx[trait.1,"trait.1"] <- trait.1
      all.inx[trait.1,"trait.2"] <- "all_traits.2"
      if(is.null(GWA.list2)){ #same as above inx but with all df2
        all.inx[trait.1,"value"] <- 
          intersect(df %>% 
                      filter(qval < sig.lvl, trait == trait.1) %>% 
                      select(SNP.name),
                    df2 %>% #if comparing the same data, don't count unity
                      filter(qval < sig.lvl2, trait != trait.1) %>% 
                      select(SNP.name) %>% unique()) %>% 
          nrow() # count them
      }else {
        all.inx[trait.1,"value"] <- 
          intersect(df %>% 
                      filter(qval < sig.lvl, trait == trait.1) %>% 
                      select(SNP.name),
                    df2 %>% # if comparing 2 dfs, use all df2
                      filter(qval < sig.lvl2) %>% 
                      select(SNP.name) %>% unique()) %>% 
          nrow() # count them
      }
    }
  }
  # prepare the data for plotting
  in.common.pairs$trait.1 <- rownames(in.common.pairs)
  comm.melt <- in.common.pairs %>% # melt data for geom_tile()
    melt(id.vars = "trait.1",variable.name = "trait.2")
  # reorder levels
  comm.melt$trait.1 <- factor(comm.melt$trait.1,levels = trait.list)
  comm.melt$trait.2 <- factor(comm.melt$trait.2,levels = trait.list2)
  # add the summary row, if needed
  if(all.2){
    comm.melt <- rbind(comm.melt,all.inx)
  }
  # total number of SNPs for x
  comm.melt$trait.1.max <- tab$n[match(comm.melt$trait.1,tab$trait)]
  # handle presenting shared counts as a proportion of X's total SNPs
  if(prop){
    comm.melt$value = comm.melt$value/comm.melt$trait.1.max
    fill.lab = expression(frac("shared assc.","trait.1 assc."))
  } else {fill.lab = "shared assc."}
  # plot  
  ggplot(comm.melt,aes(x = trait.1, y = trait.2, fill = value)) + 
    geom_tile(col = "black") + 
    geom_text(aes(label = round(value,2)),size = text.size*.25) +
    theme(axis.text.x = element_text(angle = 25, hjust = 1),
          text = element_text(size = text.size)) +
    scale_fill_gradient(low = low.col, high = high.col) +
    labs(fill = fill.lab,
         title = "Shared Association Plot",
         x = paste0("trait.1 (q < ",sig.lvl,")"),
         y = paste0("trait.2 (q < ",sig.lvl2,")"),
         subtitle = paste(unique(c(df$gwa.type,df2$gwa.type)),collapse = ", "))
}

Shared Trait Associations

First we’ll look at the shared associations among traits. Numbers in the panes of each plot represent shared SNP associations, between traits X and Y, as a proportion of total SNP associations with X. We will look at q value significance cutoffs of .05 and .01 respectively:

shared.SNP.plot(GWA.res.list = trait.sig05.gwa,
                prop = TRUE,
                sig.lvl = .05)

For the most part, we see that each row has values similar to the trait’s (Y) population SNP association proportion, which is what you would expect for a completely random sample of SNPs. However, there are some cases where a row’s value is greater than that expected by random chance ([PG,CT],[Npct,CN],[Vol,BAsqrt],[SLA,BAsqrt],[CN,CT]). These Pairs, are those that likely share true associations.

Now let’s look at a q value cutoff of .01 and add an additional row in the plot that corresponsds to all traits of the Y axis. This row shows the proportion of X’s associations that are shared by any other trait of Y (at .05, nearly 100% of each Trait’s associations were shared).

shared.SNP.plot(GWA.res.list = trait.sig05.gwa,
                prop = TRUE,
                sig.lvl = .01, all.2 = TRUE)

at this lower q value cutoff, shared associations shrink to their expected ‘random sample’ levels for all pairs of traits except those that are directly related or are transformations of each other (BA and Vol, Npct and C:N, …). This indicates that, at the .01 cutoff, we have largely removed spurious associations that are caused by trait correlations.

We can also see from this plot that Sex.TEMP shares the largest proportion of it’s associatons with other traits (62%). This makes sense, since it has the highest number of total associations and because sex effects are confounded by genet.

shared.SNP.plot(GWA.res.list = trait.sig05.gwa,
                prop = TRUE,
                sig.lvl = .001, all.2 = TRUE)

Finally, we’ll look at the shared trait plot for our trait gwa models with trait covariates

shared.SNP.plot(GWA.res.list = covartrait.sig01.gwa,
                prop = TRUE,
                sig.lvl = .001, all.2 = TRUE)

Shared Insect Association

Now let’s look at the Associations that Insects share with other insects.

Insect GWA: shared associations

First, the Insect GWA results (no trait covariates):

shared.SNP.plot(insect.sig.gwa,prop = TRUE, sig.lvl = .05,all.2=TRUE)

lomb.n <- insect.sig.gwa$sig.tab %>% filter(trait=="Lombardy.Mine") %>% select(n) %>% unlist()

We can see that, before accounting for traits, Phyllocolpa, Petiole.Gall, Harmandia, and Blackmine share the most assocations relative to their total associations (figure 1). We can also see that Lombardy.Mine had the highest proportion of it’s associations shared (30%). However, this is likely due to the very low total number of associations with Lombardy.Mine (33).

InsTrait GWA: shared associations

shared.SNP.plot(instrait.sig.gwa,prop = TRUE, sig.lvl = .05, all.2=TRUE)

After accounting for traits, Phyllocolpa, and Petiole.Gall still share associations slightly higher than ‘random sample’ chance, however the differences are small. The actual gene function behind shared SNP associations is needed to determine if these shared associations are truly higher than expected.

Shared Insect and Trait Associations

Finally, we will compare the Insect GWA associatons to the Trait GWA associations to visualize which traits insects share the most SNP associtions with, indicating that those insect-gene associations might be mediated by plant traits. There is also a table that shows insect associations in terms of total number and proportion of all SNPs for reference.

shared.SNP.plot(GWA.res.list = insect.sig.gwa,
                prop = TRUE, sig.lvl = .05,
                GWA.list2 = trait.sig05.gwa,
                sig.lvl2 = .01, all.2 = TRUE)

shared.SNP.plot(GWA.res.list = insect.sig.gwa,
                prop = TRUE, sig.lvl = .05,
                GWA.list2 = covartrait.sig05.gwa,
                sig.lvl2 = .01, all.2 = TRUE)

# PGN.assoc <- insect.sig.gwa$sig.tab %>% filter(trait=="Pale.Green.Notodontid") %>% select(n) %>% unlist()

insect.sig.gwa$sig.tab %>% select(trait,n) %>% mutate(prop = round(n/SNP.N,4))

Here we can see that PG,CT, and BBreakDegDay, share a disproportionate number of associations with multiple insects. This suggests that the trait associated SNPs are driving insect incidence (most likely indirectly via the traits).

Gene Annotations

Diversity GWA

Here we will look at the Diversity GWA

p %+% (data = diversity.sig05.gwa$sig.tab) +
  labs(title = "Diversity GWA Results") %+%
  theme(axis.text.x = element_text(angle = 45))

diversity.sig05.gwa$sig.tab %>% select(trait,n)

No Associations were detected for any MDS axis and only 2 were found forshannon index, however species richness and evenness had a few hits.

shared.SNP.plot(GWA.res.list = diversity.sig05.gwa, sig.lvl = .05,
                prop = TRUE,
                GWA.list2 = trait.sig05.gwa, sig.lvl2 = .01)

We can see that the richness and evenness associations are largely correlated with budbreak and EFN associations as well. Additionally, one of the 2 shannon traits index-associated was associated with BA and EFNs and all of them were associated with volume.

shared.SNP.plot(GWA.res.list = diversity.sig05.gwa, sig.lvl = .05,
                prop = TRUE,
                GWA.list2 = insect.sig.gwa, sig.lvl2 = .01)

A few of the diversity-associated SNPs are also common insect-associated SNPs. Unsurprisingly, Green Aphid associations are correlated with evenness associations. This is likely due to the fact that aphid populations are often in the thousands when present. Communities that contain this many aphids, and relatively few individuals from other species, will be more uneven than those with small numbers of aphids.

Do Traits Mediate Insect Hits?

Now we’ll look at the number of Insect associated SNPs are also associated with traits. For now, we’ll use only one insect Phyllocolpa for visualization and the TraitGWA results (random effects specification: (1|year/month/genet)) because the model is directly comparable to the insect models.

phyll <- sig.GWA(files.list = "InsectGWA_Phyllocolpa_master_gwa-results.csv",
                 sig.lvl = .05)$sig.df
phyll.covar <- sig.GWA(files.list = "InsTraitGWA_Phyllocolpa_master_gwa-results.csv",
                 sig.lvl = .05)$sig.df

# BA.001q <- sig.GWA(files.list = "TraitGWA_BAsqrt_master_gwa-results.csv",
#               sig.lvl = .001)$sig.df

phyll.traits <- c("BA.2012sqrt","SLA","CT","Npct","CN","BAsqrt","Vol","BBreakDegDay")
trait.mods <- lapply(phyll.traits, FUN = function(x){
  df <- sig.GWA(files.list = paste("TraitGWA",x,"master_gwa-results.csv", sep = "_"),
                sig.lvl = .01)$sig.df
  return(df)
})
trait.mods.df <- trait.mods %>% bind_rows()
trait.SNPs <- unique(trait.mods.df$SNP.name)

# Alternatively
# new.sig.df <- trait.sig05.gwa$full.results.df %>% filter(q.all <= .05)
# trait.SNPS <- unique(new.sig.df$SNP.name)

# Or
trait.sig05.bonf.gwa <- sig.GWA(trait.files,sig.lvl = .05,method = "bonf")
new.sig.df <- trait.sig05.bonf.gwa$full.results.df %>% filter(bonf.p <= .05)
# trait.SNPS <- new.sig.df %>% filter(bonf.p <= .05) %>% pull(SNP.name)
trait.SNPS <- new.sig.df %>% filter(bonf.all <= .05) %>% pull(SNP.name) #or
# intersection table
A <- phyll$SNP.name
B <- phyll.covar$SNP.name
C <- trait.SNPs
rbind(lenth = c(A = length(A),B = length(B),C = length(C)))
##          A    B     C
## lenth 3062 1891 48823
# X and Y
AB <- intersect(A,B)
AC <- intersect(A,C) #
BC <- intersect(B,C) #
ABC <- Reduce(intersect, list(A,B,C))
# X but not Y
AnB <- A[! A %in% B] #
AnC <- A[! A %in% C]
BnC <- B[! B %in% C]
# calculate the proportion of SNPs mediated by traits
all.SNPs <- unique(c(A,B,AB,BC))
trait.mediated.SNPS <- unique(c(AnB,AC,BC))
prop.mediated <- length(trait.mediated.SNPS)/length(all.SNPs)
library(VennDiagram)
# Draw a venn diagram to show this
grid.newpage()
draw.triple.venn(area1 = length(A), area2 = length(B), area3 = length(C),
                 n12 = length(AB),  n13 = length(AC),  n23 = length(BC),
                 n123 = length(ABC),
                 category = c("Phyllocolpa GWA","Phyllocolpa GWA\n(trait covars)","TraitGWA"),
                 scaled = TRUE, euler.d = TRUE,
                 # fill = c("green","blue","red"),
                 cat.col = "black", #c("green","blue","red")
                 # lty = "blank", main = "GWA Intersection Plot"
                 )

## (polygon[GRID.polygon.939], polygon[GRID.polygon.940], polygon[GRID.polygon.941], polygon[GRID.polygon.942], polygon[GRID.polygon.943], polygon[GRID.polygon.944], text[GRID.text.945], text[GRID.text.946], text[GRID.text.947], text[GRID.text.948], text[GRID.text.949], text[GRID.text.950], text[GRID.text.951], text[GRID.text.952], text[GRID.text.953], text[GRID.text.954])
round(prop.mediated*100, 2)
## [1] 69.12

We can see that 69.12% of the Associations found for Phyllocolpa are either associated with or directly dependent upon a phyllocolpa-associated trait.