# Function to return positions of glycosylation sites in amino acid sequences FindGlyc <- function( AASeq, # list of amino acid sequences CapitalLetters = T, # Indicator for whether amino acids are in capital letters BlankSign = "-" # Sign for blanks ) { # Determine amino acids required at position x and x + 2 and not allowed at # x + 1 if(CapitalLetters) { GlycMotiv1 <- c("N", "T") GlycMotiv2 <- c("N", "S") GlycNotMiddle <- "P" } else { GlycMotiv1 <- c("n", "t") GlycMotiv2 <- c("n", "s") GlycNotMiddle <- "p" } sapply(AASeq, function(x) {# Loop through sequences # Indicators for all "non-blanks" NoBlankIndices <- x != BlankSign if (sum(NoBlankIndices) > 2) { # Indices of all positions within a sequence PosIndices <- seq_along(x) # Positions without blank sign PosIndices <- PosIndices[NoBlankIndices] # Amino acid sequence with blanks removed Seq <- x[NoBlankIndices] # Loop through sequenced amino acids and determine all positions # with NXT or NXS sequence where X can be anything but P GlyInd <- sapply(1:(length(Seq) - 2), function(y) { all(Seq[c(y,(y + 2))] == GlycMotiv1 | Seq[c(y, (y + 2))] == GlycMotiv2 & rep(Seq[y + 1] != GlycNotMiddle, 2)) }) # Select position indices of original sequence that fulfill the # above requirement of a glycosylation motiv PosIndices[which(GlyInd)] } # End of if }) # End of sapply } # End of function # Function to return positions of blanks in amino acid sequences FindBlank <- function( AASeq, # list of amino acid sequences BlankSign = "-" # Sign for blanks ) { sapply(AASeq, function(x) {# Loop through sequences # Indicators for all "blanks" which(x == BlankSign) }) # End of sapply } # End of function