############################################################################################################################# ################## Sensitivity ########################################################################################## # # Function "Sensitivity" calculates sensitivity, specificity and efficiency for each haplotype # # The function was developed in "S-Plus 6.1" and "R 2.5" and runs under both programs, S-Plus and R # # by Claudia Lamina, Friedhelm Bongardt, Iris M. Heid # # Institute of Epidemiology # Helmholtz Zentrum München - German Research Center for Environmental Health # Neuherberg, Germany # # Input parameter: # # haplos: Vector of observed haplotypes in 1/2-Coding, if code="1/2" (default): # e.g. 2-locus haplotype: haplos=c("11","12","21","22") # or, alternatively: # Matrix of haplotypes in 0/1-Coding, if code="0/1": # column 1: allele of locus 1, column 2: allele of locus 2 etc # row 1: haplotype 1, row 2: haploptype 2 etc. # e.g. 2- locus haplotype: haplos=matrix(c(0,0,1,1,0,1,0,1),ncol=2) # # freqs: vector of haplotype frequencies (for haplotypes with freq > 0) # code: "1/2"(default), if input of haplotypes is a vector in "1/2"-Coding (see haplos) # "0/1", if input of haplotypes is a matrix in "0/1"-Coding (see haplos) # # Output: # # List of length=2 # 1. component: # Dataframe containing haplotypes (col 1), haplotype frequencies (col 2), sensitivity (col 3), # specificity (col 4) and efficiency (col 5) # # 2. component: # Dataframe containing haplotypes (col 1) and probabilities of the misclassificatin matrix: # p11: Prob. that haplotype is truely present and assigned after estimation # p12: Prob. that haplotype is truely present but not assigned after estimation # p21: Prob. that haplotype is truely not present but assigned after estimation # p22: Prob. that haplotype is truely not present and not assigned after estimation # p.1: Prob. that haplotype is assigned after estimation # p.2: Prob. that haplotype is not assigned after estimation # p1.: Prob. that haplotype is truely present # p2.: Prob. that haplotype is truely not present # # Examples: # # Example 1 (code="0/1"): # # haplotypes <- matrix(c(0,0,1,1,0,1,0,1),ncol=2) # frequencies <- c(0.4,0.3,0.2,0.1) # Sensitivity(haplotypes,frequencies,"0/1") # # Example 2 (code="1/2"): # # haplotypes <- c("11","12","21","22") # frequencies <- c(0.4,0.3,0.2,0.1) # Sensitivity(haplotypes,frequencies) # #################################################################################################################################### Sensitivity <- function(haplos,freqs,code="1/2") { freqs <- freqs / sum(freqs) #Standardization of sum of haplotype frequencies to 1 Leng <- length(freqs) if(code=="1/2") { haplo2 <- haplos Loci <- nchar(haplos[1]) haplotypes <- matrix(rep(NA,Leng*Loci),ncol=Loci) for(i in 1:Loci) { haplotypes[,i] <- substring(haplos,i,i) } haplos <- matrix(as.integer(haplotypes),ncol=Loci) - 1 } else haplo2 <- apply(haplos,1,paste,collapse="") Loci <- length(haplos[1,]) er.h <- rep(NA,Leng) haplos <- t(haplos) genos <- matrix(rep(NA,Leng^2),ncol=Leng) mono.freqs <- matrix(rep(NA,Leng^2),ncol=Leng) for(i in 1:Leng) { #genos: matrix with genotypes for each diplotype #e.g. "02" for diplotype 01/01 (0 minor alleles at locus 1, 2 minor alleles at locus 2) genos[i,] <- apply(haplos[,i] + haplos, 2, paste, collapse="") #mono.freqs: frequencies of (sorted) diplotypes assuming HWE mono.freqs[i,] <- freqs[i] * freqs } genotypes <- sort(unique(genos)) #all present genotypes gen.freqs <- rep(NA,length(genotypes)) #gen.freqs: sum of all diplotype-frequencies with the same genotype for(i in 1:length(genotypes)) gen.freqs[i] <- sum((genos==genotypes[i]) * mono.freqs) #genos: matrix, holding genotype frequencies genos2 <- as.factor(genos) genos <- as.factor(genos) Levels <- matrix(as.integer(genos),ncol=Leng) levels(genos) <- gen.freqs genos <- matrix(as.numeric(as.character(genos)),ncol=Leng) #mono.freqs: frequencies of sorted diplotypes (e.g.: P(h1/h2)=p and P(h2/h1)=p) #diplo.freqs: frequencies of diplotypes without consideration of order (e.g.: P(h1 and h2)=2*p) right <- matrix(rep(0,Leng^2),ncol=Leng) for(i in levels(genos2)) right <- right + matrix(((genos2 == i) * mono.freqs) == max((genos2 == i) * mono.freqs),ncol=Leng) diplo.freqs <- mono.freqs + (1 - diag(rep(1,Leng))) * mono.freqs #Calculation of misclassification and marginal probabilities p1. <- apply(diplo.freqs, 1, sum) p11 <- apply(diplo.freqs * (right), 1, sum) p12 <- p1. - p11 p.1 <- rep(0,Leng) for(j in 1:Leng) { for(i in 1:Leng) { p.1[j] <- p.1[j] + (right[i,j]) * sum(((Levels == Levels[i,j]) * mono.freqs)) } } p.2 <- 1 - p.1 p21 <- p.1 - p11 p22 <- p.2 - p12 p2. <- 1 - p1. #Calculation of sensitivity, specificity, efficiency sensitivity <- p11 / p1. specificity <- p22 / p2. efficiency <- p11 + p22 list(data.frame(Haplotype=haplo2,Frequency=freqs,Sensitivity=sensitivity,Specificity=specificity,Efficiency=efficiency), data.frame(Haplotype=haplo2,p11=p11,p12=p12,p21=p21,p22=p22,p.1=p.1,p.2=p.2,p1.=p1.,p2.=p2.)) } # END Sensitivity ################################################################################################################################# ############################################################################################################################################################### ######## Starplot ########################################################################################################################################## # # Function "Starplot" draws sensitivity, specificity or R-square-values for each haplotype # # The function was developed in "S-Plus 6.1" and "R 2.5" and runs under both programs, S-Plus and R # # by Claudia Lamina, Friedhelm Bongardt, Iris M. Heid # # Institute of Epidemiology # Helmholtz Zentrum München - German Research Center for Environmental Health # Neuherberg, Germany # # required arguments: # values: vector of parameter values to be plotted for each haplotype represented by the length of the line in the circle # (e.g. the sensitivity or specificity given by the function "sensitivity", or # the R-square that can be calculated by the program tagSNPs by Dan Stram) # # optional arguments: # labels: vector of labels to be printed for each haplotype # freqs: vector of frequencies of each haplotype; # if frequencies are specified, they will be printed in the plot # if frequencies are not specified, haplotypes are plotted in the given order # orderplot: if orderplot=T (default), haplotypes are plotted in the order of their frequencies # if orderplot=F, haplotypes are plotted in the given order # circles: vector of radii of circles to be added to the plot. By default, one circle with # radius=1 is printed. # circ.labels: if circ.labels=T: the circle radii are specified # if circ.labels=F (default): the circle radii are not specified # label.dist: Distance of the labels to the center of the plot. By default, this is 1.15 # label.size: Size of the labels (default=1) # # Graphical parameters may also be supplied to this function (see S-Plus-help for 'par'), e.g. main="" as main title # # # Examples: # # Example 1: # # starplot(c(0.9,0.8,0.6,0.5), c("AA","AB","BA","BB"), freqs=c(0.5,0.3,0.15,0.05)) # # Example 2 (using the output of the function "Sensitivity"): # # haplotypes <- c("000", "001", "010", "011", "100", "101", "110", "111") # frequencies <- c(0.48000,0.05408,0.15261,0.10000,0.17480,0.01403,0.01448,0.01000) # sens.test<-Sensitivity(haplotypes,frequencies) # starplot(sens.test[[1]]$Sensitivity, haplotypes, frequencies) # # Example 3 (using the output of the function "Sensitivity"): # # haplotypes <- matrix(c(0,0,1,1,0,1,0,1),ncol=2) # frequencies <- c(0.4,0.3,0.2,0.1) # example.plot <- Sensitivity(haplotypes,frequencies,"0/1") # par(mfrow=c(1,2)) # starplot(example.plot[[1]]$Sensitivity, example.plot[[1]]$Haplotype, example.plot[[1]]$Frequency, main="Sensitivity", label.dist=1.17, label.size=0.7) # starplot(example.plot[[1]]$Specificity, example.plot[[1]]$Haplotype, example.plot[[1]]$Frequency, main="Specificity", label.dist=1.17, label.size=0.7) # ############################################################################################################################################################ ############################################################################################################################################################ starplot <- function(values,labels=0,freqs=0,orderplot=T,circles=1,circ.labels=F,label.dist=1.15,label.size=1,...) { plotfreqs <- T if(length(labels)==1) labels <- rep("",length(values)) if(length(freqs)==1) { freqs <- rep("",length(values)) orderplot <- F plotfreqs <- F } if(orderplot==T) { variable <- list(values,as.character(labels),freqs) orderlist <- order(-variable[[3]]) } else { variable <- list(values,as.character(labels),freqs) orderlist <- 1:4 } circles <- rev(sort(circles)) par(pty="s") plot(complex(modulus=1.2,argument=seq(-pi, pi, length=1000)),type="n",axes=F,xlab="",ylab="",...) lines(complex(modulus=circles[1],argument=seq(-pi, pi, length=1000))) if(circ.labels) text(circles[1]+0.03*circles[1],0.04*circles[1],circles[1]) if(length(circles) > 1) { for(i in 2:length(circles)) { lines(complex(modulus=circles[i],argument=seq(-pi, pi, length=1000))) if(circ.labels) text(circles[i]+0.03*circles[i],0.04*circles[i],circles[i]) } } numvariables <- length(values) argum <- (0:(numvariables-1)) / numvariables * 2 * pi sine <- sin(argum) cosine <- cos(argum) sine.lab <- sine * label.dist cosine.lab <- cosine * label.dist sine <- sine*variable[[1]][orderlist] cosine <- cosine*variable[[1]][orderlist] if (plotfreqs==T) { for(i in 1:length(labels)) { lines(c(0,sine[i]),c(0,cosine[i])) text(sine.lab[i],cosine.lab[i],paste(variable[[2]][orderlist][i],"\n ",round(100*variable[[3]][orderlist][i],1),"%",sep=""),cex=label.size) } } else { for(i in 1:length(labels)) { lines(c(0,sine[i]),c(0,cosine[i])) text(sine.lab[i],cosine.lab[i],variable[[2]][orderlist][i],cex=label.size) } } } # END Starplot ##############################################################################################################################