###################################################################### # Factorial analysis code ###################################################################### # # Version 1 by Sohela Shah and Saunak Sen # Date 11 September 2009 # ####################################################################### # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # A copy of the GNU General Public License is available at # http://www.gnu.org/licenses/. ####################################################################### # This code is meant to accompany the paper entitled Strain Background # Modifies Phenotypes in the Atp8b1-Deficient Mouse # by S. Shah, U.R. Sanford, J. Vargas, H. Xu, A. Groen, # C.C. Paulusma, L. Pawlikowska, S. Sen, R.P.J. Oude Elferink, # L.N. Bull ######################################################################## # Our objective is to analyze the serum bile salt data to discover differences # between strains (C57BL/6, 129S, and F1), genotype (mutant or wild type), # the diet regime (control or cholate), and sex. The initial (base) # analysis focused on these factors and their interaction. Two 129S sub-strains # (129S1 and 129S4) were used. These were combined into one group for both pure # strains and F1 mice. # input data files serum.bs<- read.delim("/Users/sohelashah/Documents/Cholestasis/Mouse data/R files/R-serum-bile-salts-ave-corrected.txt") # keep subset to rows with meaningful data, remove rows with no data idx <- complete.cases(serum.bs[,2]) serum.bs <- serum.bs [idx,] idx.c <- c(1:4,6:8,10,12,16:19) serumdat<-serum.bs[,idx.c] # assign column names names(serumdat)<-c("mouse", "conc.1", "date.1", "conc.base", "tp.1", "conc.2", "date.2","conc.sac", "tp.2", "strain", "sex", "genotype", "diet") # make factors serumdat$strain <- as.factor(as.character(serumdat$strain)) serumdat$sex <- as.factor(as.character(serumdat$sex)) serumdat$genotype <- as.factor(as.character(serumdat$genotype)) serumdat$diet <- as.factor(as.character(serumdat$diet)) # combine the 2 S129 strains serumdat$strain2 <- serumdat$strain idx <- grep("S",as.character(serumdat$strain)) serumdat$strain2 <- as.character(serumdat$strain2) serumdat$strain2[idx] <- "S129" serumdat$strain2 <- as.factor(serumdat$strain2) # combine the 2 F1 strains serumdat$strainC <- serumdat$strain2 idx <- grep("F",as.character(serumdat$strain2)) serumdat$strainC <- as.character(serumdat$strainC) serumdat$strainC[idx] <- "F1" serumdat$strainC <- as.factor(serumdat$strainC) contrasts(serumdat$strainC) <- contr.treatment(3) serumdat$diet <- relevel(serumdat$diet,2) serumdat$genotype <- relevel(serumdat$genotype,2) # make fake new data to get estimates effects for each #combination of four factors x1 <- as.factor(rep(c("F","M"),c(12,12))) x2 <- rep(c("Control","Cholate"),c(6,6)) x2 <- as.factor(c(x2,x2)) x3 <- rep(c("B6","S129","F1"),c(2,2,2)) x3 <- as.factor(c(x3,x3,x3,x3)) x4 <- as.factor(rep(c("WT","Mutant"),6)) newdata <- data.frame(sex=x1,diet=x2,strainC=x3,genotype=x4) # Generalized linear mode for stepwise regression fitbic <- function(formula,data,...) { git <- glm(as.formula(formula),data=data,...) # hit <- step(git,k=log(nrow(git$qr$qr)),trace=0) hit <- step(git,k=log(length(git$residuals)),trace=0) # print(summary(hit)$call) # print(summary(hit)$coefficients,digits=3) print(summary(hit),sign=FALSE,digits=2) hit } plotpheno <- function(x,y,z,xlab="",ylab="",fac=0,cex=0.7) { xylim <- range(c(x,fac+y),na.rm=T) plot(x,y,xlim=xylim,ylim=xylim-fac,xlab=xlab,ylab=ylab,type="n") # open circles points(x[z],y[z],pch=21,cex=cex) # solid circles points(x[!z],y[!z],pch=19,cex=cex) lines(xylim,xylim-fac) } form.a <- "strainC*diet*genotype*sex" form.start <- "log(conc.base) ~" form.end <- "log(conc.sac) ~" idx.se <- complete.cases(serumdat$conc.base,serumdat$conc.sac) idx <- which ( (!is.na(serumdat$conc.base)) & (!is.na(serumdat$conc.sac)) & (serumdat$conc.base!=0) & ( serumdat$conc.sac!=0) ) serumdat.comp<-serumdat[idx,] # Fit baseline serum bile salt data using fitbit function hit0 <- fitbic( as.formula(paste(form.start,form.a)), data=serumdat.comp ) # Print fitted (predicted) medians for each group of mice (by sex, diet, genotype, and strain) cbind(newdata,est=round(exp(predict(hit0,newdata,interval="confidence"))*10)/10) # Fit post-diet serum bile salt data using fitbit function hit1 <- fitbic( as.formula(paste(form.end,form.a)), data=serumdat.comp ) # Print fitted (predicted) medians for each group of mice (by sex, diet, genotype, and strain) cbind(newdata,est=round(exp(predict(hit1,newdata,interval="confidence"))*100)/100)