###################################################################### # Factorial analysis code ###################################################################### # # Version 1 by Sohela Shah and Saunak Sen # Date 11 September 2009 # ####################################################################### # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # A copy of the GNU General Public License is available at # http://www.gnu.org/licenses/. ####################################################################### # This code is meant to accompany the paper entitled Strain Background # Modifies Phenotypes in the Atp8b1-Deficient Mouse # by S. Shah, U.R. Sanford, J. Vargas, H. Xu, A. Groen, # C.C. Paulusma, L. Pawlikowska, S. Sen, R.P.J. Oude Elferink, # L.N. Bull ######################################################################## # Our objective is to analyze the strain data to discover differences # between strains (C57BL/6, 129S, and F1), genotype (mutant or wild # type), the diet regime (control or cholate), and sex. The initial # (base) analysis focused on these factors and their interaction. Two # 129S sub-strains (129S1 and 129S4) were used. These were combined # into one group for both pure strains and F1 mice. # # The main phenotypes assayed are as follows: Weight loss, # serum cholesterol, ALP, bilirubin, liver weight, bile biochemisty (bile # cholesterol, bile phospholipids, and bile salts). The serums # biochemistry levels were assayed at start of experiment, and on day # of sacrifice. The bile biochemistry levels were assayed at # sacrifice. Most phenotypes were log transformed using natural # logarithms. # # Enter data from four sheets saved as CSV files in MS Excel and then # combine them into a single table. The column names were consistent # across sheets, which was checked. Two columns were added for bile # cholesterol and bile phospholipids. The script file was changed # accordingly to reflect this change. Rows with no data were deleted. # input data files wt.control <- read.delim("/Users/sohelashah/Documents/Cholestasis/Mouse data/R files/WT on Control.txt") wt.cholate <- read.delim("/Users/sohelashah/Documents/Cholestasis/Mouse data/R files/WT on Cholate.txt") mut.control <- read.delim("/Users/sohelashah/Documents/Cholestasis/Mouse data/R files/Mutant on Control.txt") mut.cholate <- read.delim("/Users/sohelashah/Documents/Cholestasis/Mouse data/R files/Mutant on Cholate.txt") # keep subset to rows with meaningful data, remove rows with no data idx <- complete.cases(wt.control[,2]) wt.control <-wt.control[idx,] idx <- complete.cases(wt.cholate[,2]) wt.cholate <-wt.cholate[idx,] idx <- complete.cases(mut.control[,2]) mut.control <-mut.control[idx,] idx <- complete.cases(mut.cholate[,2]) mut.cholate <-mut.cholate[idx,] idx.c <- c(2,5,6,11,16:20,23:27,37:38,45,12,15,21,28,22,29,34:36,46:63,10,41,42,43) # combine data files into one matrix mousedat <- rbind(wt.control[,idx.c],wt.cholate[,idx.c], mut.control[,idx.c],mut.cholate[,idx.c]) # assign column names names(mousedat) <- c( "strain", "sex", "genotype", "diet", "ast.start", "alt.start", "alp.start", "bili.start", "chol.start", "ast.end", "alt.end","alp.end", "bili.end", "chol.end", "weight.start", "weight.end", "dietdays", "dutchorus", "anes", "dil.start", "dil.end", "hemo.start", "hemo.end", "bile.chol", "bile.phos", "bile.salts", "day0", "day1", "day2","day3", "day4", "day5", "day6", "day7", "day8","day9", "day10", "day11", "day12", "day13","day14", "day15", "day16", "day17", "age", "per.wt.loss", "liverwt", "rel.liverwt") # set strain, genotype, diet, and sex as factors mousedat$strain <- as.factor(as.character(mousedat$strain)) mousedat$sex <- as.factor(as.character(mousedat$sex)) mousedat$genotype <- as.factor(as.character(mousedat$genotype)) mousedat$diet <- factor(as.character(mousedat$diet)) # combine the 2 S129 strains mousedat$strain2 <- mousedat$strain idx <- grep("S",as.character(mousedat$strain)) mousedat$strain2 <- as.character(mousedat$strain2) mousedat$strain2[idx] <- "S129" mousedat$strain2 <- as.factor(mousedat$strain2) # combine the 2 F1 strains mousedat$strainC <- mousedat$strain2 idx <- grep("F",as.character(mousedat$strain2)) mousedat$strainC <- as.character(mousedat$strainC) mousedat$strainC[idx] <- "F1" mousedat$strainC <- as.factor(mousedat$strainC) contrasts(mousedat$strainC) <- contr.treatment(3) mousedat$diet <- relevel(mousedat$diet,2) mousedat$genotype <- relevel(mousedat$genotype,2) # make fake new data to get estimates effects for each combination of # four factors x1 <- as.factor(rep(c("F","M"),c(12,12))) x2 <- rep(c("control","cholate"),c(6,6)) x2 <- as.factor(c(x2,x2)) x3 <- rep(c("B6","S129","F1"),c(2,2,2)) x3 <- as.factor(c(x3,x3,x3,x3)) x4 <- as.factor(rep(c("WT","mutant"),6)) newdata <- data.frame(sex=x1,diet=x2,strainC=x3,genotype=x4) # Generalized linear mode for stepwise regression fitbic <- function(formula,data,...) { git <- glm(as.formula(formula),data=data,...) # hit <- step(git,k=log(nrow(git$qr$qr)),trace=0) hit <- step(git,k=log(length(git$residuals)),trace=0) # print(summary(hit)$call) # print(summary(hit)$coefficients,digits=3) print(summary(hit),sign=FALSE,digits=2) hit } plotpheno <- function(x,y,z,xlab="",ylab="",fac=0,cex=0.7) { xylim <- range(c(x,fac+y),na.rm=T) plot(x,y,xlim=xylim,ylim=xylim-fac,xlab=xlab,ylab=ylab,type="n") # open circles points(x[z],y[z],pch=21,cex=cex) # solid circles points(x[!z],y[!z],pch=19,cex=cex) lines(xylim,xylim-fac) } form.a <- "strainC*diet*genotype*sex" idx.days <- ( (mousedat$dietdays>=4) & (mousedat$dietdays<=8) ) # Percent Weight loss perday # calculate percent weight loss per day mousedat$per.weight.loss <- (((mousedat$weight.start - mousedat$weight.end)/mousedat$weight.start)*100)/mousedat$dietdays # Fit weight loss data using fitbit function hit3 <- fitbic(per.weight.loss ~ strainC*diet*genotype*sex,data=mousedat ) # Print fitted (predicted) medians for each group of mice (by sex, diet, genotype, and strain) cbind(newdata,est=round(predict(hit3,newdata,interval="confidence")*1000)/1000) # Cholesterol # Several cholesterol data points were marked as \code{<50}. We treated # them as missing. mousedat$chol.start <- as.character(mousedat$chol.start) idx <- mousedat$chol.start=="<50" print(paste("Suspect values at start = ", sum(idx,na.rm=T),".",sep="")) mousedat$start[idx] <- NA mousedat$chol.start <- as.numeric(mousedat$chol.start) mousedat$chol.end <- as.character(mousedat$chol.end) idx <- mousedat$chol.end=="<50" print(paste("Suspect values at end = ", sum(idx,na.rm=T),".",sep="")) mousedat$end[idx] <- NA mousedat$chol.end <- as.numeric(mousedat$chol.end) form.start <- "log(chol.start) ~" form.end <- "log(chol.end) ~" idx.se <- complete.cases(mousedat$chol.start,mousedat$chol.end) idx <- which(!is.na(log(mousedat$chol.end))&(mousedat$chol.end!=0)) mousedat.compchol<-mousedat[idx,] # Fit baseline cholesterol data using fitbit function hit0 <- fitbic( as.formula(paste(form.start,form.a)), data=mousedat ) # Print fitted (predicted) medians for each group of mice (by sex, diet, genotype, and #strain) cbind(newdata,est=round(exp(predict(hit0,newdata,interval="confidence"))*10)/10) # Fit post-diet cholesterol data using fitbit function hit1 <- fitbic( as.formula(paste(form.end,form.a)), data=mousedat.compchol[idx.days,]) # Print fitted (predicted) medians for each group of mice (by sex, diet, genotype, and #strain) cbind(newdata,est=round(exp(predict(hit1,newdata,interval="confidence"))*10)/10) # ALP mousedat$alp.start <- as.numeric(mousedat$alp.start) mousedat$alp.end <- as.numeric(mousedat$alp.end) idx <- which(!is.na(log(mousedat$alp.end))&(mousedat$alp.end!=0)) mousedat.compalp<-mousedat[idx,] form.start <- "log(alp.start) ~" form.end <- "log(alp.end) ~" idx.se <- complete.cases(mousedat$alp.start,mousedat$alp.end) # Fit baseline ALP data using fitbit function hit0 <- fitbic( as.formula(paste(form.start,form.a)), data=mousedat ) # Print fitted (predicted) medians for each group of mice (by sex, diet, genotype, and #strain) cbind(newdata,est=round(exp(predict(hit0,newdata,interval="confidence"))*10)/10) # Fit post-diet ALP data using fitbit function hit1 <- fitbic( as.formula(paste(form.end,form.a)), data=mousedat.compalp[idx.days,] ) # Print fitted (predicted) medians for each group of mice (by sex, diet, genotype, and #strain) cbind(newdata,est=round(exp(predict(hit1,newdata,interval="confidence"))*10)/10) # Bilirubin # For bilirubin we performed a grouped analysis using the proportional # odds (polr) #function. For baseline we had 2 levels- normal and # elevated and for post-diet we had 3 #levels- normal, moderately # elevated, and highly elevated mousedat$bili.start <- as.character(mousedat$bili.start) idx <- grep("yellow", mousedat$bili.start) mousedat$bili.start[idx] <- NA idx <- grep("<", as.character(mousedat$bili.start)) mousedat$bili.start[idx] <- 0 mousedat$bili.start<- as.numeric(mousedat$bili.start) mousedat$bili.start[mousedat$bili.start<=0.1] <- 0 mousedat$bili.start[mousedat$bili.start>0.1] <- 1 mousedat$bili.end <- as.character(mousedat$bili.end) idx <- grep("yellow", mousedat$bili.end) mousedat$bili.end[idx] <- NA idx <- grep("<", as.character(mousedat$bili.end)) mousedat$bili.end[idx] <- 0 mousedat$bili.end <- as.numeric(mousedat$bili.end) mousedat$bili.end[mousedat$bili.end>2.0] <- 3 mousedat$bili.end[mousedat$bili.end>0.1&mousedat$bili.end<=2.0] <- 2 mousedat$bili.end[mousedat$bili.end<=0.1] <- 1 mousedat$bili.end <- as.factor(mousedat$bili.end) form.start <- "bili.start ~" form.end <- "bili.end ~" idx.se <- complete.cases(mousedat$bili.start,mousedat$bili.end) # Generalized linear model for binomial or 2 level analysis for baseline bilirubin glm.start<-glm(paste(form.start,form.a), data=mousedat, family=binomial) # Propotional odds regression for 3 level post-diet bilirubin pol.end<-polr(paste(form.end,form.a),data=mousedat[idx.days,], weights=Freq) summary (glm.start) summary (pol.end) step(glm.start,k=log(glm.start$df.residual), trace=0) step(pol.end, k=log(pol.end$df.residual), trace=0) # Liver Weight relative to final body weight form.liver<-"log(rel.liverwt)~" # Fit liver weight at sacrifice using fitbit function hit0 <- fitbic( as.formula(paste(form.liver,form.a)), data=mousedat ) #Print fitted (predicted) medians for each group of mice (by sex, diet, genotype, and #strain) cbind(newdata,est=round(exp(predict(hit0,newdata,interval="confidence"))*10)/10) # Bile Cholestertol form.bile <- "log(bile.chol) ~" hit0 <- fitbic( as.formula(paste(form.bile, form.a)), data=mousedat ) cbind(newdata,est=round(exp(predict(hit0,newdata,interval="confidence"))*10)/10) # Bile Phospholipids mousedat$bile.phos<- as.numeric(mousedat$bile.phos) idx <- which(!is.na(log(mousedat$bile.phos))&(mousedat$bile.phos!=0)) mousedat.compbp<-mousedat[idx,] form.phos <- "log(bile.phos) ~" hit0 <- fitbic( as.formula(paste(form.phos,form.a)), data=mousedat.compbp ) cbind(newdata,est=round(exp(predict(hit0,newdata,interval="confidence"))*10)/10) # Bile Salts mousedat$bile.salts<- as.numeric(mousedat$bile.salts) idx <- which(!is.na(log(mousedat$bile.salts))&(mousedat$bile.salts!=0)) mousedat.compbs<-mousedat[idx,] form.salt <- "log(bile.salts) ~" hit0 <- fitbic( as.formula(paste(form.salt,form.a)), data=mousedat.compbs ) cbind(newdata,est=round(exp(predict(hit0,newdata,interval="confidence"))*10)/10)