# codeCgoodness.r - goodness-of-fit calculations for testing the
# four models, called from either codeAbigeye1.r for
# bigeye tuna data or codeDmussels.r for mussels. Should really
# make into a function for easier use. 12th July 2012.
# Programs originally written in Sweave, taken from my
# bigeyemleGood.Snw and bieyemleGoodbin2.Snw, which themselves
# came from earlier programs.
# breaksgood are bins in [a,b]
# For unbounded models then set top bin to end at Inf within
# the 'if' section below
# Now to work out expected counts (expectedprob) in each bin based
# on the model that was most supported by the AIC calculations.
# dof is degrees of freedom
if(which.min(evidenceratio) == 1) # PL is best model
{ # Haven't used and tested the PL bit yet -
# as its never been best model
probdistfun = 1 - a^(mumleinf - 1) * breaksgood^(1 - mumleinf)
# P( X <= x) where x is breaksgood
expectedprob = diff(probdistfun) # pexpected probs in each bin
# Have estimated numparinf parameters by MLE, so deg freedom:
dof = length(expectedprob) - numparinf - 1
} else
if(which.min(evidenceratio) == 2) # Exp is best model
{
breaksgood[length(breaksgood)] = Inf # want last one up to Inf not b
probdistfun = 1 - exp(lambdamleinf * a) * exp( - lambdamleinf * breaksgood)
# P( X <= x) where x is breaksgood
expectedprob = diff(probdistfun) # pexpected probs in each bin
# Have estimated numparinf parameters by MLE, so deg freedom:
dof = length(expectedprob) - numparinf - 1
} else
if(which.min(evidenceratio) == 3) # PLB is best model
{
probdistfun = 1 - (breaksgood^(1-mumle) - b^(1-mumle)) / (a^(1 - mumle) - b^(1 - mumle))
# P( X <= x) where x is breaksgood
if(mumle == 1) stop("mumle = 1, need probdistrun for PLB model")
expectedprob = diff(probdistfun) # pexpected probs in each bin
# Have estimated numpar parameters by MLE, so deg freedom:
dof = length(expectedprob) - numpar - 1
} else
if(which.min(evidenceratio) == 4) # ExpB is best model
{
probdistfun = 1 - ( exp( - lambdamle * breaksgood) - exp( - lambdamle * b) ) / ( exp(- lambdamle * a) - exp( - lambdamle * b))
expectedprob = diff(probdistfun) # pexpected probs in each bin
# Have estimated numpar parameters by MLE, so deg freedom:
dof = length(expectedprob) - numpar - 1
}
if(abs(sum(expectedprob) -1) > 10^-6) {stop("Sum of expectedprob wrong")}
expected = expectedprob * n # expected no. of records
crit95 = qchisq(0.95, dof) # # pchisq(crit95, dof)=0.95
chisq = sum( (expected - observed)^2 / expected )
# print(paste("chisq statistic = ", chisq))
# print(paste("crit95 = ", crit95))
pvalchisq = 1-pchisq(chisq, dof)
# Chi-square test (not printing here):
# So if chisq < crit95, conclude that data are not sig different
# from the tested model. Commenting out but leaving in if others
# want to use.
#if(chisq < crit95)
# { print("So chisq < crit95 and conclude that data are not sig different from the model with evidence ratio 1")
# print(paste("And P value is", 1-pchisq(chisq, dof)))
# } else
# { print("So chisq > crit95 and conclude that data ARE significantly different from the model with evidence ratio 1")
# }
# Can plot histograms of observed and fitted if desired.
# G test, or log-likelihood ratio test for goodness of fit.
# From Sokahl and Roff.
G = 2 * sum( observed * log( observed / expected) ) # p692 of S+R,
qWilliams = 1 + ( length(observed)^2 - 1 ) / (6 * n * dof) # p698
GWilliams = G / qWilliams
pvalGWilliams = 1-pchisq(GWilliams, dof)
catPaste(GWilliams)
catPaste(crit95)
if(GWilliams < crit95)
{ print("So GWilliams < crit95 and conclude that data are not sig different from model with evidence ratio 1")
print(paste("And P value is", pvalGWilliams))
} else
{ print("So GWilliams > crit95 and conclude that data ARE significantly different from model with evidence ratio 1")
print(paste("And P value is", pvalGWilliams))
}
print(paste(dof, "Degrees of Freedom, 5% significance level"))
# Old code, can be adapted to look at the fits.
#hist(x, breaks=breaksgood, prob=FALSE, ylim=c(0,200))
#points(hgoodness$mid, expected, col="red", pch=3)