Bias-Reduced Logistic Regression - Free Statistics and Forecasting Software (Calculators) v.1.2.1 (<a href="https://quarto.wessa.net">new version</a>)

All rights reserved. The non-commercial (academic) use of this software is free of charge. The only thing that is asked in return is to cite this software when results are used in publications.

This free online software (calculator) computes the Bias-Reduced Logistic Regression (maximum penalized likelihood) as proposed by David Firth. The penalty function is the Jeffreys invariant prior which removes the O(1/n) term from the asymptotic bias of estimated coefficients (Firth, 1993). It always yields finite estimates and standard errors (unlike the Maximum Likelihood Estimation in situations of (quasi-)complete discrimination).

Note: the first column (=endogenous series) must be a binary variable.

Enter (or paste) a matrix (table) containing all data (time) series. Every column represents a different variable and must be delimited by a space or Tab. Every row represents a period in time (or category) and must be delimited by hard returns. The easiest way to enter data is to copy and paste a block of spreadsheet cells. Please, do not use commas or spaces to seperate groups of digits!

Source code of R module

library(brglm)
roc.plot <- function (sd, sdc, newplot = TRUE, ...)
{
sall <- sort(c(sd, sdc))
sens <- 0
specc <- 0
for (i in length(sall):1) {
sens <- c(sens, mean(sd >= sall[i], na.rm = T))
specc <- c(specc, mean(sdc >= sall[i], na.rm = T))
}
if (newplot) {
plot(specc, sens, xlim = c(0, 1), ylim = c(0, 1), type = "l",
xlab = "1-specificity", ylab = "sensitivity", main = "ROC plot", ...)
abline(0, 1)
}
else lines(specc, sens, ...)
npoints <- length(sens)
area <- sum(0.5 * (sens[-1] + sens[-npoints]) * (specc[-1] -
specc[-npoints]))
lift <- (sens - specc)[-1]
cutoff <- sall[lift == max(lift)][1]
sensopt <- sens[-1][lift == max(lift)][1]
specopt <- 1 - specc[-1][lift == max(lift)][1]
list(area = area, cutoff = cutoff, sensopt = sensopt, specopt = specopt)
}
roc.analysis <- function (object, newdata = NULL, newplot = TRUE, ...)
{
if (is.null(newdata)) {
sd <- object$fitted[object$y == 1]
sdc <- object$fitted[object$y == 0]
}
else {
sd <- predict(object, newdata, type = "response")[newdata$y ==
1]
sdc <- predict(object, newdata, type = "response")[newdata$y ==
0]
}
roc.plot(sd, sdc, newplot, ...)
}
hosmerlem <- function (y, yhat, g = 10)
{
cutyhat <- cut(yhat, breaks = quantile(yhat, probs = seq(0,
1, 1/g)), include.lowest = T)
obs <- xtabs(cbind(1 - y, y) ~ cutyhat)
expect <- xtabs(cbind(1 - yhat, yhat) ~ cutyhat)
chisq <- sum((obs - expect)^2/expect)
P <- 1 - pchisq(chisq, g - 2)
c("X^2" = chisq, Df = g - 2, "P(>Chi)" = P)
}
x <- as.data.frame(t(y))
r <- brglm(x)
summary(r)
rc <- summary(r)$coeff
try(hm <- hosmerlem(y[1,],r$fitted.values),silent=T)
try(hm,silent=T)
bitmap(file="test0.png")
ra <- roc.analysis(r)
dev.off()
te <- array(0,dim=c(2,99))
for (i in 1:99) {
threshold <- i / 100
numcorr1 <- 0
numfaul1 <- 0
numcorr0 <- 0
numfaul0 <- 0
for (j in 1:length(r$fitted.values)) {
if (y[1,j] > 0.99) {
if (r$fitted.values[j] >= threshold) numcorr1 = numcorr1 + 1 else numfaul1 = numfaul1 + 1
} else {
if (r$fitted.values[j] < threshold) numcorr0 = numcorr0 + 1 else numfaul0 = numfaul0 + 1
}
}
te[1,i] <- numfaul1 / (numfaul1 + numcorr1)
te[2,i] <- numfaul0 / (numfaul0 + numcorr0)
}
bitmap(file="test1.png")
op <- par(mfrow=c(2,2))
plot((1:99)/100,te[1,],xlab="Threshold",ylab="Type I error", main="1 - Specificity")
plot((1:99)/100,te[2,],xlab="Threshold",ylab="Type II error", main="1 - Sensitivity")
plot(te[1,],te[2,],xlab="Type I error",ylab="Type II error", main="(1-Sens.) vs (1-Spec.)")
plot((1:99)/100,te[1,]+te[2,],xlab="Threshold",ylab="Sum of Type I & II error", main="(1-Sens.) + (1-Spec.)")
par(op)
dev.off()
load(file="createtable")
a<-table.start()
a<-table.row.start(a)
a<-table.element(a,"Coefficients of Bias-Reduced Logistic Regression",5,TRUE)
a<-table.row.end(a)
a<-table.row.start(a)
a<-table.element(a,"Variable",header=TRUE)
a<-table.element(a,"Parameter",header=TRUE)
a<-table.element(a,"S.E.",header=TRUE)
a<-table.element(a,"t-stat",header=TRUE)
a<-table.element(a,"2-sided p-value",header=TRUE)
a<-table.row.end(a)
for (i in 1:length(rc[,1])) {
a<-table.row.start(a)
a<-table.element(a,labels(rc)[[1]][i],header=TRUE)
a<-table.element(a,rc[i,1])
a<-table.element(a,rc[i,2])
a<-table.element(a,rc[i,3])
a<-table.element(a,2*(1-pt(abs(rc[i,3]),r$df.residual)))
a<-table.row.end(a)
}
a<-table.end(a)
table.save(a,file="mytable.tab")
a<-table.start()
a<-table.row.start(a)
a<-table.element(a,"Summary of Bias-Reduced Logistic Regression",2,TRUE)
a<-table.row.end(a)
a<-table.row.start(a)
a<-table.element(a,"Deviance",1,TRUE)
a<-table.element(a,r$deviance)
a<-table.row.end(a)
a<-table.row.start(a)
a<-table.element(a,"Penalized deviance",1,TRUE)
a<-table.element(a,r$penalized.deviance)
a<-table.row.end(a)
a<-table.row.start(a)
a<-table.element(a,"Residual Degrees of Freedom",1,TRUE)
a<-table.element(a,r$df.residual)
a<-table.row.end(a)
a<-table.row.start(a)
a<-table.element(a,"ROC Area",1,TRUE)
a<-table.element(a,ra$area)
a<-table.row.end(a)
a<-table.row.start(a)
a<-table.element(a,"Hosmer–Lemeshow test",2,TRUE)
a<-table.row.end(a)
a<-table.row.start(a)
a<-table.element(a,"Chi-square",1,TRUE)
phm <- array("NA",dim=3)
for (i in 1:3) { try(phm[i] <- hm[i],silent=T) }
a<-table.element(a,phm[1])
a<-table.row.end(a)
a<-table.row.start(a)
a<-table.element(a,"Degrees of Freedom",1,TRUE)
a<-table.element(a,phm[2])
a<-table.row.end(a)
a<-table.row.start(a)
a<-table.element(a,"P(>Chi)",1,TRUE)
a<-table.element(a,phm[3])
a<-table.row.end(a)
a<-table.end(a)
table.save(a,file="mytable1.tab")
a<-table.start()
a<-table.row.start(a)
a<-table.element(a,"Fit of Logistic Regression",4,TRUE)
a<-table.row.end(a)
a<-table.row.start(a)
a<-table.element(a,"Index",1,TRUE)
a<-table.element(a,"Actual",1,TRUE)
a<-table.element(a,"Fitted",1,TRUE)
a<-table.element(a,"Error",1,TRUE)
a<-table.row.end(a)
for (i in 1:length(r$fitted.values)) {
a<-table.row.start(a)
a<-table.element(a,i,1,TRUE)
a<-table.element(a,y[1,i])
a<-table.element(a,r$fitted.values[i])
a<-table.element(a,y[1,i]-r$fitted.values[i])
a<-table.row.end(a)
}
a<-table.end(a)
table.save(a,file="mytable2.tab")
a<-table.start()
a<-table.row.start(a)
a<-table.element(a,"Type I & II errors for various threshold values",3,TRUE)
a<-table.row.end(a)
a<-table.row.start(a)
a<-table.element(a,"Threshold",1,TRUE)
a<-table.element(a,"Type I",1,TRUE)
a<-table.element(a,"Type II",1,TRUE)
a<-table.row.end(a)
for (i in 1:99) {
a<-table.row.start(a)
a<-table.element(a,i/100,1,TRUE)
a<-table.element(a,te[1,i])
a<-table.element(a,te[2,i])
a<-table.row.end(a)
}
a<-table.end(a)
table.save(a,file="mytable3.tab")

::Free Statistics and Forecasting Software::

:: Bias-Reduced Logistic Regression - Free Statistics Software (Calculator) ::