library(stats)
source("psopts.r")
train <- read.table("loans.dat",header=F,col.names=c("x1","x2","y"))

x1 <- train$x1
x2 <- train$x2
y  <- train$y
y0 <- (y==1)

fit <- lsfit(cbind(x1,x2),y,intercept=TRUE)

b <- as.numeric(fit$coef)
yhat <- y - fit$residuals
idx  <- order(yhat)
yhat0 <- (yhat > 0.5)
err <- abs(y-yhat0);
print(sum(err));

gistic <- glm(y ~ x1 + x2, family=binomial,data=train)
lhat <- gistic$fitted.values
lhat0 <- (lhat > 0.5)
err <- abs(y-lhat0);
print(sum(err));

disagree = (yhat0!=lhat0)

postscript(file="score.eps")

plot(x=yhat,y=yhat,type='n',
  xlab="Linear Regression Score",ylab="Score")
points(x=yhat,y=y,pch='o',col=3-y) # red = 2, green = 3
lines(x=c(min(yhat),max(yhat)),y=c(0.5,0.5))
lines(y=c(min(yhat),max(yhat)),x=c(0.5,0.5))
lines(x=yhat[idx],y=yhat[idx])
points(x=yhat[idx],y=lhat[idx],pch='.',col="black",cex=3)
points(x=yhat[idx],y=lhat[idx],pch='o',col=4*disagree[idx])
text(y=0.00,x=0.80,"<- errors")
text(y=1.00,x=0.18,"errors ->")
text(y=0.42,x=0.70,"<- disagree")

dev.off()

ryhat <- rank(yhat)
rlhat <- rank(lhat)

print(cor(ryhat,rlhat))


