# Zamiast predict.glm mozna predykcje y_pred policzyc bezposrednio: library(MASS) X=na.omit(biopsy[,-1]) y=ifelse(X[,"class"]=="malignant",1,0) X=X[,1:9] train=sample(1:nrow(X),600) mm=glm(y[train]~as.matrix(X[train,]),family=binomial) y_pred=ifelse(as.matrix(cbind(1,X[-train,])) %*% mm$coef > 0,1,0) N=table(y_pred,y[-train]) P=N/sum(N) pi=apply(P,1,sum); pj=apply(P,2,sum) PP=pi%*%t(pj) sum(ifelse(P==0,0,P*log(P/PP))) # "ifelse" jest uzasadniona, bo czasami mamy 0*log(0), a ponadto # x*log(x)-->0 przy x-->0. ############ X=rbind(Pima.te,Pima.tr) mf=glm(type~.,data=X,family=binomial) mm=glm(type~glu+bmi+npreg,data=X,family=binomial) pchisq(mm$dev-mf$dev,4,lower.tail=FALSE) #[1] 0.0008221947 ############ n0=nrow(X0) t0=sum(diag(var(X0)*(n0-1)/n0)) for( i in 2:50) t0=c(t0,sum(kmeans(X0,i,iter.max=150)$with/n0)) sepK[,k]=1-t0/t0[1] plot(sep K,type="b") kmeans(X0,k0,iter.max=150)$clust # result of clustering h=hclust(dist(X0),method="average")$h[(n0-1):(n0-50)] sepH[,k]=1-h/h[1] ############ apply(qr.Q(qr(X))^2,1,sum)