6.10 Rules 3

Rules 1a right

R figure

Code
c <- 1 
pos <- c*50  
neg <- 50  

rocgrid <- function() {
    plot( c(0,neg), c(0,pos),
         xlim=c(0,neg),ylim=c(0,pos),
         xaxs = "i",yaxs = "i",
         xaxt = 'n', yaxt = 'n',
         type = "n",
         xlab = "Negatives", ylab = "Positives")
    

    grid (nx = neg/10, ny = pos/10, col = gray(0.5))
    
}

box <- function(x0,y0,x1,y1,col) {
    lines(c(x0,x0,x1,x1,x0),c(y0,y1,y1,y0,y0),col=col,lty=1,lwd=2)}

metric <- function(tp,fp,Pos,Neg,m) {
  if (tp+fp == 0) { return(0) }
  else {

    N = Pos+Neg
    TP = tp
    FP = fp
    FN = Pos-TP
    TN = Neg-FP
    if (m=='accuracy') return( (TP+TN)/N ) 
    if (m=='wracc') return( TP/N - (TP+FP)*(TP+FN)/N^2 ) 
    if (m=='confirmation') return( ((TP+FP)*(FP+TN)/N^2 - FP/N)/(sqrt((TP+FP)*(FP+TN)/N^2) - (TP+FP)*(FP+TN)/N^2) ) 
    if (m=='generality') return( (TP+FP)/N ) 
    if (m=='precision') return( TP/(TP+FP) ) 
    if (m=='laplace-precision') return( (TP+10)/(TP+FP+20) ) 
    if (m=='f-measure') return( 2*TP/(2*TP+FP+FN) ) 
    if (m=='g-measure') return( TP/(FP+Pos) ) 
    if (m=='precision*recall') return( TP^2/((TP+FP)*(TP+FN)) ) 
    if (m=='avg-precision-recall') return( TP/(2*(TP+FP)) + TP/(2*(TP+FN)) ) 
    if (m=='aucsplit') return( (TP*Neg+Pos*TN)/(2*Pos*Neg) ) 
    if (m=='balanced-aucsplit') return( TP/Pos - FP/Neg ) 
    if (m=='chi2') return( (TP*TN - FP*FN)^2 / ((TP+FP)*(TP+FN)*(FP+TN)*(FN+TN)) ) 
    if (m=='info-gain') return( entropy(Pos,Neg) - (TP+FP)/N*entropy(TP,FP) - (FN+TN)/N*entropy(FN,TN) )
    if (m=='gini') return( gini(Pos,Neg) - (TP+FP)/N*gini(TP,FP) - (FN+TN)/N*gini(FN,TN) )
    if (m=='dkm') return( dkm(Pos,Neg) - (TP+FP)/N*dkm(TP,FP) - (FN+TN)/N*dkm(FN,TN) )
    if (m=='entropy') return( entropy(TP,FP)/2 )
    if (m=='giniimp') return( gini(TP,FP) )
    if (m=='dkmimp') return( dkm(TP,FP) )
    if (m=='minacc') return( minacc(TP,FP) )
  }
}

entropy <- function(P,N) {
  p <- P/(P+N)
  n <- N/(P+N)
  if (P==0 || N==0) { return( 0 ) }
  else { return( -p*log2(p) -n*log2(n) ) }
}

gini <- function(P,N) {
  p <- P/(P+N)
  n <- N/(P+N)
  return( 4*p*n )   
}

dkm <- function(P,N) {
  p <- P/(P+N)
  n <- N/(P+N)
  return( 2*sqrt(p*n) ) 
}

minacc <- function(P,N) {
  p <- P/(P+N)
  n <- N/(P+N)
  return( min(p,n) )    
}


contour1 <- function(m,col,lty,tp,fp,Pos,Neg) {
    v <- metric(tp,fp,Pos,Neg,m)
    col=rgb(min(2-2*v,1),v,0)
    points(fp,tp,col=col,type='p',lwd=3)
    if (tp==0 || fp==0) { 
        return() 
    }
    Pos <- tp
    Neg <- fp
    x <- seq(0,Neg)
    y <- seq(0,Pos)
    z <- matrix(nrow=Neg+1,ncol=Pos+1)
    for (i in x) {
      for (j in y) {
        z[i+1,j+1] = metric(j,i,Pos,Neg,m)
      }
    }
    v <- metric(tp,fp,Pos,Neg,m)
    contour(x,y,z,level=v,add=TRUE,col=col,lty=lty,labels="",lwd=2)
}

rocgrid()
d = 1
method = 'laplace-precision'
colour = 'black'
box(0,0,neg,pos,colour)
p = 50; n = 10
arrows(neg-d,pos-d,n+d,p-d, col='violet',length=0.1,lwd=3)
contour1(method,'orange','solid',  p,  n, pos, neg)
contour1(method,'orange','solid',20,  0, pos, neg)
NULL
Code
contour1(method,colour,'dotted',10, 30, pos, neg)
contour1(method,colour,'dotted',20, 20, pos, neg)
contour1(method,colour,'dotted', 0, 40, pos, neg)
NULL
Code
contour1(method,colour,'dotted',50, 30, pos, neg)
contour1(method,colour,'dotted', 0, 20, pos, neg)
NULL
Code
contour1(method,colour,'dotted',30, 40, pos, neg)
contour1(method,colour,'dotted',20, 10, pos, neg)
colour = 'blue'
box(0,0,n,p,colour)
p1 = 30; n1 = 00
arrows(n-d,p-d,n1+d,p1+d, col='violet',length=0.1,lwd=3)

contour1(method,'green','solid',  p1,  n1, p, n)
NULL
Code
contour1(method,colour,'dotted',20,  0, p, n)
NULL
Code
contour1(method,colour,'dotted',10, 10, p, n)

contour1(method,colour,'dotted',20, 10, p, n)

Python figure

Code
import numpy as np
import matplotlib.pyplot as plt

c = 1
pos = c * 50
neg = 50

def rocgrid():
    plt.plot([0, neg], [0, pos], alpha=0)
    plt.xlim(0, neg)
    plt.ylim(0, pos)
    plt.xticks([])
    plt.yticks([])
    plt.xlabel("Negatives")
    plt.ylabel("Positives")
    plt.grid(True, which='both', linewidth=0.5, color='gray', linestyle='-')
    plt.gca().set_aspect('auto')

def box(x0, y0, x1, y1, col):
    plt.plot([x0, x0, x1, x1, x0], [y0, y1, y1, y0, y0], color=col, linestyle='-', linewidth=2)

def entropy(P, N):
    if P == 0 or N == 0:
        return 0
    p = P / (P + N)
    n = N / (P + N)
    return -p * np.log2(p) - n * np.log2(n)

def gini(P, N):
    p = P / (P + N)
    n = N / (P + N)
    return 4 * p * n

def dkm(P, N):
    p = P / (P + N)
    n = N / (P + N)
    return 2 * np.sqrt(p * n)

def minacc(P, N):
    p = P / (P + N)
    n = N / (P + N)
    return min(p, n)

def metric(tp, fp, Pos, Neg, m):
    if tp + fp == 0:
        return 0
    N = Pos + Neg
    TP = tp
    FP = fp
    FN = Pos - TP
    TN = Neg - FP
    if m == 'accuracy': return (TP + TN) / N
    if m == 'wracc': return TP / N - (TP + FP) * (TP + FN) / N**2
    if m == 'confirmation':
        a = ((TP + FP) * (FP + TN)) / N**2
        b = FP / N
        den = np.sqrt((TP + FP) * (FP + TN)) / N
        return (a - b) / (den - a) if den != a else 0
    if m == 'generality': return (TP + FP) / N
    if m == 'precision': return TP / (TP + FP)
    if m == 'laplace-precision': return (TP + 10) / (TP + FP + 20)
    if m == 'f-measure': return 2 * TP / (2 * TP + FP + FN)
    if m == 'g-measure': return TP / (FP + Pos)
    if m == 'precision*recall': return TP**2 / ((TP + FP) * (TP + FN))
    if m == 'avg-precision-recall': return TP / (2 * (TP + FP)) + TP / (2 * (TP + FN))
    if m == 'aucsplit': return (TP * Neg + Pos * TN) / (2 * Pos * Neg)
    if m == 'balanced-aucsplit': return TP / Pos - FP / Neg
    if m == 'chi2':
        denom = (TP + FP) * (TP + FN) * (FP + TN) * (FN + TN)
        return (TP * TN - FP * FN)**2 / denom if denom != 0 else 0
    if m == 'info-gain': return entropy(Pos, Neg) - (TP + FP) / N * entropy(TP, FP) - (FN + TN) / N * entropy(FN, TN)
    if m == 'gini': return gini(Pos, Neg) - (TP + FP) / N * gini(TP, FP) - (FN + TN) / N * gini(FN, TN)
    if m == 'dkm': return dkm(Pos, Neg) - (TP + FP) / N * dkm(TP, FP) - (FN + TN) / N * dkm(FN, TN)
    if m == 'entropy': return entropy(TP, FP) / 2
    if m == 'giniimp': return gini(TP, FP)
    if m == 'dkmimp': return dkm(TP, FP)
    if m == 'minacc': return minacc(TP, FP)

def contour1(m, col, lty, tp, fp, Pos, Neg):
    v = metric(tp, fp, Pos, Neg, m)
    c = (min(2 - 2 * v, 1), v, 0)
    plt.plot(fp, tp, 'o', color=c, linewidth=3)
    if tp == 0 or fp == 0:
        return
    Pos = tp
    Neg = fp
    x = np.arange(0, Neg + 1)
    y = np.arange(0, Pos + 1)
    z = np.zeros((len(y), len(x)))
    for i, xi in enumerate(x):
        for j, yj in enumerate(y):
            z[j, i] = metric(yj, xi, Pos, Neg, m)
    v = metric(tp, fp, Pos, Neg, m)
    CS = plt.contour(x, y, z, levels=[v], colors=[col], linestyles=[lty], linewidths=2)
    for c in CS.collections:
        c.set_label("")

rocgrid()
d = 1
method = 'laplace-precision'
colour = 'black'
box(0, 0, neg, pos, colour)
p = 50
n = 10
plt.arrow(neg - d, pos - d, -(neg - n), -(pos - p), color='violet', head_width=1, linewidth=3, length_includes_head=True)
contour1(method, 'orange', 'solid', p, n, pos, neg)
AttributeError: 'QuadContourSet' object has no attribute 'collections'
Code
contour1(method, 'orange', 'solid', 20, 0, pos, neg)
contour1(method, colour, 'dotted', 10, 30, pos, neg)
AttributeError: 'QuadContourSet' object has no attribute 'collections'
Code
contour1(method, colour, 'dotted', 20, 20, pos, neg)
AttributeError: 'QuadContourSet' object has no attribute 'collections'
Code
contour1(method, colour, 'dotted', 0, 40, pos, neg)
contour1(method, colour, 'dotted', 50, 30, pos, neg)
AttributeError: 'QuadContourSet' object has no attribute 'collections'
Code
contour1(method, colour, 'dotted', 0, 20, pos, neg)
contour1(method, colour, 'dotted', 30, 40, pos, neg)
AttributeError: 'QuadContourSet' object has no attribute 'collections'
Code
contour1(method, colour, 'dotted', 20, 10, pos, neg)
AttributeError: 'QuadContourSet' object has no attribute 'collections'
Code
colour = 'blue'
box(0, 0, n, p, colour)
p1 = 30
n1 = 0
plt.arrow(n - d, p - d, -(n - n1), p1 - p, color='violet', head_width=1, linewidth=3, length_includes_head=True)
contour1(method, 'green', 'solid', p1, n1, p, n)
contour1(method, colour, 'dotted', 20, 0, p, n)
contour1(method, colour, 'dotted', 10, 10, p, n)
AttributeError: 'QuadContourSet' object has no attribute 'collections'
Code
contour1(method, colour, 'dotted', 20, 10, p, n)
AttributeError: 'QuadContourSet' object has no attribute 'collections'
Code
plt.show()

Rules 1a left

Code
graph TD;
    34["true\n[5+, 5-]"];
    38["Length=3\n[2+, 0-]" style=filled fill=orange];
    76["Length=4\n[1+, 3-]"];
    95["Length=5\n[2+, 2-]"];
    35["Gills=yes\n[0+, 4-]"];
    62["Gills=no\n[5+, 1-]" style=filled fill=orange];
    33["Beak=yes\n[5+, 3-]"];
    52["Beak=no\n[0+, 2-]"];
    32["Teeth=many\n[3+, 4-]"];
    45["Teeth=few\n[2+, 1-]"];
    63["Gills=no & Length=3\n[2+, 0-]"];
    87["Gills=no & Length=4\n[1+, 1-]"];
    106["Gills=no & Length=5\n[2+, 0-]"];
    61["Gills=no & Beak=yes\n[5+, 1-]"];
    60["Gills=no & Teeth=many\n[3+, 0-]" style=filled fill=green];
    66["Gills=no & Teeth=few\n[2+, 1-]"];
    
    34 --> 32;
    34 --> 33;
    34 --> 35;
    34 --> 38;
    34 --> 45;
    34 --> 52;
    34 --> 62;
    34 --> 76;
    34 --> 95;
    62 --> 60;
    62 --> 61;
    62 --> 63;
    62 --> 66;
    62 --> 87;
    62 --> 106;
graph TD;
    34["true\n[5+, 5-]"];
    38["Length=3\n[2+, 0-]" style=filled fill=orange];
    76["Length=4\n[1+, 3-]"];
    95["Length=5\n[2+, 2-]"];
    35["Gills=yes\n[0+, 4-]"];
    62["Gills=no\n[5+, 1-]" style=filled fill=orange];
    33["Beak=yes\n[5+, 3-]"];
    52["Beak=no\n[0+, 2-]"];
    32["Teeth=many\n[3+, 4-]"];
    45["Teeth=few\n[2+, 1-]"];
    63["Gills=no & Length=3\n[2+, 0-]"];
    87["Gills=no & Length=4\n[1+, 1-]"];
    106["Gills=no & Length=5\n[2+, 0-]"];
    61["Gills=no & Beak=yes\n[5+, 1-]"];
    60["Gills=no & Teeth=many\n[3+, 0-]" style=filled fill=green];
    66["Gills=no & Teeth=few\n[2+, 1-]"];
    
    34 --> 32;
    34 --> 33;
    34 --> 35;
    34 --> 38;
    34 --> 45;
    34 --> 52;
    34 --> 62;
    34 --> 76;
    34 --> 95;
    62 --> 60;
    62 --> 61;
    62 --> 63;
    62 --> 66;
    62 --> 87;
    62 --> 106;