import numpy as np
import matplotlib.pyplot as plt
def entropy(P, N):
if P == 0 or N == 0:
return 0
p = P / (P + N)
n = N / (P + N)
return -p * np.log2(p) - n * np.log2(n)
def gini(P, N):
p = P / (P + N)
n = N / (P + N)
return 4 * p * n
def dkm(P, N):
p = P / (P + N)
n = N / (P + N)
return 2 * np.sqrt(p * n)
def minacc(P, N):
p = P / (P + N)
n = N / (P + N)
return min(p, n)
def metric(tp, fp, Pos, Neg, m):
if tp + fp == 0:
return 0
N = Pos + Neg
TP = tp
FP = fp
FN = Pos - TP
TN = Neg - FP
if m == 'accuracy':
return (TP + TN) / N
if m == 'wracc':
return TP / N - (TP + FP) * (TP + FN) / N**2
if m == 'confirmation':
num = (TP + FP) * (FP + TN) / N**2 - FP / N
den = np.sqrt((TP + FP) * (FP + TN) / N**2) - (TP + FP) * (FP + TN) / N**2
return num / den if den != 0 else 0
if m == 'generality':
return (TP + FP) / N
if m == 'precision':
return TP / (TP + FP)
if m == 'laplace-precision':
return (TP + 10) / (TP + FP + 20)
if m == 'f-measure':
return 2 * TP / (2 * TP + FP + FN)
if m == 'g-measure':
return TP / (FP + Pos)
if m == 'precision*recall':
return TP**2 / ((TP + FP) * (TP + FN))
if m == 'avg-precision-recall':
return TP / (2 * (TP + FP)) + TP / (2 * (TP + FN))
if m == 'aucsplit':
return (TP * Neg + Pos * TN) / (2 * Pos * Neg)
if m == 'balanced-aucsplit':
return TP / Pos - FP / Neg
if m == 'chi2':
return ((TP * TN - FP * FN)**2) / ((TP + FP) * (TP + FN) * (FP + TN) * (FN + TN))
if m == 'info-gain':
return entropy(Pos, Neg) - (TP + FP) / N * entropy(TP, FP) - (FN + TN) / N * entropy(FN, TN)
if m == 'gini':
return gini(Pos, Neg) - (TP + FP) / N * gini(TP, FP) - (FN + TN) / N * gini(FN, TN)
if m == 'dkm':
return dkm(Pos, Neg) - (TP + FP) / N * dkm(TP, FP) - (FN + TN) / N * dkm(FN, TN)
if m == 'entropy':
return entropy(TP, FP) / 2
if m == 'giniimp':
return gini(TP, FP)
if m == 'dkmimp':
return dkm(TP, FP)
if m == 'minacc':
return minacc(TP, FP)
return 0
def rocgrid(pos, neg):
plt.plot([0, neg], [0, pos], color='white')
plt.xlim(0, neg)
plt.ylim(0, pos)
plt.gca().set_aspect('auto', adjustable='box')
plt.xlabel("Negatives")
plt.ylabel("Positives")
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
def box(x0, y0, x1, y1, col):
plt.plot([x0, x0, x1, x1, x0], [y0, y1, y1, y0, y0], color=col, linewidth=2)
def contour1(m, col, lty, tp, fp, Pos, Neg):
v = metric(tp, fp, Pos, Neg, m)
col = (min(2 - 2 * v, 1), v, 0)
plt.plot(fp, tp, 'o', color=col, linewidth=3)
if tp == 0 or fp == 0:
plt.plot([0, fp], [0, tp], color=col, linestyle=lty, linewidth=4)
return
Pos = tp
Neg = fp
x = np.arange(0, Neg + 1)
y = np.arange(0, Pos + 1)
z = np.zeros((len(y), len(x)))
for i in range(len(x)):
for j in range(len(y)):
z[j, i] = metric(y[j], x[i], Pos, Neg, m)
CS = plt.contour(x, y, z, levels=[v], colors=[col], linestyles=lty, linewidths=2)
plt.clabel(CS, inline=1, fontsize=8, fmt='')
pos = 10
neg = 50
rocgrid(pos, neg)
d = 0.5
method = 'precision'
colour = 'black'
box(0, 0, neg, pos, colour)
p = 10
n = 10
plt.arrow(neg - d, pos - d, n - (neg - d), p - (pos - d), color='violet', head_width=1.0, linewidth=3)
contour1(method, 'orange', 'solid', p, n, pos, neg)
contour1(method, colour, 'dotted', 10, 30, pos, neg)
contour1(method, colour, 'dotted', 0, 40, pos, neg)
contour1(method, colour, 'dotted', 0, 20, pos, neg)
contour1(method, colour, 'dotted', 10, 40, pos, neg)
contour1(method, colour, 'dotted', 0, 10, pos, neg)
colour = 'blue'
box(0, 0, n, p, colour)
p1 = 10
n1 = 0
plt.arrow(n - d, p - d, n1 - (n - d), p1 - (p - d), color='violet', head_width=1.0, linewidth=3)
contour1(method, 'green', 'solid', p1, n1, p, n)
contour1(method, colour, 'dotted', 0, 10, p, n)
plt.show()