Source code for wiggum.trend_components.classification

import pandas as pd
import numpy as np
import itertools
import scipy.stats as stats
import warnings
import itertools as itert



stat_comp = {
    'acc': lambda c: (c['TP'] + c['TN'])/sum(c),
    'err': lambda c: (c['FP'] + c['FN'])/sum(c),
    'ppv': lambda c: c['TP']/(c['TP'] + c['FP']),
    'tpr': lambda c: c['TP']/(c['TP'] + c['FN']),
    'tnr': lambda c: c['TN']/(c['TN'] + c['FP']),
    'fdr': lambda c: c['FP']/(c['TP'] + c['FP']),
    'fpr': lambda c: c['FP']/(c['TN'] + c['FP']),
    'fnr': lambda c: c['FN']/(c['TP'] + c['FN']),
    'f1': lambda c: 2*c['TP']/(2*c['TP']+c['FP']+c['FN'])}

[docs]class BinClassStats(): ''' class of trend for computing classification statistics from confusion matrix compoents based on teh comparison of values from two columns of the data ''' overview_legend = 'continuous'
[docs] def is_computable(self,labeled_df=None): """ check if this trend can be computed based on data and metadata available Parameters ---------- self : Trend a trend object with a set_vars Parameters labeled_df : LabeledDataFrame {None} (optional) data to use if trend is not already configured Returns ------- computable : bool True if requirements of get_trends are filled See also: get_trends() for description of how this trend computes and """ if not( self.set_vars): self.get_trend_vars(labeled_df) vart_test_list = [bool(self.groundtruth), bool(self.prediction), self.my_stat in stat_comp.keys()] return np.product([vartest for vartest in vart_test_list])
[docs] def get_distance(self,row,col_a='subgroup_trend',col_b='agg_trend'): """ distance for confusion matrix stats is """ # use np.divide to catch divide by 0 error # ratio = np.divide(row[col_a]/row[col_b],where =row[col_b]>0) return np.abs(row[col_a] - row[col_b])