Source code for wiggum.trend_components.regression

import pandas as pd
import numpy as np
import itertools
import scipy.stats as stats
from .base_getvars import  w_avg
import warnings

[docs]class LinearRegression(): ''' ''' overview_legend = 'continuous'
[docs] def is_computable(self,labeled_df=None): """ check if this trend can be computed based on data and metadata available this requires that the regression vars be a list of tuple or list of length at least 2. Parameters ---------- self : Trend a trend object with a set_vars Parameters labeled_df : LabeledDataFrame {None} (optional) data to use if trend is not already configured Returns ------- computable : bool True if requirements of get_trends are filled See also: get_trends() for description of how this trend computes and """ if not( self.set_vars): self.get_trend_vars(labeled_df) regssion_vars_tuple = type(self.regression_vars[0]) ==tuple regression_vars_len = len(self.regression_vars)>=2 vart_test_list = [regssion_vars_tuple or regression_vars_len, len(self.var_weight_list)==len(self.regression_vars)] return np.product([vartest for vartest in vart_test_list])
[docs] def get_distance(self,row,col_a='subgroup_trend',col_b='agg_trend'): """ compute angle between the overall and subgroup slopes for a row of a dataframe. This is the angle closest to the positive x axis and is always positive valued, to be used as a distance. Parameters ---------- row : pd.Series row of a result_df DataFrame Returns ------- angle : float angle in degrees between the subgroup_trend and agg_trend, compatible with assignment to a cell of a result_df """ # take absolute value, because the two will be in opposite directions # relative to the angle of interest abs_angle = self.get_distance_unnormalized(row,col_a,col_b) # normalize so that right angle is 1 and parallel is 0 # TODO: fix error if angle is exactly np.pi/2 right_angle = np.pi/2 return (abs_angle%right_angle)/right_angle
[docs] def get_distance_unnormalized(self,row,col_a='subgroup_trend',col_b='agg_trend'): """ compute angle between the overall and subgroup slopes for a row of a dataframe. This is the angle closest to the positive x axis and is always positive valued, to be used as a distance. Parameters ---------- row : pd.Series row of a result_df DataFrame Returns ------- angle : float angle in degrees between the subgroup_trend and agg_trend, compatible with assignment to a cell of a result_df """ # take absolute value, because the two will be in opposite directions # relative to the angle of interest theta_sub = np.arctan(row[col_a]) theta_all = np.arctan(row[col_b]) # take difference them and convert to degrees return np.abs(theta_all - theta_sub)