Skip to content

Commit

Permalink
Merge pull request #90 from dlaprins/suppressor
Browse files Browse the repository at this point in the history
The skorecard method shows warning if the co-efficients are negative.
  • Loading branch information
anilkumarpanda committed Feb 23, 2023
2 parents e2311a1 + 654d409 commit dc880bd
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 3 deletions.
10 changes: 7 additions & 3 deletions skorecard/skorecard.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from skorecard.linear_model import LogisticRegression
from skorecard.utils import BucketerTypeError
from skorecard.utils.validation import ensure_dataframe, is_fitted
from skorecard.utils.validation import ensure_dataframe, is_fitted, check_suppressor_effect
from skorecard.pipeline import BucketingProcess, to_skorecard_pipeline
from skorecard.pipeline.pipeline import _get_all_steps
from skorecard.bucketers import (
Expand Down Expand Up @@ -205,8 +205,10 @@ def _build_default_bucketing_process(self, X):
bucketing_pipeline = to_skorecard_pipeline(make_pipeline(*bucketing_pipe))

return BucketingProcess(
specials=self.specials, prebucketing_pipeline=prebucketing_pipeline, bucketing_pipeline=bucketing_pipeline,
random_state=self.random_state
specials=self.specials,
prebucketing_pipeline=prebucketing_pipeline,
bucketing_pipeline=bucketing_pipeline,
random_state=self.random_state,
)

def _build_pipeline(self, X):
Expand Down Expand Up @@ -276,6 +278,8 @@ def fit(self, X, y=None):
self.coef_ = self.pipeline_[-1].coef_
self.n_features_in_ = len(X.columns)

check_suppressor_effect(self.coef_[0], X.columns)

return self

def fit_interactive(self, X, y=None, mode="external", **server_kwargs):
Expand Down
12 changes: 12 additions & 0 deletions skorecard/utils/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,15 @@ def check_args(args: Dict, obj):
if arg not in valid_args:
msg = f"Argument '{arg}' is not a valid argument for object '{obj}'"
warnings.warn(msg)


def check_suppressor_effect(coefs: list, feat_names: list):
"""Checks if the coefficients all have the expected sign."""
suspect_feats = []
for i, c in enumerate(coefs):
if c < 0:
suspect_feats.append(feat_names[i])
if len(suspect_feats) != 0:
msg = f"Features found with coefficient-sign that is contrary to what is expected based on weight-of-evidence. \
This is likely caused by multi-collinearity. The features are: {suspect_feats}"
warnings.warn(msg)
45 changes: 45 additions & 0 deletions tests/test_suppressor_warning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import warnings

from skorecard.datasets import load_uci_credit_card
from skorecard import Skorecard


def test_suppressor_warning():
"""Checks suppressor effect warning on Skorecard fit."""
# Load the data. Construct datasets with and without suppressor effect occurring
data = load_uci_credit_card()
y = data["target"]

X_no_suppression = data["data"]
model = Skorecard()

X_suppression = X_no_suppression.copy()
X_suppression["suppressor"] = X_suppression[X_suppression.columns[0]] - X_suppression[X_suppression.columns[1]]
model_suppression = Skorecard()

with warnings.catch_warnings(record=True) as w:
# Check that the suppressor warning is not issued and that no coefficient has an unexpected sign
model = model.fit(X_no_suppression, y)
relevant_warning_issued = 0
if len(w) > 0:
latest_warning = str(w[-1].message)
msg = (
"Features found with coefficient-sign that is contrary to what is expected based on weight-of-evidence."
)
relevant_warning_issued = msg in latest_warning
coefs = model.coef_[0]
suppression = any(c < 0 for c in coefs)
assert (not relevant_warning_issued) & (not suppression)

# Check that the suppressor warning is issued and that there is a coefficient with an unexpected sign
model_suppression = model_suppression.fit(X_suppression, y)
relevant_warning_issued = 0
if len(w) > 0:
latest_warning = str(w[-1].message)
msg = (
"Features found with coefficient-sign that is contrary to what is expected based on weight-of-evidence."
)
relevant_warning_issued = msg in latest_warning
coefs = model_suppression.coef_[0]
suppression = any(c < 0 for c in coefs)
assert (relevant_warning_issued) & (suppression)

0 comments on commit dc880bd

Please sign in to comment.