Skip to content

Commit

Permalink
make normalization optional
Browse files Browse the repository at this point in the history
  • Loading branch information
Eh2406 committed Apr 4, 2018
1 parent b36288b commit b9a826e
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 22 deletions.
36 changes: 25 additions & 11 deletions urbansim/models/dcm.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,13 +234,15 @@ class MNLDiscreteChoiceModel(DiscreteChoiceModel):
Whether (and how much) to sample alternatives during prediction.
Note that this can lead to multiple choosers picking the same
alternative.
choice_column : optional
choice_column : str, optional
Name of the column in the `alternatives` table that choosers
should choose. e.g. the 'building_id' column. If not provided
the alternatives index is used.
name : optional
name : str, optional
Optional descriptive name for this model that may be used
in output.
normalize : bool, optional default False
subtract the mean and divide by the standard deviation before fitting the Coefficients
"""
def __init__(
Expand All @@ -251,7 +253,8 @@ def __init__(
interaction_predict_filters=None,
estimation_sample_size=None,
prediction_sample_size=None,
choice_column=None, name=None):
choice_column=None, name=None,
normalize=False):
self._check_prob_choice_mode_compat(probability_mode, choice_mode)
self._check_prob_mode_interaction_compat(
probability_mode, interaction_predict_filters)
Expand All @@ -270,6 +273,7 @@ def __init__(
self.choice_column = choice_column
self.name = name if name is not None else 'MNLDiscreteChoiceModel'
self.sim_pdf = None
self.normalize = normalize

self.log_likelihoods = None
self.fit_parameters = None
Expand Down Expand Up @@ -308,7 +312,8 @@ def from_yaml(cls, yaml_str=None, str_or_buffer=None):
estimation_sample_size=cfg.get('estimation_sample_size', None),
prediction_sample_size=cfg.get('prediction_sample_size', None),
choice_column=cfg.get('choice_column', None),
name=cfg.get('name', None)
name=cfg.get('name', None),
normalize=cfg.get('normalize', False),
)

if cfg.get('log_likelihoods', None):
Expand Down Expand Up @@ -420,7 +425,7 @@ def fit(self, choosers, alternatives, current_choice):
'the input columns.')

self.log_likelihoods, self.fit_parameters = mnl.mnl_estimate(
model_design.as_matrix(), chosen, self.sample_size)
model_design.as_matrix(), chosen, self.sample_size, self.normalize)
self.fit_parameters.index = model_design.columns

logger.debug('finish: fit LCM model {}'.format(self.name))
Expand Down Expand Up @@ -534,10 +539,18 @@ def probabilities(self, choosers, alternatives, filter_tables=True):
coeffs = [self.fit_parameters['Coefficient'][x]
for x in model_design.columns]

normalization_mean = [self.fit_parameters['Normalization Mean'][x]
for x in model_design.columns]
normalization_std = [self.fit_parameters['Normalization Std'][x]
for x in model_design.columns]
normalization_mean = self.fit_parameters['Normalization Mean']
if normalization_mean:
normalization_mean = [normalization_mean[x]
for x in model_design.columns]
else:
normalization_mean = 0.0
normalization_std = self.fit_parameters['Normalization Std']
if normalization_std:
normalization_std = [normalization_std[x]
for x in model_design.columns]
else:
normalization_std = 1.0

# probabilities are returned from mnl_simulate as a 2d array
# with choosers along rows and alternatives along columns
Expand All @@ -549,9 +562,9 @@ def probabilities(self, choosers, alternatives, filter_tables=True):
probabilities = mnl.mnl_simulate(
model_design.as_matrix(),
coeffs,
numalts,
normalization_mean,
normalization_std,
numalts=numalts,
returnprobs=True)

# want to turn probabilities into a Series with a MultiIndex
Expand Down Expand Up @@ -688,7 +701,8 @@ def to_dict(self):
'fitted': self.fitted,
'log_likelihoods': self.log_likelihoods,
'fit_parameters': (yamlio.frame_to_yaml_safe(self.fit_parameters)
if self.fitted else None)
if self.fitted else None),
'normalize': self.normalize,
}

def to_yaml(self, str_or_buffer=None):
Expand Down
27 changes: 16 additions & 11 deletions urbansim/urbanchoice/mnl.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def mnl_loglik(beta, data, chosen, numalts, weights=None, lcgrad=False,
return -1 * loglik, -1 * gradarr


def mnl_simulate(data, coeff, normalization_mean, normalization_std, numalts, GPU=False, returnprobs=True):
def mnl_simulate(data, coeff, numalts, normalization_mean=0.0, normalization_std=1.0, GPU=False, returnprobs=True):
"""
Get the probabilities for each chooser choosing between `numalts`
alternatives.
Expand All @@ -131,12 +131,12 @@ def mnl_simulate(data, coeff, normalization_mean, normalization_std, numalts, GP
choosers. Alternatives must be in the same order for each chooser.
coeff : 1D array
The model coefficients corresponding to each column in `data`.
normalization_mean : 1D array
The model normalization constant corresponding to each column in `data`.
normalization_std : 1D array
The model normalization factor corresponding to each column in `data`.
numalts : int
The number of alternatives available to each chooser.
normalization_mean : 1D array, optional
The model normalization constant corresponding to each column in `data`.
normalization_std : 1D array, optional
The model normalization factor corresponding to each column in `data`.
GPU : bool, optional
returnprobs : bool, optional
If True, return the probabilities for each chooser/alternative instead
Expand Down Expand Up @@ -178,7 +178,7 @@ def mnl_simulate(data, coeff, normalization_mean, normalization_std, numalts, GP


def mnl_estimate(data, chosen, numalts, GPU=False, coeffrange=(-3, 3),
weights=None, lcgrad=False, beta=None):
weights=None, lcgrad=False, beta=None, normalize=False):
"""
Calculate coefficients of the MNL model.
Expand All @@ -202,6 +202,8 @@ def mnl_estimate(data, chosen, numalts, GPU=False, coeffrange=(-3, 3),
lcgrad : bool, optional
beta : 1D array, optional
Any initial guess for the coefficients.
normalize : bool, optional default False
subtract the mean and divide by the standard deviation before fitting the Coefficients
Returns
-------
Expand All @@ -226,10 +228,11 @@ def mnl_estimate(data, chosen, numalts, GPU=False, coeffrange=(-3, 3),
numvars = data.shape[1]
numobs = data.shape[0] // numalts

normalization_mean = data.mean(0)
normalization_std = data.std(0, ddof=1)
if normalize:
normalization_mean = data.mean(0)
normalization_std = data.std(0, ddof=1)

data = (data.copy() - normalization_mean) / normalization_std
data = (data.copy() - normalization_mean) / normalization_std

if chosen is None:
chosen = np.ones((numobs, numalts)) # used for latent classes
Expand Down Expand Up @@ -270,11 +273,13 @@ def mnl_estimate(data, chosen, numalts, GPU=False, coeffrange=(-3, 3),
}

fit_parameters = pd.DataFrame({
'Normalization Mean': normalization_mean,
'Normalization Std': normalization_std,
'Coefficient': beta,
'Std. Error': stderr,
'T-Score': beta / stderr})

if normalize:
fit_parameters['Normalization Mean'] = normalization_mean
fit_parameters['Normalization Std'] = normalization_std

logger.debug('finish: MNL fit')
return log_likelihood, fit_parameters

0 comments on commit b9a826e

Please sign in to comment.