Skip to content

Commit

Permalink
Merge branch 'main' into issue_581_get_column_pair_plot
Browse files Browse the repository at this point in the history
  • Loading branch information
lajohn4747 committed Jun 25, 2024
2 parents 673c1d9 + 7d2f508 commit dc2eed0
Showing 1 changed file with 33 additions and 9 deletions.
42 changes: 33 additions & 9 deletions tests/unit/column_pairs/statistical/test_referential_integrity.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,32 +68,56 @@ def test_compute(self, compute_breakdown_mock):
assert result == 0.6

def test_compute_with_nan_foreign_keys_real_data(self):
"""Test the ``compute`` method with NaN foreign keys inside the real data."""
"""Test the ``compute`` method with NaN foreign keys inside the real data.
Here, the score should be 1.0, whether or not the synthetic data have NaN values
values, as the real data have null foreign keys.
"""
# Setup
parent_keys = pd.Series(['a', 'b', 'c'])
foreign_keys = pd.Series(['a', 'a', 'b', 'c', np.nan])
real_fk = pd.Series(['a', 'a', 'b', 'c', np.nan])
synthetic_fk = pd.Series(['a', 'a', 'b', 'c', 'a'])
synthetic_fk_with_nan = pd.Series(['a', 'a', 'b', 'c', np.nan])
metric = ReferentialIntegrity()

# Run
result = metric.compute(
real_data=(parent_keys, foreign_keys), synthetic_data=(parent_keys, foreign_keys)
real_data=(parent_keys, real_fk), synthetic_data=(parent_keys, synthetic_fk)
)
result_with_nan = metric.compute(
real_data=(parent_keys, real_fk), synthetic_data=(parent_keys, synthetic_fk_with_nan)
)

# Assert
assert result == 1.0
assert result_with_nan == 1.0

def test_compute_with_nan_foreign_keys_only_synthetic_data(self):
"""Test the ``compute`` method with NaN foreign keys inside the synthetic data."""
"""Test the ``compute`` method with NaN foreign keys inside the synthetic data.
Here, the real data have no null foreign keys, so the score should decrease as
the number of NaN values in the synthetic data increases.
"""
# Setup
parent_keys = pd.Series(['a', 'b', 'c'])
foreign_keys = pd.Series(['a', 'a', 'b', 'c', 'a'])
synth_foreign_keys = pd.Series(['a', 'a', 'b', 'c', np.nan])
real_fk = pd.Series(['a', 'a', 'b', 'c', 'a'])
synth_fk_0_nan = pd.Series(['a', 'a', 'b', 'c'])
synth_fk_1_nan = pd.Series(['a', 'a', 'b', 'c', np.nan])
synth_fk_2_nan = pd.Series(['a', 'a', 'b', 'c', np.nan, np.nan])
metric = ReferentialIntegrity()

# Run
result = metric.compute(
real_data=(parent_keys, foreign_keys), synthetic_data=(parent_keys, synth_foreign_keys)
result_0 = metric.compute(
real_data=(parent_keys, real_fk), synthetic_data=(parent_keys, synth_fk_0_nan)
)
result_1 = metric.compute(
real_data=(parent_keys, real_fk), synthetic_data=(parent_keys, synth_fk_1_nan)
)
result_2 = metric.compute(
real_data=(parent_keys, real_fk), synthetic_data=(parent_keys, synth_fk_2_nan)
)

# Assert
assert result == 0.8
assert result_0 == 1.0
assert result_1 == 0.8
assert result_2 == 2 / 3

0 comments on commit dc2eed0

Please sign in to comment.