Skip to content

Commit

Permalink
Bug in calculating all_by_all_pairwise_similarity (#800)
Browse files Browse the repository at this point in the history
* Updated semsimian version

* poetry lock --no-update

* corrected score assignment

* formatted
  • Loading branch information
hrshdhgd committed Aug 20, 2024
1 parent ed29c27 commit 7602658
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 44 deletions.
74 changes: 37 additions & 37 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ kgcl-schema = "^0.6.9"

funowl = ">=0.2.0"
gilda = {version = ">=1.0.0", optional = true}
semsimian = {version = ">=0.2.16", optional = true}
semsimian = {version = ">=0.2.18", optional = true}
kgcl-rdflib = "0.5.0"
llm = "^0.14"
html2text = {version = "*", optional = true}
Expand Down
11 changes: 6 additions & 5 deletions src/oaklib/implementations/semsimian/semsimian_implementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ def all_by_all_pairwise_similarity(
minimum_resnik_threshold=min_ancestor_information_content,
# predicates=set(predicates) if predicates else None,
)

logging.info("Post-processing results from semsimian")
for term1_key, values in all_results.items():
for term2_key, result in values.items():
Expand All @@ -228,15 +229,15 @@ def all_by_all_pairwise_similarity(
iter(ancestor_set)
), # TODO: Change this: gets first element of the set
)
sim.jaccard_similarity = jaccard
sim.ancestor_information_content = resnik
sim.phenodigm_score = phenodigm_score

else:
sim = TermPairwiseSimilarity(
subject_id=term1_key, object_id=term2_key, ancestor_id=OWL_THING
)
sim.jaccard_similarity = 0
sim.ancestor_information_content = 0
sim.jaccard_similarity = jaccard if jaccard is not None else 0.0
sim.ancestor_information_content = resnik if resnik is not None else 0.0
sim.phenodigm_score = phenodigm_score if phenodigm_score is not None else 0.0

yield sim

def termset_pairwise_similarity(
Expand Down
4 changes: 3 additions & 1 deletion tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -1522,7 +1522,9 @@ def test_annotate_file(self):
],
)
print("STDERR", result.stdout)
err = "\n".join([line for line in result.stderr.split("\n") if not line.startswith("WARNING")])
err = "\n".join(
[line for line in result.stderr.split("\n") if not line.startswith("WARNING")]
)
self.assertEqual("", err)
self.assertEqual(0, result.exit_code)
with open(outfile) as stream:
Expand Down

0 comments on commit 7602658

Please sign in to comment.