From 511c5bfb9a945fa0345918a89690de3df574bbca Mon Sep 17 00:00:00 2001 From: Nezar Abdennur Date: Wed, 10 Apr 2024 10:05:09 -0400 Subject: [PATCH] Add bigbed fixture --- pybigtools/tests/test_api.py | 128 +++++++++++++++++++++++++++++------ 1 file changed, 107 insertions(+), 21 deletions(-) diff --git a/pybigtools/tests/test_api.py b/pybigtools/tests/test_api.py index 8928db8..0718a87 100644 --- a/pybigtools/tests/test_api.py +++ b/pybigtools/tests/test_api.py @@ -33,67 +33,127 @@ def bw(): return pybigtools.open(path, "r") -def test_check_filetype(bw): +@pytest.fixture +def bb(): + path = str(TEST_DIR / "data/bigBedExample.bb") + return pybigtools.open(path, "r") + + +def test_check_filetype(bw, bb): assert not bw.is_bigbed assert bw.is_bigwig + assert bb.is_bigbed + assert not bb.is_bigwig + -def test_chroms(bw): +def test_chroms(bw, bb): # No args => dict - assert bw.chroms() == {"chr17": 83257441} + assert bw.chroms() == {"chr17": 83_257_441} + assert bb.chroms() == {"chr21": 48_129_895} + # Arg with chrom name => length assert bw.chroms("chr17") == 83257441 + assert bb.chroms("chr21") == 48129895 + # Missing chrom => None assert bw.chroms("chr11") is None + assert bb.chroms("chr11") is None -def test_zooms(bw): +def test_zooms(bw, bb): # Get a list of zooms assert bw.zooms() == [10, 40, 160, 640, 2560, 10240, 40960, 163840, 655360, 2621440] + assert bb.zooms() == [3911, 39110, 391100, 3911000, 39110000] -def test_autosql(bw): +def test_autosql(bw, bb): # Even bigwigs have sql (a sql representing bedGraph) assert "bedGraph" in bw.sql() # We can parse the sql # assert bw.sql(True)['name'] == 'bedGraph' + # bb.sql() + # BBIReadError: The file was invalid: Invalid autosql: not UTF-8 + -def test_records(bw): +def test_records(bw, bb): # (chrom, None, None) => all records on chrom - assert len(list(bw.records("chr17"))) == 100000 + assert len(list(bw.records("chr17"))) == 100_000 + assert len(list(bb.records("chr21"))) == 14_810 # (chrom, start, None) => all records from (start, ) - assert len(list(bw.records("chr17", 100000))) == 91360 + assert len(list(bw.records("chr17", 100_000))) == 91_360 + assert len(list(bb.records("chr21", 10_000_000))) == 14_799 # (chrom, start, end) => all records from (start, end) - assert len(list(bw.records("chr17", 100000, 110000))) == 1515 + assert len(list(bw.records("chr17", 100_000, 110_000))) == 1515 + assert len(list(bb.records("chr21", 10_000_000, 20_000_000))) == 233 # Out of bounds start/end are truncated - assert len(list(bw.records("chr17", -1000, 100000))) == 8641 - assert len(list(bw.records("chr17", -1000, -500))) == 0 - assert len(list(bw.records("chr17", 0, 84000000))) == 100000 + x = list(bw.records("chr17", -1000, 100_000)) + assert len(x) == 8641 + assert list(bw.records("chr17", -1000, 100_000)) == x + x = list(bb.records("chr21", -1000, 10_000_000)) + assert len(x) == 11 + assert list(bb.records("chr21", -1000, 10_000_000)) == x + + y = list(bw.records("chr17", 0, bw.chroms("chr17"))) + assert len(y) == 100_000 + assert list(bw.records("chr17", 0, bw.chroms("chr17") * 2)) == y assert next(bw.records("chr17")) == (59898, 59900, 0.06791999936103821) + y = list(bb.records("chr21", 0, bb.chroms("chr21"))) + assert len(y) == 14810 + assert list(bb.records("chr21", 0, bb.chroms("chr21") * 2)) == y + assert next(bb.records("chr21")) == (9434178, 9434609) + + # Fully out of bounds ranges return no records + assert len(list(bw.records("chr17", -1000, -500))) == 0 + assert len(list(bw.records("chr17", 83_257_441, 84_000_000))) == 0 + + assert len(list(bb.records("chr21", -1000, -500))) == 0 + assert len(list(bb.records("chr21", 48_129_895, 49_000_000))) == 0 # Unknown chrom => exception assert pytest.raises(KeyError, bw.records, "chr11") + assert pytest.raises(KeyError, bb.records, "chr11") -def test_zoom_records(bw): +def test_zoom_records(bw, bb): # (chrom, None, None) => all records on chrom assert len(list(bw.zoom_records(10, "chr17"))) == 13811 + assert len(list(bb.zoom_records(3911, "chr21"))) == 1676 # (chrom, start, None) => all records from (start, ) - assert len(list(bw.zoom_records(10, "chr17", 100000))) == 10872 + assert len(list(bw.zoom_records(10, "chr17", 100_000))) == 10872 + assert len(list(bb.zoom_records(3911, "chr21", 10_000_000))) == 1670 # (chrom, start, end) => all records from (start, end) - assert len(list(bw.zoom_records(10, "chr17", 100000, 110000))) == 766 + assert len(list(bw.zoom_records(10, "chr17", 100_000, 110_000))) == 766 + assert len(list(bb.zoom_records(3911, "chr21", 10_000_000, 20_000_000))) == 154 # Out of bounds start/end are truncated - assert len(list(bw.zoom_records(10, "chr17", -1000, 100000))) == 2940 + x = list(bw.zoom_records(10, "chr17", 0, 100_000)) + assert len(x) == 2940 + assert list(bw.zoom_records(10, "chr17", -1000, 100_000)) == x + x = list(bb.zoom_records(3911, "chr21", 0, 10_000_000)) + assert len(x) == 6 + assert list(bb.zoom_records(3911, "chr21", -1000, 10_000_000)) == x + + y = list(bw.zoom_records(10, "chr17", 0, bw.chroms("chr17"))) + assert len(y) == 13811 + assert list(bw.zoom_records(10, "chr17", 0, bw.chroms("chr17") * 2)) == y + y = list(bb.zoom_records(3911, "chr21", 0, bb.chroms("chr21"))) + assert len(y) == 1676 + assert list(bb.zoom_records(3911, "chr21", 0, bb.chroms("chr21") * 2)) == y + + # Fully out of bounds ranges return no records assert len(list(bw.zoom_records(10, "chr17", -1000, -500))) == 0 - assert len(list(bw.zoom_records(10, "chr17", 0, 84000000))) == 13811 - assert next(bw.zoom_records(10, "chr17", 0, 100000)) == ( + assert len(list(bw.zoom_records(10, "chr17", 83_257_441, 84_000_000))) == 0 + assert len(list(bb.zoom_records(3911, "chr21", -1000, -500))) == 0 + assert len(list(bb.zoom_records(3911, "chr21", 48_129_895, 49_000_000))) == 0 + + assert next(bw.zoom_records(10, "chr17", 0, 100_000)) == ( 59898, 59908, { @@ -120,26 +180,37 @@ def test_zoom_records(bw): # Unknown zoom => exception assert pytest.raises(KeyError, bw.zoom_records, 0, "chr17") + assert pytest.raises(KeyError, bb.zoom_records, 0, "chr21") # Unknown chrom => exception assert pytest.raises(KeyError, bw.zoom_records, 10, "chr11") + assert pytest.raises(KeyError, bb.zoom_records, 3911, "chr11") -def test_values(bw): - assert len(list(bw.values("chr17", 100000, 110000))) == 10000 +def test_values(bw, bb): + assert len(bw.values("chr17", 100_000, 110_000)) == 10_000 + assert len(bb.values("chr21", 10_148_000, 10_158_000)) == 10_000 - assert len(list(bw.values("chr17", 100000, 110000, 10))) == 10 + assert len(bw.values("chr17", 100000, 110000, 10)) == 10 + assert len(bb.values("chr21", 10_148_000, 10_158_000, 10)) == 10 assert bw.values("chr17", 100000, 110000, 10)[0] == 0.37435242314338685 + assert bb.values("chr21", 10_148_000, 10_158_000, 10)[0] == 0.175 assert bw.values("chr17", 100000, 110000, 10, "max")[0] == 1.1978399753570557 + assert bb.values("chr21", 10_148_000, 10_158_000, 10, "max")[0] == 1.0 assert bw.values("chr17", 100000, 110000, 10, "min")[0] == 0.05403999984264374 + assert bb.values("chr21", 10_148_000, 10_158_000, 10, "min")[0] == 0.0 assert ( bw.values("chr17", 100000, 110000, 10, "mean", exact=True)[0] == 0.37885534041374924 ) + assert ( + bb.values("chr21", 10_148_000, 10_158_000, 10, "mean", exact=True)[0] + == 0.175 + ) assert list(bw.values("chr17", 59890, 59900, 10, "mean", exact=True)) == [ 0.0, @@ -172,9 +243,14 @@ def test_values(bw): x = bw.values("chr17", -10, 10, 20, "mean", exact=True, missing=0.0) assert math.isnan(x[0]) assert not math.isnan(x[19]) + x = bb.values("chr21", -10, 10, 20, "mean", exact=True, missing=0.0) + assert math.isnan(x[0]) + assert not math.isnan(x[19]) x = bw.values("chr17", -10, 10, 20, "mean", exact=True, missing=0.0, oob=0.0) assert x[0] == 0.0 + x = bb.values("chr21", -10, 10, 20, "mean", exact=True, missing=0.0, oob=0.0) + assert x[0] == 0.0 # The returned array is the same as the one passed, so both show the same values arr = np.zeros(20) @@ -185,3 +261,13 @@ def test_values(bw): assert arr[19] == 0.0 assert math.isnan(ret_arr[0]) assert ret_arr[19] == 0.0 + assert np.array_equal(arr, ret_arr, equal_nan=True) + + ret_arr = bb.values( + "chr21", -10, 10, 20, "mean", exact=True, missing=0.0, oob=np.nan, arr=arr + ) + assert math.isnan(arr[0]) + assert arr[19] == 0.0 + assert math.isnan(ret_arr[0]) + assert ret_arr[19] == 0.0 + assert np.array_equal(arr, ret_arr, equal_nan=True)