Skip to content

Commit

Permalink
Merge pull request #106 from ncsa/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
joshfactorial committed May 7, 2024
2 parents d17945e + c91d19f commit 2c80a44
Show file tree
Hide file tree
Showing 9 changed files with 14,522 additions and 2,029 deletions.
1 change: 0 additions & 1 deletion dev-environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,6 @@ dependencies:
- platformdirs==2.5.2
- prompt-toolkit==3.0.29
- pure-eval==0.2.2
- pybedtools==0.9.0
- pygments==2.12.0
- pyqt5-sip==12.9.0
- pysam==0.19.1
Expand Down
3 changes: 1 addition & 2 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,11 @@ dependencies:
- pkginfo
- matplotlib
- numpy
- seaborn
- pyyaml
- pip
- scipy
- pytest
- bedtools
- libgcc=5.2.0
- htslib
- pip:
- pysam
Expand Down
8 changes: 0 additions & 8 deletions neat/gen_mut_model/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,9 @@
"""

import json
import os.path
import pathlib
import pickle
import math
import sys

import numpy as np
from numpy import genfromtxt
import pybedtools
from Bio import SeqIO


from pathlib import Path
import logging
Expand Down
2 changes: 0 additions & 2 deletions neat/model_sequencing_error/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,8 @@
import logging
import numpy as np
# TODO implement plotting
# import seaborn as sns
import matplotlib.pyplot as plt

import pandas as pd
from scipy.stats import mode
from ..common import open_input
from ..models import take_closest
Expand Down
12,500 changes: 12,500 additions & 0 deletions neat/models/original_error_model.py

Large diffs are not rendered by default.

2 changes: 0 additions & 2 deletions neat/utilities/compute_gc.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,9 @@
import gzip
import pickle
import time
import pdb

import numpy as np
from Bio import SeqIO
import pybedtools


def process_fasta(file: str) -> dict:
Expand Down
3,980 changes: 1,967 additions & 2,013 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ pkginfo = "^1.8.3"
numpy = "^1.23"
PyYAML = "^6.0"
pysam = "^0.19.1"
pybedtools = "^0.9.0"

[tool.poetry.dev-dependencies]
poetry = "^1.1.13"
Expand Down
54 changes: 54 additions & 0 deletions tests/test_read_simulator/test_cover_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,3 +99,57 @@ def test_fragment_mean_st_dev_combinations():
read1, _ = cover_dataset(read_pool, span_length, target_vector, options, fragment_model)
except Exception as e:
pytest.fail(f"Test failed for mean={mean}, st_dev={st_dev} with exception: {e}")

def test_coverage_ploidy_combinations():
"""Test cover_dataset with various combinations of coverage and ploidy values to ensure no errors"""
read_pool = [10] * 2000
span_length = 100
target_vector = np.full(100, fill_value=10, dtype=int)
options = Options(rng_seed=0)
options.paired_ended = True
options.read_len = 101
options.fragment_mean = 250
options.fragment_st_dev = 100
options.output.overwrite_output = True
fragment_model = FragmentLengthModel(rng=options.rng)

coverage_values = [1, 2, 5, 10, 25, 50, 100]
ploidy_values = [1, 1.5, 2]

for coverage in coverage_values:
for ploidy in ploidy_values:
options.coverage = coverage
options.ploidy = ploidy # Assuming your model or function supports a 'ploidy' option
read1, read2 = cover_dataset(read_pool, span_length, target_vector, options, fragment_model)
coverage_check = []
for i in range(span_length):
# paired ended test, need both read1 and read2
cover = [x for x in read1 + read2 if i in range(x[0], x[1])]
coverage_check.append(len(cover))
assert sum(coverage_check) / len(
coverage_check) > coverage, f"Coverage check failed for coverage {coverage} and ploidy {ploidy}"

def test_single_ended_mode():
"""Test cover_dataset in single-ended mode for various configurations"""
read_pool = [10] * 2000
span_length = 100
target_vector = np.full(100, fill_value=10, dtype=int)
options = Options(rng_seed=0)
options.read_len = 101
options.paired_ended = False
options.fragment_mean = 250
options.fragment_st_dev = 100
options.coverage = 10
options.output.overwrite_output = True
fragment_model = FragmentLengthModel(rng=options.rng)

try:
read1, _ = cover_dataset(read_pool, span_length, target_vector, options, fragment_model)
coverage_check = []
for i in range(span_length):
# Single-ended test, only need read1
cover = [x for x in read1 if i in range(x[0], x[1])]
coverage_check.append(len(cover))
assert sum(coverage_check) / len(coverage_check) > options.coverage, "Coverage check failed in single-ended mode"
except Exception as e:
pytest.fail(f"Test failed in single-ended mode with exception: {e}")

0 comments on commit 2c80a44

Please sign in to comment.