Skip to content

Commit

Permalink
Update benchmarks to use /dev/null in a few places
Browse files Browse the repository at this point in the history
  • Loading branch information
jackh726 committed Dec 3, 2023
1 parent c24d845 commit 3fc919d
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 242 deletions.
58 changes: 29 additions & 29 deletions bench/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,12 @@ def bigwigaverageoverbed(comp):
if not os.path.exists('./workdir/ENCFF646AZP_cut.bed'):
process = subprocess.check_call('cat ./workdir/ENCFF646AZP.bed | cut -f1-3 | awk -v OFS=\'\\t\' \'{print $1,$2,$3, NR}\' > ./workdir/ENCFF646AZP_cut.bed', shell=True)
benchmarks = {
'ucsc': [['{}/bigWigAverageOverBed'.format(ucsctoolspath), './workdir/ENCFF937MNZ.bigWig', './workdir/ENCFF646AZP_cut.bed', './workdir/test_out_ucsc.bed']],
'bigtools_1thread': [['{}/bigwigaverageoverbed'.format(bigtoolspath), './workdir/ENCFF937MNZ.bigWig', './workdir/ENCFF646AZP_cut.bed', './workdir/test_out_bigtools.bed', '-t 1']],
'bigtools_2thread': [['{}/bigwigaverageoverbed'.format(bigtoolspath), './workdir/ENCFF937MNZ.bigWig', './workdir/ENCFF646AZP_cut.bed', './workdir/test_out_bigtools.bed', '-t 2']],
'bigtools_4thread': [['{}/bigwigaverageoverbed'.format(bigtoolspath), './workdir/ENCFF937MNZ.bigWig', './workdir/ENCFF646AZP_cut.bed', './workdir/test_out_bigtools.bed', '-t 4']],
'bigtools_6thread': [['{}/bigwigaverageoverbed'.format(bigtoolspath), './workdir/ENCFF937MNZ.bigWig', './workdir/ENCFF646AZP_cut.bed', './workdir/test_out_bigtools.bed', '-t 6']],
'bigtools_8thread': [['{}/bigwigaverageoverbed'.format(bigtoolspath), './workdir/ENCFF937MNZ.bigWig', './workdir/ENCFF646AZP_cut.bed', './workdir/test_out_bigtools.bed', '-t 8']],
'ucsc': [['{}/bigWigAverageOverBed'.format(ucsctoolspath), './workdir/ENCFF937MNZ.bigWig', './workdir/ENCFF646AZP_cut.bed', '/dev/null']],
'bigtools_1thread': [['{}/bigwigaverageoverbed'.format(bigtoolspath), './workdir/ENCFF937MNZ.bigWig', './workdir/ENCFF646AZP_cut.bed', '/dev/null', '-t 1']],
'bigtools_2thread': [['{}/bigwigaverageoverbed'.format(bigtoolspath), './workdir/ENCFF937MNZ.bigWig', './workdir/ENCFF646AZP_cut.bed', '/dev/null', '-t 2']],
'bigtools_4thread': [['{}/bigwigaverageoverbed'.format(bigtoolspath), './workdir/ENCFF937MNZ.bigWig', './workdir/ENCFF646AZP_cut.bed', '/dev/null', '-t 4']],
'bigtools_6thread': [['{}/bigwigaverageoverbed'.format(bigtoolspath), './workdir/ENCFF937MNZ.bigWig', './workdir/ENCFF646AZP_cut.bed', '/dev/null', '-t 6']],
'bigtools_8thread': [['{}/bigwigaverageoverbed'.format(bigtoolspath), './workdir/ENCFF937MNZ.bigWig', './workdir/ENCFF646AZP_cut.bed', '/dev/null', '-t 8']],
}
compare(comp, 'bigwigaverageoverbed', benchmarks)

Expand All @@ -109,12 +109,12 @@ def bigwigaverageoverbed_long(comp):
if not os.path.exists('./workdir/ENCFF076CIO_cut_sample.bed'):
process = subprocess.check_call(f'{bigtoolspath}/bigtools chromintersect -a ./workdir/ENCFF076CIO.bed -b ./workdir/ENCFF937MNZ.bigWig -o -' + ' | cut -f1-3 | awk -v OFS=\'\\t\' \'{print $1,$2,$3, NR}\' | shuf --random-source=./workdir/ENCFF076CIO.bed | head -1000000 | sort -k1,1 -k2,2n > ./workdir/ENCFF076CIO_cut_sample.bed', shell=True)
benchmarks = {
'ucsc': [['{}/bigWigAverageOverBed'.format(ucsctoolspath), './workdir/ENCFF937MNZ.bigWig', './workdir/ENCFF076CIO_cut_sample.bed', './workdir/test_out_ucsc.bed']],
'bigtools_1thread': [['{}/bigwigaverageoverbed'.format(bigtoolspath), './workdir/ENCFF937MNZ.bigWig', './workdir/ENCFF076CIO_cut_sample.bed', './workdir/test_out_bigtools.bed', '-t 1']],
'bigtools_2thread': [['{}/bigwigaverageoverbed'.format(bigtoolspath), './workdir/ENCFF937MNZ.bigWig', './workdir/ENCFF076CIO_cut_sample.bed', './workdir/test_out_bigtools.bed', '-t 2']],
'bigtools_4thread': [['{}/bigwigaverageoverbed'.format(bigtoolspath), './workdir/ENCFF937MNZ.bigWig', './workdir/ENCFF076CIO_cut_sample.bed', './workdir/test_out_bigtools.bed', '-t 4']],
'bigtools_6thread': [['{}/bigwigaverageoverbed'.format(bigtoolspath), './workdir/ENCFF937MNZ.bigWig', './workdir/ENCFF076CIO_cut_sample.bed', './workdir/test_out_bigtools.bed', '-t 6']],
'bigtools_8thread': [['{}/bigwigaverageoverbed'.format(bigtoolspath), './workdir/ENCFF937MNZ.bigWig', './workdir/ENCFF076CIO_cut_sample.bed', './workdir/test_out_bigtools.bed', '-t 8']],
'ucsc': [['{}/bigWigAverageOverBed'.format(ucsctoolspath), './workdir/ENCFF937MNZ.bigWig', './workdir/ENCFF076CIO_cut_sample.bed', '/dev/null']],
'bigtools_1thread': [['{}/bigwigaverageoverbed'.format(bigtoolspath), './workdir/ENCFF937MNZ.bigWig', './workdir/ENCFF076CIO_cut_sample.bed', '/dev/null', '-t 1']],
'bigtools_2thread': [['{}/bigwigaverageoverbed'.format(bigtoolspath), './workdir/ENCFF937MNZ.bigWig', './workdir/ENCFF076CIO_cut_sample.bed', '/dev/null', '-t 2']],
'bigtools_4thread': [['{}/bigwigaverageoverbed'.format(bigtoolspath), './workdir/ENCFF937MNZ.bigWig', './workdir/ENCFF076CIO_cut_sample.bed', '/dev/null', '-t 4']],
'bigtools_6thread': [['{}/bigwigaverageoverbed'.format(bigtoolspath), './workdir/ENCFF937MNZ.bigWig', './workdir/ENCFF076CIO_cut_sample.bed', '/dev/null', '-t 6']],
'bigtools_8thread': [['{}/bigwigaverageoverbed'.format(bigtoolspath), './workdir/ENCFF937MNZ.bigWig', './workdir/ENCFF076CIO_cut_sample.bed', '/dev/null', '-t 8']],
}
compare(comp, 'bigwigaverageoverbed_long', benchmarks)

Expand All @@ -126,24 +126,24 @@ def bigwigmerge_bigwig(comp):
['{}/bigWigMerge'.format(ucsctoolspath), './workdir/ENCFF937MNZ.bigWig', './workdir/ENCFF447DHW.bigWig', './workdir/test_out_ucsc.bedGraph'],
['{}/bedGraphToBigWig'.format(ucsctoolspath), './workdir/test_out_ucsc.bedGraph', './workdir/hg38.chrom.sizes', './workdir/test_out_ucsc.bigWig'],
],
'bigtools_1thread': [['{}/bigwigmerge'.format(bigtoolspath), './workdir/test_out_bigtools.bigWig', '-b ./workdir/ENCFF937MNZ.bigWig', '-b ./workdir/ENCFF447DHW.bigWig', '-t 1']],
'bigtools_2thread': [['{}/bigwigmerge'.format(bigtoolspath), './workdir/test_out_bigtools.bigWig', '-b ./workdir/ENCFF937MNZ.bigWig', '-b ./workdir/ENCFF447DHW.bigWig', '-t 2']],
'bigtools_4thread': [['{}/bigwigmerge'.format(bigtoolspath), './workdir/test_out_bigtools.bigWig', '-b ./workdir/ENCFF937MNZ.bigWig', '-b ./workdir/ENCFF447DHW.bigWig', '-t 4']],
'bigtools_6thread': [['{}/bigwigmerge'.format(bigtoolspath), './workdir/test_out_bigtools.bigWig', '-b ./workdir/ENCFF937MNZ.bigWig', '-b ./workdir/ENCFF447DHW.bigWig', '-t 6']],
'bigtools_8thread': [['{}/bigwigmerge'.format(bigtoolspath), './workdir/test_out_bigtools.bigWig', '-b ./workdir/ENCFF937MNZ.bigWig', '-b ./workdir/ENCFF447DHW.bigWig', '-t 8']],
'bigtools_1thread': [['{}/bigwigmerge'.format(bigtoolspath), './workdir/test_out_bigtools.bigWig', '-b ./workdir/ENCFF937MNZ.bigWig', '-b ./workdir/ENCFF447DHW.bigWig', '--output-type', 'bigwig', '-t 1']],
'bigtools_2thread': [['{}/bigwigmerge'.format(bigtoolspath), './workdir/test_out_bigtools.bigWig', '-b ./workdir/ENCFF937MNZ.bigWig', '-b ./workdir/ENCFF447DHW.bigWig', '--output-type', 'bigwig', '-t 2']],
'bigtools_4thread': [['{}/bigwigmerge'.format(bigtoolspath), './workdir/test_out_bigtools.bigWig', '-b ./workdir/ENCFF937MNZ.bigWig', '-b ./workdir/ENCFF447DHW.bigWig', '--output-type', 'bigwig', '-t 4']],
'bigtools_6thread': [['{}/bigwigmerge'.format(bigtoolspath), './workdir/test_out_bigtools.bigWig', '-b ./workdir/ENCFF937MNZ.bigWig', '-b ./workdir/ENCFF447DHW.bigWig', '--output-type', 'bigwig', '-t 6']],
'bigtools_8thread': [['{}/bigwigmerge'.format(bigtoolspath), './workdir/test_out_bigtools.bigWig', '-b ./workdir/ENCFF937MNZ.bigWig', '-b ./workdir/ENCFF447DHW.bigWig', '--output-type', 'bigwig', '-t 8']],
}
compare(comp, 'bigwigmerge_bigwig', benchmarks)

def bigwigmerge_bedgraph(comp):
global ucsctoolspath
global bigtoolspath
benchmarks = {
'ucsc': [['{}/bigWigMerge'.format(ucsctoolspath), './workdir/ENCFF937MNZ.bigWig', './workdir/ENCFF447DHW.bigWig', './workdir/test_out_ucsc.bedGraph']],
'bigtools_1thread': [['{}/bigwigmerge'.format(bigtoolspath), './workdir/test_out_bigtools.bedGraph', '-b ./workdir/ENCFF937MNZ.bigWig', '-b ./workdir/ENCFF447DHW.bigWig', '-t 1']],
'bigtools_2thread': [['{}/bigwigmerge'.format(bigtoolspath), './workdir/test_out_bigtools.bedGraph', '-b ./workdir/ENCFF937MNZ.bigWig', '-b ./workdir/ENCFF447DHW.bigWig', '-t 2']],
'bigtools_4thread': [['{}/bigwigmerge'.format(bigtoolspath), './workdir/test_out_bigtools.bedGraph', '-b ./workdir/ENCFF937MNZ.bigWig', '-b ./workdir/ENCFF447DHW.bigWig', '-t 4']],
'bigtools_6thread': [['{}/bigwigmerge'.format(bigtoolspath), './workdir/test_out_bigtools.bedGraph', '-b ./workdir/ENCFF937MNZ.bigWig', '-b ./workdir/ENCFF447DHW.bigWig', '-t 6']],
'bigtools_8thread': [['{}/bigwigmerge'.format(bigtoolspath), './workdir/test_out_bigtools.bedGraph', '-b ./workdir/ENCFF937MNZ.bigWig', '-b ./workdir/ENCFF447DHW.bigWig', '-t 8']],
'ucsc': [['{}/bigWigMerge'.format(ucsctoolspath), './workdir/ENCFF937MNZ.bigWig', './workdir/ENCFF447DHW.bigWig', '/dev/null']],
'bigtools_1thread': [['{}/bigwigmerge'.format(bigtoolspath), '/dev/null', '-b ./workdir/ENCFF937MNZ.bigWig', '-b ./workdir/ENCFF447DHW.bigWig', '--output-type', 'bedgraph', '-t 1']],
'bigtools_2thread': [['{}/bigwigmerge'.format(bigtoolspath), '/dev/null', '-b ./workdir/ENCFF937MNZ.bigWig', '-b ./workdir/ENCFF447DHW.bigWig', '--output-type', 'bedgraph', '-t 2']],
'bigtools_4thread': [['{}/bigwigmerge'.format(bigtoolspath), '/dev/null', '-b ./workdir/ENCFF937MNZ.bigWig', '-b ./workdir/ENCFF447DHW.bigWig', '--output-type', 'bedgraph', '-t 4']],
'bigtools_6thread': [['{}/bigwigmerge'.format(bigtoolspath), '/dev/null', '-b ./workdir/ENCFF937MNZ.bigWig', '-b ./workdir/ENCFF447DHW.bigWig', '--output-type', 'bedgraph', '-t 6']],
'bigtools_8thread': [['{}/bigwigmerge'.format(bigtoolspath), '/dev/null', '-b ./workdir/ENCFF937MNZ.bigWig', '-b ./workdir/ENCFF447DHW.bigWig', '--output-type', 'bedgraph', '-t 8']],
}
compare(comp, 'bigwigmerge_bedgraph', benchmarks)

Expand Down Expand Up @@ -225,12 +225,12 @@ def bigwigtobedgraph(comp):
global ucsctoolspath
global bigtoolspath
benchmarks = {
'ucsc': [['{}/bigWigToBedGraph'.format(ucsctoolspath), './workdir/ENCFF841DHZ.bigWig', './workdir/test_out_ucsc.bedGraph']],
'bigtools_1thread': [['{}/bigwigtobedgraph'.format(bigtoolspath), './workdir/ENCFF841DHZ.bigWig', './workdir/test_out_bigtools.bedGraph', '-t 1']],
'bigtools_2thread': [['{}/bigwigtobedgraph'.format(bigtoolspath), './workdir/ENCFF841DHZ.bigWig', './workdir/test_out_bigtools.bedGraph', '-t 2']],
'bigtools_4thread': [['{}/bigwigtobedgraph'.format(bigtoolspath), './workdir/ENCFF841DHZ.bigWig', './workdir/test_out_bigtools.bedGraph', '-t 4']],
'bigtools_6thread': [['{}/bigwigtobedgraph'.format(bigtoolspath), './workdir/ENCFF841DHZ.bigWig', './workdir/test_out_bigtools.bedGraph', '-t 6']],
'bigtools_8thread': [['{}/bigwigtobedgraph'.format(bigtoolspath), './workdir/ENCFF841DHZ.bigWig', './workdir/test_out_bigtools.bedGraph', '-t 8']],
'ucsc': [['{}/bigWigToBedGraph'.format(ucsctoolspath), './workdir/ENCFF841DHZ.bigWig', '/dev/null']],
'bigtools_1thread': [['{}/bigwigtobedgraph'.format(bigtoolspath), './workdir/ENCFF841DHZ.bigWig', '/dev/null', '-t 1']],
'bigtools_2thread': [['{}/bigwigtobedgraph'.format(bigtoolspath), './workdir/ENCFF841DHZ.bigWig', '/dev/null', '-t 2']],
'bigtools_4thread': [['{}/bigwigtobedgraph'.format(bigtoolspath), './workdir/ENCFF841DHZ.bigWig', '/dev/null', '-t 4']],
'bigtools_6thread': [['{}/bigwigtobedgraph'.format(bigtoolspath), './workdir/ENCFF841DHZ.bigWig', '/dev/null', '-t 6']],
'bigtools_8thread': [['{}/bigwigtobedgraph'.format(bigtoolspath), './workdir/ENCFF841DHZ.bigWig', '/dev/null', '-t 8']],
}
compare(comp, 'bigwigtobedgraph', benchmarks)

Expand Down
213 changes: 8 additions & 205 deletions bench/bench_plots.ipynb

Large diffs are not rendered by default.

37 changes: 30 additions & 7 deletions src/bin/bigwigmerge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,11 @@ struct Cli {
#[arg(short = 't', long)]
#[arg(default_value_t = 6)]
nthreads: usize,

/// Can be `bigwig` or `bedgraph` (case-insensitive). If not specified,
/// will be inferred from the output file ending.
#[arg(long)]
output_type: Option<String>,
}

fn main() -> Result<(), Box<dyn Error>> {
Expand Down Expand Up @@ -389,8 +394,30 @@ fn main() -> Result<(), Box<dyn Error>> {
let (iter, chrom_map) =
get_merged_vals(bigwigs, 10, matches.threshold, matches.adjust, matches.clip)?;

match output {
output if output.ends_with(".bw") || output.ends_with(".bigWig") => {
enum OutputType {
BigWig,
BedGraph,
}

let output_type = match (matches.output_type, &output) {
(None, output)
if output.to_lowercase().ends_with(".bw")
|| output.to_lowercase().ends_with(".bigWig") =>
{
OutputType::BigWig
}
(None, output) if output.to_lowercase().ends_with(".bedGraph") => OutputType::BedGraph,
(Some(output_type), _) if output_type.to_lowercase() == "bigwig" => OutputType::BigWig,
(Some(output_type), _) if output_type.to_lowercase() == "bedgraph" => OutputType::BedGraph,
_ => {
eprintln!("Unable to determine output file format. \
The output file must either in with `.bw` or `.bigWig` for bigwigs or `.bedGraph` for bedGraphs; or \
`--output-type` must be set to either `bigwig` or `bedgraph`.");
return Ok(());
}
};
match output_type {
OutputType::BigWig => {
let outb = BigWigWrite::create_file(output);
let runtime = if nthreads == 1 {
runtime::Builder::new_current_thread().build().unwrap()
Expand All @@ -405,7 +432,7 @@ fn main() -> Result<(), Box<dyn Error>> {
};
outb.write(chrom_map, all_values, runtime)?;
}
output if output.ends_with(".bedGraph") => {
OutputType::BedGraph => {
// TODO: convert to multi-threaded
use std::io::Write;

Expand All @@ -423,10 +450,6 @@ fn main() -> Result<(), Box<dyn Error>> {
}
}
}
_ => {
eprintln!("Invalid output file. Must end with .bw or .bigWig for bigwig or .bedGraph for bedGraph");
return Ok(());
}
}

//TODO: fails with too many open files
Expand Down
2 changes: 1 addition & 1 deletion src/bin/bigwigtobedgraph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ pub fn write_bg_singlethreaded<R: SeekableRead + Send + 'static>(

let mut chroms: Vec<ChromInfo> = bigwig.chroms().to_vec();
chroms.sort_by(|a, b| a.name.cmp(&b.name));
let mut writer = io::BufWriter::new(out_file);
let mut writer = io::BufWriter::with_capacity(32 * 1000, out_file);
for chrom in chroms {
let start = start.unwrap_or(0);
let end = end.unwrap_or(chrom.length);
Expand Down

0 comments on commit 3fc919d

Please sign in to comment.