From af7738c27fe7f67ed1abb17e0569917e5091b1a2 Mon Sep 17 00:00:00 2001 From: Nezar Abdennur Date: Wed, 10 Apr 2024 13:09:52 -0400 Subject: [PATCH] docs: numpy-style docstrings for methods --- pybigtools/src/lib.rs | 209 +++++++++++++++++++++++++++++++++--------- 1 file changed, 168 insertions(+), 41 deletions(-) diff --git a/pybigtools/src/lib.rs b/pybigtools/src/lib.rs index 9c0fc40..16933bd 100644 --- a/pybigtools/src/lib.rs +++ b/pybigtools/src/lib.rs @@ -981,6 +981,7 @@ impl BBIRead { } } + /// Return a dict of information about the BBI file. fn info(&mut self, py: Python<'_>) -> PyResult { let (info, summary) = match &mut self.bbi { BBIReadRaw::Closed => return Err(BBIFileClosed::new_err("File is closed.")), @@ -1043,6 +1044,8 @@ impl BBIRead { Ok(info) } + /// Return a list of sizes in bases of the summary intervals used in each + /// of the zoom levels (i.e. reduction levels) of the BBI file. fn zooms(&self) -> PyResult> { let zooms = match &self.bbi { BBIReadRaw::Closed => return Err(BBIFileClosed::new_err("File is closed.")), @@ -1058,20 +1061,38 @@ impl BBIRead { Ok(zooms.iter().map(|z| z.reduction_level).collect()) } - /// Returns the autosql of this bbi file. + /// Return the autoSql schema definition of this BBI file. /// - /// For bigBeds, this comes directly from the autosql stored in the file. - /// For bigWigs, the autosql returned matches that of a bedGraph file. + /// For BigBeds, this schema comes directly from the autoSql string stored + /// in the file. For BigWigs, the schema generated describes a bedGraph + /// file. /// - /// By default, the autosql is returned as a string. Passing `parse = true` - /// returns instead a dictionary of the format: - /// ``` - /// { - /// "name": , - /// "comment": , - /// "fields": [(, , ), ...], - /// } - /// ``` + /// Parameters + /// ---------- + /// parse : bool, optional [default: False] + /// If True, return the schema as a dictionary. If False, return the + /// schema as a string. Default is False. + /// + /// Returns + /// ------- + /// schema : str or dict + /// The autoSql schema of the BBI file. If `parse` is True, the schema + /// is returned as a dictionary of the format: + /// + /// ``` + /// { + /// "name": , + /// "comment": , + /// "fields": [(, , ), ...], + /// } + /// ``` + /// + /// See Also + /// -------- + /// is_bigwig : Check if the BBI file is a bigWig. + /// is_bigbed : Check if the BBI file is a bigBed. + /// info : Get information about the BBI file. + /// zooms : Get the zoom levels of the BBI file. #[pyo3(signature = (parse = false))] fn sql(&mut self, py: Python, parse: bool) -> PyResult { pub const BEDGRAPH: &str = r#"table bedGraph @@ -1126,19 +1147,36 @@ impl BBIRead { Ok(obj) } - /// Returns the records of a given range on a chromosome. + /// Return the records of a given range on a chromosome. /// - /// The result is an iterator of tuples. For bigWigs, these tuples are in - /// the format (start: int, end: int, value: float). For bigBeds, these + /// The result is an iterator of tuples. For BigWigs, these tuples are in + /// the format (start: int, end: int, value: float). For BigBeds, these /// tuples are in the format (start: int, end: int, ...), where the "rest" /// fields are split by whitespace. /// - /// Missing values in bigWigs will results in non-contiguous records. + /// Parameters + /// ---------- + /// chrom : str + /// Name of the chromosome. + /// start, end : int, optional + /// The range to get values for. If end is not provided, it defaults to + /// the length of the chromosome. If start is not provided, it defaults + /// to the beginning of the chromosome. + /// + /// Returns + /// ------- + /// Iterator[tuple[int, int, float] or tuple[int, int, ...]] + /// An iterator of tuples in the format (start: int, end: int, value: + /// float) for BigWigs, or (start: int, end: int, *rest) for BigBeds. /// - /// The chrom argument is the name of the chromosome. - /// The start and end arguments denote the range to get values for. - /// If end is not provided, it defaults to the length of the chromosome. - /// If start is not provided, it defaults to the beginning of the chromosome. + /// Notes + /// ----- + /// Missing values in BigWigs will results in non-contiguous records. + /// + /// See Also + /// -------- + /// zoom_records : Get the zoom records of a given range on a chromosome. + /// values : Get the values of a given range on a chromosome. fn records( &mut self, py: Python<'_>, @@ -1196,15 +1234,52 @@ impl BBIRead { } } - /// Returns the zoom records of a given range on a chromosome for a given zoom level. + /// Return the zoom records of a given range on a chromosome for a given + /// zoom level. /// /// The result is an iterator of tuples. These tuples are in the format /// (start: int, end: int, summary: dict). /// - /// The chrom argument is the name of the chromosome. - /// The start and end arguments denote the range to get values for. - /// If end is not provided, it defaults to the length of the chromosome. - /// If start is not provided, it defaults to the beginning of the chromosome. + /// Parameters + /// ---------- + /// reduction_level : int + /// The zoom level to use, as a resolution in bases. Use the ``zooms`` + /// method to get a list of available zoom levels. + /// chrom : str + /// Name of the chromosome. + /// start, end : int, optional + /// The range to get values for. If end is not provided, it defaults + /// to the length of the chromosome. If start is not provided, it + /// defaults to the beginning of the chromosome. + /// + /// Returns + /// ------- + /// Iterator[tuple[int, int, dict]] + /// An iterator of tuples in the format (start: int, end: int, + /// summary: dict). + /// + /// Notes + /// ----- + /// The summary dictionary contains the following keys + /// + /// - ``total_items``: The number of items in the interval. + /// - ``bases_covered``: The number of bases covered by the interval. + /// - ``min_val``: The minimum value in the interval. + /// - ``max_val``: The maximum value in the interval. + /// - ``sum``: The sum of all values in the interval. + /// - ``sum_squares``: The sum of the squares of all values in the interval. + /// + /// For BigWigs, the summary statistics are derived from the unique + /// **signal values** associated with each base in the interval. + /// + /// For BigBeds, the summary statistics instead are derived from the + /// **number of BED intervals** overlapping each base in the interval. + /// + /// See Also + /// -------- + /// zooms : Get a list of available zoom levels. + /// records : Get the records of a given range on a chromosome. + /// values : Get the values of a given range on a chromosome. fn zoom_records( &mut self, reduction_level: u32, @@ -1274,21 +1349,66 @@ impl BBIRead { } } - /// Returns the values of a given range on a chromosome. + /// Return the values of a given range on a chromosome as a numpy array. + /// + /// For BigWigs, the returned values or summary statistics are derived + /// from the unique **signal values** associated with each base. /// - /// For bigWigs, the result is an array of length (end - start). - /// If a value does not exist in the bigwig for a specific base, it will be nan. + /// For BigBeds, the returned values or summary statistics instead are + /// derived from the **number of BED intervals** overlapping each base. /// - /// For bigBeds, the returned array instead represents a pileup of the count - /// of intervals overlapping each base. + /// Parameters + /// ---------- + /// chrom : str + /// Name of the chromosome. + /// start, end : int, optional + /// The range to get values for. If end is not provided, it defaults + /// to the length of the chromosome. If start is not provided, it + /// defaults to the beginning of the chromosome. + /// bins : int, optional + /// If provided, the query interval will be divided into equally spaced + /// bins and the values in each bin will be interpolated or summarized. + /// If not provided, the values will be returned for each base. + /// summary : Literal["mean", "min", "max"], optional [default: "mean"] + /// The summary statistic to use. Currently supported statistics are + /// ``mean``, ``min``, and ``max``. + /// exact : bool, optional [default: False] + /// If True and ``bins`` is specified, return exact summary statistic + /// values instead of interpolating from the optimal zoom level. + /// Default is False. + /// missing : float, optional [default: 0.0] + /// Fill-in value for unreported data in valid regions. Default is 0. + /// oob : float, optional [default: NaN] + /// Fill-in value for out-of-bounds regions. Default is NaN. + /// arr : numpy.ndarray, optional + /// If provided, the values will be written to this array or array + /// view. The array must be of the correct size and type. /// - /// The chrom argument is the name of the chromosome. - /// The start and end arguments denote the range to get values for. - /// If end is not provided, it defaults to the length of the chromosome. - /// If start is not provided, it defaults to the beginning of the chromosome. - /// The default oob value is `numpy.nan`. + /// Returns + /// ------- + /// numpy.ndarray + /// The signal values of the bigwig or bigbed in the specified range. /// - /// This returns a numpy array. + /// Notes + /// ----- + /// A BigWig file encodes a step function, and the value at + /// a base is given by the signal value of the unique interval that + /// contains that base. + /// + /// A BigBed file encodes a collection of (possibly overlapping) intervals + /// which may or may not be associated with quantitative scores. The + /// "value" at given base used here summarizes the number of intervals + /// overlapping that base, not any particular score. + /// + /// If a number of bins is requested and ``exact`` is False, the summarized + /// data is interpolated from the closest available zoom level. If you + /// need accurate summary data and are okay with small trade-off in speed, + /// set ``exact`` to True. + /// + /// See Also + /// -------- + /// records : Get the records of a given range on a chromosome. + /// zoom_records : Get the zoom records of a given range on a chromosome. #[pyo3( signature = (chrom, start, end, bins=None, summary="mean".to_string(), exact=false, missing=0.0, oob=f64::NAN, arr=None), text_signature = r#"(chrom, start, end, bins=None, summary="mean", exact=False, missing=0.0, oob=..., arr=None)"#, @@ -1341,12 +1461,19 @@ impl BBIRead { } } - /// Returns the chromosomes in a bigwig, and their lengths. + /// Return the names of chromosomes in a BBI file and their lengths. + /// + /// Parameters + /// ---------- + /// chrom : str or None + /// The name of the chromosome to get the length of. If None, then a + /// dictionary of all chromosome sizes will be returned. If the + /// chromosome doesn't exist, returns None. /// - /// The chroms argument can be either String or None. - /// If it is None, then all chroms will be returned. - /// If it is a String, then the length of that chromosome will be returned. - /// If the chromosome doesn't exist, nothing will be returned. + /// Returns + /// ------- + /// int or Dict[str, int] or None: + /// Chromosome length or a dictionary of chromosome lengths. fn chroms(&mut self, py: Python, chrom: Option) -> PyResult> { fn get_chrom_obj( b: &B,