From af7738c27fe7f67ed1abb17e0569917e5091b1a2 Mon Sep 17 00:00:00 2001
From: Nezar Abdennur <nabdennur@gmail.com>
Date: Wed, 10 Apr 2024 13:09:52 -0400
Subject: [PATCH] docs: numpy-style docstrings for methods

---
 pybigtools/src/lib.rs | 209 +++++++++++++++++++++++++++++++++---------
 1 file changed, 168 insertions(+), 41 deletions(-)
diff --git a/pybigtools/src/lib.rs b/pybigtools/src/lib.rs
index 9c0fc40..16933bd 100644
--- a/pybigtools/src/lib.rs
+++ b/pybigtools/src/lib.rs
@@ -981,6 +981,7 @@ impl BBIRead {
         }
     }
 
+    /// Return a dict of information about the BBI file.
     fn info(&mut self, py: Python<'_>) -> PyResult<PyObject> {
         let (info, summary) = match &mut self.bbi {
             BBIReadRaw::Closed => return Err(BBIFileClosed::new_err("File is closed.")),
@@ -1043,6 +1044,8 @@ impl BBIRead {
         Ok(info)
     }
 
+    /// Return a list of sizes in bases of the summary intervals used in each
+    /// of the zoom levels (i.e. reduction levels) of the BBI file.
     fn zooms(&self) -> PyResult<Vec<u32>> {
         let zooms = match &self.bbi {
             BBIReadRaw::Closed => return Err(BBIFileClosed::new_err("File is closed.")),
@@ -1058,20 +1061,38 @@ impl BBIRead {
         Ok(zooms.iter().map(|z| z.reduction_level).collect())
     }
 
-    /// Returns the autosql of this bbi file.
+    /// Return the autoSql schema definition of this BBI file.
     ///
-    /// For bigBeds, this comes directly from the autosql stored in the file.
-    /// For bigWigs, the autosql returned matches that of a bedGraph file.
+    /// For BigBeds, this schema comes directly from the autoSql string stored
+    /// in the file. For BigWigs, the schema generated describes a bedGraph
+    /// file.
     ///
-    /// By default, the autosql is returned as a string. Passing `parse = true`
-    /// returns instead a dictionary of the format:
-    /// ```
-    /// {
-    ///   "name": <declared name>,
-    ///   "comment": <declaration coment>,
-    ///   "fields": [(<field name>, <field type>, <field comment>), ...],
-    /// }
-    /// ```
+    /// Parameters
+    /// ----------
+    /// parse : bool, optional [default: False]
+    ///     If True, return the schema as a dictionary. If False, return the
+    ///     schema as a string. Default is False.
+    ///
+    /// Returns
+    /// -------
+    /// schema : str or dict
+    ///     The autoSql schema of the BBI file. If `parse` is True, the schema
+    ///     is returned as a dictionary of the format:
+    ///
+    ///     ```
+    ///     {
+    ///         "name": <declared name>,
+    ///         "comment": <declaration coment>,
+    ///         "fields": [(<field name>, <field type>, <field comment>), ...],
+    ///     }
+    ///     ```
+    ///
+    /// See Also
+    /// --------
+    /// is_bigwig : Check if the BBI file is a bigWig.
+    /// is_bigbed : Check if the BBI file is a bigBed.
+    /// info : Get information about the BBI file.
+    /// zooms : Get the zoom levels of the BBI file.
     #[pyo3(signature = (parse = false))]
     fn sql(&mut self, py: Python, parse: bool) -> PyResult<PyObject> {
         pub const BEDGRAPH: &str = r#"table bedGraph
@@ -1126,19 +1147,36 @@ impl BBIRead {
         Ok(obj)
     }
 
-    /// Returns the records of a given range on a chromosome.
+    /// Return the records of a given range on a chromosome.
     ///
-    /// The result is an iterator of tuples. For bigWigs, these tuples are in
-    /// the format (start: int, end: int, value: float). For bigBeds, these
+    /// The result is an iterator of tuples. For BigWigs, these tuples are in
+    /// the format (start: int, end: int, value: float). For BigBeds, these
     /// tuples are in the format (start: int, end: int, ...), where the "rest"
     /// fields are split by whitespace.
     ///
-    /// Missing values in bigWigs will results in non-contiguous records.
+    /// Parameters
+    /// ----------
+    /// chrom : str
+    ///     Name of the chromosome.
+    /// start, end : int, optional
+    ///     The range to get values for. If end is not provided, it defaults to
+    ///     the length of the chromosome. If start is not provided, it defaults
+    ///     to the beginning of the chromosome.
+    ///
+    /// Returns
+    /// -------
+    /// Iterator[tuple[int, int, float] or tuple[int, int, ...]]
+    ///     An iterator of tuples in the format (start: int, end: int, value:
+    ///     float) for BigWigs, or (start: int, end: int, *rest) for BigBeds.
     ///
-    /// The chrom argument is the name of the chromosome.  
-    /// The start and end arguments denote the range to get values for.
-    ///  If end is not provided, it defaults to the length of the chromosome.
-    ///  If start is not provided, it defaults to the beginning of the chromosome.
+    /// Notes
+    /// -----
+    /// Missing values in BigWigs will results in non-contiguous records.
+    ///
+    /// See Also
+    /// --------
+    /// zoom_records : Get the zoom records of a given range on a chromosome.
+    /// values : Get the values of a given range on a chromosome.
     fn records(
         &mut self,
         py: Python<'_>,
@@ -1196,15 +1234,52 @@ impl BBIRead {
         }
     }
 
-    /// Returns the zoom records of a given range on a chromosome for a given zoom level.
+    /// Return the zoom records of a given range on a chromosome for a given
+    /// zoom level.
     ///
     /// The result is an iterator of tuples. These tuples are in the format
     /// (start: int, end: int, summary: dict).
     ///
-    /// The chrom argument is the name of the chromosome.  
-    /// The start and end arguments denote the range to get values for.
-    ///  If end is not provided, it defaults to the length of the chromosome.
-    ///  If start is not provided, it defaults to the beginning of the chromosome.
+    /// Parameters
+    /// ----------
+    /// reduction_level : int
+    ///     The zoom level to use, as a resolution in bases. Use the ``zooms``
+    ///     method to get a list of available zoom levels.
+    /// chrom : str
+    ///     Name of the chromosome.
+    /// start, end : int, optional
+    ///     The range to get values for. If end is not provided, it defaults
+    ///     to the length of the chromosome. If start is not provided, it
+    ///     defaults to the beginning of the chromosome.
+    ///
+    /// Returns
+    /// -------
+    /// Iterator[tuple[int, int, dict]]
+    ///     An iterator of tuples in the format (start: int, end: int,
+    ///     summary: dict).
+    ///
+    /// Notes
+    /// -----
+    /// The summary dictionary contains the following keys
+    ///
+    /// - ``total_items``: The number of items in the interval.
+    /// - ``bases_covered``: The number of bases covered by the interval.
+    /// - ``min_val``: The minimum value in the interval.
+    /// - ``max_val``: The maximum value in the interval.
+    /// - ``sum``: The sum of all values in the interval.
+    /// - ``sum_squares``: The sum of the squares of all values in the interval.
+    ///
+    /// For BigWigs, the summary statistics are derived from the unique
+    /// **signal values** associated with each base in the interval.
+    ///
+    /// For BigBeds, the summary statistics instead are derived from the
+    /// **number of BED intervals** overlapping each base in the interval.
+    ///
+    /// See Also
+    /// --------
+    /// zooms : Get a list of available zoom levels.
+    /// records : Get the records of a given range on a chromosome.
+    /// values : Get the values of a given range on a chromosome.
     fn zoom_records(
         &mut self,
         reduction_level: u32,
@@ -1274,21 +1349,66 @@ impl BBIRead {
         }
     }
 
-    /// Returns the values of a given range on a chromosome.
+    /// Return the values of a given range on a chromosome as a numpy array.
+    ///
+    /// For BigWigs, the returned values or summary statistics are derived
+    /// from the unique **signal values** associated with each base.
     ///
-    /// For bigWigs, the result is an array of length (end - start).
-    /// If a value does not exist in the bigwig for a specific base, it will be nan.
+    /// For BigBeds, the returned values or summary statistics instead are
+    /// derived from the **number of BED intervals** overlapping each base.
     ///
-    /// For bigBeds, the returned array instead represents a pileup of the count
-    /// of intervals overlapping each base.
+    /// Parameters
+    /// ----------
+    /// chrom : str
+    ///     Name of the chromosome.  
+    /// start, end : int, optional
+    ///     The range to get values for. If end is not provided, it defaults
+    ///     to the length of the chromosome. If start is not provided, it
+    ///     defaults to the beginning of the chromosome.
+    /// bins : int, optional
+    ///     If provided, the query interval will be divided into equally spaced
+    ///     bins and the values in each bin will be interpolated or summarized.
+    ///     If not provided, the values will be returned for each base.
+    /// summary : Literal["mean", "min", "max"], optional [default: "mean"]
+    ///     The summary statistic to use. Currently supported statistics are
+    ///     ``mean``, ``min``, and ``max``.
+    /// exact : bool, optional [default: False]
+    ///     If True and ``bins`` is specified, return exact summary statistic
+    ///     values instead of interpolating from the optimal zoom level.
+    ///     Default is False.
+    /// missing : float, optional [default: 0.0]
+    ///     Fill-in value for unreported data in valid regions. Default is 0.
+    /// oob : float, optional [default: NaN]
+    ///     Fill-in value for out-of-bounds regions. Default is NaN.
+    /// arr : numpy.ndarray, optional
+    ///     If provided, the values will be written to this array or array
+    ///     view. The array must be of the correct size and type.
     ///
-    /// The chrom argument is the name of the chromosome.  
-    /// The start and end arguments denote the range to get values for.  
-    ///  If end is not provided, it defaults to the length of the chromosome.  
-    ///  If start is not provided, it defaults to the beginning of the chromosome.
-    /// The default oob value is `numpy.nan`.
+    /// Returns
+    /// -------
+    /// numpy.ndarray
+    ///     The signal values of the bigwig or bigbed in the specified range.
     ///
-    /// This returns a numpy array.
+    /// Notes
+    /// -----
+    /// A BigWig file encodes a step function, and the value at
+    /// a base is given by the signal value of the unique interval that
+    /// contains that base.
+    ///
+    /// A BigBed file encodes a collection of (possibly overlapping) intervals
+    /// which may or may not be associated with quantitative scores. The
+    /// "value" at given base used here summarizes the number of intervals
+    /// overlapping that base, not any particular score.
+    ///
+    /// If a number of bins is requested and ``exact`` is False, the summarized
+    /// data is interpolated from the closest available zoom level. If you
+    /// need accurate summary data and are okay with small trade-off in speed,
+    /// set ``exact`` to True.
+    ///
+    /// See Also
+    /// --------
+    /// records : Get the records of a given range on a chromosome.
+    /// zoom_records : Get the zoom records of a given range on a chromosome.
     #[pyo3(
         signature = (chrom, start, end, bins=None, summary="mean".to_string(), exact=false, missing=0.0, oob=f64::NAN, arr=None),
         text_signature = r#"(chrom, start, end, bins=None, summary="mean", exact=False, missing=0.0, oob=..., arr=None)"#,
@@ -1341,12 +1461,19 @@ impl BBIRead {
         }
     }
 
-    /// Returns the chromosomes in a bigwig, and their lengths.  
+    /// Return the names of chromosomes in a BBI file and their lengths.  
+    ///
+    /// Parameters
+    /// ----------
+    /// chrom : str or None
+    ///     The name of the chromosome to get the length of. If None, then a
+    ///     dictionary of all chromosome sizes will be returned. If the
+    ///     chromosome doesn't exist, returns None.
     ///
-    /// The chroms argument can be either String or None.  
-    ///  If it is None, then all chroms will be returned.  
-    ///  If it is a String, then the length of that chromosome will be returned.  
-    ///  If the chromosome doesn't exist, nothing will be returned.  
+    /// Returns
+    /// -------
+    /// int or Dict[str, int] or None:
+    ///     Chromosome length or a dictionary of chromosome lengths.
     fn chroms(&mut self, py: Python, chrom: Option<String>) -> PyResult<Option<PyObject>> {
         fn get_chrom_obj<B: bigtools::BBIRead>(
             b: &B,