From 7d7e05ca2374e60a2bfef2953276121c1aa2c3ee Mon Sep 17 00:00:00 2001
From: ntjohnson1 <24689722+ntjohnson1@users.noreply.github.com>
Date: Wed, 18 Mar 2026 14:15:02 -0400
Subject: [PATCH 1/8] Verify all non-alias functions have doc string

---
 python/datafusion/functions.py | 37 +++++++++++++++++++++++++---------
 1 file changed, 28 insertions(+), 9 deletions(-)

diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py
index e85d710e7..bcbcb4de9 100644
--- a/python/datafusion/functions.py
+++ b/python/datafusion/functions.py
@@ -1859,6 +1859,18 @@ def to_char(arg: Expr, formatter: Expr) -> Expr:
     For usage of ``formatter`` see the rust chrono package ``strftime`` package.
 
     [Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
+
+    Examples:
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": ["2021-01-01T00:00:00"]})
+        >>> result = df.select(
+        ...     dfn.functions.to_char(
+        ...         dfn.functions.to_timestamp(dfn.col("a")),
+        ...         dfn.lit("%Y/%m/%d"),
+        ...     ).alias("formatted")
+        ... )
+        >>> result.collect_column("formatted")[0].as_py()
+        '2021/01/01'
     """
     return Expr(f.to_char(arg.expr, formatter.expr))
 
@@ -1878,6 +1890,14 @@ def to_date(arg: Expr, *formatters: Expr) -> Expr:
     For usage of ``formatters`` see the rust chrono package ``strftime`` package.
 
     [Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
+
+    Examples:
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": ["2021-07-20"]})
+        >>> result = df.select(
+        ...     dfn.functions.to_date(dfn.col("a")).alias("dt"))
+        >>> str(result.collect_column("dt")[0].as_py())
+        '2021-07-20'
     """
     return Expr(f.to_date(arg.expr, *_unwrap_exprs(formatters)))
 
@@ -1899,6 +1919,14 @@ def to_time(arg: Expr, *formatters: Expr) -> Expr:
     For usage of ``formatters`` see the rust chrono package ``strftime`` package.
 
     [Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
+
+    Examples:
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": ["14:30:00"]})
+        >>> result = df.select(
+        ...     dfn.functions.to_time(dfn.col("a")).alias("t"))
+        >>> str(result.collect_column("t")[0].as_py())
+        '14:30:00'
     """
     return Expr(f.to_time(arg.expr, *_unwrap_exprs(formatters)))
 
@@ -3730,15 +3758,6 @@ def var_sample(expression: Expr, filter: Expr | None = None) -> Expr:
     """Computes the sample variance of the argument.
 
     This is an alias for :py:func:`var_samp`.
-
-    Examples:
-        >>> ctx = dfn.SessionContext()
-        >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0]})
-        >>> result = df.aggregate(
-        ...     [], [dfn.functions.var_sample(dfn.col("a")).alias("v")]
-        ... )
-        >>> result.collect_column("v")[0].as_py()
-        1.0
     """
     return var_samp(expression, filter)
 

From 2b9757826ee6195d8d00779e387c47fb49dd1d60 Mon Sep 17 00:00:00 2001
From: ntjohnson1 <24689722+ntjohnson1@users.noreply.github.com>
Date: Wed, 18 Mar 2026 14:24:57 -0400
Subject: [PATCH 2/8] MNove all alias for statements to see also blocks and
 confirm no examples

---
 python/datafusion/functions.py | 172 +++++++++++++++++++--------------
 1 file changed, 100 insertions(+), 72 deletions(-)

diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py
index bcbcb4de9..3e3b713b2 100644
--- a/python/datafusion/functions.py
+++ b/python/datafusion/functions.py
@@ -386,8 +386,6 @@ def list_to_string(expr: Expr, delimiter: Expr) -> Expr:
 def list_join(expr: Expr, delimiter: Expr) -> Expr:
     """Converts each element to its text representation.
 
-    This is an alias for :py:func:`array_to_string`.
-
     See Also:
         This is an alias for :py:func:`array_to_string`.
     """
@@ -1021,7 +1019,8 @@ def initcap(string: Expr) -> Expr:
 def instr(string: Expr, substring: Expr) -> Expr:
     """Finds the position from where the ``substring`` matches the ``string``.
 
-    This is an alias for :py:func:`strpos`.
+    See Also:
+        This is an alias for :py:func:`strpos`.
     """
     return strpos(string, substring)
 
@@ -1290,7 +1289,8 @@ def pi() -> Expr:
 def position(string: Expr, substring: Expr) -> Expr:
     """Finds the position from where the ``substring`` matches the ``string``.
 
-    This is an alias for :py:func:`strpos`.
+    See Also:
+        This is an alias for :py:func:`strpos`.
     """
     return strpos(string, substring)
 
@@ -1313,7 +1313,8 @@ def power(base: Expr, exponent: Expr) -> Expr:
 def pow(base: Expr, exponent: Expr) -> Expr:
     """Returns ``base`` raised to the power of ``exponent``.
 
-    This is an alias of :py:func:`power`.
+    See Also:
+        This is an alias of :py:func:`power`.
     """
     return power(base, exponent)
 
@@ -2081,7 +2082,8 @@ def current_time() -> Expr:
 def datepart(part: Expr, date: Expr) -> Expr:
     """Return a specified part of a date.
 
-    This is an alias for :py:func:`date_part`.
+    See Also:
+        This is an alias for :py:func:`date_part`.
     """
     return date_part(part, date)
 
@@ -2104,7 +2106,8 @@ def date_part(part: Expr, date: Expr) -> Expr:
 def extract(part: Expr, date: Expr) -> Expr:
     """Extracts a subfield from the date.
 
-    This is an alias for :py:func:`date_part`.
+    See Also:
+        This is an alias for :py:func:`date_part`.
     """
     return date_part(part, date)
 
@@ -2130,7 +2133,8 @@ def date_trunc(part: Expr, date: Expr) -> Expr:
 def datetrunc(part: Expr, date: Expr) -> Expr:
     """Truncates the date to a specified level of precision.
 
-    This is an alias for :py:func:`date_trunc`.
+    See Also:
+        This is an alias for :py:func:`date_trunc`.
     """
     return date_trunc(part, date)
 
@@ -2250,7 +2254,8 @@ def make_array(*args: Expr) -> Expr:
 def make_list(*args: Expr) -> Expr:
     """Returns an array using the specified input expressions.
 
-    This is an alias for :py:func:`make_array`.
+    See Also:
+        This is an alias for :py:func:`make_array`.
     """
     return make_array(*args)
 
@@ -2258,7 +2263,8 @@ def make_list(*args: Expr) -> Expr:
 def array(*args: Expr) -> Expr:
     """Returns an array using the specified input expressions.
 
-    This is an alias for :py:func:`make_array`.
+    See Also:
+        This is an alias for :py:func:`make_array`.
     """
     return make_array(*args)
 
@@ -2419,7 +2425,8 @@ def array_append(array: Expr, element: Expr) -> Expr:
 def array_push_back(array: Expr, element: Expr) -> Expr:
     """Appends an element to the end of an array.
 
-    This is an alias for :py:func:`array_append`.
+    See Also:
+        This is an alias for :py:func:`array_append`.
     """
     return array_append(array, element)
 
@@ -2427,7 +2434,8 @@ def array_push_back(array: Expr, element: Expr) -> Expr:
 def list_append(array: Expr, element: Expr) -> Expr:
     """Appends an element to the end of an array.
 
-    This is an alias for :py:func:`array_append`.
+    See Also:
+        This is an alias for :py:func:`array_append`.
     """
     return array_append(array, element)
 
@@ -2435,7 +2443,8 @@ def list_append(array: Expr, element: Expr) -> Expr:
 def list_push_back(array: Expr, element: Expr) -> Expr:
     """Appends an element to the end of an array.
 
-    This is an alias for :py:func:`array_append`.
+    See Also:
+        This is an alias for :py:func:`array_append`.
     """
     return array_append(array, element)
 
@@ -2459,7 +2468,8 @@ def array_concat(*args: Expr) -> Expr:
 def array_cat(*args: Expr) -> Expr:
     """Concatenates the input arrays.
 
-    This is an alias for :py:func:`array_concat`.
+    See Also:
+        This is an alias for :py:func:`array_concat`.
     """
     return array_concat(*args)
 
@@ -2501,7 +2511,8 @@ def array_distinct(array: Expr) -> Expr:
 def list_cat(*args: Expr) -> Expr:
     """Concatenates the input arrays.
 
-    This is an alias for :py:func:`array_concat`, :py:func:`array_cat`.
+    See Also:
+        This is an alias for :py:func:`array_concat`, :py:func:`array_cat`.
     """
     return array_concat(*args)
 
@@ -2509,7 +2520,8 @@ def list_cat(*args: Expr) -> Expr:
 def list_concat(*args: Expr) -> Expr:
     """Concatenates the input arrays.
 
-    This is an alias for :py:func:`array_concat`, :py:func:`array_cat`.
+    See Also:
+        This is an alias for :py:func:`array_concat`, :py:func:`array_cat`.
     """
     return array_concat(*args)
 
@@ -2517,7 +2529,8 @@ def list_concat(*args: Expr) -> Expr:
 def list_distinct(array: Expr) -> Expr:
     """Returns distinct values from the array after removing duplicates.
 
-    This is an alias for :py:func:`array_distinct`.
+    See Also:
+        This is an alias for :py:func:`array_distinct`.
     """
     return array_distinct(array)
 
@@ -2525,7 +2538,8 @@ def list_distinct(array: Expr) -> Expr:
 def list_dims(array: Expr) -> Expr:
     """Returns an array of the array's dimensions.
 
-    This is an alias for :py:func:`array_dims`.
+    See Also:
+        This is an alias for :py:func:`array_dims`.
     """
     return array_dims(array)
 
@@ -2562,7 +2576,8 @@ def array_empty(array: Expr) -> Expr:
 def array_extract(array: Expr, n: Expr) -> Expr:
     """Extracts the element with the index n from the array.
 
-    This is an alias for :py:func:`array_element`.
+    See Also:
+        This is an alias for :py:func:`array_element`.
     """
     return array_element(array, n)
 
@@ -2570,7 +2585,8 @@ def array_extract(array: Expr, n: Expr) -> Expr:
 def list_element(array: Expr, n: Expr) -> Expr:
     """Extracts the element with the index n from the array.
 
-    This is an alias for :py:func:`array_element`.
+    See Also:
+        This is an alias for :py:func:`array_element`.
     """
     return array_element(array, n)
 
@@ -2578,7 +2594,8 @@ def list_element(array: Expr, n: Expr) -> Expr:
 def list_extract(array: Expr, n: Expr) -> Expr:
     """Extracts the element with the index n from the array.
 
-    This is an alias for :py:func:`array_element`.
+    See Also:
+        This is an alias for :py:func:`array_element`.
     """
     return array_element(array, n)
 
@@ -2600,7 +2617,8 @@ def array_length(array: Expr) -> Expr:
 def list_length(array: Expr) -> Expr:
     """Returns the length of the array.
 
-    This is an alias for :py:func:`array_length`.
+    See Also:
+        This is an alias for :py:func:`array_length`.
     """
     return array_length(array)
 
@@ -2674,7 +2692,8 @@ def array_position(array: Expr, element: Expr, index: int | None = 1) -> Expr:
 def array_indexof(array: Expr, element: Expr, index: int | None = 1) -> Expr:
     """Return the position of the first occurrence of ``element`` in ``array``.
 
-    This is an alias for :py:func:`array_position`.
+    See Also:
+        This is an alias for :py:func:`array_position`.
     """
     return array_position(array, element, index)
 
@@ -2682,7 +2701,8 @@ def array_indexof(array: Expr, element: Expr, index: int | None = 1) -> Expr:
 def list_position(array: Expr, element: Expr, index: int | None = 1) -> Expr:
     """Return the position of the first occurrence of ``element`` in ``array``.
 
-    This is an alias for :py:func:`array_position`.
+    See Also:
+        This is an alias for :py:func:`array_position`.
     """
     return array_position(array, element, index)
 
@@ -2690,7 +2710,8 @@ def list_position(array: Expr, element: Expr, index: int | None = 1) -> Expr:
 def list_indexof(array: Expr, element: Expr, index: int | None = 1) -> Expr:
     """Return the position of the first occurrence of ``element`` in ``array``.
 
-    This is an alias for :py:func:`array_position`.
+    See Also:
+        This is an alias for :py:func:`array_position`.
     """
     return array_position(array, element, index)
 
@@ -2713,7 +2734,8 @@ def array_positions(array: Expr, element: Expr) -> Expr:
 def list_positions(array: Expr, element: Expr) -> Expr:
     """Searches for an element in the array and returns all occurrences.
 
-    This is an alias for :py:func:`array_positions`.
+    See Also:
+        This is an alias for :py:func:`array_positions`.
     """
     return array_positions(array, element)
 
@@ -2735,7 +2757,8 @@ def array_ndims(array: Expr) -> Expr:
 def list_ndims(array: Expr) -> Expr:
     """Returns the number of dimensions of the array.
 
-    This is an alias for :py:func:`array_ndims`.
+    See Also:
+        This is an alias for :py:func:`array_ndims`.
     """
     return array_ndims(array)
 
@@ -2758,7 +2781,8 @@ def array_prepend(element: Expr, array: Expr) -> Expr:
 def array_push_front(element: Expr, array: Expr) -> Expr:
     """Prepends an element to the beginning of an array.
 
-    This is an alias for :py:func:`array_prepend`.
+    See Also:
+        This is an alias for :py:func:`array_prepend`.
     """
     return array_prepend(element, array)
 
@@ -2766,7 +2790,8 @@ def array_push_front(element: Expr, array: Expr) -> Expr:
 def list_prepend(element: Expr, array: Expr) -> Expr:
     """Prepends an element to the beginning of an array.
 
-    This is an alias for :py:func:`array_prepend`.
+    See Also:
+        This is an alias for :py:func:`array_prepend`.
     """
     return array_prepend(element, array)
 
@@ -2774,7 +2799,8 @@ def list_prepend(element: Expr, array: Expr) -> Expr:
 def list_push_front(element: Expr, array: Expr) -> Expr:
     """Prepends an element to the beginning of an array.
 
-    This is an alias for :py:func:`array_prepend`.
+    See Also:
+        This is an alias for :py:func:`array_prepend`.
     """
     return array_prepend(element, array)
 
@@ -2825,7 +2851,8 @@ def array_remove(array: Expr, element: Expr) -> Expr:
 def list_remove(array: Expr, element: Expr) -> Expr:
     """Removes the first element from the array equal to the given value.
 
-    This is an alias for :py:func:`array_remove`.
+    See Also:
+        This is an alias for :py:func:`array_remove`.
     """
     return array_remove(array, element)
 
@@ -2849,7 +2876,8 @@ def array_remove_n(array: Expr, element: Expr, max: Expr) -> Expr:
 def list_remove_n(array: Expr, element: Expr, max: Expr) -> Expr:
     """Removes the first ``max`` elements from the array equal to the given value.
 
-    This is an alias for :py:func:`array_remove_n`.
+    See Also:
+        This is an alias for :py:func:`array_remove_n`.
     """
     return array_remove_n(array, element, max)
 
@@ -2872,7 +2900,8 @@ def array_remove_all(array: Expr, element: Expr) -> Expr:
 def list_remove_all(array: Expr, element: Expr) -> Expr:
     """Removes all elements from the array equal to the given value.
 
-    This is an alias for :py:func:`array_remove_all`.
+    See Also:
+        This is an alias for :py:func:`array_remove_all`.
     """
     return array_remove_all(array, element)
 
@@ -2895,7 +2924,8 @@ def array_repeat(element: Expr, count: Expr) -> Expr:
 def list_repeat(element: Expr, count: Expr) -> Expr:
     """Returns an array containing ``element`` ``count`` times.
 
-    This is an alias for :py:func:`array_repeat`.
+    See Also:
+        This is an alias for :py:func:`array_repeat`.
     """
     return array_repeat(element, count)
 
@@ -2919,7 +2949,8 @@ def array_replace(array: Expr, from_val: Expr, to_val: Expr) -> Expr:
 def list_replace(array: Expr, from_val: Expr, to_val: Expr) -> Expr:
     """Replaces the first occurrence of ``from_val`` with ``to_val``.
 
-    This is an alias for :py:func:`array_replace`.
+    See Also:
+        This is an alias for :py:func:`array_replace`.
     """
     return array_replace(array, from_val, to_val)
 
@@ -2949,7 +2980,8 @@ def list_replace_n(array: Expr, from_val: Expr, to_val: Expr, max: Expr) -> Expr
     Replaces the first ``max`` occurrences of the specified element with another
     specified element.
 
-    This is an alias for :py:func:`array_replace_n`.
+    See Also:
+        This is an alias for :py:func:`array_replace_n`.
     """
     return array_replace_n(array, from_val, to_val, max)
 
@@ -2973,7 +3005,8 @@ def array_replace_all(array: Expr, from_val: Expr, to_val: Expr) -> Expr:
 def list_replace_all(array: Expr, from_val: Expr, to_val: Expr) -> Expr:
     """Replaces all occurrences of ``from_val`` with ``to_val``.
 
-    This is an alias for :py:func:`array_replace_all`.
+    See Also:
+        This is an alias for :py:func:`array_replace_all`.
     """
     return array_replace_all(array, from_val, to_val)
 
@@ -3006,7 +3039,11 @@ def array_sort(array: Expr, descending: bool = False, null_first: bool = False)
 
 
 def list_sort(array: Expr, descending: bool = False, null_first: bool = False) -> Expr:
-    """This is an alias for :py:func:`array_sort`."""
+    """Sorts the array.
+
+    See Also:
+        This is an alias for :py:func:`array_sort`.
+    """
     return array_sort(array, descending=descending, null_first=null_first)
 
 
@@ -3033,7 +3070,8 @@ def array_slice(
 def list_slice(array: Expr, begin: Expr, end: Expr, stride: Expr | None = None) -> Expr:
     """Returns a slice of the array.
 
-    This is an alias for :py:func:`array_slice`.
+    See Also:
+        This is an alias for :py:func:`array_slice`.
     """
     return array_slice(array, begin, end, stride)
 
@@ -3061,7 +3099,8 @@ def array_intersect(array1: Expr, array2: Expr) -> Expr:
 def list_intersect(array1: Expr, array2: Expr) -> Expr:
     """Returns an the intersection of ``array1`` and ``array2``.
 
-    This is an alias for :py:func:`array_intersect`.
+    See Also:
+        This is an alias for :py:func:`array_intersect`.
     """
     return array_intersect(array1, array2)
 
@@ -3093,7 +3132,8 @@ def list_union(array1: Expr, array2: Expr) -> Expr:
 
     Duplicate rows will not be returned.
 
-    This is an alias for :py:func:`array_union`.
+    See Also:
+        This is an alias for :py:func:`array_union`.
     """
     return array_union(array1, array2)
 
@@ -3116,7 +3156,8 @@ def array_except(array1: Expr, array2: Expr) -> Expr:
 def list_except(array1: Expr, array2: Expr) -> Expr:
     """Returns the elements that appear in ``array1`` but not in the ``array2``.
 
-    This is an alias for :py:func:`array_except`.
+    See Also:
+        This is an alias for :py:func:`array_except`.
     """
     return array_except(array1, array2)
 
@@ -3144,7 +3185,10 @@ def list_resize(array: Expr, size: Expr, value: Expr) -> Expr:
     """Returns an array with the specified size filled.
 
     If ``size`` is greater than the ``array`` length, the additional entries will be
-    filled with the given ``value``. This is an alias for :py:func:`array_resize`.
+    filled with the given ``value``.
+
+    See Also:
+        This is an alias for :py:func:`array_resize`.
     """
     return array_resize(array, size, value)
 
@@ -3178,7 +3222,11 @@ def cardinality(array: Expr) -> Expr:
 
 
 def empty(array: Expr) -> Expr:
-    """This is an alias for :py:func:`array_empty`."""
+    """Returns true if the array is empty.
+
+    See Also:
+        This is an alias for :py:func:`array_empty`.
+    """
     return array_empty(array)
 
 
@@ -3546,15 +3594,8 @@ def max(expression: Expr, filter: Expr | None = None) -> Expr:
 def mean(expression: Expr, filter: Expr | None = None) -> Expr:
     """Returns the average (mean) value of the argument.
 
-    This is an alias for :py:func:`avg`.
-
-    Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0]})
-    >>> result = df.aggregate([], [dfn.functions.mean(dfn.col("a")).alias("v")])
-    >>> result.collect_column("v")[0].as_py()
-    2.0
+    See Also:
+        This is an alias for :py:func:`avg`.
     """
     return avg(expression, filter)
 
@@ -3683,16 +3724,8 @@ def stddev_pop(expression: Expr, filter: Expr | None = None) -> Expr:
 def stddev_samp(arg: Expr, filter: Expr | None = None) -> Expr:
     """Computes the sample standard deviation of the argument.
 
-    This is an alias for :py:func:`stddev`.
-
-    Examples:
-        >>> ctx = dfn.SessionContext()
-        >>> df = ctx.from_pydict({"a": [2.0, 4.0, 6.0]})
-        >>> result = df.aggregate(
-        ...     [], [dfn.functions.stddev_samp(dfn.col("a")).alias("v")]
-        ... )
-        >>> result.collect_column("v")[0].as_py()
-        2.0
+    See Also:
+        This is an alias for :py:func:`stddev`.
     """
     return stddev(arg, filter=filter)
 
@@ -3700,14 +3733,8 @@ def stddev_samp(arg: Expr, filter: Expr | None = None) -> Expr:
 def var(expression: Expr, filter: Expr | None = None) -> Expr:
     """Computes the sample variance of the argument.
 
-    This is an alias for :py:func:`var_samp`.
-
-    Examples:
-        >>> ctx = dfn.SessionContext()
-        >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0]})
-        >>> result = df.aggregate([], [dfn.functions.var(dfn.col("a")).alias("v")])
-        >>> result.collect_column("v")[0].as_py()
-        1.0
+    See Also:
+        This is an alias for :py:func:`var_samp`.
     """
     return var_samp(expression, filter)
 
@@ -3757,7 +3784,8 @@ def var_samp(expression: Expr, filter: Expr | None = None) -> Expr:
 def var_sample(expression: Expr, filter: Expr | None = None) -> Expr:
     """Computes the sample variance of the argument.
 
-    This is an alias for :py:func:`var_samp`.
+    See Also:
+        This is an alias for :py:func:`var_samp`.
     """
     return var_samp(expression, filter)
 

From 40ff61aa7b6bf6dde871db32fff02d2650bec848 Mon Sep 17 00:00:00 2001
From: ntjohnson1 <24689722+ntjohnson1@users.noreply.github.com>
Date: Wed, 18 Mar 2026 14:36:41 -0400
Subject: [PATCH 3/8] Fix google doc style for all examples

---
 python/datafusion/functions.py | 1072 +++++++++++++++-----------------
 1 file changed, 504 insertions(+), 568 deletions(-)

diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py
index 3e3b713b2..15cd43b45 100644
--- a/python/datafusion/functions.py
+++ b/python/datafusion/functions.py
@@ -650,12 +650,11 @@ def ascii(arg: Expr) -> Expr:
     """Returns the numeric code of the first character of the argument.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": ["a","b","c"]})
-    >>> ascii_df = df.select(dfn.functions.ascii(dfn.col("a")).alias("ascii"))
-    >>> ascii_df.collect_column("ascii")[0].as_py()
-    97
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": ["a","b","c"]})
+        >>> ascii_df = df.select(dfn.functions.ascii(dfn.col("a")).alias("ascii"))
+        >>> ascii_df.collect_column("ascii")[0].as_py()
+        97
     """
     return Expr(f.ascii(arg.expr))
 
@@ -730,12 +729,11 @@ def bit_length(arg: Expr) -> Expr:
     """Returns the number of bits in the string argument.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": ["a","b","c"]})
-    >>> bit_df = df.select(dfn.functions.bit_length(dfn.col("a")).alias("bit_len"))
-    >>> bit_df.collect_column("bit_len")[0].as_py()
-    8
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": ["a","b","c"]})
+        >>> bit_df = df.select(dfn.functions.bit_length(dfn.col("a")).alias("bit_len"))
+        >>> bit_df.collect_column("bit_len")[0].as_py()
+        8
     """
     return Expr(f.bit_length(arg.expr))
 
@@ -744,12 +742,11 @@ def btrim(arg: Expr) -> Expr:
     """Removes all characters, spaces by default, from both sides of a string.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [" a  "]})
-    >>> trim_df = df.select(dfn.functions.btrim(dfn.col("a")).alias("trimmed"))
-    >>> trim_df.collect_column("trimmed")[0].as_py()
-    'a'
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [" a  "]})
+        >>> trim_df = df.select(dfn.functions.btrim(dfn.col("a")).alias("trimmed"))
+        >>> trim_df.collect_column("trimmed")[0].as_py()
+        'a'
     """
     return Expr(f.btrim(arg.expr))
 
@@ -784,13 +781,12 @@ def character_length(arg: Expr) -> Expr:
     """Returns the number of characters in the argument.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": ["abc","b","c"]})
-    >>> char_len_df = df.select(
-    ...     dfn.functions.character_length(dfn.col("a")).alias("char_len"))
-    >>> char_len_df.collect_column("char_len")[0].as_py()
-    3
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": ["abc","b","c"]})
+        >>> char_len_df = df.select(
+        ...     dfn.functions.character_length(dfn.col("a")).alias("char_len"))
+        >>> char_len_df.collect_column("char_len")[0].as_py()
+        3
     """
     return Expr(f.character_length(arg.expr))
 
@@ -799,12 +795,11 @@ def length(string: Expr) -> Expr:
     """The number of characters in the ``string``.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": ["hello"]})
-    >>> result = df.select(dfn.functions.length(dfn.col("a")).alias("len"))
-    >>> result.collect_column("len")[0].as_py()
-    5
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": ["hello"]})
+        >>> result = df.select(dfn.functions.length(dfn.col("a")).alias("len"))
+        >>> result.collect_column("len")[0].as_py()
+        5
     """
     return Expr(f.length(string.expr))
 
@@ -813,12 +808,11 @@ def char_length(string: Expr) -> Expr:
     """The number of characters in the ``string``.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": ["hello"]})
-    >>> result = df.select(dfn.functions.char_length(dfn.col("a")).alias("len"))
-    >>> result.collect_column("len")[0].as_py()
-    5
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": ["hello"]})
+        >>> result = df.select(dfn.functions.char_length(dfn.col("a")).alias("len"))
+        >>> result.collect_column("len")[0].as_py()
+        5
     """
     return Expr(f.char_length(string.expr))
 
@@ -827,12 +821,11 @@ def chr(arg: Expr) -> Expr:
     """Converts the Unicode code point to a UTF8 character.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [65]})
-    >>> result = df.select(dfn.functions.chr(dfn.col("a")).alias("chr"))
-    >>> result.collect_column("chr")[0].as_py()
-    'A'
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [65]})
+        >>> result = df.select(dfn.functions.chr(dfn.col("a")).alias("chr"))
+        >>> result.collect_column("chr")[0].as_py()
+        'A'
     """
     return Expr(f.chr(arg.expr))
 
@@ -912,13 +905,12 @@ def ends_with(arg: Expr, suffix: Expr) -> Expr:
     """Returns true if the ``string`` ends with the ``suffix``, false otherwise.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": ["abc","b","c"]})
-    >>> ends_with_df = df.select(
-    ...     dfn.functions.ends_with(dfn.col("a"), dfn.lit("c")).alias("ends_with"))
-    >>> ends_with_df.collect_column("ends_with")[0].as_py()
-    True
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": ["abc","b","c"]})
+        >>> ends_with_df = df.select(
+        ...     dfn.functions.ends_with(dfn.col("a"), dfn.lit("c")).alias("ends_with"))
+        >>> ends_with_df.collect_column("ends_with")[0].as_py()
+        True
     """
     return Expr(f.ends_with(arg.expr, suffix.expr))
 
@@ -960,13 +952,12 @@ def find_in_set(string: Expr, string_list: Expr) -> Expr:
     The string list is a string composed of substrings separated by ``,`` characters.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": ["b"]})
-    >>> result = df.select(
-    ...     dfn.functions.find_in_set(dfn.col("a"), dfn.lit("a,b,c")).alias("pos"))
-    >>> result.collect_column("pos")[0].as_py()
-    2
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": ["b"]})
+        >>> result = df.select(
+        ...     dfn.functions.find_in_set(dfn.col("a"), dfn.lit("a,b,c")).alias("pos"))
+        >>> result.collect_column("pos")[0].as_py()
+        2
     """
     return Expr(f.find_in_set(string.expr, string_list.expr))
 
@@ -1006,12 +997,11 @@ def initcap(string: Expr) -> Expr:
     characters to lowercase.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": ["the cat"]})
-    >>> cap_df = df.select(dfn.functions.initcap(dfn.col("a")).alias("cap"))
-    >>> cap_df.collect_column("cap")[0].as_py()
-    'The Cat'
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": ["the cat"]})
+        >>> cap_df = df.select(dfn.functions.initcap(dfn.col("a")).alias("cap"))
+        >>> cap_df.collect_column("cap")[0].as_py()
+        'The Cat'
     """
     return Expr(f.initcap(string.expr))
 
@@ -1057,12 +1047,12 @@ def left(string: Expr, n: Expr) -> Expr:
     """Returns the first ``n`` characters in the ``string``.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": ["the cat"]})
-    >>> left_df = df.select(dfn.functions.left(dfn.col("a"), dfn.lit(3)).alias("left"))
-    >>> left_df.collect_column("left")[0].as_py()
-    'the'
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": ["the cat"]})
+        >>> left_df = df.select(
+        ...     dfn.functions.left(dfn.col("a"), dfn.lit(3)).alias("left"))
+        >>> left_df.collect_column("left")[0].as_py()
+        'the'
     """
     return Expr(f.left(string.expr, n.expr))
 
@@ -1071,13 +1061,12 @@ def levenshtein(string1: Expr, string2: Expr) -> Expr:
     """Returns the Levenshtein distance between the two given strings.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": ["kitten"]})
-    >>> result = df.select(
-    ...     dfn.functions.levenshtein(dfn.col("a"), dfn.lit("sitting")).alias("d"))
-    >>> result.collect_column("d")[0].as_py()
-    3
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": ["kitten"]})
+        >>> result = df.select(
+        ...     dfn.functions.levenshtein(dfn.col("a"), dfn.lit("sitting")).alias("d"))
+        >>> result.collect_column("d")[0].as_py()
+        3
     """
     return Expr(f.levenshtein(string1.expr, string2.expr))
 
@@ -1140,12 +1129,11 @@ def lower(arg: Expr) -> Expr:
     """Converts a string to lowercase.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": ["THE CaT"]})
-    >>> lower_df = df.select(dfn.functions.lower(dfn.col("a")).alias("lower"))
-    >>> lower_df.collect_column("lower")[0].as_py()
-    'the cat'
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": ["THE CaT"]})
+        >>> lower_df = df.select(dfn.functions.lower(dfn.col("a")).alias("lower"))
+        >>> lower_df.collect_column("lower")[0].as_py()
+        'the cat'
     """
     return Expr(f.lower(arg.expr))
 
@@ -1158,14 +1146,14 @@ def lpad(string: Expr, count: Expr, characters: Expr | None = None) -> Expr:
     truncated (on the right).
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": ["the cat", "a hat"]})
-    >>> lpad_df = df.select(dfn.functions.lpad(dfn.col("a"), dfn.lit(6)).alias("lpad"))
-    >>> lpad_df.collect_column("lpad")[0].as_py()
-    'the ca'
-    >>> lpad_df.collect_column("lpad")[1].as_py()
-    ' a hat'
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": ["the cat", "a hat"]})
+        >>> lpad_df = df.select(
+        ...     dfn.functions.lpad(dfn.col("a"), dfn.lit(6)).alias("lpad"))
+        >>> lpad_df.collect_column("lpad")[0].as_py()
+        'the ca'
+        >>> lpad_df.collect_column("lpad")[1].as_py()
+        ' a hat'
     """
     characters = characters if characters is not None else Expr.literal(" ")
     return Expr(f.lpad(string.expr, count.expr, characters.expr))
@@ -1175,12 +1163,11 @@ def ltrim(arg: Expr) -> Expr:
     """Removes all characters, spaces by default, from the beginning of a string.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [" a  "]})
-    >>> trim_df = df.select(dfn.functions.ltrim(dfn.col("a")).alias("trimmed"))
-    >>> trim_df.collect_column("trimmed")[0].as_py()
-    'a  '
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [" a  "]})
+        >>> trim_df = df.select(dfn.functions.ltrim(dfn.col("a")).alias("trimmed"))
+        >>> trim_df.collect_column("trimmed")[0].as_py()
+        'a  '
     """
     return Expr(f.ltrim(arg.expr))
 
@@ -1235,12 +1222,11 @@ def octet_length(arg: Expr) -> Expr:
     """Returns the number of bytes of a string.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": ["hello"]})
-    >>> result = df.select(dfn.functions.octet_length(dfn.col("a")).alias("len"))
-    >>> result.collect_column("len")[0].as_py()
-    5
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": ["hello"]})
+        >>> result = df.select(dfn.functions.octet_length(dfn.col("a")).alias("len"))
+        >>> result.collect_column("len")[0].as_py()
+        5
     """
     return Expr(f.octet_length(arg.expr))
 
@@ -1254,14 +1240,13 @@ def overlay(
     extends for ``length`` characters with new substring.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": ["abcdef"]})
-    >>> result = df.select(
-    ...     dfn.functions.overlay(dfn.col("a"), dfn.lit("XY"), dfn.lit(3),
-    ...     dfn.lit(2)).alias("o"))
-    >>> result.collect_column("o")[0].as_py()
-    'abXYef'
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": ["abcdef"]})
+        >>> result = df.select(
+        ...     dfn.functions.overlay(dfn.col("a"), dfn.lit("XY"), dfn.lit(3),
+        ...     dfn.lit(2)).alias("o"))
+        >>> result.collect_column("o")[0].as_py()
+        'abXYef'
     """
     if length is None:
         return Expr(f.overlay(string.expr, substring.expr, start.expr))
@@ -1479,12 +1464,12 @@ def repeat(string: Expr, n: Expr) -> Expr:
     """Repeats the ``string`` to ``n`` times.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": ["ha"]})
-    >>> result = df.select(dfn.functions.repeat(dfn.col("a"), dfn.lit(3)).alias("r"))
-    >>> result.collect_column("r")[0].as_py()
-    'hahaha'
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": ["ha"]})
+        >>> result = df.select(
+        ...     dfn.functions.repeat(dfn.col("a"), dfn.lit(3)).alias("r"))
+        >>> result.collect_column("r")[0].as_py()
+        'hahaha'
     """
     return Expr(f.repeat(string.expr, n.expr))
 
@@ -1493,14 +1478,13 @@ def replace(string: Expr, from_val: Expr, to_val: Expr) -> Expr:
     """Replaces all occurrences of ``from_val`` with ``to_val`` in the ``string``.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": ["hello world"]})
-    >>> result = df.select(
-    ...     dfn.functions.replace(dfn.col("a"), dfn.lit("world"),
-    ...     dfn.lit("there")).alias("r"))
-    >>> result.collect_column("r")[0].as_py()
-    'hello there'
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": ["hello world"]})
+        >>> result = df.select(
+        ...     dfn.functions.replace(dfn.col("a"), dfn.lit("world"),
+        ...     dfn.lit("there")).alias("r"))
+        >>> result.collect_column("r")[0].as_py()
+        'hello there'
     """
     return Expr(f.replace(string.expr, from_val.expr, to_val.expr))
 
@@ -1509,12 +1493,11 @@ def reverse(arg: Expr) -> Expr:
     """Reverse the string argument.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": ["hello"]})
-    >>> result = df.select(dfn.functions.reverse(dfn.col("a")).alias("r"))
-    >>> result.collect_column("r")[0].as_py()
-    'olleh'
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": ["hello"]})
+        >>> result = df.select(dfn.functions.reverse(dfn.col("a")).alias("r"))
+        >>> result.collect_column("r")[0].as_py()
+        'olleh'
     """
     return Expr(f.reverse(arg.expr))
 
@@ -1523,12 +1506,11 @@ def right(string: Expr, n: Expr) -> Expr:
     """Returns the last ``n`` characters in the ``string``.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": ["hello"]})
-    >>> result = df.select(dfn.functions.right(dfn.col("a"), dfn.lit(3)).alias("r"))
-    >>> result.collect_column("r")[0].as_py()
-    'llo'
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": ["hello"]})
+        >>> result = df.select(dfn.functions.right(dfn.col("a"), dfn.lit(3)).alias("r"))
+        >>> result.collect_column("r")[0].as_py()
+        'llo'
     """
     return Expr(f.right(string.expr, n.expr))
 
@@ -1559,13 +1541,12 @@ def rpad(string: Expr, count: Expr, characters: Expr | None = None) -> Expr:
     by default). If the string is already longer than length then it is truncated.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": ["hi"]})
-    >>> result = df.select(
-    ...     dfn.functions.rpad(dfn.col("a"), dfn.lit(5), dfn.lit("!")).alias("r"))
-    >>> result.collect_column("r")[0].as_py()
-    'hi!!!'
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": ["hi"]})
+        >>> result = df.select(
+        ...     dfn.functions.rpad(dfn.col("a"), dfn.lit(5), dfn.lit("!")).alias("r"))
+        >>> result.collect_column("r")[0].as_py()
+        'hi!!!'
     """
     characters = characters if characters is not None else Expr.literal(" ")
     return Expr(f.rpad(string.expr, count.expr, characters.expr))
@@ -1575,12 +1556,11 @@ def rtrim(arg: Expr) -> Expr:
     """Removes all characters, spaces by default, from the end of a string.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [" a  "]})
-    >>> trim_df = df.select(dfn.functions.rtrim(dfn.col("a")).alias("trimmed"))
-    >>> trim_df.collect_column("trimmed")[0].as_py()
-    ' a'
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [" a  "]})
+        >>> trim_df = df.select(dfn.functions.rtrim(dfn.col("a")).alias("trimmed"))
+        >>> trim_df.collect_column("trimmed")[0].as_py()
+        ' a'
     """
     return Expr(f.rtrim(arg.expr))
 
@@ -1691,13 +1671,14 @@ def split_part(string: Expr, delimiter: Expr, index: Expr) -> Expr:
     on the index.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": ["a,b,c"]})
-    >>> result = df.select(
-    ...     dfn.functions.split_part(dfn.col("a"), dfn.lit(","), dfn.lit(2)).alias("s"))
-    >>> result.collect_column("s")[0].as_py()
-    'b'
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": ["a,b,c"]})
+        >>> result = df.select(
+        ...     dfn.functions.split_part(
+        ...         dfn.col("a"), dfn.lit(","), dfn.lit(2)
+        ...     ).alias("s"))
+        >>> result.collect_column("s")[0].as_py()
+        'b'
     """
     return Expr(f.split_part(string.expr, delimiter.expr, index.expr))
 
@@ -1719,13 +1700,12 @@ def starts_with(string: Expr, prefix: Expr) -> Expr:
     """Returns true if string starts with prefix.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": ["hello_from_datafusion"]})
-    >>> result = df.select(
-    ...     dfn.functions.starts_with(dfn.col("a"), dfn.lit("hello")).alias("sw"))
-    >>> result.collect_column("sw")[0].as_py()
-    True
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": ["hello_from_datafusion"]})
+        >>> result = df.select(
+        ...     dfn.functions.starts_with(dfn.col("a"), dfn.lit("hello")).alias("sw"))
+        >>> result.collect_column("sw")[0].as_py()
+        True
     """
     return Expr(f.starts_with(string.expr, prefix.expr))
 
@@ -1734,13 +1714,12 @@ def strpos(string: Expr, substring: Expr) -> Expr:
     """Finds the position from where the ``substring`` matches the ``string``.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": ["hello"]})
-    >>> result = df.select(
-    ...     dfn.functions.strpos(dfn.col("a"), dfn.lit("llo")).alias("pos"))
-    >>> result.collect_column("pos")[0].as_py()
-    3
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": ["hello"]})
+        >>> result = df.select(
+        ...     dfn.functions.strpos(dfn.col("a"), dfn.lit("llo")).alias("pos"))
+        >>> result.collect_column("pos")[0].as_py()
+        3
     """
     return Expr(f.strpos(string.expr, substring.expr))
 
@@ -1749,12 +1728,12 @@ def substr(string: Expr, position: Expr) -> Expr:
     """Substring from the ``position`` to the end.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": ["hello"]})
-    >>> result = df.select(dfn.functions.substr(dfn.col("a"), dfn.lit(3)).alias("s"))
-    >>> result.collect_column("s")[0].as_py()
-    'llo'
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": ["hello"]})
+        >>> result = df.select(
+        ...     dfn.functions.substr(dfn.col("a"), dfn.lit(3)).alias("s"))
+        >>> result.collect_column("s")[0].as_py()
+        'llo'
     """
     return Expr(f.substr(string.expr, position.expr))
 
@@ -1766,14 +1745,13 @@ def substr_index(string: Expr, delimiter: Expr, count: Expr) -> Expr:
     ``delimiter``.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": ["a.b.c"]})
-    >>> result = df.select(
-    ...     dfn.functions.substr_index(dfn.col("a"), dfn.lit("."),
-    ...     dfn.lit(2)).alias("s"))
-    >>> result.collect_column("s")[0].as_py()
-    'a.b'
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": ["a.b.c"]})
+        >>> result = df.select(
+        ...     dfn.functions.substr_index(dfn.col("a"), dfn.lit("."),
+        ...     dfn.lit(2)).alias("s"))
+        >>> result.collect_column("s")[0].as_py()
+        'a.b'
     """
     return Expr(f.substr_index(string.expr, delimiter.expr, count.expr))
 
@@ -1782,13 +1760,14 @@ def substring(string: Expr, position: Expr, length: Expr) -> Expr:
     """Substring from the ``position`` with ``length`` characters.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": ["hello world"]})
-    >>> result = df.select(
-    ...     dfn.functions.substring(dfn.col("a"), dfn.lit(1), dfn.lit(5)).alias("s"))
-    >>> result.collect_column("s")[0].as_py()
-    'hello'
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": ["hello world"]})
+        >>> result = df.select(
+        ...     dfn.functions.substring(
+        ...         dfn.col("a"), dfn.lit(1), dfn.lit(5)
+        ...     ).alias("s"))
+        >>> result.collect_column("s")[0].as_py()
+        'hello'
     """
     return Expr(f.substring(string.expr, position.expr, length.expr))
 
@@ -1823,12 +1802,11 @@ def to_hex(arg: Expr) -> Expr:
     """Converts an integer to a hexadecimal string.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [255]})
-    >>> result = df.select(dfn.functions.to_hex(dfn.col("a")).alias("hex"))
-    >>> result.collect_column("hex")[0].as_py()
-    'ff'
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [255]})
+        >>> result = df.select(dfn.functions.to_hex(dfn.col("a")).alias("hex"))
+        >>> result.collect_column("hex")[0].as_py()
+        'ff'
     """
     return Expr(f.to_hex(arg.expr))
 
@@ -2180,14 +2158,13 @@ def translate(string: Expr, from_val: Expr, to_val: Expr) -> Expr:
     """Replaces the characters in ``from_val`` with the counterpart in ``to_val``.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": ["hello"]})
-    >>> result = df.select(
-    ...     dfn.functions.translate(dfn.col("a"), dfn.lit("helo"),
-    ...     dfn.lit("HELO")).alias("t"))
-    >>> result.collect_column("t")[0].as_py()
-    'HELLO'
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": ["hello"]})
+        >>> result = df.select(
+        ...     dfn.functions.translate(dfn.col("a"), dfn.lit("helo"),
+        ...     dfn.lit("HELO")).alias("t"))
+        >>> result.collect_column("t")[0].as_py()
+        'HELLO'
     """
     return Expr(f.translate(string.expr, from_val.expr, to_val.expr))
 
@@ -2196,12 +2173,11 @@ def trim(arg: Expr) -> Expr:
     """Removes all characters, spaces by default, from both sides of a string.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": ["  hello  "]})
-    >>> result = df.select(dfn.functions.trim(dfn.col("a")).alias("t"))
-    >>> result.collect_column("t")[0].as_py()
-    'hello'
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": ["  hello  "]})
+        >>> result = df.select(dfn.functions.trim(dfn.col("a")).alias("t"))
+        >>> result.collect_column("t")[0].as_py()
+        'hello'
     """
     return Expr(f.trim(arg.expr))
 
@@ -2225,12 +2201,11 @@ def upper(arg: Expr) -> Expr:
     """Converts a string to uppercase.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": ["hello"]})
-    >>> result = df.select(dfn.functions.upper(dfn.col("a")).alias("u"))
-    >>> result.collect_column("u")[0].as_py()
-    'HELLO'
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": ["hello"]})
+        >>> result = df.select(dfn.functions.upper(dfn.col("a")).alias("u"))
+        >>> result.collect_column("u")[0].as_py()
+        'HELLO'
     """
     return Expr(f.upper(arg.expr))
 
@@ -2239,13 +2214,14 @@ def make_array(*args: Expr) -> Expr:
     """Returns an array using the specified input expressions.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [1]})
-    >>> result = df.select(
-    ...     dfn.functions.make_array(dfn.lit(1), dfn.lit(2), dfn.lit(3)).alias("arr"))
-    >>> result.collect_column("arr")[0].as_py()
-    [1, 2, 3]
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [1]})
+        >>> result = df.select(
+        ...     dfn.functions.make_array(
+        ...         dfn.lit(1), dfn.lit(2), dfn.lit(3)
+        ...     ).alias("arr"))
+        >>> result.collect_column("arr")[0].as_py()
+        [1, 2, 3]
     """
     args = [arg.expr for arg in args]
     return Expr(f.make_array(args))
@@ -2273,13 +2249,12 @@ def range(start: Expr, stop: Expr, step: Expr) -> Expr:
     """Create a list of values in the range between start and stop.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [1]})
-    >>> result = df.select(
-    ...     dfn.functions.range(dfn.lit(0), dfn.lit(5), dfn.lit(2)).alias("r"))
-    >>> result.collect_column("r")[0].as_py()
-    [0, 2, 4]
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [1]})
+        >>> result = df.select(
+        ...     dfn.functions.range(dfn.lit(0), dfn.lit(5), dfn.lit(2)).alias("r"))
+        >>> result.collect_column("r")[0].as_py()
+        [0, 2, 4]
     """
     return Expr(f.range(start.expr, stop.expr, step.expr))
 
@@ -2411,13 +2386,12 @@ def array_append(array: Expr, element: Expr) -> Expr:
     """Appends an element to the end of an array.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [[1, 2, 3]]})
-    >>> result = df.select(
-    ...     dfn.functions.array_append(dfn.col("a"), dfn.lit(4)).alias("result"))
-    >>> result.collect_column("result")[0].as_py()
-    [1, 2, 3, 4]
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [[1, 2, 3]]})
+        >>> result = df.select(
+        ...     dfn.functions.array_append(dfn.col("a"), dfn.lit(4)).alias("result"))
+        >>> result.collect_column("result")[0].as_py()
+        [1, 2, 3, 4]
     """
     return Expr(f.array_append(array.expr, element.expr))
 
@@ -2453,13 +2427,12 @@ def array_concat(*args: Expr) -> Expr:
     """Concatenates the input arrays.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [[1, 2]], "b": [[3, 4]]})
-    >>> result = df.select(
-    ...     dfn.functions.array_concat(dfn.col("a"), dfn.col("b")).alias("result"))
-    >>> result.collect_column("result")[0].as_py()
-    [1, 2, 3, 4]
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [[1, 2]], "b": [[3, 4]]})
+        >>> result = df.select(
+        ...     dfn.functions.array_concat(dfn.col("a"), dfn.col("b")).alias("result"))
+        >>> result.collect_column("result")[0].as_py()
+        [1, 2, 3, 4]
     """
     args = [arg.expr for arg in args]
     return Expr(f.array_concat(args))
@@ -2478,12 +2451,11 @@ def array_dims(array: Expr) -> Expr:
     """Returns an array of the array's dimensions.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [[1, 2, 3]]})
-    >>> result = df.select(dfn.functions.array_dims(dfn.col("a")).alias("result"))
-    >>> result.collect_column("result")[0].as_py()
-    [3]
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [[1, 2, 3]]})
+        >>> result = df.select(dfn.functions.array_dims(dfn.col("a")).alias("result"))
+        >>> result.collect_column("result")[0].as_py()
+        [3]
     """
     return Expr(f.array_dims(array.expr))
 
@@ -2492,18 +2464,17 @@ def array_distinct(array: Expr) -> Expr:
     """Returns distinct values from the array after removing duplicates.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [[1, 1, 2, 3]]})
-    >>> result = df.select(
-    ...     dfn.functions.array_distinct(
-    ...         dfn.col("a")
-    ...     ).alias("result")
-    ... )
-    >>> sorted(
-    ...     result.collect_column("result")[0].as_py()
-    ... )
-    [1, 2, 3]
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [[1, 1, 2, 3]]})
+        >>> result = df.select(
+        ...     dfn.functions.array_distinct(
+        ...         dfn.col("a")
+        ...     ).alias("result")
+        ... )
+        >>> sorted(
+        ...     result.collect_column("result")[0].as_py()
+        ... )
+        [1, 2, 3]
     """
     return Expr(f.array_distinct(array.expr))
 
@@ -2548,13 +2519,12 @@ def array_element(array: Expr, n: Expr) -> Expr:
     """Extracts the element with the index n from the array.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [[10, 20, 30]]})
-    >>> result = df.select(
-    ...     dfn.functions.array_element(dfn.col("a"), dfn.lit(2)).alias("result"))
-    >>> result.collect_column("result")[0].as_py()
-    20
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [[10, 20, 30]]})
+        >>> result = df.select(
+        ...     dfn.functions.array_element(dfn.col("a"), dfn.lit(2)).alias("result"))
+        >>> result.collect_column("result")[0].as_py()
+        20
     """
     return Expr(f.array_element(array.expr, n.expr))
 
@@ -2563,12 +2533,11 @@ def array_empty(array: Expr) -> Expr:
     """Returns a boolean indicating whether the array is empty.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [[1, 2]]})
-    >>> result = df.select(dfn.functions.array_empty(dfn.col("a")).alias("result"))
-    >>> result.collect_column("result")[0].as_py()
-    False
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [[1, 2]]})
+        >>> result = df.select(dfn.functions.array_empty(dfn.col("a")).alias("result"))
+        >>> result.collect_column("result")[0].as_py()
+        False
     """
     return Expr(f.array_empty(array.expr))
 
@@ -2604,12 +2573,11 @@ def array_length(array: Expr) -> Expr:
     """Returns the length of the array.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [[1, 2, 3]]})
-    >>> result = df.select(dfn.functions.array_length(dfn.col("a")).alias("result"))
-    >>> result.collect_column("result")[0].as_py()
-    3
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [[1, 2, 3]]})
+        >>> result = df.select(dfn.functions.array_length(dfn.col("a")).alias("result"))
+        >>> result.collect_column("result")[0].as_py()
+        3
     """
     return Expr(f.array_length(array.expr))
 
@@ -2627,13 +2595,12 @@ def array_has(first_array: Expr, second_array: Expr) -> Expr:
     """Returns true if the element appears in the first array, otherwise false.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [[1, 2, 3]]})
-    >>> result = df.select(
-    ...     dfn.functions.array_has(dfn.col("a"), dfn.lit(2)).alias("result"))
-    >>> result.collect_column("result")[0].as_py()
-    True
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [[1, 2, 3]]})
+        >>> result = df.select(
+        ...     dfn.functions.array_has(dfn.col("a"), dfn.lit(2)).alias("result"))
+        >>> result.collect_column("result")[0].as_py()
+        True
     """
     return Expr(f.array_has(first_array.expr, second_array.expr))
 
@@ -2645,13 +2612,12 @@ def array_has_all(first_array: Expr, second_array: Expr) -> Expr:
     Otherwise, it returns false.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [[1, 2, 3]], "b": [[1, 2]]})
-    >>> result = df.select(
-    ...     dfn.functions.array_has_all(dfn.col("a"), dfn.col("b")).alias("result"))
-    >>> result.collect_column("result")[0].as_py()
-    True
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [[1, 2, 3]], "b": [[1, 2]]})
+        >>> result = df.select(
+        ...     dfn.functions.array_has_all(dfn.col("a"), dfn.col("b")).alias("result"))
+        >>> result.collect_column("result")[0].as_py()
+        True
     """
     return Expr(f.array_has_all(first_array.expr, second_array.expr))
 
@@ -2663,13 +2629,12 @@ def array_has_any(first_array: Expr, second_array: Expr) -> Expr:
     array. Otherwise, it returns false.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [[1, 2, 3]], "b": [[2, 5]]})
-    >>> result = df.select(
-    ...     dfn.functions.array_has_any(dfn.col("a"), dfn.col("b")).alias("result"))
-    >>> result.collect_column("result")[0].as_py()
-    True
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [[1, 2, 3]], "b": [[2, 5]]})
+        >>> result = df.select(
+        ...     dfn.functions.array_has_any(dfn.col("a"), dfn.col("b")).alias("result"))
+        >>> result.collect_column("result")[0].as_py()
+        True
     """
     return Expr(f.array_has_any(first_array.expr, second_array.expr))
 
@@ -2678,13 +2643,12 @@ def array_position(array: Expr, element: Expr, index: int | None = 1) -> Expr:
     """Return the position of the first occurrence of ``element`` in ``array``.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [[10, 20, 30]]})
-    >>> result = df.select(
-    ...     dfn.functions.array_position(dfn.col("a"), dfn.lit(20)).alias("result"))
-    >>> result.collect_column("result")[0].as_py()
-    2
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [[10, 20, 30]]})
+        >>> result = df.select(
+        ...     dfn.functions.array_position(dfn.col("a"), dfn.lit(20)).alias("result"))
+        >>> result.collect_column("result")[0].as_py()
+        2
     """
     return Expr(f.array_position(array.expr, element.expr, index))
 
@@ -2720,13 +2684,12 @@ def array_positions(array: Expr, element: Expr) -> Expr:
     """Searches for an element in the array and returns all occurrences.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [[1, 2, 1]]})
-    >>> result = df.select(
-    ...     dfn.functions.array_positions(dfn.col("a"), dfn.lit(1)).alias("result"))
-    >>> result.collect_column("result")[0].as_py()
-    [1, 3]
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [[1, 2, 1]]})
+        >>> result = df.select(
+        ...     dfn.functions.array_positions(dfn.col("a"), dfn.lit(1)).alias("result"))
+        >>> result.collect_column("result")[0].as_py()
+        [1, 3]
     """
     return Expr(f.array_positions(array.expr, element.expr))
 
@@ -2744,12 +2707,11 @@ def array_ndims(array: Expr) -> Expr:
     """Returns the number of dimensions of the array.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [[1, 2, 3]]})
-    >>> result = df.select(dfn.functions.array_ndims(dfn.col("a")).alias("result"))
-    >>> result.collect_column("result")[0].as_py()
-    1
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [[1, 2, 3]]})
+        >>> result = df.select(dfn.functions.array_ndims(dfn.col("a")).alias("result"))
+        >>> result.collect_column("result")[0].as_py()
+        1
     """
     return Expr(f.array_ndims(array.expr))
 
@@ -2767,13 +2729,12 @@ def array_prepend(element: Expr, array: Expr) -> Expr:
     """Prepends an element to the beginning of an array.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [[1, 2]]})
-    >>> result = df.select(
-    ...     dfn.functions.array_prepend(dfn.lit(0), dfn.col("a")).alias("result"))
-    >>> result.collect_column("result")[0].as_py()
-    [0, 1, 2]
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [[1, 2]]})
+        >>> result = df.select(
+        ...     dfn.functions.array_prepend(dfn.lit(0), dfn.col("a")).alias("result"))
+        >>> result.collect_column("result")[0].as_py()
+        [0, 1, 2]
     """
     return Expr(f.array_prepend(element.expr, array.expr))
 
@@ -2809,12 +2770,12 @@ def array_pop_back(array: Expr) -> Expr:
     """Returns the array without the last element.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [[1, 2, 3]]})
-    >>> result = df.select(dfn.functions.array_pop_back(dfn.col("a")).alias("result"))
-    >>> result.collect_column("result")[0].as_py()
-    [1, 2]
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [[1, 2, 3]]})
+        >>> result = df.select(
+        ...     dfn.functions.array_pop_back(dfn.col("a")).alias("result"))
+        >>> result.collect_column("result")[0].as_py()
+        [1, 2]
     """
     return Expr(f.array_pop_back(array.expr))
 
@@ -2823,12 +2784,12 @@ def array_pop_front(array: Expr) -> Expr:
     """Returns the array without the first element.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [[1, 2, 3]]})
-    >>> result = df.select(dfn.functions.array_pop_front(dfn.col("a")).alias("result"))
-    >>> result.collect_column("result")[0].as_py()
-    [2, 3]
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [[1, 2, 3]]})
+        >>> result = df.select(
+        ...     dfn.functions.array_pop_front(dfn.col("a")).alias("result"))
+        >>> result.collect_column("result")[0].as_py()
+        [2, 3]
     """
     return Expr(f.array_pop_front(array.expr))
 
@@ -2837,13 +2798,12 @@ def array_remove(array: Expr, element: Expr) -> Expr:
     """Removes the first element from the array equal to the given value.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [[1, 2, 1]]})
-    >>> result = df.select(
-    ...     dfn.functions.array_remove(dfn.col("a"), dfn.lit(1)).alias("result"))
-    >>> result.collect_column("result")[0].as_py()
-    [2, 1]
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [[1, 2, 1]]})
+        >>> result = df.select(
+        ...     dfn.functions.array_remove(dfn.col("a"), dfn.lit(1)).alias("result"))
+        >>> result.collect_column("result")[0].as_py()
+        [2, 1]
     """
     return Expr(f.array_remove(array.expr, element.expr))
 
@@ -2861,14 +2821,13 @@ def array_remove_n(array: Expr, element: Expr, max: Expr) -> Expr:
     """Removes the first ``max`` elements from the array equal to the given value.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [[1, 2, 1, 1]]})
-    >>> result = df.select(
-    ...     dfn.functions.array_remove_n(dfn.col("a"), dfn.lit(1),
-    ...     dfn.lit(2)).alias("result"))
-    >>> result.collect_column("result")[0].as_py()
-    [2, 1]
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [[1, 2, 1, 1]]})
+        >>> result = df.select(
+        ...     dfn.functions.array_remove_n(dfn.col("a"), dfn.lit(1),
+        ...     dfn.lit(2)).alias("result"))
+        >>> result.collect_column("result")[0].as_py()
+        [2, 1]
     """
     return Expr(f.array_remove_n(array.expr, element.expr, max.expr))
 
@@ -2886,13 +2845,14 @@ def array_remove_all(array: Expr, element: Expr) -> Expr:
     """Removes all elements from the array equal to the given value.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [[1, 2, 1]]})
-    >>> result = df.select(
-    ...     dfn.functions.array_remove_all(dfn.col("a"), dfn.lit(1)).alias("result"))
-    >>> result.collect_column("result")[0].as_py()
-    [2]
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [[1, 2, 1]]})
+        >>> result = df.select(
+        ...     dfn.functions.array_remove_all(
+        ...         dfn.col("a"), dfn.lit(1)
+        ...     ).alias("result"))
+        >>> result.collect_column("result")[0].as_py()
+        [2]
     """
     return Expr(f.array_remove_all(array.expr, element.expr))
 
@@ -2910,13 +2870,12 @@ def array_repeat(element: Expr, count: Expr) -> Expr:
     """Returns an array containing ``element`` ``count`` times.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [1]})
-    >>> result = df.select(
-    ...     dfn.functions.array_repeat(dfn.lit(3), dfn.lit(3)).alias("result"))
-    >>> result.collect_column("result")[0].as_py()
-    [3, 3, 3]
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [1]})
+        >>> result = df.select(
+        ...     dfn.functions.array_repeat(dfn.lit(3), dfn.lit(3)).alias("result"))
+        >>> result.collect_column("result")[0].as_py()
+        [3, 3, 3]
     """
     return Expr(f.array_repeat(element.expr, count.expr))
 
@@ -2934,14 +2893,13 @@ def array_replace(array: Expr, from_val: Expr, to_val: Expr) -> Expr:
     """Replaces the first occurrence of ``from_val`` with ``to_val``.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [[1, 2, 1]]})
-    >>> result = df.select(
-    ...     dfn.functions.array_replace(dfn.col("a"), dfn.lit(1),
-    ...     dfn.lit(9)).alias("result"))
-    >>> result.collect_column("result")[0].as_py()
-    [9, 2, 1]
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [[1, 2, 1]]})
+        >>> result = df.select(
+        ...     dfn.functions.array_replace(dfn.col("a"), dfn.lit(1),
+        ...     dfn.lit(9)).alias("result"))
+        >>> result.collect_column("result")[0].as_py()
+        [9, 2, 1]
     """
     return Expr(f.array_replace(array.expr, from_val.expr, to_val.expr))
 
@@ -2962,14 +2920,13 @@ def array_replace_n(array: Expr, from_val: Expr, to_val: Expr, max: Expr) -> Exp
     specified element.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [[1, 2, 1, 1]]})
-    >>> result = df.select(
-    ...     dfn.functions.array_replace_n(dfn.col("a"), dfn.lit(1), dfn.lit(9),
-    ...     dfn.lit(2)).alias("result"))
-    >>> result.collect_column("result")[0].as_py()
-    [9, 2, 9, 1]
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [[1, 2, 1, 1]]})
+        >>> result = df.select(
+        ...     dfn.functions.array_replace_n(dfn.col("a"), dfn.lit(1), dfn.lit(9),
+        ...     dfn.lit(2)).alias("result"))
+        >>> result.collect_column("result")[0].as_py()
+        [9, 2, 9, 1]
     """
     return Expr(f.array_replace_n(array.expr, from_val.expr, to_val.expr, max.expr))
 
@@ -2990,14 +2947,13 @@ def array_replace_all(array: Expr, from_val: Expr, to_val: Expr) -> Expr:
     """Replaces all occurrences of ``from_val`` with ``to_val``.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [[1, 2, 1]]})
-    >>> result = df.select(
-    ...     dfn.functions.array_replace_all(dfn.col("a"), dfn.lit(1),
-    ...     dfn.lit(9)).alias("result"))
-    >>> result.collect_column("result")[0].as_py()
-    [9, 2, 9]
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [[1, 2, 1]]})
+        >>> result = df.select(
+        ...     dfn.functions.array_replace_all(dfn.col("a"), dfn.lit(1),
+        ...     dfn.lit(9)).alias("result"))
+        >>> result.collect_column("result")[0].as_py()
+        [9, 2, 9]
     """
     return Expr(f.array_replace_all(array.expr, from_val.expr, to_val.expr))
 
@@ -3020,12 +2976,11 @@ def array_sort(array: Expr, descending: bool = False, null_first: bool = False)
         null_first: If True, nulls will be returned at the beginning of the array.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [[3, 1, 2]]})
-    >>> result = df.select(dfn.functions.array_sort(dfn.col("a")).alias("result"))
-    >>> result.collect_column("result")[0].as_py()
-    [1, 2, 3]
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [[3, 1, 2]]})
+        >>> result = df.select(dfn.functions.array_sort(dfn.col("a")).alias("result"))
+        >>> result.collect_column("result")[0].as_py()
+        [1, 2, 3]
     """
     desc = "DESC" if descending else "ASC"
     nulls_first = "NULLS FIRST" if null_first else "NULLS LAST"
@@ -3053,14 +3008,13 @@ def array_slice(
     """Returns a slice of the array.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [[1, 2, 3, 4]]})
-    >>> result = df.select(
-    ...     dfn.functions.array_slice(dfn.col("a"), dfn.lit(2),
-    ...     dfn.lit(3)).alias("result"))
-    >>> result.collect_column("result")[0].as_py()
-    [2, 3]
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [[1, 2, 3, 4]]})
+        >>> result = df.select(
+        ...     dfn.functions.array_slice(dfn.col("a"), dfn.lit(2),
+        ...     dfn.lit(3)).alias("result"))
+        >>> result.collect_column("result")[0].as_py()
+        [2, 3]
     """
     if stride is not None:
         stride = stride.expr
@@ -3080,18 +3034,17 @@ def array_intersect(array1: Expr, array2: Expr) -> Expr:
     """Returns the intersection of ``array1`` and ``array2``.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [[1, 2, 3]], "b": [[2, 3, 4]]})
-    >>> result = df.select(
-    ...     dfn.functions.array_intersect(
-    ...         dfn.col("a"), dfn.col("b")
-    ...     ).alias("result")
-    ... )
-    >>> sorted(
-    ...     result.collect_column("result")[0].as_py()
-    ... )
-    [2, 3]
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [[1, 2, 3]], "b": [[2, 3, 4]]})
+        >>> result = df.select(
+        ...     dfn.functions.array_intersect(
+        ...         dfn.col("a"), dfn.col("b")
+        ...     ).alias("result")
+        ... )
+        >>> sorted(
+        ...     result.collect_column("result")[0].as_py()
+        ... )
+        [2, 3]
     """
     return Expr(f.array_intersect(array1.expr, array2.expr))
 
@@ -3111,18 +3064,17 @@ def array_union(array1: Expr, array2: Expr) -> Expr:
     Duplicate rows will not be returned.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [[1, 2, 3]], "b": [[2, 3, 4]]})
-    >>> result = df.select(
-    ...     dfn.functions.array_union(
-    ...         dfn.col("a"), dfn.col("b")
-    ...     ).alias("result")
-    ... )
-    >>> sorted(
-    ...     result.collect_column("result")[0].as_py()
-    ... )
-    [1, 2, 3, 4]
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [[1, 2, 3]], "b": [[2, 3, 4]]})
+        >>> result = df.select(
+        ...     dfn.functions.array_union(
+        ...         dfn.col("a"), dfn.col("b")
+        ...     ).alias("result")
+        ... )
+        >>> sorted(
+        ...     result.collect_column("result")[0].as_py()
+        ... )
+        [1, 2, 3, 4]
     """
     return Expr(f.array_union(array1.expr, array2.expr))
 
@@ -3142,13 +3094,12 @@ def array_except(array1: Expr, array2: Expr) -> Expr:
     """Returns the elements that appear in ``array1`` but not in ``array2``.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [[1, 2, 3]], "b": [[2, 3, 4]]})
-    >>> result = df.select(
-    ...     dfn.functions.array_except(dfn.col("a"), dfn.col("b")).alias("result"))
-    >>> result.collect_column("result")[0].as_py()
-    [1]
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [[1, 2, 3]], "b": [[2, 3, 4]]})
+        >>> result = df.select(
+        ...     dfn.functions.array_except(dfn.col("a"), dfn.col("b")).alias("result"))
+        >>> result.collect_column("result")[0].as_py()
+        [1]
     """
     return Expr(f.array_except(array1.expr, array2.expr))
 
@@ -3169,14 +3120,13 @@ def array_resize(array: Expr, size: Expr, value: Expr) -> Expr:
     be filled with the given ``value``.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [[1, 2]]})
-    >>> result = df.select(
-    ...     dfn.functions.array_resize(dfn.col("a"), dfn.lit(4),
-    ...     dfn.lit(0)).alias("result"))
-    >>> result.collect_column("result")[0].as_py()
-    [1, 2, 0, 0]
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [[1, 2]]})
+        >>> result = df.select(
+        ...     dfn.functions.array_resize(dfn.col("a"), dfn.lit(4),
+        ...     dfn.lit(0)).alias("result"))
+        >>> result.collect_column("result")[0].as_py()
+        [1, 2, 0, 0]
     """
     return Expr(f.array_resize(array.expr, size.expr, value.expr))
 
@@ -3197,12 +3147,11 @@ def flatten(array: Expr) -> Expr:
     """Flattens an array of arrays into a single array.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [[[1, 2], [3, 4]]]})
-    >>> result = df.select(dfn.functions.flatten(dfn.col("a")).alias("result"))
-    >>> result.collect_column("result")[0].as_py()
-    [1, 2, 3, 4]
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [[[1, 2], [3, 4]]]})
+        >>> result = df.select(dfn.functions.flatten(dfn.col("a")).alias("result"))
+        >>> result.collect_column("result")[0].as_py()
+        [1, 2, 3, 4]
     """
     return Expr(f.flatten(array.expr))
 
@@ -3211,12 +3160,11 @@ def cardinality(array: Expr) -> Expr:
     """Returns the total number of elements in the array.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [[1, 2, 3]]})
-    >>> result = df.select(dfn.functions.cardinality(dfn.col("a")).alias("result"))
-    >>> result.collect_column("result")[0].as_py()
-    3
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [[1, 2, 3]]})
+        >>> result = df.select(dfn.functions.cardinality(dfn.col("a")).alias("result"))
+        >>> result.collect_column("result")[0].as_py()
+        3
     """
     return Expr(f.cardinality(array.expr))
 
@@ -3249,13 +3197,12 @@ def approx_distinct(
         filter: If provided, only compute against rows for which the filter is True
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [1, 1, 2, 3]})
-    >>> result = df.aggregate(
-    ...     [], [dfn.functions.approx_distinct(dfn.col("a")).alias("v")])
-    >>> result.collect_column("v")[0].as_py() == 3
-    True
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [1, 1, 2, 3]})
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.approx_distinct(dfn.col("a")).alias("v")])
+        >>> result.collect_column("v")[0].as_py() == 3
+        True
     """
     filter_raw = filter.expr if filter is not None else None
 
@@ -3276,13 +3223,12 @@ def approx_median(expression: Expr, filter: Expr | None = None) -> Expr:
         filter: If provided, only compute against rows for which the filter is True
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0]})
-    >>> result = df.aggregate(
-    ...     [], [dfn.functions.approx_median(dfn.col("a")).alias("v")])
-    >>> result.collect_column("v")[0].as_py()
-    2.0
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0]})
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.approx_median(dfn.col("a")).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        2.0
     """
     filter_raw = filter.expr if filter is not None else None
     return Expr(f.approx_median(expression.expr, filter=filter_raw))
@@ -3316,13 +3262,14 @@ def approx_percentile_cont(
         filter: If provided, only compute against rows for which the filter is True
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0, 4.0, 5.0]})
-    >>> result = df.aggregate(
-    ...     [], [dfn.functions.approx_percentile_cont(dfn.col("a"), 0.5).alias("v")])
-    >>> result.collect_column("v")[0].as_py()
-    3.0
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0, 4.0, 5.0]})
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.approx_percentile_cont(
+        ...         dfn.col("a"), 0.5
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        3.0
     """
     sort_expr_raw = sort_or_default(sort_expression)
     filter_raw = filter.expr if filter is not None else None
@@ -3356,14 +3303,13 @@ def approx_percentile_cont_with_weight(
         filter: If provided, only compute against rows for which the filter is True
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0], "w": [1.0, 1.0, 1.0]})
-    >>> result = df.aggregate(
-    ...     [], [dfn.functions.approx_percentile_cont_with_weight(dfn.col("a"),
-    ...     dfn.col("w"), 0.5).alias("v")])
-    >>> result.collect_column("v")[0].as_py()
-    2.0
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0], "w": [1.0, 1.0, 1.0]})
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.approx_percentile_cont_with_weight(dfn.col("a"),
+        ...     dfn.col("w"), 0.5).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        2.0
     """
     sort_expr_raw = sort_or_default(sort_expression)
     filter_raw = filter.expr if filter is not None else None
@@ -3400,12 +3346,12 @@ def array_agg(
         order_by: Order the resultant array values. Accepts column names or expressions.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [1, 2, 3]})
-    >>> result = df.aggregate([], [dfn.functions.array_agg(dfn.col("a")).alias("v")])
-    >>> result.collect_column("v")[0].as_py()
-    [1, 2, 3]
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [1, 2, 3]})
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.array_agg(dfn.col("a")).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        [1, 2, 3]
     """
     order_by_raw = sort_list_to_raw_sort_list(order_by)
     filter_raw = filter.expr if filter is not None else None
@@ -3433,12 +3379,11 @@ def avg(
         filter: If provided, only compute against rows for which the filter is True
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0]})
-    >>> result = df.aggregate([], [dfn.functions.avg(dfn.col("a")).alias("v")])
-    >>> result.collect_column("v")[0].as_py()
-    2.0
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0]})
+        >>> result = df.aggregate([], [dfn.functions.avg(dfn.col("a")).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        2.0
     """
     filter_raw = filter.expr if filter is not None else None
     return Expr(f.avg(expression.expr, filter=filter_raw))
@@ -3487,12 +3432,11 @@ def count(
         filter: If provided, only compute against rows for which the filter is True
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [1, 2, 3]})
-    >>> result = df.aggregate([], [dfn.functions.count(dfn.col("a")).alias("v")])
-    >>> result.collect_column("v")[0].as_py()
-    3
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [1, 2, 3]})
+        >>> result = df.aggregate([], [dfn.functions.count(dfn.col("a")).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        3
     """
     filter_raw = filter.expr if filter is not None else None
 
@@ -3580,12 +3524,11 @@ def max(expression: Expr, filter: Expr | None = None) -> Expr:
         filter: If provided, only compute against rows for which the filter is True
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [1, 2, 3]})
-    >>> result = df.aggregate([], [dfn.functions.max(dfn.col("a")).alias("v")])
-    >>> result.collect_column("v")[0].as_py()
-    3
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [1, 2, 3]})
+        >>> result = df.aggregate([], [dfn.functions.max(dfn.col("a")).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        3
     """
     filter_raw = filter.expr if filter is not None else None
     return Expr(f.max(expression.expr, filter=filter_raw))
@@ -3617,12 +3560,11 @@ def median(
         filter: If provided, only compute against rows for which the filter is True
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0]})
-    >>> result = df.aggregate([], [dfn.functions.median(dfn.col("a")).alias("v")])
-    >>> result.collect_column("v")[0].as_py()
-    2.0
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0]})
+        >>> result = df.aggregate([], [dfn.functions.median(dfn.col("a")).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        2.0
     """
     filter_raw = filter.expr if filter is not None else None
     return Expr(f.median(expression.expr, distinct=distinct, filter=filter_raw))
@@ -3639,12 +3581,11 @@ def min(expression: Expr, filter: Expr | None = None) -> Expr:
         filter: If provided, only compute against rows for which the filter is True
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [1, 2, 3]})
-    >>> result = df.aggregate([], [dfn.functions.min(dfn.col("a")).alias("v")])
-    >>> result.collect_column("v")[0].as_py()
-    1
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [1, 2, 3]})
+        >>> result = df.aggregate([], [dfn.functions.min(dfn.col("a")).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        1
     """
     filter_raw = filter.expr if filter is not None else None
     return Expr(f.min(expression.expr, filter=filter_raw))
@@ -3666,12 +3607,11 @@ def sum(
         filter: If provided, only compute against rows for which the filter is True
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [1, 2, 3]})
-    >>> result = df.aggregate([], [dfn.functions.sum(dfn.col("a")).alias("v")])
-    >>> result.collect_column("v")[0].as_py()
-    6
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [1, 2, 3]})
+        >>> result = df.aggregate([], [dfn.functions.sum(dfn.col("a")).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        6
     """
     filter_raw = filter.expr if filter is not None else None
     return Expr(f.sum(expression.expr, filter=filter_raw))
@@ -4212,12 +4152,11 @@ def bit_and(expression: Expr, filter: Expr | None = None) -> Expr:
         filter: If provided, only compute against rows for which the filter is True
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [7, 3]})
-    >>> result = df.aggregate([], [dfn.functions.bit_and(dfn.col("a")).alias("v")])
-    >>> result.collect_column("v")[0].as_py()
-    3
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [7, 3]})
+        >>> result = df.aggregate([], [dfn.functions.bit_and(dfn.col("a")).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        3
     """
     filter_raw = filter.expr if filter is not None else None
     return Expr(f.bit_and(expression.expr, filter=filter_raw))
@@ -4236,12 +4175,11 @@ def bit_or(expression: Expr, filter: Expr | None = None) -> Expr:
         filter: If provided, only compute against rows for which the filter is True
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [1, 2]})
-    >>> result = df.aggregate([], [dfn.functions.bit_or(dfn.col("a")).alias("v")])
-    >>> result.collect_column("v")[0].as_py()
-    3
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [1, 2]})
+        >>> result = df.aggregate([], [dfn.functions.bit_or(dfn.col("a")).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        3
     """
     filter_raw = filter.expr if filter is not None else None
     return Expr(f.bit_or(expression.expr, filter=filter_raw))
@@ -4263,12 +4201,11 @@ def bit_xor(
         filter: If provided, only compute against rows for which the filter is True
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [5, 3]})
-    >>> result = df.aggregate([], [dfn.functions.bit_xor(dfn.col("a")).alias("v")])
-    >>> result.collect_column("v")[0].as_py()
-    6
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [5, 3]})
+        >>> result = df.aggregate([], [dfn.functions.bit_xor(dfn.col("a")).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        6
     """
     filter_raw = filter.expr if filter is not None else None
     return Expr(f.bit_xor(expression.expr, distinct=distinct, filter=filter_raw))
@@ -4288,12 +4225,11 @@ def bool_and(expression: Expr, filter: Expr | None = None) -> Expr:
         filter: If provided, only compute against rows for which the filter is True
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [True, True, False]})
-    >>> result = df.aggregate([], [dfn.functions.bool_and(dfn.col("a")).alias("v")])
-    >>> result.collect_column("v")[0].as_py()
-    False
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [True, True, False]})
+        >>> result = df.aggregate([], [dfn.functions.bool_and(dfn.col("a")).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        False
     """
     filter_raw = filter.expr if filter is not None else None
     return Expr(f.bool_and(expression.expr, filter=filter_raw))
@@ -4313,12 +4249,11 @@ def bool_or(expression: Expr, filter: Expr | None = None) -> Expr:
         filter: If provided, only compute against rows for which the filter is True
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": [False, False, True]})
-    >>> result = df.aggregate([], [dfn.functions.bool_or(dfn.col("a")).alias("v")])
-    >>> result.collect_column("v")[0].as_py()
-    True
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [False, False, True]})
+        >>> result = df.aggregate([], [dfn.functions.bool_or(dfn.col("a")).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        True
     """
     filter_raw = filter.expr if filter is not None else None
     return Expr(f.bool_or(expression.expr, filter=filter_raw))
@@ -4747,13 +4682,14 @@ def string_agg(
             column names or expressions.
 
     Examples:
-    ---------
-    >>> ctx = dfn.SessionContext()
-    >>> df = ctx.from_pydict({"a": ["x", "y", "z"]})
-    >>> result = df.aggregate(
-    ...     [], [dfn.functions.string_agg(dfn.col("a"), ",", order_by="a").alias("s")])
-    >>> result.collect_column("s")[0].as_py()
-    'x,y,z'
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": ["x", "y", "z"]})
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.string_agg(
+        ...         dfn.col("a"), ",", order_by="a"
+        ...     ).alias("s")])
+        >>> result.collect_column("s")[0].as_py()
+        'x,y,z'
     """
     order_by_raw = sort_list_to_raw_sort_list(order_by)
     filter_raw = filter.expr if filter is not None else None

From 75f48d7782a77e9b69edefa88551df180b3490c0 Mon Sep 17 00:00:00 2001
From: ntjohnson1 <24689722+ntjohnson1@users.noreply.github.com>
Date: Wed, 18 Mar 2026 14:38:20 -0400
Subject: [PATCH 4/8] Remove builtins use

---
 python/datafusion/functions.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py
index 15cd43b45..40440d074 100644
--- a/python/datafusion/functions.py
+++ b/python/datafusion/functions.py
@@ -1259,14 +1259,12 @@ def pi() -> Expr:
     Examples:
         >>> ctx = dfn.SessionContext()
         >>> df = ctx.from_pydict({"a": [1]})
-        >>> import builtins
+        >>> from math import pi
         >>> result = df.select(
         ...     dfn.functions.pi().alias("pi")
         ... )
-        >>> builtins.round(
-        ...     result.collect_column("pi")[0].as_py(), 5
-        ... )
-        3.14159
+        >>> result.collect_column("pi")[0].as_py() == pi
+        True
     """
     return Expr(f.pi())
 

From fd396386531e43f1f6f79f63be26f4593a64b55d Mon Sep 17 00:00:00 2001
From: ntjohnson1 <24689722+ntjohnson1@users.noreply.github.com>
Date: Wed, 18 Mar 2026 14:54:15 -0400
Subject: [PATCH 5/8] Add coverage for optional filter

---
 python/datafusion/functions.py | 238 +++++++++++++++++++++++++++++++++
 1 file changed, 238 insertions(+)

diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py
index 40440d074..a8058bb69 100644
--- a/python/datafusion/functions.py
+++ b/python/datafusion/functions.py
@@ -521,6 +521,13 @@ def count_star(filter: Expr | None = None) -> Expr:
         >>> result = df.aggregate([], [dfn.functions.count_star().alias("cnt")])
         >>> result.collect_column("cnt")[0].as_py()
         3
+
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.count_star(
+        ...         filter=dfn.col("a") > dfn.lit(1)
+        ...     ).alias("cnt")])
+        >>> result.collect_column("cnt")[0].as_py()
+        2
     """
     return count(Expr.literal(1), filter=filter)
 
@@ -3201,6 +3208,14 @@ def approx_distinct(
         ...     [], [dfn.functions.approx_distinct(dfn.col("a")).alias("v")])
         >>> result.collect_column("v")[0].as_py() == 3
         True
+
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.approx_distinct(
+        ...         dfn.col("a"),
+        ...         filter=dfn.col("a") > dfn.lit(1)
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py() == 2
+        True
     """
     filter_raw = filter.expr if filter is not None else None
 
@@ -3227,6 +3242,14 @@ def approx_median(expression: Expr, filter: Expr | None = None) -> Expr:
         ...     [], [dfn.functions.approx_median(dfn.col("a")).alias("v")])
         >>> result.collect_column("v")[0].as_py()
         2.0
+
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.approx_median(
+        ...         dfn.col("a"),
+        ...         filter=dfn.col("a") > dfn.lit(1.0)
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        2.5
     """
     filter_raw = filter.expr if filter is not None else None
     return Expr(f.approx_median(expression.expr, filter=filter_raw))
@@ -3382,6 +3405,14 @@ def avg(
         >>> result = df.aggregate([], [dfn.functions.avg(dfn.col("a")).alias("v")])
         >>> result.collect_column("v")[0].as_py()
         2.0
+
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.avg(
+        ...         dfn.col("a"),
+        ...         filter=dfn.col("a") > dfn.lit(1.0)
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        2.5
     """
     filter_raw = filter.expr if filter is not None else None
     return Expr(f.avg(expression.expr, filter=filter_raw))
@@ -3407,6 +3438,14 @@ def corr(value_y: Expr, value_x: Expr, filter: Expr | None = None) -> Expr:
         ...     [], [dfn.functions.corr(dfn.col("a"), dfn.col("b")).alias("v")])
         >>> result.collect_column("v")[0].as_py()
         1.0
+
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.corr(
+        ...         dfn.col("a"), dfn.col("b"),
+        ...         filter=dfn.col("a") > dfn.lit(1.0)
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        1.0
     """
     filter_raw = filter.expr if filter is not None else None
     return Expr(f.corr(value_y.expr, value_x.expr, filter=filter_raw))
@@ -3472,6 +3511,18 @@ def covar_pop(value_y: Expr, value_x: Expr, filter: Expr | None = None) -> Expr:
         ... )
         >>> result.collect_column("v")[0].as_py()
         3.0
+
+        >>> df = ctx.from_pydict(
+        ...     {"a": [0.0, 1.0, 3.0], "b": [0.0, 1.0, 3.0]})
+        >>> result = df.aggregate(
+        ...     [],
+        ...     [dfn.functions.covar_pop(
+        ...         dfn.col("a"), dfn.col("b"),
+        ...         filter=dfn.col("a") > dfn.lit(0.0)
+        ...     ).alias("v")]
+        ... )
+        >>> result.collect_column("v")[0].as_py()
+        1.0
     """
     filter_raw = filter.expr if filter is not None else None
     return Expr(f.covar_pop(value_y.expr, value_x.expr, filter=filter_raw))
@@ -3497,6 +3548,14 @@ def covar_samp(value_y: Expr, value_x: Expr, filter: Expr | None = None) -> Expr
         ...     [], [dfn.functions.covar_samp(dfn.col("a"), dfn.col("b")).alias("v")])
         >>> result.collect_column("v")[0].as_py()
         1.0
+
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.covar_samp(
+        ...         dfn.col("a"), dfn.col("b"),
+        ...         filter=dfn.col("a") > dfn.lit(1.0)
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        0.5
     """
     filter_raw = filter.expr if filter is not None else None
     return Expr(f.covar_samp(value_y.expr, value_x.expr, filter=filter_raw))
@@ -3527,6 +3586,14 @@ def max(expression: Expr, filter: Expr | None = None) -> Expr:
         >>> result = df.aggregate([], [dfn.functions.max(dfn.col("a")).alias("v")])
         >>> result.collect_column("v")[0].as_py()
         3
+
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.max(
+        ...         dfn.col("a"),
+        ...         filter=dfn.col("a") < dfn.lit(3)
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        2
     """
     filter_raw = filter.expr if filter is not None else None
     return Expr(f.max(expression.expr, filter=filter_raw))
@@ -3584,6 +3651,14 @@ def min(expression: Expr, filter: Expr | None = None) -> Expr:
         >>> result = df.aggregate([], [dfn.functions.min(dfn.col("a")).alias("v")])
         >>> result.collect_column("v")[0].as_py()
         1
+
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.min(
+        ...         dfn.col("a"),
+        ...         filter=dfn.col("a") > dfn.lit(1)
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        2
     """
     filter_raw = filter.expr if filter is not None else None
     return Expr(f.min(expression.expr, filter=filter_raw))
@@ -3610,6 +3685,14 @@ def sum(
         >>> result = df.aggregate([], [dfn.functions.sum(dfn.col("a")).alias("v")])
         >>> result.collect_column("v")[0].as_py()
         6
+
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.sum(
+        ...         dfn.col("a"),
+        ...         filter=dfn.col("a") > dfn.lit(1)
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        5
     """
     filter_raw = filter.expr if filter is not None else None
     return Expr(f.sum(expression.expr, filter=filter_raw))
@@ -3631,6 +3714,15 @@ def stddev(expression: Expr, filter: Expr | None = None) -> Expr:
         >>> result = df.aggregate([], [dfn.functions.stddev(dfn.col("a")).alias("v")])
         >>> result.collect_column("v")[0].as_py()
         2.0
+
+        >>> df = ctx.from_pydict({"a": [1.0, 2.0, 4.0, 6.0]})
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.stddev(
+        ...         dfn.col("a"),
+        ...         filter=dfn.col("a") > dfn.lit(1.0)
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        2.0
     """
     filter_raw = filter.expr if filter is not None else None
     return Expr(f.stddev(expression.expr, filter=filter_raw))
@@ -3654,6 +3746,15 @@ def stddev_pop(expression: Expr, filter: Expr | None = None) -> Expr:
         ... )
         >>> result.collect_column("v")[0].as_py()
         1.0
+
+        >>> df = ctx.from_pydict({"a": [0.0, 1.0, 3.0]})
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.stddev_pop(
+        ...         dfn.col("a"),
+        ...         filter=dfn.col("a") > dfn.lit(0.0)
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        1.0
     """
     filter_raw = filter.expr if filter is not None else None
     return Expr(f.stddev_pop(expression.expr, filter=filter_raw))
@@ -3693,6 +3794,15 @@ def var_pop(expression: Expr, filter: Expr | None = None) -> Expr:
         >>> result = df.aggregate([], [dfn.functions.var_pop(dfn.col("a")).alias("v")])
         >>> result.collect_column("v")[0].as_py()
         1.0
+
+        >>> df = ctx.from_pydict({"a": [-1.0, 0.0, 2.0]})
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.var_pop(
+        ...         dfn.col("a"),
+        ...         filter=dfn.col("a") > dfn.lit(-1.0)
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        1.0
     """
     filter_raw = filter.expr if filter is not None else None
     return Expr(f.var_pop(expression.expr, filter=filter_raw))
@@ -3714,6 +3824,14 @@ def var_samp(expression: Expr, filter: Expr | None = None) -> Expr:
         >>> result = df.aggregate([], [dfn.functions.var_samp(dfn.col("a")).alias("v")])
         >>> result.collect_column("v")[0].as_py()
         1.0
+
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.var_samp(
+        ...         dfn.col("a"),
+        ...         filter=dfn.col("a") > dfn.lit(1.0)
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        0.5
     """
     filter_raw = filter.expr if filter is not None else None
     return Expr(f.var_sample(expression.expr, filter=filter_raw))
@@ -3753,6 +3871,14 @@ def regr_avgx(
         ...     [], [dfn.functions.regr_avgx(dfn.col("y"), dfn.col("x")).alias("v")])
         >>> result.collect_column("v")[0].as_py()
         5.0
+
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.regr_avgx(
+        ...         dfn.col("y"), dfn.col("x"),
+        ...         filter=dfn.col("y") > dfn.lit(1.0)
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        5.5
     """
     filter_raw = filter.expr if filter is not None else None
 
@@ -3784,6 +3910,14 @@ def regr_avgy(
         ...     [], [dfn.functions.regr_avgy(dfn.col("y"), dfn.col("x")).alias("v")])
         >>> result.collect_column("v")[0].as_py()
         2.0
+
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.regr_avgy(
+        ...         dfn.col("y"), dfn.col("x"),
+        ...         filter=dfn.col("y") > dfn.lit(1.0)
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        2.5
     """
     filter_raw = filter.expr if filter is not None else None
 
@@ -3815,6 +3949,14 @@ def regr_count(
         ...     [], [dfn.functions.regr_count(dfn.col("y"), dfn.col("x")).alias("v")])
         >>> result.collect_column("v")[0].as_py()
         3
+
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.regr_count(
+        ...         dfn.col("y"), dfn.col("x"),
+        ...         filter=dfn.col("y") > dfn.lit(1.0)
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        2
     """
     filter_raw = filter.expr if filter is not None else None
 
@@ -3847,6 +3989,15 @@ def regr_intercept(
         ...     [dfn.functions.regr_intercept(dfn.col("y"), dfn.col("x")).alias("v")])
         >>> result.collect_column("v")[0].as_py()
         0.0
+
+        >>> result = df.aggregate(
+        ...     [],
+        ...     [dfn.functions.regr_intercept(
+        ...         dfn.col("y"), dfn.col("x"),
+        ...         filter=dfn.col("y") > dfn.lit(2.0)
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        0.0
     """
     filter_raw = filter.expr if filter is not None else None
 
@@ -3878,6 +4029,14 @@ def regr_r2(
         ...     [], [dfn.functions.regr_r2(dfn.col("y"), dfn.col("x")).alias("v")])
         >>> result.collect_column("v")[0].as_py()
         1.0
+
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.regr_r2(
+        ...         dfn.col("y"), dfn.col("x"),
+        ...         filter=dfn.col("y") > dfn.lit(2.0)
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        1.0
     """
     filter_raw = filter.expr if filter is not None else None
 
@@ -3909,6 +4068,14 @@ def regr_slope(
         ...     [], [dfn.functions.regr_slope(dfn.col("y"), dfn.col("x")).alias("v")])
         >>> result.collect_column("v")[0].as_py()
         2.0
+
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.regr_slope(
+        ...         dfn.col("y"), dfn.col("x"),
+        ...         filter=dfn.col("y") > dfn.lit(2.0)
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        2.0
     """
     filter_raw = filter.expr if filter is not None else None
 
@@ -3940,6 +4107,14 @@ def regr_sxx(
         ...     [], [dfn.functions.regr_sxx(dfn.col("y"), dfn.col("x")).alias("v")])
         >>> result.collect_column("v")[0].as_py()
         2.0
+
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.regr_sxx(
+        ...         dfn.col("y"), dfn.col("x"),
+        ...         filter=dfn.col("y") > dfn.lit(1.0)
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        0.5
     """
     filter_raw = filter.expr if filter is not None else None
 
@@ -3971,6 +4146,14 @@ def regr_sxy(
         ...     [], [dfn.functions.regr_sxy(dfn.col("y"), dfn.col("x")).alias("v")])
         >>> result.collect_column("v")[0].as_py()
         2.0
+
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.regr_sxy(
+        ...         dfn.col("y"), dfn.col("x"),
+        ...         filter=dfn.col("y") > dfn.lit(1.0)
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        0.5
     """
     filter_raw = filter.expr if filter is not None else None
 
@@ -4002,6 +4185,14 @@ def regr_syy(
         ...     [], [dfn.functions.regr_syy(dfn.col("y"), dfn.col("x")).alias("v")])
         >>> result.collect_column("v")[0].as_py()
         2.0
+
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.regr_syy(
+        ...         dfn.col("y"), dfn.col("x"),
+        ...         filter=dfn.col("y") > dfn.lit(1.0)
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        0.5
     """
     filter_raw = filter.expr if filter is not None else None
 
@@ -4155,6 +4346,15 @@ def bit_and(expression: Expr, filter: Expr | None = None) -> Expr:
         >>> result = df.aggregate([], [dfn.functions.bit_and(dfn.col("a")).alias("v")])
         >>> result.collect_column("v")[0].as_py()
         3
+
+        >>> df = ctx.from_pydict({"a": [7, 5, 3]})
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.bit_and(
+        ...         dfn.col("a"),
+        ...         filter=dfn.col("a") > dfn.lit(3)
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        5
     """
     filter_raw = filter.expr if filter is not None else None
     return Expr(f.bit_and(expression.expr, filter=filter_raw))
@@ -4178,6 +4378,15 @@ def bit_or(expression: Expr, filter: Expr | None = None) -> Expr:
         >>> result = df.aggregate([], [dfn.functions.bit_or(dfn.col("a")).alias("v")])
         >>> result.collect_column("v")[0].as_py()
         3
+
+        >>> df = ctx.from_pydict({"a": [1, 2, 4]})
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.bit_or(
+        ...         dfn.col("a"),
+        ...         filter=dfn.col("a") > dfn.lit(1)
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        6
     """
     filter_raw = filter.expr if filter is not None else None
     return Expr(f.bit_or(expression.expr, filter=filter_raw))
@@ -4228,6 +4437,16 @@ def bool_and(expression: Expr, filter: Expr | None = None) -> Expr:
         >>> result = df.aggregate([], [dfn.functions.bool_and(dfn.col("a")).alias("v")])
         >>> result.collect_column("v")[0].as_py()
         False
+
+        >>> df = ctx.from_pydict(
+        ...     {"a": [True, True, False], "b": [1, 2, 3]})
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.bool_and(
+        ...         dfn.col("a"),
+        ...         filter=dfn.col("b") < dfn.lit(3)
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        True
     """
     filter_raw = filter.expr if filter is not None else None
     return Expr(f.bool_and(expression.expr, filter=filter_raw))
@@ -4252,6 +4471,16 @@ def bool_or(expression: Expr, filter: Expr | None = None) -> Expr:
         >>> result = df.aggregate([], [dfn.functions.bool_or(dfn.col("a")).alias("v")])
         >>> result.collect_column("v")[0].as_py()
         True
+
+        >>> df = ctx.from_pydict(
+        ...     {"a": [False, False, True], "b": [1, 2, 3]})
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.bool_or(
+        ...         dfn.col("a"),
+        ...         filter=dfn.col("b") < dfn.lit(3)
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        False
     """
     filter_raw = filter.expr if filter is not None else None
     return Expr(f.bool_or(expression.expr, filter=filter_raw))
@@ -4688,6 +4917,15 @@ def string_agg(
         ...     ).alias("s")])
         >>> result.collect_column("s")[0].as_py()
         'x,y,z'
+
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.string_agg(
+        ...         dfn.col("a"), ",",
+        ...         filter=dfn.col("a") > dfn.lit("x"),
+        ...         order_by="a",
+        ...     ).alias("s")])
+        >>> result.collect_column("s")[0].as_py()
+        'y,z'
     """
     order_by_raw = sort_list_to_raw_sort_list(order_by)
     filter_raw = filter.expr if filter is not None else None

From 6190410f96af079e92fc4a4daf47787bbc561ad7 Mon Sep 17 00:00:00 2001
From: ntjohnson1 <24689722+ntjohnson1@users.noreply.github.com>
Date: Wed, 18 Mar 2026 15:03:57 -0400
Subject: [PATCH 6/8] Cover optional argument examples for window and value
 functions

---
 python/datafusion/functions.py | 113 +++++++++++++++++++++++++++++++++
 1 file changed, 113 insertions(+)

diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py
index a8058bb69..8db07180c 100644
--- a/python/datafusion/functions.py
+++ b/python/datafusion/functions.py
@@ -4227,6 +4227,17 @@ def first_value(
         ... )
         >>> result.collect_column("v")[0].as_py()
         10
+
+        >>> df = ctx.from_pydict({"a": [None, 20, 10]})
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.first_value(
+        ...         dfn.col("a"),
+        ...         filter=dfn.col("a") > dfn.lit(10),
+        ...         order_by="a",
+        ...         null_treatment=dfn.common.NullTreatment.IGNORE_NULLS,
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        20
     """
     order_by_raw = sort_list_to_raw_sort_list(order_by)
     filter_raw = filter.expr if filter is not None else None
@@ -4269,6 +4280,17 @@ def last_value(
         ... )
         >>> result.collect_column("v")[0].as_py()
         30
+
+        >>> df = ctx.from_pydict({"a": [None, 20, 10]})
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.last_value(
+        ...         dfn.col("a"),
+        ...         filter=dfn.col("a") > dfn.lit(10),
+        ...         order_by="a",
+        ...         null_treatment=dfn.common.NullTreatment.IGNORE_NULLS,
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        20
     """
     order_by_raw = sort_list_to_raw_sort_list(order_by)
     filter_raw = filter.expr if filter is not None else None
@@ -4313,6 +4335,17 @@ def nth_value(
         ... )
         >>> result.collect_column("v")[0].as_py()
         20
+
+        >>> df = ctx.from_pydict({"a": [None, 20, 10]})
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.nth_value(
+        ...         dfn.col("a"), 1,
+        ...         filter=dfn.col("a") > dfn.lit(10),
+        ...         order_by="a",
+        ...         null_treatment=dfn.common.NullTreatment.IGNORE_NULLS,
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        20
     """
     order_by_raw = sort_list_to_raw_sort_list(order_by)
     filter_raw = filter.expr if filter is not None else None
@@ -4531,6 +4564,16 @@ def lead(
         ...     default_value=0, order_by="a").alias("lead"))
         >>> result.sort(dfn.col("a")).collect_column("lead").to_pylist()
         [2, 3, 0]
+
+        >>> df = ctx.from_pydict({"g": ["a", "a", "b"], "v": [1, 2, 3]})
+        >>> result = df.select(
+        ...     dfn.col("g"), dfn.col("v"),
+        ...     dfn.functions.lead(
+        ...         dfn.col("v"), shift_offset=1, default_value=0,
+        ...         partition_by=dfn.col("g"), order_by="v",
+        ...     ).alias("lead"))
+        >>> result.sort(dfn.col("g"), dfn.col("v")).collect_column("lead").to_pylist()
+        [2, 0, 0]
     """
     if not isinstance(default_value, pa.Scalar) and default_value is not None:
         default_value = pa.scalar(default_value)
@@ -4591,6 +4634,16 @@ def lag(
         ...     default_value=0, order_by="a").alias("lag"))
         >>> result.sort(dfn.col("a")).collect_column("lag").to_pylist()
         [0, 1, 2]
+
+        >>> df = ctx.from_pydict({"g": ["a", "a", "b"], "v": [1, 2, 3]})
+        >>> result = df.select(
+        ...     dfn.col("g"), dfn.col("v"),
+        ...     dfn.functions.lag(
+        ...         dfn.col("v"), shift_offset=1, default_value=0,
+        ...         partition_by=dfn.col("g"), order_by="v",
+        ...     ).alias("lag"))
+        >>> result.sort(dfn.col("g"), dfn.col("v")).collect_column("lag").to_pylist()
+        [0, 1, 0]
     """
     if not isinstance(default_value, pa.Scalar):
         default_value = pa.scalar(default_value)
@@ -4640,6 +4693,16 @@ def row_number(
         ...     dfn.col("a"), dfn.functions.row_number(order_by="a").alias("rn"))
         >>> result.sort(dfn.col("a")).collect_column("rn").to_pylist()
         [1, 2, 3]
+
+        >>> df = ctx.from_pydict(
+        ...     {"g": ["a", "a", "b", "b"], "v": [1, 2, 3, 4]})
+        >>> result = df.select(
+        ...     dfn.col("g"), dfn.col("v"),
+        ...     dfn.functions.row_number(
+        ...         partition_by=dfn.col("g"), order_by="v",
+        ...     ).alias("rn"))
+        >>> result.sort(dfn.col("g"), dfn.col("v")).collect_column("rn").to_pylist()
+        [1, 2, 1, 2]
     """
     partition_by_raw = expr_list_to_raw_expr_list(partition_by)
     order_by_raw = sort_list_to_raw_sort_list(order_by)
@@ -4689,6 +4752,16 @@ def rank(
         ... )
         >>> result.sort(dfn.col("a")).collect_column("rnk").to_pylist()
         [1, 1, 3]
+
+        >>> df = ctx.from_pydict(
+        ...     {"g": ["a", "a", "b", "b"], "v": [1, 1, 2, 3]})
+        >>> result = df.select(
+        ...     dfn.col("g"), dfn.col("v"),
+        ...     dfn.functions.rank(
+        ...         partition_by=dfn.col("g"), order_by="v",
+        ...     ).alias("rnk"))
+        >>> result.sort(dfn.col("g"), dfn.col("v")).collect_column("rnk").to_pylist()
+        [1, 1, 1, 2]
     """
     partition_by_raw = expr_list_to_raw_expr_list(partition_by)
     order_by_raw = sort_list_to_raw_sort_list(order_by)
@@ -4732,6 +4805,16 @@ def dense_rank(
         ...     dfn.col("a"), dfn.functions.dense_rank(order_by="a").alias("dr"))
         >>> result.sort(dfn.col("a")).collect_column("dr").to_pylist()
         [1, 1, 2]
+
+        >>> df = ctx.from_pydict(
+        ...     {"g": ["a", "a", "b", "b"], "v": [1, 1, 2, 3]})
+        >>> result = df.select(
+        ...     dfn.col("g"), dfn.col("v"),
+        ...     dfn.functions.dense_rank(
+        ...         partition_by=dfn.col("g"), order_by="v",
+        ...     ).alias("dr"))
+        >>> result.sort(dfn.col("g"), dfn.col("v")).collect_column("dr").to_pylist()
+        [1, 1, 1, 2]
     """
     partition_by_raw = expr_list_to_raw_expr_list(partition_by)
     order_by_raw = sort_list_to_raw_sort_list(order_by)
@@ -4777,6 +4860,16 @@ def percent_rank(
         ...     dfn.col("a"), dfn.functions.percent_rank(order_by="a").alias("pr"))
         >>> result.sort(dfn.col("a")).collect_column("pr").to_pylist()
         [0.0, 0.5, 1.0]
+
+        >>> df = ctx.from_pydict(
+        ...     {"g": ["a", "a", "a", "b", "b"], "v": [1, 2, 3, 4, 5]})
+        >>> result = df.select(
+        ...     dfn.col("g"), dfn.col("v"),
+        ...     dfn.functions.percent_rank(
+        ...         partition_by=dfn.col("g"), order_by="v",
+        ...     ).alias("pr"))
+        >>> result.sort(dfn.col("g"), dfn.col("v")).collect_column("pr").to_pylist()
+        [0.0, 0.5, 1.0, 0.0, 1.0]
     """
     partition_by_raw = expr_list_to_raw_expr_list(partition_by)
     order_by_raw = sort_list_to_raw_sort_list(order_by)
@@ -4825,6 +4918,16 @@ def cume_dist(
         ... )
         >>> result.collect_column("cd").to_pylist()
         [0.25..., 0.75..., 0.75..., 1.0...]
+
+        >>> df = ctx.from_pydict(
+        ...     {"g": ["a", "a", "b", "b"], "v": [1, 2, 3, 4]})
+        >>> result = df.select(
+        ...     dfn.col("g"), dfn.col("v"),
+        ...     dfn.functions.cume_dist(
+        ...         partition_by=dfn.col("g"), order_by="v",
+        ...     ).alias("cd"))
+        >>> result.sort(dfn.col("g"), dfn.col("v")).collect_column("cd").to_pylist()
+        [0.5, 1.0, 0.5, 1.0]
     """
     partition_by_raw = expr_list_to_raw_expr_list(partition_by)
     order_by_raw = sort_list_to_raw_sort_list(order_by)
@@ -4873,6 +4976,16 @@ def ntile(
         ...     dfn.col("a"), dfn.functions.ntile(2, order_by="a").alias("nt"))
         >>> result.sort(dfn.col("a")).collect_column("nt").to_pylist()
         [1, 1, 2, 2]
+
+        >>> df = ctx.from_pydict(
+        ...     {"g": ["a", "a", "b", "b"], "v": [1, 2, 3, 4]})
+        >>> result = df.select(
+        ...     dfn.col("g"), dfn.col("v"),
+        ...     dfn.functions.ntile(
+        ...         2, partition_by=dfn.col("g"), order_by="v",
+        ...     ).alias("nt"))
+        >>> result.sort(dfn.col("g"), dfn.col("v")).collect_column("nt").to_pylist()
+        [1, 2, 1, 2]
     """
     partition_by_raw = expr_list_to_raw_expr_list(partition_by)
     order_by_raw = sort_list_to_raw_sort_list(order_by)

From c0ef9a6f8608b93950faeefca203801fdd12228b Mon Sep 17 00:00:00 2001
From: ntjohnson1 <24689722+ntjohnson1@users.noreply.github.com>
Date: Wed, 18 Mar 2026 15:27:31 -0400
Subject: [PATCH 7/8] Cover optional arguments for scalar functions

---
 python/datafusion/functions.py | 105 +++++++++++++++++++++++++++++++++
 1 file changed, 105 insertions(+)

diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py
index 8db07180c..a7324e525 100644
--- a/python/datafusion/functions.py
+++ b/python/datafusion/functions.py
@@ -405,6 +405,15 @@ def in_list(arg: Expr, values: list[Expr], negated: bool = False) -> Expr:
         ... )
         >>> result.collect_column("in").to_pylist()
         [True, False, True]
+
+        >>> result = df.select(
+        ...     dfn.functions.in_list(
+        ...         dfn.col("a"), [dfn.lit(1), dfn.lit(3)],
+        ...         negated=True,
+        ...     ).alias("not_in")
+        ... )
+        >>> result.collect_column("not_in").to_pylist()
+        [False, True, False]
     """
     values = [v.expr for v in values]
     return Expr(f.in_list(arg.expr, values, negated))
@@ -469,6 +478,11 @@ def order_by(expr: Expr, ascending: bool = True, nulls_first: bool = True) -> So
         >>> sort_expr = dfn.functions.order_by(dfn.col("a"), ascending=False)
         >>> sort_expr.ascending()
         False
+
+        >>> sort_expr = dfn.functions.order_by(
+        ...     dfn.col("a"), ascending=True, nulls_first=False)
+        >>> sort_expr.nulls_first()
+        False
     """
     return SortExpr(expr, ascending=ascending, nulls_first=nulls_first)
 
@@ -488,6 +502,13 @@ def alias(expr: Expr, name: str, metadata: dict[str, str] | None = None) -> Expr
         ...     dfn.functions.alias(dfn.col("a"), "b")
         ... ).collect_column("b")[0].as_py()
         1
+
+        >>> df.select(
+        ...     dfn.functions.alias(
+        ...         dfn.col("a"), "b", metadata={"info": "test"}
+        ...     )
+        ... ).collect_column("b")[0].as_py()
+        1
     """
     return Expr(f.alias(expr.expr, name, metadata))
 
@@ -1161,6 +1182,13 @@ def lpad(string: Expr, count: Expr, characters: Expr | None = None) -> Expr:
         'the ca'
         >>> lpad_df.collect_column("lpad")[1].as_py()
         ' a hat'
+
+        >>> result = df.select(
+        ...     dfn.functions.lpad(
+        ...         dfn.col("a"), dfn.lit(10), dfn.lit(".")
+        ...     ).alias("lpad"))
+        >>> result.collect_column("lpad")[0].as_py()
+        '...the cat'
     """
     characters = characters if characters is not None else Expr.literal(" ")
     return Expr(f.lpad(string.expr, count.expr, characters.expr))
@@ -1341,6 +1369,17 @@ def regexp_like(string: Expr, regex: Expr, flags: Expr | None = None) -> Expr:
         ... )
         >>> result.collect_column("m")[0].as_py()
         True
+
+        Use ``flags`` for case-insensitive matching:
+
+        >>> result = df.select(
+        ...     dfn.functions.regexp_like(
+        ...         dfn.col("a"), dfn.lit("HELLO"),
+        ...         flags=dfn.lit("i"),
+        ...     ).alias("m")
+        ... )
+        >>> result.collect_column("m")[0].as_py()
+        True
     """
     if flags is not None:
         flags = flags.expr
@@ -1363,6 +1402,17 @@ def regexp_match(string: Expr, regex: Expr, flags: Expr | None = None) -> Expr:
         ... )
         >>> result.collect_column("m")[0].as_py()
         ['42']
+
+        Use ``flags`` for case-insensitive matching:
+
+        >>> result = df.select(
+        ...     dfn.functions.regexp_match(
+        ...         dfn.col("a"), dfn.lit("(HELLO)"),
+        ...         flags=dfn.lit("i"),
+        ...     ).alias("m")
+        ... )
+        >>> result.collect_column("m")[0].as_py()
+        ['hello']
     """
     if flags is not None:
         flags = flags.expr
@@ -1391,6 +1441,18 @@ def regexp_replace(
         ... )
         >>> result.collect_column("r")[0].as_py()
         'hello XX'
+
+        Use the ``g`` flag to replace all occurrences:
+
+        >>> df = ctx.from_pydict({"a": ["a1 b2 c3"]})
+        >>> result = df.select(
+        ...     dfn.functions.regexp_replace(
+        ...         dfn.col("a"), dfn.lit("\\d+"),
+        ...         dfn.lit("X"), flags=dfn.lit("g"),
+        ...     ).alias("r")
+        ... )
+        >>> result.collect_column("r")[0].as_py()
+        'aX bX cX'
     """
     if flags is not None:
         flags = flags.expr
@@ -1412,6 +1474,17 @@ def regexp_count(
         ...     dfn.functions.regexp_count(dfn.col("a"), dfn.lit("abc")).alias("c"))
         >>> result.collect_column("c")[0].as_py()
         2
+
+        Use ``start`` to begin searching from a position, and
+        ``flags`` for case-insensitive matching:
+
+        >>> result = df.select(
+        ...     dfn.functions.regexp_count(
+        ...         dfn.col("a"), dfn.lit("ABC"),
+        ...         start=dfn.lit(4), flags=dfn.lit("i"),
+        ...     ).alias("c"))
+        >>> result.collect_column("c")[0].as_py()
+        1
     """
     if flags is not None:
         flags = flags.expr
@@ -1447,6 +1520,31 @@ def regexp_instr(
         ... )
         >>> result.collect_column("pos")[0].as_py()
         7
+
+        Use ``start`` to search from a position, ``n`` for the
+        nth occurrence, and ``flags`` for case-insensitive mode:
+
+        >>> df = ctx.from_pydict({"a": ["abc ABC abc"]})
+        >>> result = df.select(
+        ...     dfn.functions.regexp_instr(
+        ...         dfn.col("a"), dfn.lit("abc"),
+        ...         start=dfn.lit(2), n=dfn.lit(1),
+        ...         flags=dfn.lit("i"),
+        ...     ).alias("pos")
+        ... )
+        >>> result.collect_column("pos")[0].as_py()
+        5
+
+        Use ``sub_expr`` to get the position of a capture group:
+
+        >>> result = df.select(
+        ...     dfn.functions.regexp_instr(
+        ...         dfn.col("a"), dfn.lit("(abc)"),
+        ...         sub_expr=dfn.lit(1),
+        ...     ).alias("pos")
+        ... )
+        >>> result.collect_column("pos")[0].as_py()
+        1
     """
     start = start.expr if start is not None else None
     n = n.expr if n is not None else None
@@ -2196,6 +2294,13 @@ def trunc(num: Expr, precision: Expr | None = None) -> Expr:
         >>> result = df.select(dfn.functions.trunc(dfn.col("a")).alias("t"))
         >>> result.collect_column("t")[0].as_py()
         1.0
+
+        >>> result = df.select(
+        ...     dfn.functions.trunc(
+        ...         dfn.col("a"), precision=dfn.lit(2)
+        ...     ).alias("t"))
+        >>> result.collect_column("t")[0].as_py()
+        1.56
     """
     if precision is not None:
         return Expr(f.trunc(num.expr, precision.expr))

From 6a5991cb1d2b2db90688d23f1c27e117dcdfd16f Mon Sep 17 00:00:00 2001
From: ntjohnson1 <24689722+ntjohnson1@users.noreply.github.com>
Date: Wed, 18 Mar 2026 15:34:07 -0400
Subject: [PATCH 8/8] Cover array and aggregation functions

---
 python/datafusion/functions.py | 90 ++++++++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)

diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py
index a7324e525..3fedf9ce2 100644
--- a/python/datafusion/functions.py
+++ b/python/datafusion/functions.py
@@ -2759,6 +2759,16 @@ def array_position(array: Expr, element: Expr, index: int | None = 1) -> Expr:
         ...     dfn.functions.array_position(dfn.col("a"), dfn.lit(20)).alias("result"))
         >>> result.collect_column("result")[0].as_py()
         2
+
+        Use ``index`` to start searching from a given position:
+
+        >>> df = ctx.from_pydict({"a": [[10, 20, 10, 20]]})
+        >>> result = df.select(
+        ...     dfn.functions.array_position(
+        ...         dfn.col("a"), dfn.lit(20), index=3,
+        ...     ).alias("result"))
+        >>> result.collect_column("result")[0].as_py()
+        4
     """
     return Expr(f.array_position(array.expr, element.expr, index))
 
@@ -3091,6 +3101,14 @@ def array_sort(array: Expr, descending: bool = False, null_first: bool = False)
         >>> result = df.select(dfn.functions.array_sort(dfn.col("a")).alias("result"))
         >>> result.collect_column("result")[0].as_py()
         [1, 2, 3]
+
+        >>> df = ctx.from_pydict({"a": [[3, None, 1]]})
+        >>> result = df.select(
+        ...     dfn.functions.array_sort(
+        ...         dfn.col("a"), descending=True, null_first=True,
+        ...     ).alias("result"))
+        >>> result.collect_column("result")[0].as_py()
+        [None, 3, 1]
     """
     desc = "DESC" if descending else "ASC"
     nulls_first = "NULLS FIRST" if null_first else "NULLS LAST"
@@ -3125,6 +3143,16 @@ def array_slice(
         ...     dfn.lit(3)).alias("result"))
         >>> result.collect_column("result")[0].as_py()
         [2, 3]
+
+        Use ``stride`` to skip elements:
+
+        >>> result = df.select(
+        ...     dfn.functions.array_slice(
+        ...         dfn.col("a"), dfn.lit(1), dfn.lit(4),
+        ...         stride=dfn.lit(2),
+        ...     ).alias("result"))
+        >>> result.collect_column("result")[0].as_py()
+        [1, 3]
     """
     if stride is not None:
         stride = stride.expr
@@ -3396,6 +3424,15 @@ def approx_percentile_cont(
         ...     ).alias("v")])
         >>> result.collect_column("v")[0].as_py()
         3.0
+
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.approx_percentile_cont(
+        ...         dfn.col("a"), 0.5,
+        ...         num_centroids=10,
+        ...         filter=dfn.col("a") > dfn.lit(1.0),
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        3.5
     """
     sort_expr_raw = sort_or_default(sort_expression)
     filter_raw = filter.expr if filter is not None else None
@@ -3436,6 +3473,15 @@ def approx_percentile_cont_with_weight(
         ...     dfn.col("w"), 0.5).alias("v")])
         >>> result.collect_column("v")[0].as_py()
         2.0
+
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.approx_percentile_cont_with_weight(
+        ...         dfn.col("a"), dfn.col("w"), 0.5,
+        ...         num_centroids=10,
+        ...         filter=dfn.col("a") > dfn.lit(1.0),
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        2.5
     """
     sort_expr_raw = sort_or_default(sort_expression)
     filter_raw = filter.expr if filter is not None else None
@@ -3478,6 +3524,23 @@ def array_agg(
         ...     [], [dfn.functions.array_agg(dfn.col("a")).alias("v")])
         >>> result.collect_column("v")[0].as_py()
         [1, 2, 3]
+
+        >>> df = ctx.from_pydict({"a": [3, 1, 2, 1]})
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.array_agg(
+        ...         dfn.col("a"), distinct=True,
+        ...     ).alias("v")])
+        >>> sorted(result.collect_column("v")[0].as_py())
+        [1, 2, 3]
+
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.array_agg(
+        ...         dfn.col("a"),
+        ...         filter=dfn.col("a") > dfn.lit(1),
+        ...         order_by="a",
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        [2, 3]
     """
     order_by_raw = sort_list_to_raw_sort_list(order_by)
     filter_raw = filter.expr if filter is not None else None
@@ -3579,6 +3642,15 @@ def count(
         >>> result = df.aggregate([], [dfn.functions.count(dfn.col("a")).alias("v")])
         >>> result.collect_column("v")[0].as_py()
         3
+
+        >>> df = ctx.from_pydict({"a": [1, 1, 2, 3]})
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.count(
+        ...         dfn.col("a"), distinct=True,
+        ...         filter=dfn.col("a") > dfn.lit(1),
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        2
     """
     filter_raw = filter.expr if filter is not None else None
 
@@ -3735,6 +3807,15 @@ def median(
         >>> result = df.aggregate([], [dfn.functions.median(dfn.col("a")).alias("v")])
         >>> result.collect_column("v")[0].as_py()
         2.0
+
+        >>> df = ctx.from_pydict({"a": [1.0, 1.0, 2.0, 3.0]})
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.median(
+        ...         dfn.col("a"), distinct=True,
+        ...         filter=dfn.col("a") > dfn.lit(0.0),
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        2.0
     """
     filter_raw = filter.expr if filter is not None else None
     return Expr(f.median(expression.expr, distinct=distinct, filter=filter_raw))
@@ -4551,6 +4632,15 @@ def bit_xor(
         >>> result = df.aggregate([], [dfn.functions.bit_xor(dfn.col("a")).alias("v")])
         >>> result.collect_column("v")[0].as_py()
         6
+
+        >>> df = ctx.from_pydict({"a": [5, 5, 3]})
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.bit_xor(
+        ...         dfn.col("a"), distinct=True,
+        ...         filter=dfn.col("a") > dfn.lit(0),
+        ...     ).alias("v")])
+        >>> result.collect_column("v")[0].as_py()
+        6
     """
     filter_raw = filter.expr if filter is not None else None
     return Expr(f.bit_xor(expression.expr, distinct=distinct, filter=filter_raw))