Skip to content

Add a correction keyword to the std methods #183

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jul 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 30 additions & 3 deletions spec/API_specification/dataframe_api/column_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,20 +469,47 @@ def mean(self, *, skip_nulls: bool = True) -> Scalar:
dtypes.
"""

def std(self, *, skip_nulls: bool = True) -> Scalar:
def std(self, *, correction: int | float = 1, skip_nulls: bool = True) -> Scalar:
"""
Reduction returns a scalar. Must be supported for numerical and
datetime data types. Returns a float for numerical data types, and
datetime (with the appropriate timedelta format string) for datetime
dtypes.
"""

def var(self, *, skip_nulls: bool = True) -> Scalar:
Parameters
----------
correction
Degrees of freedom adjustment. Setting this parameter to a value other
than ``0`` has the effect of adjusting the divisor during the
calculation of the standard deviation according to ``N-correction``,
where ``N`` corresponds to the total number of elements over which
the standard deviation is computed. When computing the standard
deviation of a population, setting this parameter to ``0`` is the
standard choice (i.e., the provided column contains data
constituting an entire population). When computing the corrected
sample standard deviation, setting this parameter to ``1`` is the
standard choice (i.e., the provided column contains data sampled
from a larger population; this is commonly referred to as Bessel's
correction). Fractional (float) values are allowed. Default: ``1``.
skip_nulls
Whether to skip null values.
"""

def var(self, *, correction: int | float = 1, skip_nulls: bool = True) -> Scalar:
"""
Reduction returns a scalar. Must be supported for numerical and
datetime data types. Returns a float for numerical data types, and
datetime (with the appropriate timedelta format string) for datetime
dtypes.

Parameters
----------
correction
Correction to apply to the result. For example, ``0`` for sample
standard deviation and ``1`` for population standard deviation.
See `Column.std` for a more detailed description.
skip_nulls
Whether to skip null values.
"""

def is_null(self) -> Column:
Expand Down
22 changes: 20 additions & 2 deletions spec/API_specification/dataframe_api/dataframe_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -684,15 +684,33 @@ def mean(self, *, skip_nulls: bool = True) -> DataFrame:
"""
...

def std(self, *, skip_nulls: bool = True) -> DataFrame:
def std(self, *, correction: int | float = 1, skip_nulls: bool = True) -> DataFrame:
"""
Reduction returns a 1-row DataFrame.

Parameters
----------
correction
Correction to apply to the result. For example, ``0`` for sample
standard deviation and ``1`` for population standard deviation.
See `Column.std` for a more detailed description.
skip_nulls
Whether to skip null values.
"""
...

def var(self, *, skip_nulls: bool = True) -> DataFrame:
def var(self, *, correction: int | float = 1, skip_nulls: bool = True) -> DataFrame:
"""
Reduction returns a 1-row DataFrame.

Parameters
----------
correction
Correction to apply to the result. For example, ``0`` for sample
standard deviation and ``1`` for population standard deviation.
See `Column.std` for a more detailed description.
skip_nulls
Whether to skip null values.
"""
...

Expand Down
4 changes: 2 additions & 2 deletions spec/API_specification/dataframe_api/groupby_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ def median(self, *, skip_nulls: bool = True) -> "DataFrame":
def mean(self, *, skip_nulls: bool = True) -> "DataFrame":
...

def std(self, *, skip_nulls: bool = True) -> "DataFrame":
def std(self, *, correction: int | float = 1, skip_nulls: bool = True) -> "DataFrame":
...

def var(self, *, skip_nulls: bool = True) -> "DataFrame":
def var(self, *, correction: int | float = 1, skip_nulls: bool = True) -> "DataFrame":
...

def size(self) -> "DataFrame":
Expand Down