Source code for paramnormal.paramnormal

from collections import namedtuple

import numpy
from scipy import stats

from . import utils


class BaseDist_Mixin:
    def __new__(cls, **params):
        dist_params = cls._process_args(fit=False, **params)
        return cls.dist(**dist_params)

    @classmethod
    def _fit(cls, data, **guesses):
        args = utils._pop_none(**cls._process_args(fit=True, **guesses))
        _sp_params = cls.dist.fit(data, **args)
        return _sp_params

    @classmethod
    def fit(cls, data, **guesses):
        """ Fit a distribution to sample using scipy's maximum
        likelihood estimation methods.

        Parameters
        ----------
        data : array-like
            A sample whose distribution parameters will be estimated.
        guesses : named arguments of floats
            Inital guess values for certain parameters of the
            distribution. See the class docstring for more information
            on the parameters.

        Returns
        -------
        params : namedtuple
            A namedtuple containing all of the paramaters of the
            distribution.

        """

        return cls.param_template(*cls._fit(data, **guesses))

    @classmethod
    def from_params(cls, params):
        """ Create a distribution from the namedtuple result of the
        :meth:`~fit` method.

        Examples
        --------
        >>> import numpy
        >>> import paramnormal
        >>> # silly fake data
        >>> x = numpy.random.normal(size=37)
        >>> params = paramnormal.normal.fit(x)
        >>> dist = = paramnormal.normal.from_params(parama)

        """

        kwargs = dict(zip(params._fields, params))
        return cls(**kwargs)


[docs]class normal(BaseDist_Mixin): """ Create and fit data to a normal distribution. Methods ------- fit(data, **guesses) Use scipy's maximum likelihood estimation methods to estimate the parameters of the data's distribution. from_params(params) Create a new distribution instances from the namedtuple result of the :meth:`~fit` method. Parameters ---------- mu : float The expected value (mean) of the underlying normal distribution. Acts as the location parameter of the distribution. sigma : float The standard deviation of the underlying normal distribution. Also acts as the scale parameter of distribution. Examples -------- >>> import numpy >>> import paramnormal as pn >>> numpy.random.seed(0) >>> pn.normal(mu=5, sigma=2).rvs(size=3) array([ 8.52810469, 5.80031442, 6.95747597]) >>> # english names and greek symbols are interchangeable >>> numpy.random.seed(0) >>> pn.normal(μ=5, σ=2).rvs(size=3) array([ 8.52810469, 5.80031442, 6.95747597]) >>> # silly fake data >>> numpy.random.seed(0) >>> data = numpy.random.normal(5, 2, size=37) >>> # pretend `data` is unknown and we want to fit a dist. to it >>> pn.normal.fit(data) params(mu=5.6480512782619359, sigma=2.1722505742582769) >>> # estimate sigma when mu is fixed a known value: >>> pn.normal.fit(data, mu=4.75) params(mu=4.75, sigma=2.3505677305181645) References ---------- http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html https://en.wikipedia.org/wiki/normal_distribution See Also -------- scipy.stats.norm numpy.random.normal """ dist = stats.norm param_template = namedtuple('params', ['mu', 'sigma']) name = 'normal' @staticmethod @utils.greco_deco def _process_args(mu=None, sigma=None, fit=False): loc_key, scale_key = utils._get_loc_scale_keys(fit=fit) return {loc_key: mu, scale_key: sigma}
[docs]class lognormal(BaseDist_Mixin): """ Create and fit data to a lognormal distribution. Methods ------- fit(data, **guesses) Use scipy's maximum likelihood estimation methods to estimate the parameters of the data's distribution. By default, `offset` is fixed at 0. Thus, only `mu` and `sigma` are estimated unless the `offset` is explicitly set to `None`. from_params(params) Create a new distribution instances from the namedtuple result of the :meth:`~fit` method. Parameters ---------- mu : float The expected value (mean) of the underlying normal distribution. Acts as the scale parameter of the distribution. sigma : float The standard deviation of the underlying normal distribution. Also acts as the shape parameter of distribution. offset : float, optional The location parameter of the distribution. It's effectively the lower bound of the distribution. In other works, if you're investigating some quantity that cannot go below zero (e.g., pollutant concentrations), leave this as the default (zero). .. note :: When fitting a lognormal distribution to a dataset, this will be fixed at its default value unless you explicitly set it to another value. Set it to `None` if wish that it be estimated entirely from scratch. Examples -------- >>> import numpy >>> import paramnormal as pn >>> numpy.random.seed(0) >>> pn.lognormal(mu=5, sigma=2).rvs(size=3) array([ 5054.85624027, 330.40342795, 1050.97750604]) >>> # you can also use greek letters >>> numpy.random.seed(0) >>> pn.lognormal(μ=5, σ=2).rvs(size=3) array([ 5054.85624027, 330.40342795, 1050.97750604]) >>> # silly fake data >>> numpy.random.seed(0) >>> data = numpy.random.lognormal(5, 2, size=37) >>> # pretend `data` is unknown and we want to fit a dist. to it >>> pn.lognormal.fit(data) params(mu=5.6480512782619359, sigma=2.1722505742582769, offset=0) >>> # estimate sigma when mu is fixed a known value: >>> pn.lognormal.fit(data, mu=4.75) params(mu=4.75, sigma=2.3505677305181645) >>> # include `offset` in the estimate >>> pn.lognormal.fit(data, offset=None) params(mu=5.6538159643, sigma=2.1596452081, offset=-0.12039282462) References ---------- http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.lognorm.html https://en.wikipedia.org/wiki/lognormal_distribution See Also -------- scipy.stats.lognorm numpy.random.lognormal """ dist = stats.lognorm param_template = namedtuple('params', ['mu', 'sigma', 'offset']) name = 'lognormal' @staticmethod @utils.greco_deco def _process_args(mu=None, sigma=None, offset=0, fit=False): loc_key, scale_key = utils._get_loc_scale_keys(fit=fit) if fit: key = 'f0' else: key = 's' if offset is None and not fit: raise ValueError("`offset` parameter is required. Recommended value is 0.") return {key: sigma, scale_key: numpy.exp(mu) if mu is not None else mu, loc_key: offset} @classmethod
[docs] def fit(cls, data, **guesses): params = cls._fit(data, **guesses) return cls.param_template(mu=numpy.log(params[2]), sigma=params[0], offset=params[1])
[docs]class weibull(BaseDist_Mixin): """ Create and fit data to a weibull distribution. Methods ------- fit(data, **guesses) Use scipy's maximum likelihood estimation methods to estimate the parameters of the data's distribution. By default, `loc` and `scale` are fixed at 0 and 1, respectively. Thus, only `k` is estimated unless `loc` or `scale` are explicitly set to `None`. from_params(params) Create a new distribution instances from the namedtuple result of the :meth:`~fit` method. Parameters ---------- k : float The shape parameter of the distribution. .. note :: Strictly speaking, the weibull distribution has a second shape parameter, lambda. However, it seems to be always set to 1. So much so that scipy doesn't give you any other option. loc, scale : floats, optional Location and scale parameters of the distribution. These default to, and should probably be left at, 0 and 1, respectively. .. note :: When fitting a weibull distribution to a dataset, these will be fixed at their default values unless you explicitly set them to other values. Set them to `None` if you wish that they be estimated entirely from scratch. Examples -------- >>> import numpy >>> import paramnormal as pn >>> numpy.random.seed(0) >>> pn.weibull(k=5).rvs(size=3) array([ 0.9553641 , 1.04662991, 0.98415009]) >>> # silly fake data >>> numpy.random.seed(0) >>> data = numpy.random.weibull(5, size=37) >>> # pretend `data` is unknown and we want to fit a dist. to it >>> pn.weibull.fit(data) params(k=5.4158203125000091, loc=0, scale=1) >>> # include `loc` and `scale` in the estimate >>> pn.weibull.fit(data, loc=None, scale=None) params(k=14.12010770249, loc=-1.38985653558, scale=2.432032433984) References ---------- http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.weibull_min.html https://en.wikipedia.org/wiki/weibull_distribution See Also -------- scipy.stats.weibull_min scipy.stats.frechet_min numpy.random.weibull """ dist = stats.weibull_min param_template = namedtuple('params', ['k', 'loc', 'scale']) name = 'weibull' @staticmethod @utils.greco_deco def _process_args(k=None, loc=0, scale=1, fit=False): loc_key, scale_key = utils._get_loc_scale_keys(fit=fit) if fit: key = 'f0' else: key = 'c' return {key: k, loc_key: loc, scale_key: scale}
[docs]class alpha(BaseDist_Mixin): """ Create and fit data to a alpha distribution. Methods ------- fit(data, **guesses) Use scipy's maximum likelihood estimation methods to estimate the parameters of the data's distribution. By default, `loc` and `scale` are fixed at 0 and 1, respectively. Thus, only `alpha` is estimated unless `loc` or `scale` are explicitly set to `None`. from_params(params) Create a new distribution instances from the namedtuple result of the :meth:`~fit` method. Parameters ---------- alpha : float The shape parameter of the distribution. loc, scale : floats, optional Location and scale parameters of the distribution. These default to, and should probably be left at, 0 and 1, respectively. .. note :: When fitting a alpha distribution to a dataset, these will be fixed at their default values unless you explicitly set them to other values. Set them to `None` if you wish that they be estimated entirely from scratch. Examples -------- >>> import numpy >>> from scipy import stats >>> import paramnormal as pn >>> numpy.random.seed(0) >>> pn.alpha(alpha=5).rvs(size=3) array([ 0.9553641 , 1.04662991, 0.98415009]) >>> # you can also use greek letters >>> numpy.random.seed(0) >>> pn.alpha(α=5).rvs(size=3) array([ 0.9553641 , 1.04662991, 0.98415009]) >>> # silly fake data >>> numpy.random.seed(0) >>> data = stats.alpha.rvs(5, size=37) >>> # pretend `data` is unknown and we want to fit a dist. to it >>> pn.alpha.fit(data) params(alpha=4.8356445312500096, loc=0, scale=1) >>> # include `loc` and `scale` in the estimate >>> pn.alpha.fit(data, loc=None, scale=None) params(alpha=8.6781299501, loc=-0.15002784430, scale=3.1262971852) References ---------- http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.alpha.html See Also -------- scipy.stats.alpha """ dist = stats.alpha param_template = namedtuple('params', ['alpha', 'loc', 'scale']) @staticmethod @utils.greco_deco def _process_args(alpha=None, loc=0, scale=1, fit=False): loc_key, scale_key = utils._get_loc_scale_keys(fit=fit) if fit: alpha_key = 'f0' else: alpha_key = 'a' return {alpha_key: alpha, loc_key: loc, scale_key: scale}
[docs]class beta(BaseDist_Mixin): """ Create and fit data to a beta distribution. Methods ------- fit(data, **guesses) Use scipy's maximum likelihood estimation methods to estimate the parameters of the data's distribution. By default, `loc` and `scale` are fixed at 0 and 1, respectively. Thus, only `alpha` and `beta` are estimated unless `loc` or `scale` are explicitly set to `None`. from_params(params) Create a new distribution instances from the namedtuple result of the :meth:`~fit` method. Parameters ---------- alpha, beta : float The (positive) shape parameters of the distribution. loc, scale : floats, optional Location and scale parameters of the distribution. These default to, and should probably be left at, 0 and 1, respectively. .. note :: When fitting a beta distribution to a dataset, these will be fixed at their default values unless you explicitly set them to other values. Set them to `None` if you wish that they be estimated entirely from scratch. Examples -------- >>> import numpy >>> import paramnormal as pn >>> numpy.random.seed(0) >>> pn.beta(alpha=2, beta=5).rvs(size=3) array([ 0.9553641 , 1.04662991, 0.98415009]) >>> you can also use greek letters >>> numpy.random.seed(0) >>> pn.beta(α=2, β=5).rvs(size=3) array([ 0.9553641 , 1.04662991, 0.98415009]) >>> # silly fake data >>> numpy.random.seed(0) >>> data = pn.beta(alpha=2, beta=5).rvs(size=37) >>> # pretend `data` is unknown and we want to fit a dist. to it >>> pn.beta.fit(data) params(alpha=1.6784891179355, beta=4.2459121691279, loc=0, scale=1) >>> # just estimate beta with a known alpha >>> pn.beta.fit(data, alpha=2) params(alpha=2, beta=4.96992644, loc=0, scale=1) >>> # include `loc` and `scale` in the estimate >>> pn.alpha.fit(data, loc=None, scale=None) params(alpha=1.81112, beta=4.69728, loc=-0.00540140, scale=1.03884) References ---------- http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.beta.html https://en.wikipedia.org/wiki/beta_distribution See Also -------- scipy.stats.beta numpy.random.beta """ dist = stats.beta param_template = namedtuple('params', ['alpha', 'beta', 'loc', 'scale']) @staticmethod @utils.greco_deco def _process_args(alpha=None, beta=None, loc=0, scale=1, fit=False): loc_key, scale_key = utils._get_loc_scale_keys(fit=fit) if fit: alpha_key = 'f0' beta_key = 'f1' else: alpha_key = 'a' beta_key = 'b' return {alpha_key: alpha, beta_key: beta, loc_key: loc, scale_key: scale}
[docs]class gamma(BaseDist_Mixin): """ Create and fit data to a gamma distribution. Methods ------- fit(data, **guesses) Use scipy's maximum likelihood estimation methods to estimate the parameters of the data's distribution. By default, `loc` and `scale` are fixed at 0 and 1, respectively. Thus, only `alpha` and `beta` are estimated unless `loc` or `scale` are explicitly set to `None`. from_params(params) Create a new distribution instances from the namedtuple result of the :meth:`~fit` method. Parameters ---------- k, theta : float The shape and scale parameters of the distribution, respectively. loc : float, optional Location parameter of the distribution. This default to, and should probably be left at, 0. .. note :: When fitting a beta distribution to a dataset, this will be fixed at its default value unless you explicitly set it to other values. Set to `None` if you wish that it be estimated entirely from scratch. Examples -------- >>> import numpy >>> import paramnormal as pn >>> numpy.random.seed(0) >>> pn.gamma(k=2, theta=5).rvs(size=3) array([ 25.69414788, 11.19240456, 27.13566137]) >>> # you can also use greek letters >>> numpy.random.seed(0) >>> pn.gamma(k=2, θ=5).rvs(size=3) array([ 25.69414788, 11.19240456, 27.13566137]) >>> # silly fake data >>> numpy.random.seed(0) >>> data = pn.gamma(k=2, θ=5).rvs(size=37) >>> # pretend `data` is unknown and we want to fit a dist. to it >>> pn.gamma.fit(data) params(k=1.3379069223213478, loc=0, theta=7.5830062081633587) >>> # just estimate theta with a known k >>> pn.gamma.fit(data, theta=5) params(k=1.8060453251225814, loc=0, theta=5) >>> # include `loc` in the estimate >>> pn.gamma.fit(data, loc=None) params(k=1.099611776886, loc=0.2991473526658, theta=8.954245031559) References ---------- http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.gamma.html https://en.wikipedia.org/wiki/gamma_distribution See Also -------- scipy.stats.gamma numpy.random.gamma """ dist = stats.gamma param_template = namedtuple('params', ['k', 'loc', 'theta']) @staticmethod @utils.greco_deco def _process_args(k=None, theta=None, loc=0, fit=False): loc_key, scale_key = utils._get_loc_scale_keys(fit=fit) if fit: key = 'f0' else: key = 'a' return {key: k, loc_key: loc, scale_key: theta}
[docs]class chi_squared(BaseDist_Mixin): """ Create and fit data to a chi-squared distribution. Methods ------- fit(data, **guesses) Use scipy's maximum likelihood estimation methods to estimate the parameters of the data's distribution. By default, `loc` and `scale` are fixed at 0 and 1, respectively. Thus, only `alpha` and `beta` are estimated unless `loc` or `scale` are explicitly set to `None`. from_params(params) Create a new distribution instances from the namedtuple result of the :meth:`~fit` method. Parameters ---------- k : float The degrees of freedom of the distribution, respectively. loc, scale : floats, optional Location and scale parameters of the distribution. These default to, and should probably be left at, 0 and 1, respectively. .. note :: When fitting a chi-squared distribution to a dataset, these will be fixed at their default value unless you explicitly set them to other values. Set to `None` if you wish that they be estimated entirely from scratch. Examples -------- >>> import numpy >>> import paramnormal as pn >>> numpy.random.seed(0) >>> pn.chi_squared(k=2).rvs(size=3) array([ 1.59174902, 2.51186153, 1.84644629]) >>> # silly fake data >>> numpy.random.seed(0) >>> data = pn.chi_squared(k=2).rvs(size=37) >>> # pretend `data` is unknown and we want to fit a dist. to it >>> pn.chi_squared.fit(data) params(k=2.2668945312500028, loc=0, scale=1) >>> # include `loc` in the estimate >>> pn.chi_squared.fit(data, loc=None) params(k=1.9361813889429524, loc=0.037937143324767775, scale=1) References ---------- http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.chi2.html https://en.wikipedia.org/wiki/Chi-squared_distribution See Also -------- scipy.stats.chi2 numpy.random.chisquare """ dist = stats.chi2 param_template = namedtuple('params', ['k', 'loc', 'scale']) @staticmethod @utils.greco_deco def _process_args(k=None, loc=0, scale=1, fit=False): loc_key, scale_key = utils._get_loc_scale_keys(fit=fit) if fit: key = 'f0' else: key = 'df' return {key: k, loc_key: loc, scale_key: 1}
[docs]class pareto(BaseDist_Mixin): """ Create and fit data to a pareto distribution. Methods ------- fit(data, **guesses) Use scipy's maximum likelihood estimation methods to estimate the parameters of the data's distribution. By default, `loc` and `scale` are fixed at 0 and 1, respectively. Thus, only `alpha` is estimated unless `loc` or `scale` are explicitly set to `None`. from_params(params) Create a new distribution instances from the namedtuple result of the :meth:`~fit` method. Parameters ---------- alpha : float The shape parameter of the distribution. loc, scale : floats, optional Location and scale parameters of the distribution. These default to, and should probably be left at, 0 and 1, respectively. .. note :: When fitting a pareto distribution to a dataset, this will be fixed at its default value unless you explicitly set it to other values. Set to `None` if you wish that it be estimated entirely from scratch. Examples -------- >>> import numpy >>> import paramnormal as pn >>> numpy.random.seed(0) >>> pn.pareto(alpha=2).rvs(size=3) array([ 1.48875061, 1.87379424, 1.58662889]) >>> # you can also use greek letters >>> numpy.random.seed(0) >>> pn.pareto(α=5).rvs(size=3) array([ 1.48875061, 1.87379424, 1.58662889]) >>> # silly fake data >>> numpy.random.seed(0) >>> data = pn.pareto(alpha=2).rvs(size=37) >>> # pretend `data` is unknown and we want to fit a dist. to it >>> pn.pareto.fit(data) params(alpha=1.7850585937500019, loc=0, scale=1) >>> # include `loc` in the estimate >>> pn.pareto.fit(data, loc=None) params(alpha=1.8040853559635659, loc=0.009529403810858695, scale=1) References ---------- http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.pareto.html https://en.wikipedia.org/wiki/pareto_distribution See Also -------- scipy.stats.pareto numpy.random.pareto """ dist = stats.pareto param_template = namedtuple('params', ['alpha', 'loc', 'scale']) @staticmethod @utils.greco_deco def _process_args(alpha=None, loc=0, scale=1, fit=False): loc_key, scale_key = utils._get_loc_scale_keys(fit=fit) if fit: key = 'f0' else: key = 'b' return {key: alpha, loc_key: loc, scale_key: scale}
[docs]class exponential(BaseDist_Mixin): """ Create and fit data to an exponential distribution. Methods ------- fit(data, **guesses) Use scipy's maximum likelihood estimation methods to estimate the parameters of the data's distribution. By default, `loc` isfixed at 0. Thus, only `lambda_` is estimated unless `loc` is explicitly set to `None`. from_params(params) Create a new distribution instances from the namedtuple result of the :meth:`~fit` method. Parameters ---------- lambda_ : float The shape parameter of the distribution. loc : float, optional Location parameter of the distribution. This default to, and should probably be left at, 0, .. note :: When fitting an exponential distribution to a dataset, this will be fixed at its default value unless you explicitly set it to other values. Set to `None` if you wish that it be estimated entirely from scratch. Examples -------- >>> import numpy >>> import paramnormal as pn >>> numpy.random.seed(0) >>> pn.exponential(lambda_=2).rvs(size=3) array([ 0.39793725, 0.62796538, 0.46161157]) >>> # you can also use greek letters >>> numpy.random.seed(0) >>> pn.exponential(λ=2).rvs(size=3) array([ 0.39793725, 0.62796538, 0.46161157]) >>> # silly fake data >>> numpy.random.seed(0) >>> data = pn.exponential(λ=2).rvs(size=37) >>> # pretend `data` is unknown and we want to fit a dist. to it >>> pn.exponential.fit(data) params(lambda_=1.7849050026146085, loc=0) >>> # include `loc` in the estimate >>> pn.exponential.fit(data, loc=None) params(lambda_=1.8154701618164411, loc=0.0094842718426853996) References ---------- http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.expon.html https://en.wikipedia.org/wiki/exponential_distribution See Also -------- scipy.stats.expon numpy.random.exponential """ dist = stats.expon param_template = namedtuple('params', ['lambda_', 'loc']) @staticmethod @utils.greco_deco def _process_args(lambda_=None, loc=0, fit=False): loc_key, scale_key = utils._get_loc_scale_keys(fit=fit) return {loc_key: loc, scale_key: lambda_**-1 if lambda_ is not None else lambda_} @classmethod
[docs] def fit(cls, data, **guesses): params = cls._fit(data, **guesses) return cls.param_template(loc=params[0], lambda_=params[1]**-1)
__all__ = [ 'normal', 'lognormal', 'weibull', 'alpha', 'beta', 'gamma', 'chi_squared', 'pareto', 'exponential', ]