Source code for autofit.non_linear.samples.samples

from abc import ABC

import json
from copy import copy
import logging
import os
from typing import Dict, List, Optional, Tuple, Union

import numpy as np
from pathlib import Path

from autoconf import conf
from autoconf.class_path import get_class_path
from autofit import exc
from autofit.mapper.model import ModelInstance
from autofit.mapper.prior_model.abstract import AbstractPriorModel
from autofit.non_linear.samples.sample import Sample

from .summary import SamplesSummary
from .interface import SamplesInterface, to_instance
from ...text.formatter import write_table

logger = logging.getLogger(__name__)


[docs]class Samples(SamplesInterface, ABC):
    def __init__(
        self,
        model: AbstractPriorModel,
        sample_list: List[Sample],
        samples_info: Optional[Dict] = None,
    ):
        """
        Contains the samples of the non-linear search, including parameter values, log likelihoods,
        weights and other quantites.

        For example, the output class can be used to load an instance of the best-fit model, get an instance of any
        individual sample by the `NonLinearSearch` and return information on the likelihoods, errors, etc.

        This class stores samples of searches which provide maximum likelihood estimates of the  model-fit (e.g.
        PySwarms, LBFGS).

        Parameters
        ----------
        model
            Maps input vectors of unit parameter values to physical values and model instances via priors.
        sample_list
            The list of `Samples` which contains the paramoeters, likelihood, weights, etc. of every sample taken
            by the non-linear search.
        samples_info
            Contains information on the samples (e.g. total iterations, time to run the search, etc.).
        """

        super().__init__(model=model)

        self.sample_list = sample_list
        self.samples_info = {
            **(samples_info or {}),
            "class_path": get_class_path(self.__class__),
        }

    def __str__(self):
        return f"{self.__class__.__name__}({len(self.sample_list)})"

    def __repr__(self):
        return str(self)

    @property
    def instances(self):
        """
        One model instance for each sample
        """
        return [
            self.model.instance_from_vector(
                sample.parameter_lists_for_paths(
                    self.paths if sample.is_path_kwargs else self.names
                ),
                ignore_prior_limits=True,
            )
            for sample in self.sample_list
        ]

    @property
    def log_evidence(self):
        return None

    @classmethod
    def from_list_info_and_model(
        cls,
        sample_list,
        samples_info,
        model: AbstractPriorModel,
    ):
        return cls(
            model=model,
            sample_list=sample_list,
            samples_info=samples_info,
        )

    def summary(self):
        return SamplesSummary(
            model=self.model,
            max_log_likelihood_sample=self.max_log_likelihood_sample,
        )

[docs]    def __add__(self, other: "Samples") -> "Samples":
        """
        Samples can be added together, which combines their `sample_list` meaning that inferred parameters are
        computed via their joint PDF.

        Parameters
        ----------
        other
            The Samples to be added to this Samples instance.

        Returns
        -------
        A class that combined the samples of the two Samples objects.
        """

        self._check_addition(other=other)

        return self.__class__(
            model=self.model,
            sample_list=self.sample_list + other.sample_list,
        )

    def __radd__(self, other):
        """
        Samples can be added together, which combines their `sample_list` meaning that inferred parameters are
        computed via their joint PDF.

        Overwriting `__radd__` enables the sum function to be used on a list of samples, e.g.:

        `samples = sum([samples_x5, samples_x5, samples_x5])`

        Parameters
        ----------
        other
            The Samples to be added to this Samples instance.

        Returns
        -------
        A class that combines the samples of a list of Samples objects.
        """
        return self

    def __len__(self):
        return len(self.sample_list)

    def __setstate__(self, state):
        self.__dict__.update(state)

    def __copy__(self):
        cls = self.__class__
        result = cls.__new__(cls)
        result.__dict__.update(self.__dict__)
        result._names = None
        result._paths = None
        return result

    def _check_addition(self, other: "Samples"):
        """
        When adding samples together, perform the following checks to make sure it is valid to add the two objects
        together:

        - That both objects being added are `Samples` objects.
        - That both models have the same prior count, else the dimensionality does not allow for valid addition.
        - That both `Samples` objects use an identical model, such that we are adding together the same parameters.

        Parameters
        ----------
        other
            The Samples to be added to this Samples instance.
        """

        def raise_exc():
            raise exc.SamplesException(
                "Cannot add together two Samples objects which have different models."
            )

        if not isinstance(self, Samples):
            raise_exc()

        if not isinstance(other, Samples):
            raise_exc()

        if self.model.prior_count != other.model.prior_count:
            raise_exc()

        for path_self, path_other in zip(self.model.paths, other.model.paths):
            if path_self != path_other:
                raise_exc()

[docs]    def values_for_path(self, path: Tuple[str]) -> List[float]:
        """
        Returns the value for a variable with a given path
        for each sample in the model
        """
        return [sample.kwargs[path] for sample in self.sample_list]

    @property
    def total_iterations(self) -> int:
        return self.samples_info["total_iterations"]

    @property
    def time(self) -> Optional[float]:
        return self.samples_info["time"]

    @property
    def parameter_lists(self):
        result = list()
        for sample in self.sample_list:
            tuples = self.paths if sample.is_path_kwargs else self.names
            result.append(sample.parameter_lists_for_paths(tuples))

        return result

    @property
    def total_samples(self):
        return len(self.sample_list)

    @property
    def weight_list(self):
        return [sample.weight for sample in self.sample_list]

    @property
    def log_likelihood_list(self):
        return [sample.log_likelihood for sample in self.sample_list]

    @property
    def log_posterior_list(self):
        return [sample.log_posterior for sample in self.sample_list]

    @property
    def log_prior_list(self):
        return [sample.log_prior for sample in self.sample_list]

    @property
    def parameters_extract(self):
        return np.asarray(self.parameter_lists).T

    @property
    def _headers(self) -> List[str]:
        """
        Headers for the samples table
        """

        return self.model.joined_paths + [
            "log_likelihood",
            "log_prior",
            "log_posterior",
            "weight",
        ]

    @property
    def _rows(self) -> List[List[float]]:
        """
        Rows in the samples table
        """

        log_likelihood_list = self.log_likelihood_list
        log_prior_list = self.log_prior_list
        log_posterior_list = self.log_posterior_list
        weight_list = self.weight_list

        for index, row in enumerate(self.parameter_lists):
            yield row + [
                log_likelihood_list[index],
                log_prior_list[index],
                log_posterior_list[index],
                weight_list[index],
            ]

[docs]    def write_table(self, filename: Union[str, Path]):
        """
        Write a table of parameters, posteriors, priors and likelihoods.

        Parameters
        ----------
        filename
            Where the table is to be written
        """
        write_table(
            filename=filename,
            headers=list(self._headers),
            rows=list(self._rows),
        )

    def info_to_json(self, filename):
        with open(filename, "w") as outfile:
            json.dump(self.samples_info, outfile)

    @property
    def max_log_likelihood_sample(self) -> Sample:
        """
        The index of the sample with the highest log likelihood.
        """
        most_likely_sample = None
        for sample in self.sample_list:
            if (
                most_likely_sample is None
                or sample.log_likelihood > most_likely_sample.log_likelihood
            ):
                most_likely_sample = sample
        return most_likely_sample

    @property
    def max_log_likelihood_index(self) -> int:
        """
        The index of the sample with the highest log likelihood.
        """
        return int(np.argmax(self.log_likelihood_list))

[docs]    @to_instance
    def max_log_likelihood(self) -> List[float]:
        """
        The parameters of the maximum log likelihood sample of the `NonLinearSearch` returned as a model instance or
        list of values.
        """

        sample = self.max_log_likelihood_sample

        return sample.parameter_lists_for_paths(
            self.paths if sample.is_path_kwargs else self.names
        )

    @property
    def max_log_posterior_sample(self) -> Sample:
        return self.sample_list[self.max_log_posterior_index]

    @property
    def max_log_posterior_index(self) -> int:
        """
        The index of the sample with the highest log posterior.
        """
        return int(np.argmax(self.log_posterior_list))

[docs]    @to_instance
    def max_log_posterior(self) -> ModelInstance:
        """
        The parameters of the maximum log posterior sample of the `NonLinearSearch` returned as a model instance.
        """
        return self.parameter_lists[self.max_log_posterior_index]

[docs]    @to_instance
    def from_sample_index(self, sample_index: int) -> ModelInstance:
        """
        The parameters of an individual sample of the non-linear search, returned as a model instance.

        Parameters
        ----------
        sample_index
            The sample index of the weighted sample to return.
        """
        return self.parameter_lists[sample_index]

[docs]    def samples_above_weight_threshold_from(
        self, weight_threshold: Optional[float] = None, log_message: bool = False
    ) -> "Samples":
        """
        Returns a new `Samples` object containing only the samples with a weight above the input threshold.

        This function can be used after a non-linear search is complete, to reduce the samples to only the high weight
        values. The benefit of this is that the corresponding `samples.csv` file will be reduced in hard-disk size.

        For large libraries of results can significantly reduce the overall hard-disk space used and speed up the
        time taken to load the samples from a .csv file and perform analysis on them.

        For a sufficiently low threshold, this has a neglible impact on the numerical accuracy of the results, and
        even higher values can be used for aggresive use cases where hard-disk space is at a premium.

        Parameters
        ----------
        weight_threshold
            The threshold of weight at which a sample is included in the new `Samples` object.
        """

        if weight_threshold is None:
            weight_threshold = conf.instance["output"]["samples_weight_threshold"]

        if os.environ.get("PYAUTOFIT_TEST_MODE") == "1":
            weight_threshold = None

        if weight_threshold is None:
            return self

        sample_list = []

        for sample in self.sample_list:
            if sample.weight > weight_threshold:
                sample_list.append(sample)

        if log_message:
            logger.info(
                f"Samples with weight less than {weight_threshold} removed from samples.csv."
            )

        return self.__class__(
            model=self.model,
            sample_list=sample_list,
            samples_info=self.samples_info,
        )

[docs]    def minimise(self) -> "Samples":
        """
        A copy of this object with only important samples retained
        """
        samples = copy(self)
        samples.model = None
        samples.sample_list = list(
            {self.max_log_likelihood_sample, self.max_log_posterior_sample}
        )
        return samples

[docs]    def with_paths(self, paths: Union[List[Tuple[str, ...]], List[str]]) -> "Samples":
        """
        Create a copy of this object with only attributes specified
        by a list of paths.

        Parameters
        ----------
        paths
            A list of paths to attributes. Only kwargs and model components
            specified by these paths are retained.

            All children of a given path are retained.

        Returns
        -------
        A set of samples with a reduced set of attributes
        """
        with_paths = copy(self)
        with_paths.model = self.model.with_paths(paths)
        with_paths.sample_list = [
            sample.with_paths(paths) for sample in self.sample_list
        ]
        return with_paths

[docs]    def without_paths(
        self, paths: Union[List[Tuple[str, ...]], List[str]]
    ) -> "Samples":
        """
        Create a copy of this object with only attributes not specified
        by a list of paths.

        Parameters
        ----------
        paths
            A list of paths to attributes. kwargs and model components
            specified by these paths are removed.

            All children of a given path are removed.

        Returns
        -------
        A set of samples with a reduced set of attributes
        """
        with_paths = copy(self)
        with_paths.model = self.model.without_paths(paths)
        with_paths.sample_list = [
            sample.without_paths(paths) for sample in self.sample_list
        ]
        return with_paths

    def subsamples(self, model):
        if self.model is None:
            return None

        path_map = self.path_map_for_model(model)
        copied = copy(self)
        copied._paths = None
        copied._names = None
        copied.model = model

        copied.sample_list = [sample.subsample(path_map) for sample in self.sample_list]
        return copied