Source code for autofit.non_linear.paths.directory

import shutil

import dill
import json
import os
from os import path
from pathlib import Path
from typing import Optional, Union, cast, Type
import logging

from autoconf import conf
from autoconf.class_path import get_class
from autoconf.dictable import to_dict, from_dict
from autoconf.output import conditional_output, should_output
from autofit.text import formatter
from autofit.tools.util import open_
from autofit.non_linear.samples.samples import Samples

from .abstract import AbstractPaths

from ..samples import load_from_table
from autofit.non_linear.samples.pdf import SamplesPDF
from autofit.non_linear.samples.summary import SamplesSummary
import numpy as np

from ...visualise import VisualiseGraph

logger = logging.getLogger(__name__)


[docs]class DirectoryPaths(AbstractPaths):
    def _path_for_pickle(self, name: str, prefix: str = "") -> Path:
        return self._files_path / prefix / f"{name}.pickle"

    def _path_for_json(self, name, prefix: str = "") -> Path:
        if isinstance(name, Path):
            return name
        return self._files_path / prefix / f"{name}.json"

    def _path_for_csv(self, name) -> Path:
        return self._files_path / f"{name}.csv"

    def _path_for_fits(self, name, prefix: str = "") -> Path:
        os.makedirs(self._files_path / prefix, exist_ok=True)

        return self._files_path / prefix / f"{name}.fits"

[docs]    @conditional_output
    def save_object(self, name: str, obj: object, prefix: str = ""):
        """
        Serialise an object using dill and save it to the pickles
        directory of the search.

        Parameters
        ----------
        name
            The name of the object
        obj
            A serialisable object
        prefix
            A prefix to add to the path which is the name of the folder the file is saved in.
        """
        with open_(self._path_for_pickle(name, prefix), "wb") as f:
            dill.dump(obj, f)

[docs]    @conditional_output
    def save_json(self, name, object_dict: Union[dict, list], prefix: str = ""):
        """
        Save a dictionary as a json file in the jsons directory of the search.

        Parameters
        ----------
        name
            The name of the json file
        object_dict
            The dictionary to save
        prefix
            A prefix to add to the path which is the name of the folder the file is saved in.
        """
        with open_(self._path_for_json(name, prefix), "w+") as f:
            json.dump(object_dict, f, indent=4)

    def load_json(self, name, prefix: str = ""):
        with open_(self._path_for_json(name, prefix)) as f:
            return json.load(f)

[docs]    @conditional_output
    def save_array(self, name: str, array: np.ndarray):
        """
        Save a numpy array as a csv file in the csvs directory of the search.

        Parameters
        ----------
        name
            The name of the csv file
        array
            The numpy array to save
        """
        # noinspection PyTypeChecker
        np.savetxt(self._path_for_csv(name), array, delimiter=",")

    def load_array(self, name: str):
        return np.loadtxt(self._path_for_csv(name), delimiter=",")

[docs]    @conditional_output
    def save_fits(self, name: str, hdu, prefix: str = ""):
        """
        Save an HDU as a fits file in the fits directory of the search.

        Parameters
        ----------
        name
            The name of the fits file
        hdu
            The HDU to save
        prefix
            A prefix to add to the path which is the name of the folder the file is saved in.
        """
        hdu.writeto(self._path_for_fits(name, prefix), overwrite=True)

[docs]    def load_fits(self, name: str, prefix: str = ""):
        """
        Load an HDU from a fits file in the fits directory of the search.

        Parameters
        ----------
        name
            The name of the fits file
        prefix
            A prefix to add to the path which is the name of the folder the file is saved in.

        Returns
        -------
        The loaded HDU.
        """
        from astropy.io import fits

        return fits.open(self._path_for_fits(name, prefix))[0]

[docs]    def load_object(self, name: str, prefix: str = ""):
        """
        Load a serialised object with the given name.

        e.g. if the name is 'model' then pickles/model.pickle is loaded.

        Parameters
        ----------
        name
            The name of a serialised object
        prefix
            A prefix to add to the path which is the name of the folder the file is saved in.

        Returns
        -------
        The deserialised object
        """
        with open_(self._path_for_pickle(name, prefix), "rb") as f:
            return dill.load(f)

[docs]    def remove_object(self, name: str):
        """
        Remove the object with the given name from the pickles folder.

        Parameters
        ----------
        name
            The name of a pickle file excluding .pickle
        """
        try:
            os.remove(self._path_for_pickle(name))
        except FileNotFoundError:
            pass

[docs]    def is_object(self, name: str) -> bool:
        """
        Is there a file pickles/{name}.pickle?
        """
        return os.path.exists(self._path_for_pickle(name))

    @property
    def is_complete(self) -> bool:
        """
        Has the search been completed?
        """
        return path.exists(self._has_completed_path)

[docs]    def save_search_internal(self, obj):
        """
        Save the internal representation of a non-linear search as dill file.

        The results in this representation are required to use a search's in-built tools for visualization,
        analysing samples and other tasks.
        """
        filename = self.search_internal_path / "search_internal.dill"

        with open_(filename, "wb") as f:
            dill.dump(obj, f)

[docs]    def load_search_internal(self):
        """
        Load the internal representation of a non-linear search from a pickle or dill file.

        The results in this representation are required to use a search's in-built tools for visualization,
        analysing samples and other tasks.

        Returns
        -------
        The results of the non-linear search in its internal representation.
        """

        # This is a nasty hack to load emcee backends. It will be removed once the source code is more stable.

        import emcee

        backend_filename = self.search_internal_path / "search_internal.hdf"
        if os.path.isfile(backend_filename):
            return emcee.backends.HDFBackend(filename=str(backend_filename))

        filename = self.search_internal_path / "search_internal.dill"

        with open_(filename, "rb") as f:
            return dill.load(f)

[docs]    def remove_search_internal(self):
        """
        Remove the internal representation of a non-linear search.

        This deletes the entire `search_internal` folder, including a .pickle / .dill file containing the interal
        results and files with the timer values.

        This folder can often have a large filesize, thus deleting it can reduce hard-disk use of the model-fit.
        """
        shutil.rmtree(self.search_internal_path)

[docs]    def completed(self):
        """
        Mark the search as complete by saving a file
        """
        open_(self._has_completed_path, "w+").close()

[docs]    def load_samples(self):
        return load_from_table(filename=self._samples_file)

    @property
    def samples(self):
        """
        Load the samples associated with the search from the output directory.
        """
        sample_list = self.load_samples()
        samples_info = self.load_samples_info()

        cls = cast(Type[Samples], get_class(samples_info["class_path"]))

        return cls.from_list_info_and_model(
            sample_list=sample_list,
            samples_info=samples_info,
            model=self.model,
        )

[docs]    def save_latent_samples(
        self,
        latent_samples,
    ):
        """
        Write out the latent variables of the model to a file.

        Parameters
        ----------
        latent_samples
            Samples describing the latent variables of the model
        """
        self._save_samples(latent_samples, name="latent")

[docs]    def save_samples(self, samples):
        """
        Save the final-result samples associated with the phase as a pickle
        """
        self._save_samples(samples)

    def _save_samples(self, samples, name=None):
        """
        Save the final-result samples associated with the phase as a pickle
        """

        if name is not None:
            directory = self._files_path / name
        else:
            directory = self._files_path
            name = "samples"
        if conf.instance["general"]["output"]["samples_to_csv"] and should_output(name):
            self.save_json(directory / "samples_info.json", samples.samples_info)

            if isinstance(samples, SamplesPDF):
                try:
                    samples.save_covariance_matrix(directory / "covariance.csv")
                except (ValueError, ZeroDivisionError) as e:
                    logger.warning(
                        f"Could not save covariance matrix because of the following error:\n{e}"
                    )

            samples.write_table(filename=directory / "samples.csv")

[docs]    def save_samples_summary(self, samples_summary: SamplesSummary):
        model = samples_summary.model

        filter_args = tuple(
            name
            for name in (
                "errors_at_sigma_1",
                "errors_at_sigma_3",
                "values_at_sigma_1",
                "values_at_sigma_3",
                "max_log_likelihood_sample",
                "median_pdf_sample",
            )
            if not should_output(name)
        )

        samples_summary.model = None
        self.save_json(
            "samples_summary",
            to_dict(
                samples_summary,
                filter_args=filter_args,
            ),
        )
        samples_summary.model = model

[docs]    def load_samples_summary(self) -> SamplesSummary:
        samples_summary = from_dict(self.load_json(name="samples_summary"))
        samples_summary.model = self.model

        return samples_summary

    def load_latent_samples(self):
        return load_from_table(filename=self._files_path / "latent/samples.csv")

    def load_samples_info(self):
        with open_(self._info_file) as infile:
            return json.load(infile)

    def save_all(self, search_config_dict=None, info=None):
        info = info or {}

        self.save_identifier()
        self.save_parent_identifier()
        self._save_model_info(model=self.model)
        VisualiseGraph(
            model=self.model,
        ).save(str(self.output_path / "model_graph.html"))
        if info:
            self.save_json("info", info)
        self.save_json("search", to_dict(self.search))
        self.save_json("model", to_dict(self.model))
        self._save_metadata(search_name=type(self.search).__name__.lower())

    @AbstractPaths.parent.setter
    def parent(self, parent: AbstractPaths):
        """
        The search performed before this search. For example, a search
        that is then compared to searches during a grid search.
        """
        self._parent = parent

    def save_parent_identifier(self):
        if self.parent is not None:
            with open_(self._parent_identifier_path, "w+") as f:
                f.write(self.parent.identifier)
            self.parent.save_unique_tag()

    def save_unique_tag(self, is_grid_search=False):
        if is_grid_search:
            with open_(self._grid_search_path, "w+") as f:
                if self.unique_tag is not None:
                    f.write(self.unique_tag)

    @property
    def _parent_identifier_path(self) -> Path:
        return self.output_path / ".parent_identifier"

    @property
    def _grid_search_path(self) -> Path:
        return self.output_path / ".is_grid_search"

    @property
    def is_grid_search(self) -> bool:
        """
        Is this a grid search which comprises a number of child searches?
        """
        return os.path.exists(self._grid_search_path)

[docs]    def create_child(
        self,
        name: Optional[str] = None,
        path_prefix: Optional[str] = None,
        is_identifier_in_paths: Optional[bool] = None,
        identifier: Optional[str] = None,
    ) -> "AbstractPaths":
        """
        Create a paths object which is the child of some parent
        paths object. This is done during a GridSearch so that
        results can be stored in the correct directory.

        Parameters
        ----------
        name
        path_prefix
        is_identifier_in_paths
            If False then this path's identifier will not be
            added to its output path.
        identifier

        Returns
        -------
        A new paths object
        """
        child = type(self)(
            name=name or self.name,
            path_prefix=path_prefix or self.path_prefix,
            is_identifier_in_paths=(
                is_identifier_in_paths
                if is_identifier_in_paths is not None
                else self.is_identifier_in_paths
            ),
            parent=self,
        )
        child.model = self.model
        child.search = self.search
        child._identifier = identifier
        return child

[docs]    def for_sub_analysis(self, analysis_name: str):
        """
        Paths for an analysis which is a child of another analysis.

        The analysis name forms a new directory on the end of the original
        analysis output path.
        """
        from .sub_directory_paths import SubDirectoryPaths

        return SubDirectoryPaths(parent=self, analysis_name=analysis_name)

    def _save_metadata(self, search_name):
        """
        Save metadata associated with the phase, such as the name of the pipeline, the
        name of the phase and the name of the dataset being fit
        """
        with open_(self.output_path / "metadata", "a") as f:
            f.write(
                f"""name={self.name}\nnon_linear_search={search_name}
            """
            )

    def _save_model_info(self, model):
        """
        Save the model.info file, which summarizes every parameter and prior.
        """
        with open_(self.output_path / "model.info", "w+") as f:
            f.write(model.info)

    def _save_parameter_names_file(self, model):
        """
        Create the param_names file listing every parameter's label and Latex tag, which is used for corner.py
        visualization.

        The parameter labels are determined using the label.ini and label_format.ini config files.
        """

        parameter_names = model.model_component_and_parameter_names
        parameter_labels = model.parameter_labels
        subscripts = model.superscripts_overwrite_via_config
        parameter_labels_with_subscript = [
            f"{label}_{subscript}"
            for label, subscript in zip(parameter_labels, subscripts)
        ]

        parameter_name_and_label = []

        for i in range(model.prior_count):
            line = formatter.add_whitespace(
                str0=parameter_names[i],
                str1=parameter_labels_with_subscript[i],
                whitespace=70,
            )
            parameter_name_and_label += [f"{line}\n"]

        formatter.output_list_of_strings_to_file(
            file=self._files_path / "model.paramnames",
            list_of_strings=parameter_name_and_label,
        )

    @property
    def _info_file(self) -> Path:
        return self._files_path / "samples_info.json"

    @property
    def _has_completed_path(self) -> Path:
        """
        A file indicating that a `NonLinearSearch` has been completed previously
        """
        return self.output_path / ".completed"

    def _make_path(self) -> str:
        """
        Returns the path to the folder at which the metadata should be saved

        The path terminates with the identifier, unless the identifier has already
        been added to the path.
        """
        path_ = Path(path.join(conf.instance.output_path, self.path_prefix, self.name))
        if self.is_identifier_in_paths:
            path_ = path_ / self.identifier
        return path_