Source code for ananke.utils
#!/usr/bin/env python
#
# Author: Adrien CR Thob
# Copyright (C) 2022 Adrien CR Thob
#
# This file is part of the py-ananke project,
# <https://github.com/athob/py-ananke>, which is licensed
# under the GNU Affero General Public License v3.0 (AGPL-3.0).
#
# The full copyright notice, including terms governing use, modification,
# and redistribution, is contained in the files LICENSE and COPYRIGHT,
# which can be found at the root of the source code distribution tree:
# - LICENSE <https://github.com/athob/py-ananke/blob/main/LICENSE>
# - COPYRIGHT <https://github.com/athob/py-ananke/blob/main/COPYRIGHT>
#
"""
Module miscellaneous utilities
"""
from typing import Optional, List, Union
import re
import docstring_parser as DS_parser
import numpy as np
from scipy import interpolate
import pandas as pd
import vaex
from galaxia_ananke import utils as Gutils
__all__ = ['classproperty', 'compare_given_and_required', 'confirm_equal_length_arrays_in_dict', 'PDOrVaexDF', 'RecordingDataFrame', 'extract_parameters_from_docstring', 'extract_notes_from_docstring', 'LinearNDInterpolatorExtrapolator']
classproperty = Gutils.classproperty
compare_given_and_required = Gutils.compare_given_and_required
confirm_equal_length_arrays_in_dict = Gutils.confirm_equal_length_arrays_in_dict
PDOrVaexDF = Union[pd.DataFrame, vaex.DataFrame]
[docs]
class RecordingDataFrame(pd.DataFrame):
"""
Pandas DataFrame that records all its used keys from getitem
"""
[docs]
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self._record_of_all_used_keys = set()
def _add_to_record_of_all_used_keys(self, keys):
if isinstance(keys, str):
keys = [keys]
for key in keys:
self._record_of_all_used_keys.add(key)
def __getitem__(self, key):
self._add_to_record_of_all_used_keys(key)
return super().__getitem__(key)
# def __setitem__(self, key, value):
# self._add_to_record_of_all_used_keys(key)
# super().__setitem__(key, value)
# def __delitem__(self, key):
# self._add_to_record_of_all_used_keys(key)
# super().__delitem__(key)
@property
def record_of_all_used_keys(self):
return self._record_of_all_used_keys
[docs]
def extract_parameters_from_docstring(docstring: str, parameters: Optional[List[str]] = None, ignore: Optional[List[str]] = None) -> str:
input_DS = DS_parser.parse(docstring)
output_DS = DS_parser.Docstring()
output_DS.style = input_DS.style
output_DS.meta = [param
for param in input_DS.params
if (True if parameters is None else param.arg_name in parameters) and (True if ignore is None else param.arg_name not in ignore)]
temp_docstring = re.split("\n-*\n",DS_parser.compose(output_DS),maxsplit=1)[1]
return '\n'.join([line if line[:1] in ['', ' '] else f"\n{line}" for line in temp_docstring.split('\n')])
[docs]
def extract_notes_from_docstring(docstring: str) -> str:
input_DS = DS_parser.parse(docstring)
output_DS = DS_parser.Docstring()
output_DS.style = input_DS.style
output_DS.meta = [meta for meta in input_DS.meta if 'notes' in meta.args]
return re.split("\n-*\n",DS_parser.compose(output_DS),maxsplit=1)[1]
[docs]
class LinearNDInterpolatorExtrapolator:
[docs]
def __init__(self, points: np.ndarray, values: np.ndarray, **kwargs):
"""
Use ND-linear interpolation over the convex hull of points, and nearest neighbor outside (for
extrapolation)
Idea taken from https://stackoverflow.com/questions/20516762/extrapolate-with-linearndinterpolator
Adapted from https://stackoverflow.com/a/75327466
"""
self.linear_interpolator = interpolate.LinearNDInterpolator(points, values, **kwargs)
self.nearest_neighbor_interpolator = interpolate.NearestNDInterpolator(points, values, **kwargs)
self._calibrating_center = np.mean(points,axis=0)
self.linear_interpolator(self._calibrating_center)
from_calibrating_center = points - self._calibrating_center
self._calibrating_outer = self._calibrating_center + 2*from_calibrating_center[
np.argmax(np.linalg.norm(from_calibrating_center)
if points.ndim == 2
else np.abs(from_calibrating_center))
]
self.nearest_neighbor_interpolator(self._calibrating_outer)
def __call__(self, *args) -> Union[float, np.ndarray]:
t = self.linear_interpolator(*args)
t[np.isnan(t)] = self.nearest_neighbor_interpolator(*args)[np.isnan(t)] # TODO reduce unnecessary interpolation use?
if t.size == 1:
return t.item(0)
return t
if __name__ == '__main__':
raise NotImplementedError()