Module pipeline

`mlpype.sklearn.pipeline`

Provides type checkers and data models for numpy and pandas.

`mlpype.sklearn.pipeline.NumpyTypeChecker`

Bases: TypeChecker[ndarray]

Provides a TypeChecker for numpy arrays.

Source code in packages/mlpype-sklearn/src/mlpype/sklearn/pipeline/numpy_type_checker.py

class NumpyTypeChecker(TypeChecker[np.ndarray]):
    """Provides a TypeChecker for numpy arrays."""

    dims: Tuple[int, ...]
    dtype: Union[type, None]

    def fit(self, data: np.ndarray) -> "NumpyTypeChecker":
        """Fit this Numpy TypeChecker to the given data.

        Args:
            data (np.ndarray): The data to fit.

        Returns:
            NumpyTypeChecker: self.
        """
        self.dims = data.shape[1:]
        self.dtype = self._convert_dtype(data.dtype)
        return self

    def transform(self, data: np.ndarray) -> np.ndarray:
        """Checks if the given data fits the specifications this TypeChecker was fitted for.

        Args:
            data (np.ndarray):  The data to check.

        Returns:
            np.ndarray: data, if the data fits the specifications. Otherwise, an assertion error is thrown.
        """
        assert self.dtype is not None, "Please fit pipeline first"
        assert isinstance(data, np.ndarray), "Please provide a numpy array!"
        assert data.shape[1:] == self.dims, f"Dimensions of numpy arrays do not add up: {data.shape[1:]} vs {self.dims}"

        converted_type = self._convert_dtype(data.dtype)
        assert converted_type == self.dtype, f"Dtype of data does not add up: {converted_type} vs {self.dtype}"
        return data

    def _convert_dtype(self, dtype: np.dtype) -> type:
        dtype_name = dtype.name
        if "int" in dtype_name:
            return int
        elif "float" in dtype_name:
            return float
        elif "bool" in dtype_name:
            return bool
        else:
            return str

    def get_pydantic_type(self) -> Type[NumpyData]:
        """Creates a Pydantic model for this data to handle serialisation/deserialisation.

        Returns:
            Type[NumpyData]: A NumpyData model that fits the data this wat fitted on.
        """
        base_iter: type = List[self.dtype]  # type: ignore

        for _ in range(len(self.dims)):
            base_iter = List[base_iter]  # type: ignore

        model = create_model(f"NumpyData[{self.name}]", data=(base_iter, ...), __base__=NumpyData)

        return model

    @classmethod
    def supports_object(cls, obj: Any) -> bool:
        """Returns True if the object is a numpy array.

        Args:
            obj (Any): The object to check.

        Returns:
            bool: True if the given object is a numpy array, False otherwise.
        """
        return isinstance(obj, np.ndarray)

`fit(data)`

Fit this Numpy TypeChecker to the given data.

Parameters:

Name	Type	Description	Default
`data`	`ndarray`	The data to fit.	required

Returns:

Name	Type	Description
`NumpyTypeChecker`	`NumpyTypeChecker`	self.

Source code in packages/mlpype-sklearn/src/mlpype/sklearn/pipeline/numpy_type_checker.py

def fit(self, data: np.ndarray) -> "NumpyTypeChecker":
    """Fit this Numpy TypeChecker to the given data.

    Args:
        data (np.ndarray): The data to fit.

    Returns:
        NumpyTypeChecker: self.
    """
    self.dims = data.shape[1:]
    self.dtype = self._convert_dtype(data.dtype)
    return self

`get_pydantic_type()`

Creates a Pydantic model for this data to handle serialisation/deserialisation.

Returns:

Type	Description
`Type[NumpyData]`	Type[NumpyData]: A NumpyData model that fits the data this wat fitted on.

Source code in packages/mlpype-sklearn/src/mlpype/sklearn/pipeline/numpy_type_checker.py

def get_pydantic_type(self) -> Type[NumpyData]:
    """Creates a Pydantic model for this data to handle serialisation/deserialisation.

    Returns:
        Type[NumpyData]: A NumpyData model that fits the data this wat fitted on.
    """
    base_iter: type = List[self.dtype]  # type: ignore

    for _ in range(len(self.dims)):
        base_iter = List[base_iter]  # type: ignore

    model = create_model(f"NumpyData[{self.name}]", data=(base_iter, ...), __base__=NumpyData)

    return model

`supports_object(obj)` `classmethod`

Returns True if the object is a numpy array.

Parameters:

Name	Type	Description	Default
`obj`	`Any`	The object to check.	required

Returns:

Name	Type	Description
`bool`	`bool`	True if the given object is a numpy array, False otherwise.

Source code in packages/mlpype-sklearn/src/mlpype/sklearn/pipeline/numpy_type_checker.py

@classmethod
def supports_object(cls, obj: Any) -> bool:
    """Returns True if the object is a numpy array.

    Args:
        obj (Any): The object to check.

    Returns:
        bool: True if the given object is a numpy array, False otherwise.
    """
    return isinstance(obj, np.ndarray)

`transform(data)`

Checks if the given data fits the specifications this TypeChecker was fitted for.

Parameters:

Name	Type	Description	Default
`data`	`ndarray`	The data to check.	required

Returns:

Type	Description
`ndarray`	np.ndarray: data, if the data fits the specifications. Otherwise, an assertion error is thrown.

Source code in packages/mlpype-sklearn/src/mlpype/sklearn/pipeline/numpy_type_checker.py

def transform(self, data: np.ndarray) -> np.ndarray:
    """Checks if the given data fits the specifications this TypeChecker was fitted for.

    Args:
        data (np.ndarray):  The data to check.

    Returns:
        np.ndarray: data, if the data fits the specifications. Otherwise, an assertion error is thrown.
    """
    assert self.dtype is not None, "Please fit pipeline first"
    assert isinstance(data, np.ndarray), "Please provide a numpy array!"
    assert data.shape[1:] == self.dims, f"Dimensions of numpy arrays do not add up: {data.shape[1:]} vs {self.dims}"

    converted_type = self._convert_dtype(data.dtype)
    assert converted_type == self.dtype, f"Dtype of data does not add up: {converted_type} vs {self.dtype}"
    return data

`mlpype.sklearn.pipeline.PandasTypeChecker`

Bases: TypeChecker[DataFrame]

A TypeChecker for pandas DataFrames.

Source code in packages/mlpype-sklearn/src/mlpype/sklearn/pipeline/pandas_type_checker.py

class PandasTypeChecker(TypeChecker[pd.DataFrame]):
    """A TypeChecker for pandas DataFrames."""

    def fit(self, data: pd.DataFrame) -> "PandasTypeChecker":
        """Fit this PandasTypeChecker to the given data.

        Args:
            data (pd.DataFrame): The data to fit.

        Returns:
            PandasTypeChecker: self
        """
        self.raw_types = self._convert_raw_types(dict(data.dtypes))
        return self

    def transform(self, data: pd.DataFrame) -> pd.DataFrame:
        """Checks if the given data fits the specifications this TypeChecker was fitted for.

        Args:
            data (pd.DataFrame): The data to check.

        Returns:
            pd.DataFrame: data, if the data fits the specifications. Otherwise, an assertion error is thrown.
        """
        assert isinstance(data, pd.DataFrame), "Please provide a pandas DataFrame!"
        colnames = list(self.raw_types.keys())
        assert np.all(np.isin(colnames, data.columns)), "Not all columns are present."

        data = data[colnames]

        for name, (_, checker) in self.raw_types.items():
            assert checker(data[name]), f"Dtypes did not match up for col {name}."
        return data

    def _convert_raw_types(self, types: Dict[str, type]) -> Dict[str, Tuple[type, Callable]]:
        return {name: self._convert_raw_type(type_) for name, type_ in types.items()}

    def _convert_raw_type(self, type_: type) -> Tuple[type, Callable]:
        str_type = str(type_)
        if "int" in str_type:
            return (int, is_integer_dtype)
        elif "float" in str_type:
            return (float, is_float_dtype)
        elif "bool" in str_type:
            return (bool, is_bool_dtype)
        elif "datetime" in str_type:
            return (datetime, is_datetime64_any_dtype)
        elif "str" in str_type:
            return (str, is_string_dtype)
        else:
            return (str, is_object_dtype)

    def get_pydantic_type(self) -> Type[PandasData]:
        """Creates a Pydantic model for this data to handle serialisation/deserialisation.

        Returns:
            Type[PandasData]: A PandasData model that fits the data this wat fitted on.
        """
        data_type = {
            name: (Union[List[dtype], Dict[str or int, dtype]], ...)  # type: ignore
            for name, (dtype, _) in self.raw_types.items()
        }

        model = create_model(f"PandasData[{self.name}]", **data_type, __base__=PandasData)

        return model

    @classmethod
    def supports_object(cls, obj: Any) -> bool:
        """Returns True if the object is a pandas DataFrame.

        Args:
            obj (Any): The object to check.

        Returns:
            bool: True if the given object is a pandas DataFrame, False otherwise.
        """
        return isinstance(obj, pd.DataFrame)

`fit(data)`

Fit this PandasTypeChecker to the given data.

Parameters:

Name	Type	Description	Default
`data`	`DataFrame`	The data to fit.	required

Returns:

Name	Type	Description
`PandasTypeChecker`	`PandasTypeChecker`	self

Source code in packages/mlpype-sklearn/src/mlpype/sklearn/pipeline/pandas_type_checker.py

def fit(self, data: pd.DataFrame) -> "PandasTypeChecker":
    """Fit this PandasTypeChecker to the given data.

    Args:
        data (pd.DataFrame): The data to fit.

    Returns:
        PandasTypeChecker: self
    """
    self.raw_types = self._convert_raw_types(dict(data.dtypes))
    return self

`get_pydantic_type()`

Creates a Pydantic model for this data to handle serialisation/deserialisation.

Returns:

Type	Description
`Type[PandasData]`	Type[PandasData]: A PandasData model that fits the data this wat fitted on.

Source code in packages/mlpype-sklearn/src/mlpype/sklearn/pipeline/pandas_type_checker.py

def get_pydantic_type(self) -> Type[PandasData]:
    """Creates a Pydantic model for this data to handle serialisation/deserialisation.

    Returns:
        Type[PandasData]: A PandasData model that fits the data this wat fitted on.
    """
    data_type = {
        name: (Union[List[dtype], Dict[str or int, dtype]], ...)  # type: ignore
        for name, (dtype, _) in self.raw_types.items()
    }

    model = create_model(f"PandasData[{self.name}]", **data_type, __base__=PandasData)

    return model

`supports_object(obj)` `classmethod`

Returns True if the object is a pandas DataFrame.

Parameters:

Name	Type	Description	Default
`obj`	`Any`	The object to check.	required

Returns:

Name	Type	Description
`bool`	`bool`	True if the given object is a pandas DataFrame, False otherwise.

Source code in packages/mlpype-sklearn/src/mlpype/sklearn/pipeline/pandas_type_checker.py

@classmethod
def supports_object(cls, obj: Any) -> bool:
    """Returns True if the object is a pandas DataFrame.

    Args:
        obj (Any): The object to check.

    Returns:
        bool: True if the given object is a pandas DataFrame, False otherwise.
    """
    return isinstance(obj, pd.DataFrame)

`transform(data)`

Checks if the given data fits the specifications this TypeChecker was fitted for.

Parameters:

Name	Type	Description	Default
`data`	`DataFrame`	The data to check.	required

Returns:

Type	Description
`DataFrame`	pd.DataFrame: data, if the data fits the specifications. Otherwise, an assertion error is thrown.

Source code in packages/mlpype-sklearn/src/mlpype/sklearn/pipeline/pandas_type_checker.py

def transform(self, data: pd.DataFrame) -> pd.DataFrame:
    """Checks if the given data fits the specifications this TypeChecker was fitted for.

    Args:
        data (pd.DataFrame): The data to check.

    Returns:
        pd.DataFrame: data, if the data fits the specifications. Otherwise, an assertion error is thrown.
    """
    assert isinstance(data, pd.DataFrame), "Please provide a pandas DataFrame!"
    colnames = list(self.raw_types.keys())
    assert np.all(np.isin(colnames, data.columns)), "Not all columns are present."

    data = data[colnames]

    for name, (_, checker) in self.raw_types.items():
        assert checker(data[name]), f"Dtypes did not match up for col {name}."
    return data

Module pipeline

mlpype.sklearn.pipeline

mlpype.sklearn.pipeline.NumpyTypeChecker

fit(data)

get_pydantic_type()

supports_object(obj) classmethod

transform(data)

mlpype.sklearn.pipeline.PandasTypeChecker

fit(data)

get_pydantic_type()

supports_object(obj) classmethod

transform(data)

`mlpype.sklearn.pipeline`

`mlpype.sklearn.pipeline.NumpyTypeChecker`

`fit(data)`

`get_pydantic_type()`

`supports_object(obj)` `classmethod`

`transform(data)`

`mlpype.sklearn.pipeline.PandasTypeChecker`

`fit(data)`

`get_pydantic_type()`

`supports_object(obj)` `classmethod`

`transform(data)`