Skip to content

Module pipeline

mlpype.sklearn.pipeline

Provides type checkers and data models for numpy and pandas.

mlpype.sklearn.pipeline.NumpyTypeChecker

Bases: TypeChecker[ndarray]

Provides a TypeChecker for numpy arrays.

Source code in packages/mlpype-sklearn/src/mlpype/sklearn/pipeline/numpy_type_checker.py
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
class NumpyTypeChecker(TypeChecker[np.ndarray]):
    """Provides a TypeChecker for numpy arrays."""

    dims: Tuple[int, ...]
    dtype: Union[type, None]

    def fit(self, data: np.ndarray) -> "NumpyTypeChecker":
        """Fit this Numpy TypeChecker to the given data.

        Args:
            data (np.ndarray): The data to fit.

        Returns:
            NumpyTypeChecker: self.
        """
        self.dims = data.shape[1:]
        self.dtype = self._convert_dtype(data.dtype)
        return self

    def transform(self, data: np.ndarray) -> np.ndarray:
        """Checks if the given data fits the specifications this TypeChecker was fitted for.

        Args:
            data (np.ndarray):  The data to check.

        Returns:
            np.ndarray: data, if the data fits the specifications. Otherwise, an assertion error is thrown.
        """
        assert self.dtype is not None, "Please fit pipeline first"
        assert isinstance(data, np.ndarray), "Please provide a numpy array!"
        assert data.shape[1:] == self.dims, f"Dimensions of numpy arrays do not add up: {data.shape[1:]} vs {self.dims}"

        converted_type = self._convert_dtype(data.dtype)
        assert converted_type == self.dtype, f"Dtype of data does not add up: {converted_type} vs {self.dtype}"
        return data

    def _convert_dtype(self, dtype: np.dtype) -> type:
        dtype_name = dtype.name
        if "int" in dtype_name:
            return int
        elif "float" in dtype_name:
            return float
        elif "bool" in dtype_name:
            return bool
        else:
            return str

    def get_pydantic_type(self) -> Type[NumpyData]:
        """Creates a Pydantic model for this data to handle serialisation/deserialisation.

        Returns:
            Type[NumpyData]: A NumpyData model that fits the data this wat fitted on.
        """
        base_iter: type = List[self.dtype]  # type: ignore

        for _ in range(len(self.dims)):
            base_iter = List[base_iter]  # type: ignore

        model = create_model(f"NumpyData[{self.name}]", data=(base_iter, ...), __base__=NumpyData)

        return model

    @classmethod
    def supports_object(cls, obj: Any) -> bool:
        """Returns True if the object is a numpy array.

        Args:
            obj (Any): The object to check.

        Returns:
            bool: True if the given object is a numpy array, False otherwise.
        """
        return isinstance(obj, np.ndarray)

fit(data)

Fit this Numpy TypeChecker to the given data.

Parameters:

Name Type Description Default
data ndarray

The data to fit.

required

Returns:

Name Type Description
NumpyTypeChecker NumpyTypeChecker

self.

Source code in packages/mlpype-sklearn/src/mlpype/sklearn/pipeline/numpy_type_checker.py
44
45
46
47
48
49
50
51
52
53
54
55
def fit(self, data: np.ndarray) -> "NumpyTypeChecker":
    """Fit this Numpy TypeChecker to the given data.

    Args:
        data (np.ndarray): The data to fit.

    Returns:
        NumpyTypeChecker: self.
    """
    self.dims = data.shape[1:]
    self.dtype = self._convert_dtype(data.dtype)
    return self

get_pydantic_type()

Creates a Pydantic model for this data to handle serialisation/deserialisation.

Returns:

Type Description
Type[NumpyData]

Type[NumpyData]: A NumpyData model that fits the data this wat fitted on.

Source code in packages/mlpype-sklearn/src/mlpype/sklearn/pipeline/numpy_type_checker.py
85
86
87
88
89
90
91
92
93
94
95
96
97
98
def get_pydantic_type(self) -> Type[NumpyData]:
    """Creates a Pydantic model for this data to handle serialisation/deserialisation.

    Returns:
        Type[NumpyData]: A NumpyData model that fits the data this wat fitted on.
    """
    base_iter: type = List[self.dtype]  # type: ignore

    for _ in range(len(self.dims)):
        base_iter = List[base_iter]  # type: ignore

    model = create_model(f"NumpyData[{self.name}]", data=(base_iter, ...), __base__=NumpyData)

    return model

supports_object(obj) classmethod

Returns True if the object is a numpy array.

Parameters:

Name Type Description Default
obj Any

The object to check.

required

Returns:

Name Type Description
bool bool

True if the given object is a numpy array, False otherwise.

Source code in packages/mlpype-sklearn/src/mlpype/sklearn/pipeline/numpy_type_checker.py
100
101
102
103
104
105
106
107
108
109
110
@classmethod
def supports_object(cls, obj: Any) -> bool:
    """Returns True if the object is a numpy array.

    Args:
        obj (Any): The object to check.

    Returns:
        bool: True if the given object is a numpy array, False otherwise.
    """
    return isinstance(obj, np.ndarray)

transform(data)

Checks if the given data fits the specifications this TypeChecker was fitted for.

Parameters:

Name Type Description Default
data ndarray

The data to check.

required

Returns:

Type Description
ndarray

np.ndarray: data, if the data fits the specifications. Otherwise, an assertion error is thrown.

Source code in packages/mlpype-sklearn/src/mlpype/sklearn/pipeline/numpy_type_checker.py
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
def transform(self, data: np.ndarray) -> np.ndarray:
    """Checks if the given data fits the specifications this TypeChecker was fitted for.

    Args:
        data (np.ndarray):  The data to check.

    Returns:
        np.ndarray: data, if the data fits the specifications. Otherwise, an assertion error is thrown.
    """
    assert self.dtype is not None, "Please fit pipeline first"
    assert isinstance(data, np.ndarray), "Please provide a numpy array!"
    assert data.shape[1:] == self.dims, f"Dimensions of numpy arrays do not add up: {data.shape[1:]} vs {self.dims}"

    converted_type = self._convert_dtype(data.dtype)
    assert converted_type == self.dtype, f"Dtype of data does not add up: {converted_type} vs {self.dtype}"
    return data

mlpype.sklearn.pipeline.PandasTypeChecker

Bases: TypeChecker[DataFrame]

A TypeChecker for pandas DataFrames.

Source code in packages/mlpype-sklearn/src/mlpype/sklearn/pipeline/pandas_type_checker.py
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
class PandasTypeChecker(TypeChecker[pd.DataFrame]):
    """A TypeChecker for pandas DataFrames."""

    def fit(self, data: pd.DataFrame) -> "PandasTypeChecker":
        """Fit this PandasTypeChecker to the given data.

        Args:
            data (pd.DataFrame): The data to fit.

        Returns:
            PandasTypeChecker: self
        """
        self.raw_types = self._convert_raw_types(dict(data.dtypes))
        return self

    def transform(self, data: pd.DataFrame) -> pd.DataFrame:
        """Checks if the given data fits the specifications this TypeChecker was fitted for.

        Args:
            data (pd.DataFrame): The data to check.

        Returns:
            pd.DataFrame: data, if the data fits the specifications. Otherwise, an assertion error is thrown.
        """
        assert isinstance(data, pd.DataFrame), "Please provide a pandas DataFrame!"
        colnames = list(self.raw_types.keys())
        assert np.all(np.isin(colnames, data.columns)), "Not all columns are present."

        data = data[colnames]

        for name, (_, checker) in self.raw_types.items():
            assert checker(data[name]), f"Dtypes did not match up for col {name}."
        return data

    def _convert_raw_types(self, types: Dict[str, type]) -> Dict[str, Tuple[type, Callable]]:
        return {name: self._convert_raw_type(type_) for name, type_ in types.items()}

    def _convert_raw_type(self, type_: type) -> Tuple[type, Callable]:
        str_type = str(type_)
        if "int" in str_type:
            return (int, is_integer_dtype)
        elif "float" in str_type:
            return (float, is_float_dtype)
        elif "bool" in str_type:
            return (bool, is_bool_dtype)
        elif "datetime" in str_type:
            return (datetime, is_datetime64_any_dtype)
        elif "str" in str_type:
            return (str, is_string_dtype)
        else:
            return (str, is_object_dtype)

    def get_pydantic_type(self) -> Type[PandasData]:
        """Creates a Pydantic model for this data to handle serialisation/deserialisation.

        Returns:
            Type[PandasData]: A PandasData model that fits the data this wat fitted on.
        """
        data_type = {
            name: (Union[List[dtype], Dict[str or int, dtype]], ...)  # type: ignore
            for name, (dtype, _) in self.raw_types.items()
        }

        model = create_model(f"PandasData[{self.name}]", **data_type, __base__=PandasData)

        return model

    @classmethod
    def supports_object(cls, obj: Any) -> bool:
        """Returns True if the object is a pandas DataFrame.

        Args:
            obj (Any): The object to check.

        Returns:
            bool: True if the given object is a pandas DataFrame, False otherwise.
        """
        return isinstance(obj, pd.DataFrame)

fit(data)

Fit this PandasTypeChecker to the given data.

Parameters:

Name Type Description Default
data DataFrame

The data to fit.

required

Returns:

Name Type Description
PandasTypeChecker PandasTypeChecker

self

Source code in packages/mlpype-sklearn/src/mlpype/sklearn/pipeline/pandas_type_checker.py
47
48
49
50
51
52
53
54
55
56
57
def fit(self, data: pd.DataFrame) -> "PandasTypeChecker":
    """Fit this PandasTypeChecker to the given data.

    Args:
        data (pd.DataFrame): The data to fit.

    Returns:
        PandasTypeChecker: self
    """
    self.raw_types = self._convert_raw_types(dict(data.dtypes))
    return self

get_pydantic_type()

Creates a Pydantic model for this data to handle serialisation/deserialisation.

Returns:

Type Description
Type[PandasData]

Type[PandasData]: A PandasData model that fits the data this wat fitted on.

Source code in packages/mlpype-sklearn/src/mlpype/sklearn/pipeline/pandas_type_checker.py
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
def get_pydantic_type(self) -> Type[PandasData]:
    """Creates a Pydantic model for this data to handle serialisation/deserialisation.

    Returns:
        Type[PandasData]: A PandasData model that fits the data this wat fitted on.
    """
    data_type = {
        name: (Union[List[dtype], Dict[str or int, dtype]], ...)  # type: ignore
        for name, (dtype, _) in self.raw_types.items()
    }

    model = create_model(f"PandasData[{self.name}]", **data_type, __base__=PandasData)

    return model

supports_object(obj) classmethod

Returns True if the object is a pandas DataFrame.

Parameters:

Name Type Description Default
obj Any

The object to check.

required

Returns:

Name Type Description
bool bool

True if the given object is a pandas DataFrame, False otherwise.

Source code in packages/mlpype-sklearn/src/mlpype/sklearn/pipeline/pandas_type_checker.py
111
112
113
114
115
116
117
118
119
120
121
@classmethod
def supports_object(cls, obj: Any) -> bool:
    """Returns True if the object is a pandas DataFrame.

    Args:
        obj (Any): The object to check.

    Returns:
        bool: True if the given object is a pandas DataFrame, False otherwise.
    """
    return isinstance(obj, pd.DataFrame)

transform(data)

Checks if the given data fits the specifications this TypeChecker was fitted for.

Parameters:

Name Type Description Default
data DataFrame

The data to check.

required

Returns:

Type Description
DataFrame

pd.DataFrame: data, if the data fits the specifications. Otherwise, an assertion error is thrown.

Source code in packages/mlpype-sklearn/src/mlpype/sklearn/pipeline/pandas_type_checker.py
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
def transform(self, data: pd.DataFrame) -> pd.DataFrame:
    """Checks if the given data fits the specifications this TypeChecker was fitted for.

    Args:
        data (pd.DataFrame): The data to check.

    Returns:
        pd.DataFrame: data, if the data fits the specifications. Otherwise, an assertion error is thrown.
    """
    assert isinstance(data, pd.DataFrame), "Please provide a pandas DataFrame!"
    colnames = list(self.raw_types.keys())
    assert np.all(np.isin(colnames, data.columns)), "Not all columns are present."

    data = data[colnames]

    for name, (_, checker) in self.raw_types.items():
        assert checker(data[name]), f"Dtypes did not match up for col {name}."
    return data