Skip to content

Module model

mlpype.sklearn.model

Provides classes for sklearn models.

For sklearn models not already configured here, you can use the SklearnModel class to quickly incorporate your model. We have already integrated classifier and regressor models from sklearn. You can import them like:

from mlpype.sklearn.model import <sklearn name>Model

SklearnModel

Bases: Model[SklearnData], Generic[T]

A generic class for sklearn-like Models.

You should set a sklearn model as a type hint to this class when defining a new model. This allows us to get the parameters from the documentation of that sklearn model. For an example, see the implementation of LinearModel, especially the SklearnModel[LinearRegression] part.

Below are some examples for how to do this yourself.

# Works
class LinearRegressionModel(SklearnModel[LinearRegression]):
    pass

# An alternative to dynamically generate the model, which is easier to export/import
model_class = SklearnModel.class_from_sklearn_model_class(LinearRegression)

# Unfortunately, using something like the following will not work due to how Generic types are handled.
LinearRegressionModel = SklearnModel[LinearRegression]
Source code in packages/mlpype-sklearn/src/mlpype/sklearn/model/sklearn_model.py
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
class SklearnModel(Model[SklearnData], Generic[T]):
    """A generic class for sklearn-like Models.

    You should set a sklearn model as a type hint to this class when defining a new model.
    This allows us to get the parameters from the documentation of that sklearn model.
    For an example, see the implementation of LinearModel, especially the `SklearnModel[LinearRegression]` part.

    Below are some examples for how to do this yourself.

    ```python
    # Works
    class LinearRegressionModel(SklearnModel[LinearRegression]):
        pass

    # An alternative to dynamically generate the model, which is easier to export/import
    model_class = SklearnModel.class_from_sklearn_model_class(LinearRegression)

    # Unfortunately, using something like the following will not work due to how Generic types are handled.
    LinearRegressionModel = SklearnModel[LinearRegression]
    ```
    """

    SKLEARN_MODEL_FILE = "model.pkl"

    def __init__(
        self,
        inputs: List[str],
        outputs: List[str],
        model: Optional[T] = None,
        seed: int = 1,
        **model_args: Any,
    ) -> None:
        """A generic class for sklearn-like Models.

        Args:
            inputs (List[str]): A list of names of input Data. This determines which Data is
                used to fit the model.
            outputs (List[str]): A list of names of output Data. This determines the names of
                output variables.
            model (Optional[T]): An object that has fit() and predict() methods. If none,
                we will use the model_args to instantiate a new model. Should be of type SklearnModelBaseType
            seed (int, optional): The RNG seed to ensure reproducability.. Defaults to 1.
            **model_args (Any): Optional keyword arguments passed to the model class to instantiate a new
                model if `model` is None. This is where the arguments to the sklearn model go.
        """
        super().__init__(inputs, outputs, seed)
        if model is None:
            model = self._init_model(model_args)
        self.model = model

    def _init_model(self, args: Dict[str, Any]) -> T:
        return self._get_annotated_class()(**args)

    @classmethod
    def _get_annotated_class(cls) -> Type[T]:
        return typing.get_args(cls.__orig_bases__[0])[0]

    def set_seed(self) -> None:
        """Sets the RNG seed."""
        np.random.seed(self.seed)

    def _fit(self, *data: SklearnData) -> None:
        self.model.fit(*data)

    def _transform(self, *data: SklearnData) -> Union[Iterable[SklearnData], SklearnData]:
        return self.model.predict(*data)

    def _save(self, folder: Path) -> None:
        serialiser = JoblibSerialiser()
        serialiser.serialise(self.model, folder / self.SKLEARN_MODEL_FILE)

    @classmethod
    def _load(cls: Type["SklearnModel"], folder: Path, inputs: List[str], outputs: List[str]) -> "SklearnModel":
        serialiser = JoblibSerialiser()
        model = serialiser.deserialise(folder / cls.SKLEARN_MODEL_FILE)
        return cls(inputs=inputs, outputs=outputs, model=model, seed=1)

    @classmethod
    def get_parameters(cls: Type["SklearnModel"], parser: ArgumentParser) -> None:
        """Get and add parameters to initialise this class.

        SklearnModel's will work by requiring 2 ways to instantiate a Model:
            - through `model`, which is a sklearn model.
            - through parameters, which will instantiate the model internally.

        Args:
            parser (ArgumentParser): The ArgumentParser to add arguments to.
        """
        super().get_parameters(parser)
        BaseModel = cls._get_annotated_class()

        add_args_to_parser_for_class(
            parser, BaseModel, "model", [], excluded_args=["seed", "inputs", "outputs", "model"]
        )

    @classmethod
    def class_from_sklearn_model_class(
        cls,
        model_class: Type[SklearnModelBaseType],
    ) -> Type["SklearnModel"]:
        """Create a SklearnModel class from a SklearnModelBaseType.

        This should support all sklearn classifaction and regression models.

        Args:
            model_class (Type[SklearnModelBaseType]): The class of the sklearn model. For example,
                LinearRegression or LogisticRegression.

        Returns:
            Type[SklearnModel]: The created SklearnModel.
        """
        new_name = f"{model_class.__name__}Model"
        try:
            old_docs = cls.__doc__
            assert isinstance(old_docs, str)
        except AttributeError:
            logger.warning(f"Failed to add docstring to {new_name}.")
            old_docs = "A generic class for sklearn-like Models."

        new_doc = old_docs.replace(
            "A generic class for sklearn-like Models.",
            f"""
A generic class for sklearn-like Models.

See SklearnModel for the original source and docs for subfunctions.

This model uses a fixed class: {model_class.__name__}.
The source module is: {model_class.__module__}

""",
        )
        klass = types.new_class(
            new_name,
            (SklearnModel[model_class],),  # type: ignore
            exec_body=lambda ns: ns.update(
                {
                    "__doc__": new_doc,
                    "__module__": SklearnModel.__module__,
                }
            ),
        )
        # klass.__doc__ = new_doc

        return klass

    @classmethod
    def from_sklearn_model_class(
        cls,
        model_class: Type[SklearnModelBaseType],
        inputs: List[str],
        outputs: List[str],
        seed: int = 1,
        **model_args: Any,
    ) -> "SklearnModel":
        """Create a SklearnModel from a SklearnModelBaseType.

        This should support all sklearn classifaction and regression models.

        Args:
            model_class (Type[SklearnModelBaseType]): The class of the sklearn model. For example,
                LinearRegression or LogisticRegression.
            inputs (List[str]): A list of names of input Data. This determines which Data is
                used to fit the model.
            outputs (List[str]): A list of names of output Data. This determines the names of
                output variables.
            seed (int, optional): The RNG seed to ensure reproducability.. Defaults to 1.
            **model_args (Any): Optional keyword arguments passed to the model class to instantiate a new
                model if `model` is None.

        Returns:
            SklearnModel: The created SklearnModel.
        """
        new_cls = cls.class_from_sklearn_model_class(model_class)
        return new_cls(inputs=inputs, outputs=outputs, model=None, seed=seed, **model_args)

__init__(inputs, outputs, model=None, seed=1, **model_args)

A generic class for sklearn-like Models.

Parameters:

Name Type Description Default
inputs List[str]

A list of names of input Data. This determines which Data is used to fit the model.

required
outputs List[str]

A list of names of output Data. This determines the names of output variables.

required
model Optional[T]

An object that has fit() and predict() methods. If none, we will use the model_args to instantiate a new model. Should be of type SklearnModelBaseType

None
seed int

The RNG seed to ensure reproducability.. Defaults to 1.

1
**model_args Any

Optional keyword arguments passed to the model class to instantiate a new model if model is None. This is where the arguments to the sklearn model go.

{}
Source code in packages/mlpype-sklearn/src/mlpype/sklearn/model/sklearn_model.py
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
def __init__(
    self,
    inputs: List[str],
    outputs: List[str],
    model: Optional[T] = None,
    seed: int = 1,
    **model_args: Any,
) -> None:
    """A generic class for sklearn-like Models.

    Args:
        inputs (List[str]): A list of names of input Data. This determines which Data is
            used to fit the model.
        outputs (List[str]): A list of names of output Data. This determines the names of
            output variables.
        model (Optional[T]): An object that has fit() and predict() methods. If none,
            we will use the model_args to instantiate a new model. Should be of type SklearnModelBaseType
        seed (int, optional): The RNG seed to ensure reproducability.. Defaults to 1.
        **model_args (Any): Optional keyword arguments passed to the model class to instantiate a new
            model if `model` is None. This is where the arguments to the sklearn model go.
    """
    super().__init__(inputs, outputs, seed)
    if model is None:
        model = self._init_model(model_args)
    self.model = model

class_from_sklearn_model_class(model_class) classmethod

Create a SklearnModel class from a SklearnModelBaseType.

This should support all sklearn classifaction and regression models.

Parameters:

Name Type Description Default
model_class Type[SklearnModelBaseType]

The class of the sklearn model. For example, LinearRegression or LogisticRegression.

required

Returns:

Type Description
Type[SklearnModel]

Type[SklearnModel]: The created SklearnModel.

Source code in packages/mlpype-sklearn/src/mlpype/sklearn/model/sklearn_model.py
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
    @classmethod
    def class_from_sklearn_model_class(
        cls,
        model_class: Type[SklearnModelBaseType],
    ) -> Type["SklearnModel"]:
        """Create a SklearnModel class from a SklearnModelBaseType.

        This should support all sklearn classifaction and regression models.

        Args:
            model_class (Type[SklearnModelBaseType]): The class of the sklearn model. For example,
                LinearRegression or LogisticRegression.

        Returns:
            Type[SklearnModel]: The created SklearnModel.
        """
        new_name = f"{model_class.__name__}Model"
        try:
            old_docs = cls.__doc__
            assert isinstance(old_docs, str)
        except AttributeError:
            logger.warning(f"Failed to add docstring to {new_name}.")
            old_docs = "A generic class for sklearn-like Models."

        new_doc = old_docs.replace(
            "A generic class for sklearn-like Models.",
            f"""
A generic class for sklearn-like Models.

See SklearnModel for the original source and docs for subfunctions.

This model uses a fixed class: {model_class.__name__}.
The source module is: {model_class.__module__}

""",
        )
        klass = types.new_class(
            new_name,
            (SklearnModel[model_class],),  # type: ignore
            exec_body=lambda ns: ns.update(
                {
                    "__doc__": new_doc,
                    "__module__": SklearnModel.__module__,
                }
            ),
        )
        # klass.__doc__ = new_doc

        return klass

from_sklearn_model_class(model_class, inputs, outputs, seed=1, **model_args) classmethod

Create a SklearnModel from a SklearnModelBaseType.

This should support all sklearn classifaction and regression models.

Parameters:

Name Type Description Default
model_class Type[SklearnModelBaseType]

The class of the sklearn model. For example, LinearRegression or LogisticRegression.

required
inputs List[str]

A list of names of input Data. This determines which Data is used to fit the model.

required
outputs List[str]

A list of names of output Data. This determines the names of output variables.

required
seed int

The RNG seed to ensure reproducability.. Defaults to 1.

1
**model_args Any

Optional keyword arguments passed to the model class to instantiate a new model if model is None.

{}

Returns:

Name Type Description
SklearnModel SklearnModel

The created SklearnModel.

Source code in packages/mlpype-sklearn/src/mlpype/sklearn/model/sklearn_model.py
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
@classmethod
def from_sklearn_model_class(
    cls,
    model_class: Type[SklearnModelBaseType],
    inputs: List[str],
    outputs: List[str],
    seed: int = 1,
    **model_args: Any,
) -> "SklearnModel":
    """Create a SklearnModel from a SklearnModelBaseType.

    This should support all sklearn classifaction and regression models.

    Args:
        model_class (Type[SklearnModelBaseType]): The class of the sklearn model. For example,
            LinearRegression or LogisticRegression.
        inputs (List[str]): A list of names of input Data. This determines which Data is
            used to fit the model.
        outputs (List[str]): A list of names of output Data. This determines the names of
            output variables.
        seed (int, optional): The RNG seed to ensure reproducability.. Defaults to 1.
        **model_args (Any): Optional keyword arguments passed to the model class to instantiate a new
            model if `model` is None.

    Returns:
        SklearnModel: The created SklearnModel.
    """
    new_cls = cls.class_from_sklearn_model_class(model_class)
    return new_cls(inputs=inputs, outputs=outputs, model=None, seed=seed, **model_args)

get_parameters(parser) classmethod

Get and add parameters to initialise this class.

SklearnModel's will work by requiring 2 ways to instantiate a Model: - through model, which is a sklearn model. - through parameters, which will instantiate the model internally.

Parameters:

Name Type Description Default
parser ArgumentParser

The ArgumentParser to add arguments to.

required
Source code in packages/mlpype-sklearn/src/mlpype/sklearn/model/sklearn_model.py
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
@classmethod
def get_parameters(cls: Type["SklearnModel"], parser: ArgumentParser) -> None:
    """Get and add parameters to initialise this class.

    SklearnModel's will work by requiring 2 ways to instantiate a Model:
        - through `model`, which is a sklearn model.
        - through parameters, which will instantiate the model internally.

    Args:
        parser (ArgumentParser): The ArgumentParser to add arguments to.
    """
    super().get_parameters(parser)
    BaseModel = cls._get_annotated_class()

    add_args_to_parser_for_class(
        parser, BaseModel, "model", [], excluded_args=["seed", "inputs", "outputs", "model"]
    )

set_seed()

Sets the RNG seed.

Source code in packages/mlpype-sklearn/src/mlpype/sklearn/model/sklearn_model.py
77
78
79
def set_seed(self) -> None:
    """Sets the RNG seed."""
    np.random.seed(self.seed)

SklearnModelBaseType

Bases: Protocol

Base class for sklearn-like models.

Source code in packages/mlpype-sklearn/src/mlpype/sklearn/model/sklearn_base_type.py
 7
 8
 9
10
11
12
13
14
class SklearnModelBaseType(Protocol):
    """Base class for sklearn-like models."""

    def fit(self, *x: SklearnData, **kwargs: Any) -> Any:
        """Fit a model to the given data. Kwargs are ignored."""

    def predict(self, *x: SklearnData, **kwargs: Any) -> Union[Iterable[SklearnData], SklearnData]:
        """Predict for given data using a trained model. Kwargs are ignored."""

fit(*x, **kwargs)

Fit a model to the given data. Kwargs are ignored.

Source code in packages/mlpype-sklearn/src/mlpype/sklearn/model/sklearn_base_type.py
10
11
def fit(self, *x: SklearnData, **kwargs: Any) -> Any:
    """Fit a model to the given data. Kwargs are ignored."""

predict(*x, **kwargs)

Predict for given data using a trained model. Kwargs are ignored.

Source code in packages/mlpype-sklearn/src/mlpype/sklearn/model/sklearn_base_type.py
13
14
def predict(self, *x: SklearnData, **kwargs: Any) -> Union[Iterable[SklearnData], SklearnData]:
    """Predict for given data using a trained model. Kwargs are ignored."""

mlpype.sklearn.model.SklearnModel

Bases: Model[SklearnData], Generic[T]

A generic class for sklearn-like Models.

You should set a sklearn model as a type hint to this class when defining a new model. This allows us to get the parameters from the documentation of that sklearn model. For an example, see the implementation of LinearModel, especially the SklearnModel[LinearRegression] part.

Below are some examples for how to do this yourself.

# Works
class LinearRegressionModel(SklearnModel[LinearRegression]):
    pass

# An alternative to dynamically generate the model, which is easier to export/import
model_class = SklearnModel.class_from_sklearn_model_class(LinearRegression)

# Unfortunately, using something like the following will not work due to how Generic types are handled.
LinearRegressionModel = SklearnModel[LinearRegression]
Source code in packages/mlpype-sklearn/src/mlpype/sklearn/model/sklearn_model.py
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
class SklearnModel(Model[SklearnData], Generic[T]):
    """A generic class for sklearn-like Models.

    You should set a sklearn model as a type hint to this class when defining a new model.
    This allows us to get the parameters from the documentation of that sklearn model.
    For an example, see the implementation of LinearModel, especially the `SklearnModel[LinearRegression]` part.

    Below are some examples for how to do this yourself.

    ```python
    # Works
    class LinearRegressionModel(SklearnModel[LinearRegression]):
        pass

    # An alternative to dynamically generate the model, which is easier to export/import
    model_class = SklearnModel.class_from_sklearn_model_class(LinearRegression)

    # Unfortunately, using something like the following will not work due to how Generic types are handled.
    LinearRegressionModel = SklearnModel[LinearRegression]
    ```
    """

    SKLEARN_MODEL_FILE = "model.pkl"

    def __init__(
        self,
        inputs: List[str],
        outputs: List[str],
        model: Optional[T] = None,
        seed: int = 1,
        **model_args: Any,
    ) -> None:
        """A generic class for sklearn-like Models.

        Args:
            inputs (List[str]): A list of names of input Data. This determines which Data is
                used to fit the model.
            outputs (List[str]): A list of names of output Data. This determines the names of
                output variables.
            model (Optional[T]): An object that has fit() and predict() methods. If none,
                we will use the model_args to instantiate a new model. Should be of type SklearnModelBaseType
            seed (int, optional): The RNG seed to ensure reproducability.. Defaults to 1.
            **model_args (Any): Optional keyword arguments passed to the model class to instantiate a new
                model if `model` is None. This is where the arguments to the sklearn model go.
        """
        super().__init__(inputs, outputs, seed)
        if model is None:
            model = self._init_model(model_args)
        self.model = model

    def _init_model(self, args: Dict[str, Any]) -> T:
        return self._get_annotated_class()(**args)

    @classmethod
    def _get_annotated_class(cls) -> Type[T]:
        return typing.get_args(cls.__orig_bases__[0])[0]

    def set_seed(self) -> None:
        """Sets the RNG seed."""
        np.random.seed(self.seed)

    def _fit(self, *data: SklearnData) -> None:
        self.model.fit(*data)

    def _transform(self, *data: SklearnData) -> Union[Iterable[SklearnData], SklearnData]:
        return self.model.predict(*data)

    def _save(self, folder: Path) -> None:
        serialiser = JoblibSerialiser()
        serialiser.serialise(self.model, folder / self.SKLEARN_MODEL_FILE)

    @classmethod
    def _load(cls: Type["SklearnModel"], folder: Path, inputs: List[str], outputs: List[str]) -> "SklearnModel":
        serialiser = JoblibSerialiser()
        model = serialiser.deserialise(folder / cls.SKLEARN_MODEL_FILE)
        return cls(inputs=inputs, outputs=outputs, model=model, seed=1)

    @classmethod
    def get_parameters(cls: Type["SklearnModel"], parser: ArgumentParser) -> None:
        """Get and add parameters to initialise this class.

        SklearnModel's will work by requiring 2 ways to instantiate a Model:
            - through `model`, which is a sklearn model.
            - through parameters, which will instantiate the model internally.

        Args:
            parser (ArgumentParser): The ArgumentParser to add arguments to.
        """
        super().get_parameters(parser)
        BaseModel = cls._get_annotated_class()

        add_args_to_parser_for_class(
            parser, BaseModel, "model", [], excluded_args=["seed", "inputs", "outputs", "model"]
        )

    @classmethod
    def class_from_sklearn_model_class(
        cls,
        model_class: Type[SklearnModelBaseType],
    ) -> Type["SklearnModel"]:
        """Create a SklearnModel class from a SklearnModelBaseType.

        This should support all sklearn classifaction and regression models.

        Args:
            model_class (Type[SklearnModelBaseType]): The class of the sklearn model. For example,
                LinearRegression or LogisticRegression.

        Returns:
            Type[SklearnModel]: The created SklearnModel.
        """
        new_name = f"{model_class.__name__}Model"
        try:
            old_docs = cls.__doc__
            assert isinstance(old_docs, str)
        except AttributeError:
            logger.warning(f"Failed to add docstring to {new_name}.")
            old_docs = "A generic class for sklearn-like Models."

        new_doc = old_docs.replace(
            "A generic class for sklearn-like Models.",
            f"""
A generic class for sklearn-like Models.

See SklearnModel for the original source and docs for subfunctions.

This model uses a fixed class: {model_class.__name__}.
The source module is: {model_class.__module__}

""",
        )
        klass = types.new_class(
            new_name,
            (SklearnModel[model_class],),  # type: ignore
            exec_body=lambda ns: ns.update(
                {
                    "__doc__": new_doc,
                    "__module__": SklearnModel.__module__,
                }
            ),
        )
        # klass.__doc__ = new_doc

        return klass

    @classmethod
    def from_sklearn_model_class(
        cls,
        model_class: Type[SklearnModelBaseType],
        inputs: List[str],
        outputs: List[str],
        seed: int = 1,
        **model_args: Any,
    ) -> "SklearnModel":
        """Create a SklearnModel from a SklearnModelBaseType.

        This should support all sklearn classifaction and regression models.

        Args:
            model_class (Type[SklearnModelBaseType]): The class of the sklearn model. For example,
                LinearRegression or LogisticRegression.
            inputs (List[str]): A list of names of input Data. This determines which Data is
                used to fit the model.
            outputs (List[str]): A list of names of output Data. This determines the names of
                output variables.
            seed (int, optional): The RNG seed to ensure reproducability.. Defaults to 1.
            **model_args (Any): Optional keyword arguments passed to the model class to instantiate a new
                model if `model` is None.

        Returns:
            SklearnModel: The created SklearnModel.
        """
        new_cls = cls.class_from_sklearn_model_class(model_class)
        return new_cls(inputs=inputs, outputs=outputs, model=None, seed=seed, **model_args)

__init__(inputs, outputs, model=None, seed=1, **model_args)

A generic class for sklearn-like Models.

Parameters:

Name Type Description Default
inputs List[str]

A list of names of input Data. This determines which Data is used to fit the model.

required
outputs List[str]

A list of names of output Data. This determines the names of output variables.

required
model Optional[T]

An object that has fit() and predict() methods. If none, we will use the model_args to instantiate a new model. Should be of type SklearnModelBaseType

None
seed int

The RNG seed to ensure reproducability.. Defaults to 1.

1
**model_args Any

Optional keyword arguments passed to the model class to instantiate a new model if model is None. This is where the arguments to the sklearn model go.

{}
Source code in packages/mlpype-sklearn/src/mlpype/sklearn/model/sklearn_model.py
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
def __init__(
    self,
    inputs: List[str],
    outputs: List[str],
    model: Optional[T] = None,
    seed: int = 1,
    **model_args: Any,
) -> None:
    """A generic class for sklearn-like Models.

    Args:
        inputs (List[str]): A list of names of input Data. This determines which Data is
            used to fit the model.
        outputs (List[str]): A list of names of output Data. This determines the names of
            output variables.
        model (Optional[T]): An object that has fit() and predict() methods. If none,
            we will use the model_args to instantiate a new model. Should be of type SklearnModelBaseType
        seed (int, optional): The RNG seed to ensure reproducability.. Defaults to 1.
        **model_args (Any): Optional keyword arguments passed to the model class to instantiate a new
            model if `model` is None. This is where the arguments to the sklearn model go.
    """
    super().__init__(inputs, outputs, seed)
    if model is None:
        model = self._init_model(model_args)
    self.model = model

class_from_sklearn_model_class(model_class) classmethod

Create a SklearnModel class from a SklearnModelBaseType.

This should support all sklearn classifaction and regression models.

Parameters:

Name Type Description Default
model_class Type[SklearnModelBaseType]

The class of the sklearn model. For example, LinearRegression or LogisticRegression.

required

Returns:

Type Description
Type[SklearnModel]

Type[SklearnModel]: The created SklearnModel.

Source code in packages/mlpype-sklearn/src/mlpype/sklearn/model/sklearn_model.py
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
    @classmethod
    def class_from_sklearn_model_class(
        cls,
        model_class: Type[SklearnModelBaseType],
    ) -> Type["SklearnModel"]:
        """Create a SklearnModel class from a SklearnModelBaseType.

        This should support all sklearn classifaction and regression models.

        Args:
            model_class (Type[SklearnModelBaseType]): The class of the sklearn model. For example,
                LinearRegression or LogisticRegression.

        Returns:
            Type[SklearnModel]: The created SklearnModel.
        """
        new_name = f"{model_class.__name__}Model"
        try:
            old_docs = cls.__doc__
            assert isinstance(old_docs, str)
        except AttributeError:
            logger.warning(f"Failed to add docstring to {new_name}.")
            old_docs = "A generic class for sklearn-like Models."

        new_doc = old_docs.replace(
            "A generic class for sklearn-like Models.",
            f"""
A generic class for sklearn-like Models.

See SklearnModel for the original source and docs for subfunctions.

This model uses a fixed class: {model_class.__name__}.
The source module is: {model_class.__module__}

""",
        )
        klass = types.new_class(
            new_name,
            (SklearnModel[model_class],),  # type: ignore
            exec_body=lambda ns: ns.update(
                {
                    "__doc__": new_doc,
                    "__module__": SklearnModel.__module__,
                }
            ),
        )
        # klass.__doc__ = new_doc

        return klass

from_sklearn_model_class(model_class, inputs, outputs, seed=1, **model_args) classmethod

Create a SklearnModel from a SklearnModelBaseType.

This should support all sklearn classifaction and regression models.

Parameters:

Name Type Description Default
model_class Type[SklearnModelBaseType]

The class of the sklearn model. For example, LinearRegression or LogisticRegression.

required
inputs List[str]

A list of names of input Data. This determines which Data is used to fit the model.

required
outputs List[str]

A list of names of output Data. This determines the names of output variables.

required
seed int

The RNG seed to ensure reproducability.. Defaults to 1.

1
**model_args Any

Optional keyword arguments passed to the model class to instantiate a new model if model is None.

{}

Returns:

Name Type Description
SklearnModel SklearnModel

The created SklearnModel.

Source code in packages/mlpype-sklearn/src/mlpype/sklearn/model/sklearn_model.py
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
@classmethod
def from_sklearn_model_class(
    cls,
    model_class: Type[SklearnModelBaseType],
    inputs: List[str],
    outputs: List[str],
    seed: int = 1,
    **model_args: Any,
) -> "SklearnModel":
    """Create a SklearnModel from a SklearnModelBaseType.

    This should support all sklearn classifaction and regression models.

    Args:
        model_class (Type[SklearnModelBaseType]): The class of the sklearn model. For example,
            LinearRegression or LogisticRegression.
        inputs (List[str]): A list of names of input Data. This determines which Data is
            used to fit the model.
        outputs (List[str]): A list of names of output Data. This determines the names of
            output variables.
        seed (int, optional): The RNG seed to ensure reproducability.. Defaults to 1.
        **model_args (Any): Optional keyword arguments passed to the model class to instantiate a new
            model if `model` is None.

    Returns:
        SklearnModel: The created SklearnModel.
    """
    new_cls = cls.class_from_sklearn_model_class(model_class)
    return new_cls(inputs=inputs, outputs=outputs, model=None, seed=seed, **model_args)

get_parameters(parser) classmethod

Get and add parameters to initialise this class.

SklearnModel's will work by requiring 2 ways to instantiate a Model: - through model, which is a sklearn model. - through parameters, which will instantiate the model internally.

Parameters:

Name Type Description Default
parser ArgumentParser

The ArgumentParser to add arguments to.

required
Source code in packages/mlpype-sklearn/src/mlpype/sklearn/model/sklearn_model.py
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
@classmethod
def get_parameters(cls: Type["SklearnModel"], parser: ArgumentParser) -> None:
    """Get and add parameters to initialise this class.

    SklearnModel's will work by requiring 2 ways to instantiate a Model:
        - through `model`, which is a sklearn model.
        - through parameters, which will instantiate the model internally.

    Args:
        parser (ArgumentParser): The ArgumentParser to add arguments to.
    """
    super().get_parameters(parser)
    BaseModel = cls._get_annotated_class()

    add_args_to_parser_for_class(
        parser, BaseModel, "model", [], excluded_args=["seed", "inputs", "outputs", "model"]
    )

set_seed()

Sets the RNG seed.

Source code in packages/mlpype-sklearn/src/mlpype/sklearn/model/sklearn_model.py
77
78
79
def set_seed(self) -> None:
    """Sets the RNG seed."""
    np.random.seed(self.seed)

mlpype.sklearn.model.SklearnModelBaseType

Bases: Protocol

Base class for sklearn-like models.

Source code in packages/mlpype-sklearn/src/mlpype/sklearn/model/sklearn_base_type.py
 7
 8
 9
10
11
12
13
14
class SklearnModelBaseType(Protocol):
    """Base class for sklearn-like models."""

    def fit(self, *x: SklearnData, **kwargs: Any) -> Any:
        """Fit a model to the given data. Kwargs are ignored."""

    def predict(self, *x: SklearnData, **kwargs: Any) -> Union[Iterable[SklearnData], SklearnData]:
        """Predict for given data using a trained model. Kwargs are ignored."""

fit(*x, **kwargs)

Fit a model to the given data. Kwargs are ignored.

Source code in packages/mlpype-sklearn/src/mlpype/sklearn/model/sklearn_base_type.py
10
11
def fit(self, *x: SklearnData, **kwargs: Any) -> Any:
    """Fit a model to the given data. Kwargs are ignored."""

predict(*x, **kwargs)

Predict for given data using a trained model. Kwargs are ignored.

Source code in packages/mlpype-sklearn/src/mlpype/sklearn/model/sklearn_base_type.py
13
14
def predict(self, *x: SklearnData, **kwargs: Any) -> Union[Iterable[SklearnData], SklearnData]:
    """Predict for given data using a trained model. Kwargs are ignored."""