Source code for lazypredict.config

"""Configuration constants and defaults for LazyPredict."""

import logging
from typing import FrozenSet, Tuple

logger = logging.getLogger("lazypredict")

# ---------------------------------------------------------------------------
# Valid encoder types
# ---------------------------------------------------------------------------
VALID_ENCODERS: Tuple[str, ...] = ("onehot", "ordinal", "target", "binary")

# ---------------------------------------------------------------------------
# Default values
# ---------------------------------------------------------------------------
DEFAULT_RANDOM_STATE: int = 42
DEFAULT_CARDINALITY_THRESHOLD: int = 11
DEFAULT_IMPUTE_STRATEGY: str = "mean"
DEFAULT_CATEGORICAL_FILL: str = "missing"
DEFAULT_UNKNOWN_VALUE: int = -1
DEFAULT_N_JOBS: int = -1

# ---------------------------------------------------------------------------
# Removed estimators — excluded from default model lists
# ---------------------------------------------------------------------------
REMOVED_CLASSIFIERS: FrozenSet[str] = frozenset([
    "ClassifierChain",
    "ComplementNB",
    "GradientBoostingClassifier",
    "GaussianProcessClassifier",
    "HistGradientBoostingClassifier",
    "MLPClassifier",
    "LogisticRegressionCV",
    "MultiOutputClassifier",
    "MultinomialNB",
    "OneVsOneClassifier",
    "OneVsRestClassifier",
    "OutputCodeClassifier",
    "RadiusNeighborsClassifier",
    "VotingClassifier",
])

REMOVED_REGRESSORS: FrozenSet[str] = frozenset([
    "TheilSenRegressor",
    "ARDRegression",
    "CCA",
    "IsotonicRegression",
    "StackingRegressor",
    "MultiOutputRegressor",
    "MultiTaskElasticNet",
    "MultiTaskElasticNetCV",
    "MultiTaskLasso",
    "MultiTaskLassoCV",
    "PLSCanonical",
    "PLSRegression",
    "RadiusNeighborsRegressor",
    "RegressorChain",
    "VotingRegressor",
])

# ---------------------------------------------------------------------------
# Time series forecasting defaults
# ---------------------------------------------------------------------------
DEFAULT_N_LAGS: int = 10
DEFAULT_ROLLING_WINDOWS: Tuple[int, ...] = (3, 7)
DEFAULT_SORT_BY_FORECASTER: str = "RMSE"
REMOVED_FORECASTERS: FrozenSet[str] = frozenset()


# ---------------------------------------------------------------------------
# GPU support utilities
# ---------------------------------------------------------------------------
[docs] def is_gpu_available() -> bool: """Check if a CUDA-capable GPU is available via PyTorch. Returns ------- bool True if CUDA is available, False otherwise. """ try: import torch return torch.cuda.is_available() except ImportError: return False
[docs] def get_gpu_model_params(model_class, use_gpu: bool) -> dict: """Return GPU-related keyword arguments for a model class. Inspects the model class module to determine if it supports GPU acceleration and returns the appropriate kwargs. Supported GPU backends: - **XGBoost**: ``device="cuda"`` - **LightGBM**: ``device="gpu"`` - **CatBoost**: ``task_type="GPU"`` - **cuML (RAPIDS)**: No extra params needed (GPU-native). Parameters ---------- model_class : type The model class to inspect. use_gpu : bool Whether GPU usage has been requested by the user. Returns ------- dict Keyword arguments to pass to the model constructor for GPU support. Empty dict if the model does not support GPU or ``use_gpu`` is False. """ if not use_gpu: return {} module = getattr(model_class, "__module__", "") or "" # XGBoost: use device="cuda" if "xgboost" in module: if is_gpu_available(): return {"device": "cuda"} else: logger.warning( "GPU requested for %s but CUDA is not available. " "Falling back to CPU.", model_class.__name__, ) return {} # LightGBM: use device="gpu" if "lightgbm" in module: if is_gpu_available(): return {"device": "gpu"} else: logger.warning( "GPU requested for %s but CUDA is not available. " "Falling back to CPU.", model_class.__name__, ) return {} # CatBoost: use task_type="GPU" if "catboost" in module: if is_gpu_available(): return {"task_type": "GPU"} else: logger.warning( "GPU requested for %s but CUDA is not available. " "Falling back to CPU.", model_class.__name__, ) return {} # cuML (RAPIDS): models are GPU-native, no extra params needed if "cuml" in module: if is_gpu_available(): return {} else: logger.warning( "GPU requested for %s (cuML) but CUDA is not available. " "cuML requires a CUDA-capable GPU.", model_class.__name__, ) return {} return {}
[docs] def get_cuml_models() -> dict: """Return a mapping of sklearn model names to cuML GPU equivalents. cuML (RAPIDS) provides GPU-accelerated drop-in replacements for many scikit-learn estimators. This function returns the available ones. Returns ------- dict ``{sklearn_name: cuml_class}`` for available cuML models. Empty dict if cuML is not installed. """ models: dict = {} try: import cuml # noqa: F401 except ImportError: return models # Classifiers _cuml_classifiers = [ ("cuML_LogisticRegression", "cuml.linear_model", "LogisticRegression"), ("cuML_RandomForestClassifier", "cuml.ensemble", "RandomForestClassifier"), ("cuML_KNeighborsClassifier", "cuml.neighbors", "KNeighborsClassifier"), ("cuML_SVC", "cuml.svm", "SVC"), ] # Regressors _cuml_regressors = [ ("cuML_LinearRegression", "cuml.linear_model", "LinearRegression"), ("cuML_Ridge", "cuml.linear_model", "Ridge"), ("cuML_Lasso", "cuml.linear_model", "Lasso"), ("cuML_ElasticNet", "cuml.linear_model", "ElasticNet"), ("cuML_RandomForestRegressor", "cuml.ensemble", "RandomForestRegressor"), ("cuML_KNeighborsRegressor", "cuml.neighbors", "KNeighborsRegressor"), ("cuML_SVR", "cuml.svm", "SVR"), ] for name, mod_path, cls_name in _cuml_classifiers + _cuml_regressors: try: import importlib mod = importlib.import_module(mod_path) models[name] = getattr(mod, cls_name) except (ImportError, AttributeError): pass return models