import numpy as np from typing import List, Any from classifiers import DecisionTreeClassifier, NaiveBayesClassifier, KNNClassifier class BaggingClassifier: """Bagging集成分类器""" def __init__(self, base_classifier, n_estimators: int = 10, random_state: int = 42): self.base_classifier = base_classifier self.n_estimators = n_estimators self.random_state = random_state self.estimators = [] def fit(self, X: np.ndarray, y: np.ndarray): """训练Bagging分类器""" np.random.seed(self.random_state) n_samples = X.shape[0] self.estimators = [] for i in range(self.n_estimators): # Bootstrap采样 bootstrap_indices = np.random.choice(n_samples, size=n_samples, replace=True) X_bootstrap = X[bootstrap_indices] y_bootstrap = y[bootstrap_indices] # 训练基分类器 if self.base_classifier == 'decision_tree': estimator = DecisionTreeClassifier(max_depth=8) elif self.base_classifier == 'naive_bayes': estimator = NaiveBayesClassifier() elif self.base_classifier == 'knn': estimator = KNNClassifier(k=5) estimator.fit(X_bootstrap, y_bootstrap) self.estimators.append(estimator) def predict(self, X: np.ndarray) -> np.ndarray: """预测""" # 收集所有基分类器的预测 predictions = np.zeros((X.shape[0], self.n_estimators)) for i, estimator in enumerate(self.estimators): predictions[:, i] = estimator.predict(X) # 投票决定最终预测 final_predictions = [] for i in range(X.shape[0]): votes = predictions[i, :] prediction = max(set(votes), key=list(votes).count) final_predictions.append(prediction) return np.array(final_predictions) class AdaBoostClassifier: """AdaBoost集成分类器""" def __init__(self, n_estimators: int = 10, random_state: int = 42): self.n_estimators = n_estimators self.random_state = random_state self.estimators = [] self.estimator_weights = [] def fit(self, X: np.ndarray, y: np.ndarray): """训练AdaBoost分类器""" np.random.seed(self.random_state) n_samples = X.shape[0] # 初始化样本权重 sample_weights = np.ones(n_samples) / n_samples self.estimators = [] self.estimator_weights = [] for i in range(self.n_estimators): # 根据样本权重采样 sample_indices = np.random.choice( n_samples, size=n_samples, replace=True, p=sample_weights ) X_weighted = X[sample_indices] y_weighted = y[sample_indices] # 训练弱分类器(决策树桩) estimator = DecisionTreeClassifier(max_depth=1) estimator.fit(X_weighted, y_weighted) # 计算预测错误率 y_pred = estimator.predict(X) error_mask = y_pred != y error_rate = np.average(error_mask, weights=sample_weights) # 如果错误率太高,停止 if error_rate >= 0.5: break # 计算分类器权重 alpha = 0.5 * np.log((1 - error_rate) / (error_rate + 1e-10)) # 更新样本权重 sample_weights *= np.exp(-alpha * y * y_pred) sample_weights /= np.sum(sample_weights) self.estimators.append(estimator) self.estimator_weights.append(alpha) def predict(self, X: np.ndarray) -> np.ndarray: """预测""" n_samples = X.shape[0] predictions = np.zeros(n_samples) for estimator, weight in zip(self.estimators, self.estimator_weights): y_pred = estimator.predict(X) predictions += weight * y_pred return np.sign(predictions) class VotingClassifier: """投票集成分类器""" def __init__(self, estimators: List[Any]): self.estimators = estimators def fit(self, X: np.ndarray, y: np.ndarray): """训练所有分类器""" for estimator in self.estimators: estimator.fit(X, y) def predict(self, X: np.ndarray) -> np.ndarray: """预测""" predictions = np.zeros((X.shape[0], len(self.estimators))) for i, estimator in enumerate(self.estimators): predictions[:, i] = estimator.predict(X) # 投票决定最终预测 final_predictions = [] for i in range(X.shape[0]): votes = predictions[i, :] prediction = max(set(votes), key=list(votes).count) final_predictions.append(prediction) return np.array(final_predictions)