143 lines
5.0 KiB
Python
143 lines
5.0 KiB
Python
import numpy as np
|
|
from typing import List, Any
|
|
from classifiers import DecisionTreeClassifier, NaiveBayesClassifier, KNNClassifier
|
|
|
|
class BaggingClassifier:
|
|
"""Bagging集成分类器"""
|
|
|
|
def __init__(self, base_classifier, n_estimators: int = 10, random_state: int = 42):
|
|
self.base_classifier = base_classifier
|
|
self.n_estimators = n_estimators
|
|
self.random_state = random_state
|
|
self.estimators = []
|
|
|
|
def fit(self, X: np.ndarray, y: np.ndarray):
|
|
"""训练Bagging分类器"""
|
|
np.random.seed(self.random_state)
|
|
n_samples = X.shape[0]
|
|
|
|
self.estimators = []
|
|
|
|
for i in range(self.n_estimators):
|
|
# Bootstrap采样
|
|
bootstrap_indices = np.random.choice(n_samples, size=n_samples, replace=True)
|
|
X_bootstrap = X[bootstrap_indices]
|
|
y_bootstrap = y[bootstrap_indices]
|
|
|
|
# 训练基分类器
|
|
if self.base_classifier == 'decision_tree':
|
|
estimator = DecisionTreeClassifier(max_depth=8)
|
|
elif self.base_classifier == 'naive_bayes':
|
|
estimator = NaiveBayesClassifier()
|
|
elif self.base_classifier == 'knn':
|
|
estimator = KNNClassifier(k=5)
|
|
|
|
estimator.fit(X_bootstrap, y_bootstrap)
|
|
self.estimators.append(estimator)
|
|
|
|
def predict(self, X: np.ndarray) -> np.ndarray:
|
|
"""预测"""
|
|
# 收集所有基分类器的预测
|
|
predictions = np.zeros((X.shape[0], self.n_estimators))
|
|
|
|
for i, estimator in enumerate(self.estimators):
|
|
predictions[:, i] = estimator.predict(X)
|
|
|
|
# 投票决定最终预测
|
|
final_predictions = []
|
|
for i in range(X.shape[0]):
|
|
votes = predictions[i, :]
|
|
prediction = max(set(votes), key=list(votes).count)
|
|
final_predictions.append(prediction)
|
|
|
|
return np.array(final_predictions)
|
|
|
|
class AdaBoostClassifier:
|
|
"""AdaBoost集成分类器"""
|
|
|
|
def __init__(self, n_estimators: int = 10, random_state: int = 42):
|
|
self.n_estimators = n_estimators
|
|
self.random_state = random_state
|
|
self.estimators = []
|
|
self.estimator_weights = []
|
|
|
|
def fit(self, X: np.ndarray, y: np.ndarray):
|
|
"""训练AdaBoost分类器"""
|
|
np.random.seed(self.random_state)
|
|
n_samples = X.shape[0]
|
|
|
|
# 初始化样本权重
|
|
sample_weights = np.ones(n_samples) / n_samples
|
|
|
|
self.estimators = []
|
|
self.estimator_weights = []
|
|
|
|
for i in range(self.n_estimators):
|
|
# 根据样本权重采样
|
|
sample_indices = np.random.choice(
|
|
n_samples, size=n_samples, replace=True, p=sample_weights
|
|
)
|
|
X_weighted = X[sample_indices]
|
|
y_weighted = y[sample_indices]
|
|
|
|
# 训练弱分类器(决策树桩)
|
|
estimator = DecisionTreeClassifier(max_depth=1)
|
|
estimator.fit(X_weighted, y_weighted)
|
|
|
|
# 计算预测错误率
|
|
y_pred = estimator.predict(X)
|
|
error_mask = y_pred != y
|
|
error_rate = np.average(error_mask, weights=sample_weights)
|
|
|
|
# 如果错误率太高,停止
|
|
if error_rate >= 0.5:
|
|
break
|
|
|
|
# 计算分类器权重
|
|
alpha = 0.5 * np.log((1 - error_rate) / (error_rate + 1e-10))
|
|
|
|
# 更新样本权重
|
|
sample_weights *= np.exp(-alpha * y * y_pred)
|
|
sample_weights /= np.sum(sample_weights)
|
|
|
|
self.estimators.append(estimator)
|
|
self.estimator_weights.append(alpha)
|
|
|
|
def predict(self, X: np.ndarray) -> np.ndarray:
|
|
"""预测"""
|
|
n_samples = X.shape[0]
|
|
predictions = np.zeros(n_samples)
|
|
|
|
for estimator, weight in zip(self.estimators, self.estimator_weights):
|
|
y_pred = estimator.predict(X)
|
|
predictions += weight * y_pred
|
|
|
|
return np.sign(predictions)
|
|
|
|
class VotingClassifier:
|
|
"""投票集成分类器"""
|
|
|
|
def __init__(self, estimators: List[Any]):
|
|
self.estimators = estimators
|
|
|
|
def fit(self, X: np.ndarray, y: np.ndarray):
|
|
"""训练所有分类器"""
|
|
for estimator in self.estimators:
|
|
estimator.fit(X, y)
|
|
|
|
def predict(self, X: np.ndarray) -> np.ndarray:
|
|
"""预测"""
|
|
predictions = np.zeros((X.shape[0], len(self.estimators)))
|
|
|
|
for i, estimator in enumerate(self.estimators):
|
|
predictions[:, i] = estimator.predict(X)
|
|
|
|
# 投票决定最终预测
|
|
final_predictions = []
|
|
for i in range(X.shape[0]):
|
|
votes = predictions[i, :]
|
|
prediction = max(set(votes), key=list(votes).count)
|
|
final_predictions.append(prediction)
|
|
|
|
return np.array(final_predictions)
|