ml-homework/improved_bp.py

249 lines
8.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import numpy as np
from typing import List, Tuple
class ImprovedBPNetwork:
"""改进的BP神经网络支持动态学习率调整"""
def __init__(self, hidden_layers: List[int], learning_rate: float = 0.01,
max_epochs: int = 1000, tolerance: float = 1e-6):
self.hidden_layers = hidden_layers
self.initial_lr = learning_rate
self.learning_rate = learning_rate
self.max_epochs = max_epochs
self.tolerance = tolerance
self.weights = []
self.biases = []
self.loss_history = []
def _sigmoid(self, x: np.ndarray) -> np.ndarray:
"""Sigmoid激活函数"""
x = np.clip(x, -500, 500) # 防止溢出
return 1 / (1 + np.exp(-x))
def _sigmoid_derivative(self, x: np.ndarray) -> np.ndarray:
"""Sigmoid函数的导数"""
s = self._sigmoid(x)
return s * (1 - s)
def _initialize_weights(self, input_size: int, output_size: int):
"""初始化权重和偏置"""
self.weights = []
self.biases = []
# 构建网络结构
layers = [input_size] + self.hidden_layers + [output_size]
# Xavier初始化
for i in range(len(layers) - 1):
w = np.random.normal(0, np.sqrt(2.0 / (layers[i] + layers[i+1])),
(layers[i], layers[i+1]))
b = np.zeros((1, layers[i+1]))
self.weights.append(w)
self.biases.append(b)
def _forward_pass(self, X: np.ndarray) -> List[np.ndarray]:
"""前向传播"""
activations = [X]
for i in range(len(self.weights)):
z = np.dot(activations[-1], self.weights[i]) + self.biases[i]
a = self._sigmoid(z)
activations.append(a)
return activations
def _backward_pass(self, X: np.ndarray, y: np.ndarray, activations: List[np.ndarray]) -> Tuple[List[np.ndarray], List[np.ndarray]]:
"""反向传播"""
m = X.shape[0]
dw = [np.zeros_like(w) for w in self.weights]
db = [np.zeros_like(b) for b in self.biases]
# 输出层误差
delta = activations[-1] - y
# 从输出层向输入层反向传播
for i in range(len(self.weights) - 1, -1, -1):
dw[i] = np.dot(activations[i].T, delta) / m
db[i] = np.mean(delta, axis=0, keepdims=True)
if i > 0:
delta = np.dot(delta, self.weights[i].T) * self._sigmoid_derivative(
np.dot(activations[i], self.weights[i]) + self.biases[i])
return dw, db
def _adaptive_learning_rate(self, epoch: int, current_loss: float, prev_loss: float):
"""动态调整学习率"""
if epoch > 0:
if current_loss > prev_loss:
# 损失增加,降低学习率
self.learning_rate *= 0.9
elif (prev_loss - current_loss) / prev_loss < 0.001:
# 损失下降缓慢,增加学习率
self.learning_rate *= 1.05
# 限制学习率范围
self.learning_rate = np.clip(self.learning_rate,
self.initial_lr * 0.01,
self.initial_lr * 10)
def fit(self, X: np.ndarray, y: np.ndarray):
"""训练神经网络"""
# 处理标签
if len(y.shape) == 1:
y_encoded = np.zeros((len(y), len(np.unique(y))))
for i, label in enumerate(np.unique(y)):
y_encoded[y == label, i] = 1
y = y_encoded
self._initialize_weights(X.shape[1], y.shape[1])
prev_loss = float('inf')
for epoch in range(self.max_epochs):
# 前向传播
activations = self._forward_pass(X)
# 计算损失
loss = np.mean((activations[-1] - y) ** 2)
self.loss_history.append(loss)
# 动态调整学习率
self._adaptive_learning_rate(epoch, loss, prev_loss)
# 反向传播
dw, db = self._backward_pass(X, y, activations)
# 更新权重和偏置
for i in range(len(self.weights)):
self.weights[i] -= self.learning_rate * dw[i]
self.biases[i] -= self.learning_rate * db[i]
# 检查收敛
if abs(prev_loss - loss) < self.tolerance:
print(f"训练在第{epoch+1}轮收敛")
break
prev_loss = loss
if epoch % 100 == 0:
print(f"Epoch {epoch}, Loss: {loss:.6f}, LR: {self.learning_rate:.6f}")
def predict(self, X: np.ndarray) -> np.ndarray:
"""预测"""
activations = self._forward_pass(X)
return np.argmax(activations[-1], axis=1)
def predict_proba(self, X: np.ndarray) -> np.ndarray:
"""预测概率"""
activations = self._forward_pass(X)
return activations[-1]
class StandardBPNetwork:
"""标准BP神经网络固定学习率"""
def __init__(self, hidden_layers: List[int], learning_rate: float = 0.01,
max_epochs: int = 1000, tolerance: float = 1e-6):
self.hidden_layers = hidden_layers
self.learning_rate = learning_rate
self.max_epochs = max_epochs
self.tolerance = tolerance
self.weights = []
self.biases = []
self.loss_history = []
def _sigmoid(self, x: np.ndarray) -> np.ndarray:
"""Sigmoid激活函数"""
x = np.clip(x, -500, 500)
return 1 / (1 + np.exp(-x))
def _sigmoid_derivative(self, x: np.ndarray) -> np.ndarray:
"""Sigmoid函数的导数"""
s = self._sigmoid(x)
return s * (1 - s)
def _initialize_weights(self, input_size: int, output_size: int):
"""初始化权重和偏置"""
self.weights = []
self.biases = []
layers = [input_size] + self.hidden_layers + [output_size]
for i in range(len(layers) - 1):
w = np.random.normal(0, np.sqrt(2.0 / (layers[i] + layers[i+1])),
(layers[i], layers[i+1]))
b = np.zeros((1, layers[i+1]))
self.weights.append(w)
self.biases.append(b)
def _forward_pass(self, X: np.ndarray) -> List[np.ndarray]:
"""前向传播"""
activations = [X]
for i in range(len(self.weights)):
z = np.dot(activations[-1], self.weights[i]) + self.biases[i]
a = self._sigmoid(z)
activations.append(a)
return activations
def _backward_pass(self, X: np.ndarray, y: np.ndarray, activations: List[np.ndarray]) -> Tuple[List[np.ndarray], List[np.ndarray]]:
"""反向传播"""
m = X.shape[0]
dw = [np.zeros_like(w) for w in self.weights]
db = [np.zeros_like(b) for b in self.biases]
delta = activations[-1] - y
for i in range(len(self.weights) - 1, -1, -1):
dw[i] = np.dot(activations[i].T, delta) / m
db[i] = np.mean(delta, axis=0, keepdims=True)
if i > 0:
delta = np.dot(delta, self.weights[i].T) * self._sigmoid_derivative(
np.dot(activations[i], self.weights[i]) + self.biases[i])
return dw, db
def fit(self, X: np.ndarray, y: np.ndarray):
"""训练神经网络"""
if len(y.shape) == 1:
y_encoded = np.zeros((len(y), len(np.unique(y))))
for i, label in enumerate(np.unique(y)):
y_encoded[y == label, i] = 1
y = y_encoded
self._initialize_weights(X.shape[1], y.shape[1])
prev_loss = float('inf')
for epoch in range(self.max_epochs):
activations = self._forward_pass(X)
loss = np.mean((activations[-1] - y) ** 2)
self.loss_history.append(loss)
dw, db = self._backward_pass(X, y, activations)
for i in range(len(self.weights)):
self.weights[i] -= self.learning_rate * dw[i]
self.biases[i] -= self.learning_rate * db[i]
if abs(prev_loss - loss) < self.tolerance:
print(f"标准BP训练在第{epoch+1}轮收敛")
break
prev_loss = loss
if epoch % 100 == 0:
print(f"Standard BP Epoch {epoch}, Loss: {loss:.6f}")
def predict(self, X: np.ndarray) -> np.ndarray:
"""预测"""
activations = self._forward_pass(X)
return np.argmax(activations[-1], axis=1)
def predict_proba(self, X: np.ndarray) -> np.ndarray:
"""预测概率"""
activations = self._forward_pass(X)
return activations[-1]