From 047b03a59007b330e15d117f471d9e79e9f63e51 Mon Sep 17 00:00:00 2001 From: grtsinry43 Date: Sat, 24 May 2025 17:02:46 +0800 Subject: [PATCH] feat: Add initial implementation of core modules --- classifiers.py | 220 +++++++++++++++++++++++++++++ ensemble.py | 142 +++++++++++++++++++ experiments.py | 319 ++++++++++++++++++++++++++++++++++++++++++ feature_extraction.py | 96 +++++++++++++ improved_bp.py | 248 ++++++++++++++++++++++++++++++++ main.ipynb | 188 ++++++++++++------------- main.py | 29 ++++ utils.py | 72 ++++++++++ 8 files changed, 1220 insertions(+), 94 deletions(-) create mode 100644 classifiers.py create mode 100644 ensemble.py create mode 100644 experiments.py create mode 100644 feature_extraction.py create mode 100644 improved_bp.py create mode 100644 main.py create mode 100644 utils.py diff --git a/classifiers.py b/classifiers.py new file mode 100644 index 0000000..9eb0d0e --- /dev/null +++ b/classifiers.py @@ -0,0 +1,220 @@ +import numpy as np +from typing import Dict, List, Tuple +from collections import Counter + +class NaiveBayesClassifier: + """朴素贝叶斯分类器""" + + def __init__(self): + self.class_priors = {} + self.feature_likelihoods = {} + self.classes = None + + def fit(self, X: np.ndarray, y: np.ndarray): + """训练朴素贝叶斯分类器""" + self.classes = np.unique(y) + n_samples, n_features = X.shape + + # 计算类先验概率 + for c in self.classes: + self.class_priors[c] = np.sum(y == c) / n_samples + + # 计算特征似然 + self.feature_likelihoods = {} + for c in self.classes: + class_data = X[y == c] + self.feature_likelihoods[c] = { + 'mean': np.mean(class_data, axis=0), + 'var': np.var(class_data, axis=0) + 1e-10 # 避免除零 + } + + def _gaussian_probability(self, x: float, mean: float, var: float) -> float: + """计算高斯概率密度""" + return (1 / np.sqrt(2 * np.pi * var)) * np.exp(-0.5 * ((x - mean) ** 2) / var) + + def predict(self, X: np.ndarray) -> np.ndarray: + """预测""" + predictions = [] + + for sample in X: + class_scores = {} + + for c in self.classes: + # 计算后验概率(对数形式避免下溢) + log_prob = np.log(self.class_priors[c]) + + for i, feature_value in enumerate(sample): + mean = self.feature_likelihoods[c]['mean'][i] + var = self.feature_likelihoods[c]['var'][i] + log_prob += np.log(self._gaussian_probability(feature_value, mean, var)) + + class_scores[c] = log_prob + + # 选择概率最大的类 + predicted_class = max(class_scores, key=class_scores.get) + predictions.append(predicted_class) + + return np.array(predictions) + +class KNNClassifier: + """K最近邻分类器""" + + def __init__(self, k: int = 3): + self.k = k + self.X_train = None + self.y_train = None + + def fit(self, X: np.ndarray, y: np.ndarray): + """训练KNN分类器""" + self.X_train = X + self.y_train = y + + def _euclidean_distance(self, x1: np.ndarray, x2: np.ndarray) -> float: + """计算欧几里得距离""" + return np.sqrt(np.sum((x1 - x2) ** 2)) + + def predict(self, X: np.ndarray) -> np.ndarray: + """预测""" + predictions = [] + + for sample in X: + # 计算与所有训练样本的距离 + distances = [] + for i, train_sample in enumerate(self.X_train): + dist = self._euclidean_distance(sample, train_sample) + distances.append((dist, self.y_train[i])) + + # 选择k个最近邻 + distances.sort(key=lambda x: x[0]) + k_nearest = distances[:self.k] + + # 投票决定类别 + votes = [label for _, label in k_nearest] + predicted_class = max(set(votes), key=votes.count) + predictions.append(predicted_class) + + return np.array(predictions) + +class DecisionTreeNode: + """决策树节点""" + + def __init__(self): + self.feature_idx = None + self.threshold = None + self.left = None + self.right = None + self.prediction = None + self.is_leaf = False + +class DecisionTreeClassifier: + """决策树分类器""" + + def __init__(self, max_depth: int = 10, min_samples_split: int = 2): + self.max_depth = max_depth + self.min_samples_split = min_samples_split + self.root = None + + def _gini_impurity(self, y: np.ndarray) -> float: + """计算基尼不纯度""" + if len(y) == 0: + return 0 + + _, counts = np.unique(y, return_counts=True) + probabilities = counts / len(y) + return 1 - np.sum(probabilities ** 2) + + def _information_gain(self, y: np.ndarray, y_left: np.ndarray, y_right: np.ndarray) -> float: + """计算信息增益""" + n = len(y) + n_left, n_right = len(y_left), len(y_right) + + if n_left == 0 or n_right == 0: + return 0 + + gini_parent = self._gini_impurity(y) + gini_children = (n_left / n) * self._gini_impurity(y_left) + (n_right / n) * self._gini_impurity(y_right) + + return gini_parent - gini_children + + def _best_split(self, X: np.ndarray, y: np.ndarray) -> Tuple[int, float, float]: + """找到最佳分割""" + best_gain = 0 + best_feature_idx = None + best_threshold = None + + n_features = X.shape[1] + + for feature_idx in range(n_features): + feature_values = X[:, feature_idx] + thresholds = np.unique(feature_values) + + for threshold in thresholds: + left_mask = feature_values <= threshold + right_mask = ~left_mask + + if np.sum(left_mask) == 0 or np.sum(right_mask) == 0: + continue + + y_left, y_right = y[left_mask], y[right_mask] + gain = self._information_gain(y, y_left, y_right) + + if gain > best_gain: + best_gain = gain + best_feature_idx = feature_idx + best_threshold = threshold + + return best_feature_idx, best_threshold, best_gain + + def _build_tree(self, X: np.ndarray, y: np.ndarray, depth: int = 0) -> DecisionTreeNode: + """构建决策树""" + node = DecisionTreeNode() + + # 停止条件 + if (depth >= self.max_depth or + len(np.unique(y)) == 1 or + len(y) < self.min_samples_split): + node.is_leaf = True + node.prediction = max(set(y), key=list(y).count) + return node + + # 找到最佳分割 + feature_idx, threshold, gain = self._best_split(X, y) + + if gain == 0: + node.is_leaf = True + node.prediction = max(set(y), key=list(y).count) + return node + + # 分割数据 + left_mask = X[:, feature_idx] <= threshold + right_mask = ~left_mask + + node.feature_idx = feature_idx + node.threshold = threshold + node.left = self._build_tree(X[left_mask], y[left_mask], depth + 1) + node.right = self._build_tree(X[right_mask], y[right_mask], depth + 1) + + return node + + def fit(self, X: np.ndarray, y: np.ndarray): + """训练决策树""" + self.root = self._build_tree(X, y) + + def _predict_sample(self, sample: np.ndarray, node: DecisionTreeNode): + """预测单个样本""" + if node.is_leaf: + return node.prediction + + if sample[node.feature_idx] <= node.threshold: + return self._predict_sample(sample, node.left) + else: + return self._predict_sample(sample, node.right) + + def predict(self, X: np.ndarray) -> np.ndarray: + """预测""" + predictions = [] + for sample in X: + prediction = self._predict_sample(sample, self.root) + predictions.append(prediction) + + return np.array(predictions) diff --git a/ensemble.py b/ensemble.py new file mode 100644 index 0000000..6a611ad --- /dev/null +++ b/ensemble.py @@ -0,0 +1,142 @@ +import numpy as np +from typing import List, Any +from classifiers import DecisionTreeClassifier, NaiveBayesClassifier, KNNClassifier + +class BaggingClassifier: + """Bagging集成分类器""" + + def __init__(self, base_classifier, n_estimators: int = 10, random_state: int = 42): + self.base_classifier = base_classifier + self.n_estimators = n_estimators + self.random_state = random_state + self.estimators = [] + + def fit(self, X: np.ndarray, y: np.ndarray): + """训练Bagging分类器""" + np.random.seed(self.random_state) + n_samples = X.shape[0] + + self.estimators = [] + + for i in range(self.n_estimators): + # Bootstrap采样 + bootstrap_indices = np.random.choice(n_samples, size=n_samples, replace=True) + X_bootstrap = X[bootstrap_indices] + y_bootstrap = y[bootstrap_indices] + + # 训练基分类器 + if self.base_classifier == 'decision_tree': + estimator = DecisionTreeClassifier(max_depth=8) + elif self.base_classifier == 'naive_bayes': + estimator = NaiveBayesClassifier() + elif self.base_classifier == 'knn': + estimator = KNNClassifier(k=5) + + estimator.fit(X_bootstrap, y_bootstrap) + self.estimators.append(estimator) + + def predict(self, X: np.ndarray) -> np.ndarray: + """预测""" + # 收集所有基分类器的预测 + predictions = np.zeros((X.shape[0], self.n_estimators)) + + for i, estimator in enumerate(self.estimators): + predictions[:, i] = estimator.predict(X) + + # 投票决定最终预测 + final_predictions = [] + for i in range(X.shape[0]): + votes = predictions[i, :] + prediction = max(set(votes), key=list(votes).count) + final_predictions.append(prediction) + + return np.array(final_predictions) + +class AdaBoostClassifier: + """AdaBoost集成分类器""" + + def __init__(self, n_estimators: int = 10, random_state: int = 42): + self.n_estimators = n_estimators + self.random_state = random_state + self.estimators = [] + self.estimator_weights = [] + + def fit(self, X: np.ndarray, y: np.ndarray): + """训练AdaBoost分类器""" + np.random.seed(self.random_state) + n_samples = X.shape[0] + + # 初始化样本权重 + sample_weights = np.ones(n_samples) / n_samples + + self.estimators = [] + self.estimator_weights = [] + + for i in range(self.n_estimators): + # 根据样本权重采样 + sample_indices = np.random.choice( + n_samples, size=n_samples, replace=True, p=sample_weights + ) + X_weighted = X[sample_indices] + y_weighted = y[sample_indices] + + # 训练弱分类器(决策树桩) + estimator = DecisionTreeClassifier(max_depth=1) + estimator.fit(X_weighted, y_weighted) + + # 计算预测错误率 + y_pred = estimator.predict(X) + error_mask = y_pred != y + error_rate = np.average(error_mask, weights=sample_weights) + + # 如果错误率太高,停止 + if error_rate >= 0.5: + break + + # 计算分类器权重 + alpha = 0.5 * np.log((1 - error_rate) / (error_rate + 1e-10)) + + # 更新样本权重 + sample_weights *= np.exp(-alpha * y * y_pred) + sample_weights /= np.sum(sample_weights) + + self.estimators.append(estimator) + self.estimator_weights.append(alpha) + + def predict(self, X: np.ndarray) -> np.ndarray: + """预测""" + n_samples = X.shape[0] + predictions = np.zeros(n_samples) + + for estimator, weight in zip(self.estimators, self.estimator_weights): + y_pred = estimator.predict(X) + predictions += weight * y_pred + + return np.sign(predictions) + +class VotingClassifier: + """投票集成分类器""" + + def __init__(self, estimators: List[Any]): + self.estimators = estimators + + def fit(self, X: np.ndarray, y: np.ndarray): + """训练所有分类器""" + for estimator in self.estimators: + estimator.fit(X, y) + + def predict(self, X: np.ndarray) -> np.ndarray: + """预测""" + predictions = np.zeros((X.shape[0], len(self.estimators))) + + for i, estimator in enumerate(self.estimators): + predictions[:, i] = estimator.predict(X) + + # 投票决定最终预测 + final_predictions = [] + for i in range(X.shape[0]): + votes = predictions[i, :] + prediction = max(set(votes), key=list(votes).count) + final_predictions.append(prediction) + + return np.array(final_predictions) diff --git a/experiments.py b/experiments.py new file mode 100644 index 0000000..8f7d32c --- /dev/null +++ b/experiments.py @@ -0,0 +1,319 @@ +import numpy as np +import matplotlib.pyplot as plt +import time +from typing import Dict, List, Tuple + +from utils import train_test_split, normalize_data, accuracy_score, cross_validation +from feature_extraction import PCA, FeatureSelector +from improved_bp import ImprovedBPNetwork, StandardBPNetwork +from classifiers import NaiveBayesClassifier, KNNClassifier, DecisionTreeClassifier +from ensemble import BaggingClassifier, AdaBoostClassifier, VotingClassifier + +class ExperimentRunner: + """实验运行器""" + + def __init__(self): + self.results = {} + + def generate_synthetic_data(self, n_samples: int = 1000, n_features: int = 20, n_classes: int = 3, + random_state: int = 42) -> Tuple[np.ndarray, np.ndarray]: + """生成合成数据集""" + np.random.seed(random_state) + + # 为每个类生成不同的均值和协方差 + class_means = np.random.randn(n_classes, n_features) * 2 + X = [] + y = [] + + samples_per_class = n_samples // n_classes + + for class_idx in range(n_classes): + # 生成该类的数据 + class_data = np.random.randn(samples_per_class, n_features) + class_means[class_idx] + X.append(class_data) + y.extend([class_idx] * samples_per_class) + + X = np.vstack(X) + y = np.array(y) + + # 添加噪声特征 + noise_features = np.random.randn(len(X), n_features // 2) + X = np.hstack([X, noise_features]) + + return X, y + + def run_bp_comparison(self, X: np.ndarray, y: np.ndarray, dataset_name: str): + """运行BP算法比较实验""" + print(f"\n=== BP算法比较实验 - {dataset_name} ===") + + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) + X_train_norm, X_test_norm = normalize_data(X_train, X_test) + + # 改进的BP网络 + print("训练改进的BP网络...") + start_time = time.time() + improved_bp = ImprovedBPNetwork(hidden_layers=[10, 5], learning_rate=0.01, max_epochs=500) + improved_bp.fit(X_train_norm, y_train) + improved_train_time = time.time() - start_time + + y_pred_improved = improved_bp.predict(X_test_norm) + improved_accuracy = accuracy_score(y_test, y_pred_improved) + + # 标准BP网络 + print("训练标准BP网络...") + start_time = time.time() + standard_bp = StandardBPNetwork(hidden_layers=[10, 5], learning_rate=0.01, max_epochs=500) + standard_bp.fit(X_train_norm, y_train) + standard_train_time = time.time() - start_time + + y_pred_standard = standard_bp.predict(X_test_norm) + standard_accuracy = accuracy_score(y_test, y_pred_standard) + + # 结果 + print(f"改进BP - 准确率: {improved_accuracy:.4f}, 训练时间: {improved_train_time:.2f}s") + print(f"标准BP - 准确率: {standard_accuracy:.4f}, 训练时间: {standard_train_time:.2f}s") + + # 绘制损失曲线 + plt.figure(figsize=(10, 6)) + plt.plot(improved_bp.loss_history, label='改进BP', alpha=0.8) + plt.plot(standard_bp.loss_history, label='标准BP', alpha=0.8) + plt.xlabel('训练轮次') + plt.ylabel('损失') + plt.title(f'BP算法损失曲线对比 - {dataset_name}') + plt.legend() + plt.grid(True) + plt.savefig(f'c:/Users/grtsi/ml-homework/bp_comparison_{dataset_name.lower()}.png') + plt.show() + + return { + 'improved_bp': {'accuracy': improved_accuracy, 'time': improved_train_time}, + 'standard_bp': {'accuracy': standard_accuracy, 'time': standard_train_time} + } + + def run_feature_extraction_comparison(self, X: np.ndarray, y: np.ndarray, dataset_name: str): + """运行特征提取比较实验""" + print(f"\n=== 特征提取比较实验 - {dataset_name} ===") + + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) + X_train_norm, X_test_norm = normalize_data(X_train, X_test) + + classifiers = { + 'NaiveBayes': NaiveBayesClassifier(), + 'KNN': KNNClassifier(k=5), + 'DecisionTree': DecisionTreeClassifier(max_depth=8) + } + + results = {} + + for clf_name, clf in classifiers.items(): + print(f"\n{clf_name} 分类器:") + + # 无特征提取 + clf_no_fe = type(clf)() if clf_name != 'KNN' else KNNClassifier(k=5) + clf_no_fe.fit(X_train_norm, y_train) + y_pred_no_fe = clf_no_fe.predict(X_test_norm) + acc_no_fe = accuracy_score(y_test, y_pred_no_fe) + + # PCA特征提取 + pca = PCA(n_components=min(10, X.shape[1] // 2)) + X_train_pca = pca.fit_transform(X_train_norm) + X_test_pca = pca.transform(X_test_norm) + + clf_pca = type(clf)() if clf_name != 'KNN' else KNNClassifier(k=5) + clf_pca.fit(X_train_pca, y_train) + y_pred_pca = clf_pca.predict(X_test_pca) + acc_pca = accuracy_score(y_test, y_pred_pca) + + # 特征选择 + feature_selector = FeatureSelector(k=min(10, X.shape[1] // 2)) + X_train_fs = feature_selector.fit_transform(X_train_norm, y_train) + X_test_fs = feature_selector.transform(X_test_norm) + + clf_fs = type(clf)() if clf_name != 'KNN' else KNNClassifier(k=5) + clf_fs.fit(X_train_fs, y_train) + y_pred_fs = clf_fs.predict(X_test_fs) + acc_fs = accuracy_score(y_test, y_pred_fs) + + print(f" 无特征提取: {acc_no_fe:.4f}") + print(f" PCA特征提取: {acc_pca:.4f}") + print(f" 特征选择: {acc_fs:.4f}") + + results[clf_name] = { + 'no_feature_extraction': acc_no_fe, + 'pca': acc_pca, + 'feature_selection': acc_fs + } + + return results + + def run_classifier_comparison(self, X: np.ndarray, y: np.ndarray, dataset_name: str): + """运行分类器比较实验""" + print(f"\n=== 分类器比较实验 - {dataset_name} ===") + + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) + X_train_norm, X_test_norm = normalize_data(X_train, X_test) + + classifiers = { + 'NaiveBayes': NaiveBayesClassifier(), + 'KNN': KNNClassifier(k=5), + 'DecisionTree': DecisionTreeClassifier(max_depth=8) + } + + results = {} + + for clf_name, clf in classifiers.items(): + print(f"\n{clf_name} 分类器:") + + # 训练和测试 + start_time = time.time() + clf.fit(X_train_norm, y_train) + train_time = time.time() - start_time + + y_pred = clf.predict(X_test_norm) + accuracy = accuracy_score(y_test, y_pred) + + # 交叉验证 + cv_scores = cross_validation(type(clf)() if clf_name != 'KNN' else KNNClassifier(k=5), + X_train_norm, y_train, k=5) + + print(f" 准确率: {accuracy:.4f}") + print(f" 训练时间: {train_time:.4f}s") + print(f" 交叉验证均值: {np.mean(cv_scores):.4f} ± {np.std(cv_scores):.4f}") + + results[clf_name] = { + 'accuracy': accuracy, + 'train_time': train_time, + 'cv_mean': np.mean(cv_scores), + 'cv_std': np.std(cv_scores) + } + + return results + + def run_ensemble_comparison(self, X: np.ndarray, y: np.ndarray, dataset_name: str): + """运行集成算法比较实验""" + print(f"\n=== 集成算法比较实验 - {dataset_name} ===") + + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) + X_train_norm, X_test_norm = normalize_data(X_train, X_test) + + # 基础分类器 + base_classifiers = { + 'DecisionTree': DecisionTreeClassifier(max_depth=8), + 'NaiveBayes': NaiveBayesClassifier(), + 'KNN': KNNClassifier(k=5) + } + + # 集成分类器 + ensemble_classifiers = { + 'Bagging_DT': BaggingClassifier('decision_tree', n_estimators=10), + 'Voting': VotingClassifier([ + DecisionTreeClassifier(max_depth=8), + NaiveBayesClassifier(), + KNNClassifier(k=5) + ]) + } + + results = {} + + # 测试基础分类器 + print("基础分类器:") + for clf_name, clf in base_classifiers.items(): + start_time = time.time() + clf.fit(X_train_norm, y_train) + train_time = time.time() - start_time + + y_pred = clf.predict(X_test_norm) + accuracy = accuracy_score(y_test, y_pred) + + print(f" {clf_name}: {accuracy:.4f} (训练时间: {train_time:.4f}s)") + results[clf_name] = {'accuracy': accuracy, 'train_time': train_time} + + # 测试集成分类器 + print("\n集成分类器:") + for clf_name, clf in ensemble_classifiers.items(): + start_time = time.time() + clf.fit(X_train_norm, y_train) + train_time = time.time() - start_time + + y_pred = clf.predict(X_test_norm) + accuracy = accuracy_score(y_test, y_pred) + + print(f" {clf_name}: {accuracy:.4f} (训练时间: {train_time:.4f}s)") + results[clf_name] = {'accuracy': accuracy, 'train_time': train_time} + + return results + + def run_all_experiments(self): + """运行所有实验""" + print("开始机器学习算法比较实验...") + + # 生成两个不同的数据集 + print("生成数据集...") + X1, y1 = self.generate_synthetic_data(n_samples=800, n_features=20, n_classes=3, random_state=42) + X2, y2 = self.generate_synthetic_data(n_samples=1000, n_features=25, n_classes=4, random_state=123) + + datasets = [ + (X1, y1, "Dataset1"), + (X2, y2, "Dataset2") + ] + + all_results = {} + + for X, y, dataset_name in datasets: + print(f"\n{'='*50}") + print(f"处理数据集: {dataset_name}") + print(f"样本数: {X.shape[0]}, 特征数: {X.shape[1]}, 类别数: {len(np.unique(y))}") + + # 运行各种实验 + bp_results = self.run_bp_comparison(X, y, dataset_name) + fe_results = self.run_feature_extraction_comparison(X, y, dataset_name) + clf_results = self.run_classifier_comparison(X, y, dataset_name) + ensemble_results = self.run_ensemble_comparison(X, y, dataset_name) + + all_results[dataset_name] = { + 'bp_comparison': bp_results, + 'feature_extraction': fe_results, + 'classifier_comparison': clf_results, + 'ensemble_comparison': ensemble_results + } + + # 生成总结报告 + self.generate_summary_report(all_results) + + return all_results + + def generate_summary_report(self, results: Dict): + """生成总结报告""" + print(f"\n{'='*60}") + print("实验总结报告") + print(f"{'='*60}") + + for dataset_name, dataset_results in results.items(): + print(f"\n{dataset_name} 结果总结:") + print("-" * 40) + + # BP算法比较 + bp_results = dataset_results['bp_comparison'] + print(f"BP算法比较:") + print(f" 改进BP: 准确率 {bp_results['improved_bp']['accuracy']:.4f}, 时间 {bp_results['improved_bp']['time']:.2f}s") + print(f" 标准BP: 准确率 {bp_results['standard_bp']['accuracy']:.4f}, 时间 {bp_results['standard_bp']['time']:.2f}s") + + # 特征提取比较 + fe_results = dataset_results['feature_extraction'] + print(f"\n特征提取效果 (最佳结果):") + for clf_name, clf_results in fe_results.items(): + best_method = max(clf_results, key=clf_results.get) + best_acc = clf_results[best_method] + print(f" {clf_name}: {best_method} ({best_acc:.4f})") + + # 集成算法比较 + ensemble_results = dataset_results['ensemble_comparison'] + print(f"\n分类器性能排名:") + sorted_classifiers = sorted(ensemble_results.items(), + key=lambda x: x[1]['accuracy'], reverse=True) + for i, (clf_name, clf_result) in enumerate(sorted_classifiers[:5]): + print(f" {i+1}. {clf_name}: {clf_result['accuracy']:.4f}") + +if __name__ == "__main__": + runner = ExperimentRunner() + results = runner.run_all_experiments() diff --git a/feature_extraction.py b/feature_extraction.py new file mode 100644 index 0000000..e492562 --- /dev/null +++ b/feature_extraction.py @@ -0,0 +1,96 @@ +import numpy as np +from typing import Tuple + +class PCA: + """主成分分析""" + + def __init__(self, n_components: int): + self.n_components = n_components + self.components_ = None + self.mean_ = None + self.explained_variance_ratio_ = None + + def fit(self, X: np.ndarray) -> 'PCA': + """训练PCA模型""" + self.mean_ = np.mean(X, axis=0) + X_centered = X - self.mean_ + + # 计算协方差矩阵 + cov_matrix = np.cov(X_centered, rowvar=False) + + # 计算特征值和特征向量 + eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix) + + # 按特征值降序排列 + idx = np.argsort(eigenvalues)[::-1] + eigenvalues = eigenvalues[idx] + eigenvectors = eigenvectors[:, idx] + + # 选择前n_components个主成分 + self.components_ = eigenvectors[:, :self.n_components].T + self.explained_variance_ratio_ = eigenvalues[:self.n_components] / np.sum(eigenvalues) + + return self + + def transform(self, X: np.ndarray) -> np.ndarray: + """应用PCA变换""" + X_centered = X - self.mean_ + return np.dot(X_centered, self.components_.T) + + def fit_transform(self, X: np.ndarray) -> np.ndarray: + """训练并变换""" + return self.fit(X).transform(X) + +class FeatureSelector: + """基于信息增益的特征选择""" + + def __init__(self, k: int): + self.k = k + self.selected_features_ = None + + def _entropy(self, y: np.ndarray) -> float: + """计算熵""" + _, counts = np.unique(y, return_counts=True) + probabilities = counts / len(y) + return -np.sum(probabilities * np.log2(probabilities + 1e-10)) + + def _information_gain(self, X_feature: np.ndarray, y: np.ndarray) -> float: + """计算信息增益""" + # 对连续特征进行离散化 + if len(np.unique(X_feature)) > 10: + bins = np.linspace(np.min(X_feature), np.max(X_feature), 11) + X_feature = np.digitize(X_feature, bins) + + total_entropy = self._entropy(y) + + values, counts = np.unique(X_feature, return_counts=True) + weighted_entropy = 0 + + for value, count in zip(values, counts): + subset_y = y[X_feature == value] + weighted_entropy += (count / len(y)) * self._entropy(subset_y) + + return total_entropy - weighted_entropy + + def fit(self, X: np.ndarray, y: np.ndarray) -> 'FeatureSelector': + """训练特征选择器""" + n_features = X.shape[1] + feature_scores = [] + + for i in range(n_features): + score = self._information_gain(X[:, i], y) + feature_scores.append((i, score)) + + # 按信息增益排序 + feature_scores.sort(key=lambda x: x[1], reverse=True) + self.selected_features_ = [idx for idx, _ in feature_scores[:self.k]] + + return self + + def transform(self, X: np.ndarray) -> np.ndarray: + """应用特征选择""" + return X[:, self.selected_features_] + + def fit_transform(self, X: np.ndarray, y: np.ndarray) -> np.ndarray: + """训练并变换""" + return self.fit(X, y).transform(X) diff --git a/improved_bp.py b/improved_bp.py new file mode 100644 index 0000000..eee7cd5 --- /dev/null +++ b/improved_bp.py @@ -0,0 +1,248 @@ +import numpy as np +from typing import List, Tuple + +class ImprovedBPNetwork: + """改进的BP神经网络,支持动态学习率调整""" + + def __init__(self, hidden_layers: List[int], learning_rate: float = 0.01, + max_epochs: int = 1000, tolerance: float = 1e-6): + self.hidden_layers = hidden_layers + self.initial_lr = learning_rate + self.learning_rate = learning_rate + self.max_epochs = max_epochs + self.tolerance = tolerance + self.weights = [] + self.biases = [] + self.loss_history = [] + + def _sigmoid(self, x: np.ndarray) -> np.ndarray: + """Sigmoid激活函数""" + x = np.clip(x, -500, 500) # 防止溢出 + return 1 / (1 + np.exp(-x)) + + def _sigmoid_derivative(self, x: np.ndarray) -> np.ndarray: + """Sigmoid函数的导数""" + s = self._sigmoid(x) + return s * (1 - s) + + def _initialize_weights(self, input_size: int, output_size: int): + """初始化权重和偏置""" + self.weights = [] + self.biases = [] + + # 构建网络结构 + layers = [input_size] + self.hidden_layers + [output_size] + + # Xavier初始化 + for i in range(len(layers) - 1): + w = np.random.normal(0, np.sqrt(2.0 / (layers[i] + layers[i+1])), + (layers[i], layers[i+1])) + b = np.zeros((1, layers[i+1])) + self.weights.append(w) + self.biases.append(b) + + def _forward_pass(self, X: np.ndarray) -> List[np.ndarray]: + """前向传播""" + activations = [X] + + for i in range(len(self.weights)): + z = np.dot(activations[-1], self.weights[i]) + self.biases[i] + a = self._sigmoid(z) + activations.append(a) + + return activations + + def _backward_pass(self, X: np.ndarray, y: np.ndarray, activations: List[np.ndarray]) -> Tuple[List[np.ndarray], List[np.ndarray]]: + """反向传播""" + m = X.shape[0] + dw = [np.zeros_like(w) for w in self.weights] + db = [np.zeros_like(b) for b in self.biases] + + # 输出层误差 + delta = activations[-1] - y + + # 从输出层向输入层反向传播 + for i in range(len(self.weights) - 1, -1, -1): + dw[i] = np.dot(activations[i].T, delta) / m + db[i] = np.mean(delta, axis=0, keepdims=True) + + if i > 0: + delta = np.dot(delta, self.weights[i].T) * self._sigmoid_derivative( + np.dot(activations[i], self.weights[i]) + self.biases[i]) + + return dw, db + + def _adaptive_learning_rate(self, epoch: int, current_loss: float, prev_loss: float): + """动态调整学习率""" + if epoch > 0: + if current_loss > prev_loss: + # 损失增加,降低学习率 + self.learning_rate *= 0.9 + elif (prev_loss - current_loss) / prev_loss < 0.001: + # 损失下降缓慢,增加学习率 + self.learning_rate *= 1.05 + + # 限制学习率范围 + self.learning_rate = np.clip(self.learning_rate, + self.initial_lr * 0.01, + self.initial_lr * 10) + + def fit(self, X: np.ndarray, y: np.ndarray): + """训练神经网络""" + # 处理标签 + if len(y.shape) == 1: + y_encoded = np.zeros((len(y), len(np.unique(y)))) + for i, label in enumerate(np.unique(y)): + y_encoded[y == label, i] = 1 + y = y_encoded + + self._initialize_weights(X.shape[1], y.shape[1]) + + prev_loss = float('inf') + + for epoch in range(self.max_epochs): + # 前向传播 + activations = self._forward_pass(X) + + # 计算损失 + loss = np.mean((activations[-1] - y) ** 2) + self.loss_history.append(loss) + + # 动态调整学习率 + self._adaptive_learning_rate(epoch, loss, prev_loss) + + # 反向传播 + dw, db = self._backward_pass(X, y, activations) + + # 更新权重和偏置 + for i in range(len(self.weights)): + self.weights[i] -= self.learning_rate * dw[i] + self.biases[i] -= self.learning_rate * db[i] + + # 检查收敛 + if abs(prev_loss - loss) < self.tolerance: + print(f"训练在第{epoch+1}轮收敛") + break + + prev_loss = loss + + if epoch % 100 == 0: + print(f"Epoch {epoch}, Loss: {loss:.6f}, LR: {self.learning_rate:.6f}") + + def predict(self, X: np.ndarray) -> np.ndarray: + """预测""" + activations = self._forward_pass(X) + return np.argmax(activations[-1], axis=1) + + def predict_proba(self, X: np.ndarray) -> np.ndarray: + """预测概率""" + activations = self._forward_pass(X) + return activations[-1] + +class StandardBPNetwork: + """标准BP神经网络(固定学习率)""" + + def __init__(self, hidden_layers: List[int], learning_rate: float = 0.01, + max_epochs: int = 1000, tolerance: float = 1e-6): + self.hidden_layers = hidden_layers + self.learning_rate = learning_rate + self.max_epochs = max_epochs + self.tolerance = tolerance + self.weights = [] + self.biases = [] + self.loss_history = [] + + def _sigmoid(self, x: np.ndarray) -> np.ndarray: + """Sigmoid激活函数""" + x = np.clip(x, -500, 500) + return 1 / (1 + np.exp(-x)) + + def _sigmoid_derivative(self, x: np.ndarray) -> np.ndarray: + """Sigmoid函数的导数""" + s = self._sigmoid(x) + return s * (1 - s) + + def _initialize_weights(self, input_size: int, output_size: int): + """初始化权重和偏置""" + self.weights = [] + self.biases = [] + + layers = [input_size] + self.hidden_layers + [output_size] + + for i in range(len(layers) - 1): + w = np.random.normal(0, np.sqrt(2.0 / (layers[i] + layers[i+1])), + (layers[i], layers[i+1])) + b = np.zeros((1, layers[i+1])) + self.weights.append(w) + self.biases.append(b) + + def _forward_pass(self, X: np.ndarray) -> List[np.ndarray]: + """前向传播""" + activations = [X] + + for i in range(len(self.weights)): + z = np.dot(activations[-1], self.weights[i]) + self.biases[i] + a = self._sigmoid(z) + activations.append(a) + + return activations + + def _backward_pass(self, X: np.ndarray, y: np.ndarray, activations: List[np.ndarray]) -> Tuple[List[np.ndarray], List[np.ndarray]]: + """反向传播""" + m = X.shape[0] + dw = [np.zeros_like(w) for w in self.weights] + db = [np.zeros_like(b) for b in self.biases] + + delta = activations[-1] - y + + for i in range(len(self.weights) - 1, -1, -1): + dw[i] = np.dot(activations[i].T, delta) / m + db[i] = np.mean(delta, axis=0, keepdims=True) + + if i > 0: + delta = np.dot(delta, self.weights[i].T) * self._sigmoid_derivative( + np.dot(activations[i], self.weights[i]) + self.biases[i]) + + return dw, db + + def fit(self, X: np.ndarray, y: np.ndarray): + """训练神经网络""" + if len(y.shape) == 1: + y_encoded = np.zeros((len(y), len(np.unique(y)))) + for i, label in enumerate(np.unique(y)): + y_encoded[y == label, i] = 1 + y = y_encoded + + self._initialize_weights(X.shape[1], y.shape[1]) + + prev_loss = float('inf') + + for epoch in range(self.max_epochs): + activations = self._forward_pass(X) + loss = np.mean((activations[-1] - y) ** 2) + self.loss_history.append(loss) + + dw, db = self._backward_pass(X, y, activations) + + for i in range(len(self.weights)): + self.weights[i] -= self.learning_rate * dw[i] + self.biases[i] -= self.learning_rate * db[i] + + if abs(prev_loss - loss) < self.tolerance: + print(f"标准BP训练在第{epoch+1}轮收敛") + break + + prev_loss = loss + + if epoch % 100 == 0: + print(f"Standard BP Epoch {epoch}, Loss: {loss:.6f}") + + def predict(self, X: np.ndarray) -> np.ndarray: + """预测""" + activations = self._forward_pass(X) + return np.argmax(activations[-1], axis=1) + + def predict_proba(self, X: np.ndarray) -> np.ndarray: + """预测概率""" + activations = self._forward_pass(X) + return activations[-1] diff --git a/main.ipynb b/main.ipynb index eded0c5..6a48537 100644 --- a/main.ipynb +++ b/main.ipynb @@ -2,42 +2,15 @@ "cells": [ { "cell_type": "code", + "execution_count": 1, "id": "initial_id", "metadata": { - "collapsed": true, "ExecuteTime": { "end_time": "2025-05-17T12:44:09.056878Z", "start_time": "2025-05-17T12:44:06.294335Z" - } + }, + "collapsed": true }, - "source": [ - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "import pandas as pd\n", - "\n", - "try:\n", - " df = pd.read_csv(\"hf://datasets/schooly/online-shoppers-intention/online_shoppers_intention.csv\")\n", - "except FileNotFoundError:\n", - " print(f\"错误: 数据集联网加载失败\")\n", - " exit()\n", - "\n", - "# --- 初步数据探索 ---\n", - "print(\"--- 数据集概览 ---\")\n", - "print(df.head())\n", - "print(\"\\n--- 数据信息 ---\")\n", - "df.info()\n", - "print(\"\\n--- 描述性统计 ---\")\n", - "print(df.describe())\n", - "print(\"\\n--- 缺失值检查 ---\")\n", - "print(df.isnull().sum())\n", - "\n", - "# 目标变量分布\n", - "print(\"\\n--- 目标变量 'Revenue' 分布 ---\")\n", - "print(df['Revenue'].value_counts(normalize=True))\n", - "sns.countplot(x='Revenue', data=df)\n", - "plt.title('Revenue Distribution')\n", - "plt.show()" - ], "outputs": [ { "name": "stderr", @@ -178,25 +151,67 @@ }, { "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAk0AAAHHCAYAAACiOWx7AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjEsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvc2/+5QAAAAlwSFlzAAAPYQAAD2EBqD+naQAAN7xJREFUeJzt3Xl8TXfi//H3vVlsIZPFUqJKVFBCVIs0mhmlRUqLobUrRVstVXtprcXodBqhQ5pO00q1aC21a1EtY6vUPqi1hJnKgggVWe7vDz/n69b2SZrIDa/n45HHI/eczz3nc+iVV88598bmcDgcAgAAwC3ZC3oCAAAAhQHRBAAAYIBoAgAAMEA0AQAAGCCaAAAADBBNAAAABogmAAAAA0QTAACAAaIJAADAANEEALnUtWtXde3a9Y7sKygoSNOmTbMeT5s2TUFBQUpJSbkj+2/SpImGDx9+R/YFuCr3gp4AgLy1YMECjRgxwnrs5uYmPz8/PfbYYxo4cKDKli1bgLNzXcOHD9fChQutx8WLF5evr68eeughRUREqFmzZrLb//j/Z/7000/697//re7du6tUqVJ/eHt5yZXnBrgCogm4S/Xv318BAQG6fPmyduzYoYULFyo+Pl5Lly5VkSJFCnp6LsnT01MTJkyQJKWnp+vkyZP67rvv1L9/fz366KOaMWOGvLy8rPH/+te/cryP7du3a/r06WrTpk2OwmTXrl1yc3PL8f5y4lZzW7lypWw2W77uH3B1RBNwl3r88cdVu3ZtSVL79u3l4+OjmJgYrVmzRi1btizg2bkmd3d3PfPMM07LBg4cqA8//FDvvfeeRo0apcjISGudp6dnvs4nOztbGRkZKlKkSIGHbn4fK1AYcE8TcI+oX7++JOnEiRNOyw8fPmydSaldu7batm2rNWvWWOt3796toKAgp0tXV61fv15BQUH67rvvrGW//vqrRowYodDQUNWqVUsRERH66quvnJ63ZcsWBQUFafny5ZoxY4YVeN27d9cvv/ziNPZm99Lc6H6iy5cvKyoqSs2aNVOtWrUUHh6uKVOm6PLly4Z/SjfWp08fhYWFaeXKlTp69Ogt5xAXF6eIiAjVqVNHjzzyiNq2baslS5ZIunIf0pQpUyRJTzzxhIKCghQUFKSEhARJV+5bGjdunBYvXqyIiAjVrl1b69evt9Zde0/TVWfOnNGAAQNUr149NWjQQBMmTFB6erq1PiEhQUFBQVqwYMF1z712m7eb243+Hk6cOGH9t1OnTh116NBB69atcxqTk79rwNVxpgm4R5w8eVKSnC67HDx4UB07dlTZsmXVu3dvFS9eXCtWrFC/fv00bdo0NWvWTLVr11bFihW1YsUKtWnTxmmby5cvl7e3t8LCwiRJSUlJ6tChg2w2mzp37ixfX1/98MMPGjlypNLS0tSjRw+n58fExMhms6lnz55KS0vTRx99pMGDB+vLL7/M8fFlZ2fr5ZdfVnx8vDp06KDAwED9/PPP+vTTT3Xs2DH985//zPE2r9W6dWtt2LBBGzduVOXKlW84Zt68eZowYYKeeuopdevWTenp6Tpw4IB27typVq1aqVmzZjp27JiWLl2qESNGyMfHR5Lk6+trbWPz5s1asWKFOnfuLB8fH1WoUOGW83r99ddVoUIFDRo0SDt27FBcXJxSU1OtADJlMrdrJSUl6fnnn9dvv/2mrl27ysfHRwsXLtTLL79sheu18vLvGigoRBNwl0pLS1NKSoouX76snTt3avr06fL09NRf/vIXa8w777yj++67T/Pnz7cuv3Tq1EkdO3bU3//+d+sHX8uWLfXxxx/r3Llz8vb2lnTlrM7q1avVrFkzeXh4SJLef/99ZWVlacmSJdYP3Y4dO+qNN97Q9OnT9fzzz6to0aLW/tPT07Vo0SJr36VKldI777yjn3/+WdWqVcvR8S5ZskQbN25UXFycdVZNkh588EGNHj1aP/30k+rVq5fTP0bL1fkcP378pmPWrVunBx98UFFRUTdcX716ddWsWVNLly5V06ZNFRAQcN2Yo0ePasmSJapatarRvAICAjRjxgxJUufOneXl5aXPP/9cPXv2VPXq1Y22YTq3a3344YdKSkrS7NmzrT/v9u3bq3Xr1po0aZKeeOIJpxvn8/LvGigoXJ4D7lI9evRQo0aNFB4erv79+6tYsWKaMWOGypUrJ0k6e/asNm/erBYtWliBlZKSojNnzigsLEzHjh3Tr7/+KulKNGVkZOibb76xtv/vf/9bqamp1v1RDodD33zzjZo0aSKHw2FtLyUlRWFhYTp//rz27t3rNMe2bds63Stzs0uIJlauXKnAwEBVqVLFad8NGzaUdOUy0R9RvHhxSdKFCxduOqZUqVL63//+p127duV6P4888ohxMElXQulaXbp0kST98MMPuZ6Die+//17BwcFOgVqiRAk999xzOnnypA4dOuQ0Pi//roGCwpkm4C719ttvq3Llyjp//rzmz5+vH3/80emH1vHjx+VwODR16lRNnTr1httITk5W2bJlVb16dVWpUkUrVqxQ+/btJV25NOfj42NFSUpKilJTUzV37lzNnTv3htv7/WcKlS9f3unx1UuHqampOT7eX375RYcPH1ajRo1ueix/xMWLFyVdCYOb6d27tzZu3Kj27durUqVKeuyxx/T000/r4YcfNt7P7c7w/F6lSpWcHt9///2y2+3WvUj55dSpU6pTp851y6tUqWKtv/YMUl7+XQMFhWgC7lLBwcHWu+eaNm2qTp06adCgQVq5cqVKlCih7OxsSVLPnj3VuHHjG27j/vvvt75v2bKlZs6cqZSUFHl5eWnt2rWKiIiQu/uVf0aubq9169bX3ft0VVBQkNPjm33ukcPhuO3xZWVlOb0FPzs7W9WqVXP6jKprXT3Dlls///yzJOc/k98LDAzUypUrtW7dOq1fv17ffPONPv/8c/Xr10/9+/c32s+1ly9z4/cfC3CzjwnIysr6Q/vJqT/ydw24CqIJuAe4ubnpjTfeULdu3TR79mz16dNHFStWlCR5eHgoNDT0ttto2bKlpk+frm+++Ub+/v5KS0tTRESEtd7X19eKMZPtmfL29r7h2YhTp05ZxyBdiZn9+/erUaNG+fJ5QosXL5bNZtNjjz12y3HFixdXy5Yt1bJlS12+fFmvvfaaZs6cqb59+6pIkSJ5PrdffvnF6c/hl19+UXZ2tnXG6uo9aL//Mzx16tR128rJ3MqXL+/0TsKrjhw5Yq0H7jbc0wTcIxo0aKDg4GB9+umnSk9Pl5+fnx599FHNnTtXp0+fvm787y+lBQYGqlq1alq+fLmWL1+u0qVL65FHHrHWu7m56amnntKqVausszK32p6pihUraufOnU4fG/Ddd9/pv//9r9O4Fi1a6Ndff9W8efOu28alS5esy2u58eGHH2rDhg1q2bKlHnjggZuOO3PmjNNjT09PBQYGyuFwKCMjQ5JUrFgxSdL58+dzPZ9rzZ492+nxZ599JunK53RJkpeXl3x8fLRt2zancZ9//vl128rJ3MLDw7Vr1y5t377dWnbx4kXNmzdPFSpUyNF9WUBhwZkm4B7Sq1cvDRgwQAsWLFDHjh01evRoderUSa1atVKHDh1UsWJFJSUlaceOHfrf//6nxYsXOz2/ZcuWioqKUpEiRfTXv/71uksugwYN0pYtW9ShQwe1b99eVatW1blz57R3715t2rRJW7duzfGc27dvr1WrVunFF19UixYtdPz4cS1ZsuS6y2TPPPOMVqxYodGjR2vLli2qV6+esrKydOTIEa1cuVIfffSRdbnyZjIzM/X1119LuvLuwJMnT2rt2rU6cOCAGjRooHHjxt3y+b169ZK/v7/q1asnPz8/HTlyRJ999pnCw8OtTxJ/6KGHJF15p2HLli3l4eGhv/zlL9aN5jmVkJCgl156SY0bN9aOHTu0ePFiPf30007vnGvfvr0+/PBDjRw5UrVq1dK2bdtueJYoJ3Pr06ePli1bpt69e6tr167y9vbWokWLlJCQoGnTpuXJr5wBXA3RBNxDnnzySd1///36+OOP1aFDB1WtWlXz58/X9OnTtXDhQp09e1a+vr6qWbOm+vXrd93zW7ZsqcjISP32229q0aLFdev9/f315Zdf6oMPPtC3336rL774Qn/6059UtWpVDR48OFdzbty4sYYPH67Y2FhNnDhRtWrV0syZM/W3v/3NaZzdbtcHH3ygTz75RF9//bW+/fZbFStWTAEBAeratetNP1vpWpcvX9bQoUMlXTnr4uvrq1q1aqlfv35Gv3vuueee05IlSxQbG6uLFy+qXLly6tq1q1555RVrTHBwsAYMGKA5c+Zo/fr1ys7O1po1a3IdTZGRkZo6daree+89ubu7q0uXLtYxXNWvXz+lpKRo1apVWrFihR5//HF99NFH1900n5O5+fv7a86cOXr33Xf12WefKT09XUFBQZo5c6b+/Oc/5+pYAFdnc3AXHgAAwG1x/hQAAMAA0QQAAGCAaAIAADBANAEAABggmgAAAAwQTQAAAAaIJgAAAANEEwAAgAE+ETyPJSefFx8XCgBA4WCzSX5+JY3GEk15zOEQ0QQAwF2Iy3MAAAAGiCYAAAADRBMAAIABogkAAMAA0QQAAGCAaAIAADBANAEAABggmgAAAAwQTQAAAAaIJgAAAANEEwAAgAGiCQAAwADRBAAAYIBoAgAAMOBe0BNAztjtNtnttoKeBuBSsrMdys52FPQ0ANzliKZCxG636U9/Ki43N04QAtfKysrW2bMXCScA+YpoKkTsdpvc3Owa9fl6HT19rqCnA7iEymW8NaFTY9ntNqIJQL4imgqho6fPaf/JlIKeBgAA9xSu8wAAABggmgAAAAwQTQAAAAaIJgAAAANEEwAAgAGiCQAAwADRBAAAYKBAo+nHH3/USy+9pLCwMAUFBWn16tVO6x0Oh6ZOnaqwsDAFBwerR48eOnbsmNOYs2fPatCgQapXr57q16+vN998UxcuXHAas3//fnXq1Em1a9dWeHi4YmJirpvLihUr1Lx5c9WuXVutWrXS999/n+fHCwAACq8CjaaLFy8qKChIo0ePvuH6mJgYxcXFacyYMZo3b56KFSumXr16KT093RozePBgHTp0SLGxsZo5c6a2bdumt99+21qflpamXr16qXz58lqwYIGGDh2q6dOna+7cudaYn376SYMGDdJf//pXLVq0SE888YT69eunn3/+Of8OHgAAFCoFGk3h4eEaOHCgmjVrdt06h8OhWbNm6eWXX1bTpk1VvXp1TZkyRadPn7bOSB0+fFjr16/XhAkTVKdOHdWvX1+jRo3SsmXL9Ouvv0qSFi9erIyMDE2cOFEPPvigIiIi1LVrV8XGxlr7mjVrlho3bqwXX3xRgYGBev3111WzZk199tlnd+YPAgAAuDyXvacpISFBiYmJCg0NtZaVLFlSderU0fbt2yVJ27dvV6lSpVS7dm1rTGhoqOx2u3bt2iVJ2rFjh+rXry9PT09rTFhYmI4ePapz585ZYxo1auS0/7CwMO3YsSPH87bZ8u8LwK3l5+uPL774unu/TLns755LTEyUJPn5+Tkt9/PzU1JSkiQpKSlJvr6+Tuvd3d3l7e1tPT8pKUkBAQFOY/z9/a113t7eSkpKspbdaD854edXMsfPAfDH+fiUKOgpALjLuWw0FVbJyeflyKdftO7mZucHA3ATZ85cUFZWdkFPA0AhY7OZn/Bw2WgqXbq0JCk5OVllypSxlicnJ6t69eqSrpwxSklJcXpeZmamzp07Zz3f39//ujNGVx9fPbt0ozHJycnXnX0y4XAo36IJwK3x2gOQn1z2nqaAgACVLl1amzZtspalpaVp586dCgkJkSSFhIQoNTVVe/bsscZs3rxZ2dnZCg4OliTVrVtX27ZtU0ZGhjVm48aNqly5sry9va0xmzdvdtr/xo0bVbdu3fw6PAAAUMgUaDRduHBB+/bt0759+yRdufl73759OnXqlGw2m7p166YZM2ZozZo1OnDggIYOHaoyZcqoadOmkqTAwEA1btxYb731lnbt2qX4+HiNHz9eERERKlu2rCSpVatW8vDw0MiRI3Xw4EEtX75cs2bN0gsvvGDNo1u3blq/fr0+/vhjHT58WNOmTdOePXvUpUuXO/+HAgAAXJLN4Si4E9pbtmxRt27drlvepk0bTZ48WQ6HQ1FRUZo3b55SU1P18MMPa/To0apcubI19uzZsxo/frzWrl0ru92uJ598UqNGjVKJEv9378/+/fs1btw47d69Wz4+PurSpYv69OnjtM8VK1YoMjJSJ0+e1AMPPKAhQ4YoPDw8x8eUlJR/9zS5u1+5p6lz5FLtP5ly+ycA94DqFXw1+/WndebMBWVmck8TgJyx2SR/f7N7mgo0mu5GRBNwZxFNAP6InESTy97TBAAA4EqIJgAAAANEEwAAgAGiCQAAwADRBAAAYIBoAgAAMEA0AQAAGCCaAAAADBBNAAAABogmAAAAA0QTAACAAaIJAADAANEEAABggGgCAAAwQDQBAAAYIJoAAAAMEE0AAAAGiCYAAAADRBMAAIABogkAAMAA0QQAAGCAaAIAADBANAEAABggmgAAAAwQTQAAAAaIJgAAAANEEwAAgAGiCQAAwADRBAAAYIBoAgAAMEA0AQAAGCCaAAAADBBNAAAABogmAAAAA0QTAACAAaIJAADAANEEAABggGgCAAAwQDQBAAAYIJoAAAAMEE0AAAAGiCYAAAADRBMAAIABogkAAMAA0QQAAGCAaAIAADBANAEAABggmgAAAAwQTQAAAAaIJgAAAANEEwAAgAGiCQAAwADRBAAAYIBoAgAAMEA0AQAAGCCaAAAADLh0NGVlZSkyMlJNmjRRcHCwmjZtqg8++EAOh8Ma43A4NHXqVIWFhSk4OFg9evTQsWPHnLZz9uxZDRo0SPXq1VP9+vX15ptv6sKFC05j9u/fr06dOql27doKDw9XTEzMnThEAABQSLh0NMXExOiLL77Q22+/reXLl2vw4MH66KOPFBcX5zQmLi5OY8aM0bx581SsWDH16tVL6enp1pjBgwfr0KFDio2N1cyZM7Vt2za9/fbb1vq0tDT16tVL5cuX14IFCzR06FBNnz5dc+fOvaPHCwAAXJdLR9P27dv1xBNP6M9//rMCAgLUvHlzhYWFadeuXZKunGWaNWuWXn75ZTVt2lTVq1fXlClTdPr0aa1evVqSdPjwYa1fv14TJkxQnTp1VL9+fY0aNUrLli3Tr7/+KklavHixMjIyNHHiRD344IOKiIhQ165dFRsbW2DHDgAAXItLR1NISIg2b96so0ePSrpyCS0+Pl6PP/64JCkhIUGJiYkKDQ21nlOyZEnVqVNH27dvl3QlvEqVKqXatWtbY0JDQ2W326342rFjh+rXry9PT09rTFhYmI4ePapz587l+3ECAADX517QE7iVPn36KC0tTS1atJCbm5uysrI0cOBAtW7dWpKUmJgoSfLz83N6np+fn5KSkiRJSUlJ8vX1dVrv7u4ub29v6/lJSUkKCAhwGuPv72+t8/b2Np6zzZaDAwSQp3j9AcipnPy74dLRtGLFCi1ZskTvvfeeqlatqn379mnSpEkqU6aM2rRpU9DTuyE/v5IFPQXgnuTjU6KgpwDgLufS0TRlyhT16dNHERERkqSgoCCdOnVK0dHRatOmjUqXLi1JSk5OVpkyZaznJScnq3r16pKunDFKSUlx2m5mZqbOnTtnPd/f3986M3XV1cdXzziZSk4+r2ve3Jen3Nzs/GAAbuLMmQvKysou6GkAKGRsNvMTHi4dTZcuXZLtd+fN3NzcrI8cCAgIUOnSpbVp0ybVqFFD0pV3wu3cuVMdO3aUdOW+qNTUVO3Zs0e1atWSJG3evFnZ2dkKDg6WJNWtW1eRkZHKyMiQh4eHJGnjxo2qXLlyji7NSZLDoXyLJgC3xmsPQH5y6RvB//KXv2jmzJlat26dEhIS9O233yo2NlZNmzaVJNlsNnXr1k0zZszQmjVrdODAAQ0dOlRlypSxxgQGBqpx48Z66623tGvXLsXHx2v8+PGKiIhQ2bJlJUmtWrWSh4eHRo4cqYMHD2r58uWaNWuWXnjhhQI7dgAA4FpsDofr/r9ZWlqapk6dqtWrV1uX4CIiItSvXz/rnW4Oh0NRUVGaN2+eUlNT9fDDD2v06NGqXLmytZ2zZ89q/PjxWrt2rex2u5588kmNGjVKJUr836Wu/fv3a9y4cdq9e7d8fHzUpUsX9enTJ8dzTkrKv8tz7u5XLs91jlyq/SdTbv8E4B5QvYKvZr/+tM6cuaDMTC7PAcgZm03y9ze7POfS0VQYEU3AnUU0AfgjchJNLn15DgAAwFUQTQAAAAaIJgAAAANEEwAAgAGiCQAAwADRBAAAYIBoAgAAMEA0AQAAGCCaAAAADBBNAAAABogmAAAAA0QTAACAAaIJAADAANEEAABggGgCAAAwQDQBAAAYIJoAAAAMEE0AAAAGiCYAAAADRBMAAIABogkAAMAA0QQAAGCAaAIAADBANAEAABggmgAAAAwQTQAAAAaIJgAAAANEEwAAgAGiCQAAwADRBAAAYIBoAgAAMEA0AQAAGCCaAAAADBBNAAAABogmAAAAA0QTAACAAaIJAADAANEEAABggGgCAAAwQDQBAAAYIJoAAAAMEE0AAAAGiCYAAAADRBMAAIABogkAAMAA0QQAAGCAaAIAADBANAEAABggmgAAAAwQTQAAAAaIJgAAAANEEwAAgAGiCQAAwADRBAAAYIBoAgAAMEA0AQAAGCCaAAAADLh8NP36668aPHiwGjRooODgYLVq1Uq7d++21jscDk2dOlVhYWEKDg5Wjx49dOzYMadtnD17VoMGDVK9evVUv359vfnmm7pw4YLTmP3796tTp06qXbu2wsPDFRMTcycODwAAFBIuHU3nzp1Tx44d5eHhoZiYGC1btkzDhg2Tt7e3NSYmJkZxcXEaM2aM5s2bp2LFiqlXr15KT0+3xgwePFiHDh1SbGysZs6cqW3btuntt9+21qelpalXr14qX768FixYoKFDh2r69OmaO3fuHT1eAADgutwLegK3EhMTo3LlymnSpEnWsooVK1rfOxwOzZo1Sy+//LKaNm0qSZoyZYpCQ0O1evVqRURE6PDhw1q/fr2++uor1a5dW5I0atQo9enTR0OHDlXZsmW1ePFiZWRkaOLEifL09NSDDz6offv2KTY2Vs8999ydPWgAAOCSXPpM09q1a1WrVi31799fjRo10rPPPqt58+ZZ6xMSEpSYmKjQ0FBrWcmSJVWnTh1t375dkrR9+3aVKlXKCiZJCg0Nld1u165duyRJO3bsUP369eXp6WmNCQsL09GjR3Xu3Lkczdlmy78vALeWn68/vvji6+79MuXSZ5pOnDihL774Qi+88IJeeukl7d69WxMmTJCHh4fatGmjxMRESZKfn5/T8/z8/JSUlCRJSkpKkq+vr9N6d3d3eXt7W89PSkpSQECA0xh/f39r3bWXA2/Hz69kzg4SQJ7w8SlR0FMAcJfLVTR169ZN06dPV6lSpZyWp6Wl6ZVXXtGsWbPyZHIOh0O1atXSG2+8IUmqWbOmDh48qDlz5qhNmzZ5so+8lpx8Xg5H/mzbzc3ODwbgJs6cuaCsrOyCngaAQsZmMz/hkato2rp1qzIyMq5bnp6ervj4+Nxs8oZKly6twMBAp2VVqlTRqlWrrPWSlJycrDJlylhjkpOTVb16dUlXzhilpKQ4bSMzM1Pnzp2znu/v72+dmbrq6uOrZ5xMORzKt2gCcGu89gDkpxxF0/79+63vDx06ZF3ekqTs7GytX79eZcuWzbPJ1atXT0ePHnVaduzYMVWoUEGSFBAQoNKlS2vTpk2qUaOGpCtnu3bu3KmOHTtKkkJCQpSamqo9e/aoVq1akqTNmzcrOztbwcHBkqS6desqMjJSGRkZ8vDwkCRt3LhRlStXztGlOQAAcPfKUTQ9++yzstlsstls6t69+3XrixYtqlGjRuXZ5Lp3766OHTtq5syZatGihXbt2qV58+Zp3LhxkiSbzaZu3bppxowZqlSpkgICAjR16lSVKVPGejddYGCgGjdurLfeektjx45VRkaGxo8fr4iICCvwWrVqpQ8++EAjR45U7969dfDgQc2aNUsjRozIs2MBAACFm83hMD+hffLkSTkcDjVt2lRffvml0w3WHh4e8vPzk5ubW55O8LvvvtM//vEPHTt2TAEBAXrhhRfUoUMHa73D4VBUVJTmzZun1NRUPfzwwxo9erQqV65sjTl79qzGjx+vtWvXym6368knn9SoUaNUosT/3R+0f/9+jRs3Trt375aPj4+6dOmiPn365Hi+SUn5d0+Tu/uVe5o6Ry7V/pMpt38CcA+oXsFXs19/WmfOXFBmJvc0AcgZm03y9ze7pylH0YTbI5qAO4toAvBH5CSacv2RA8eOHdOWLVuUnJys7Gznf6heffXV3G4WAADAJeUqmubNm6cxY8bIx8dH/v7+sl3zyVA2m41oAgAAd51cRdOMGTP0+uuv5+qeHwAAgMIoV79G5dy5c2rRokVezwUAAMBl5Sqamjdvrg0bNuT1XAAAAFxWri7PVapUSVOnTtXOnTtVrVo1ubs7b6Zbt255MjkAAABXkatomjt3rooXL66tW7dq69atTuuufuAkAADA3SRX0bR27dq8ngcAAIBLy9U9TQAAAPeaXJ1put3vZJs0aVKuJgMAAOCqchVNqampTo8zMzN18OBBpaamqmHDhnkyMQAAAFeSq2j64IMPrluWnZ2tMWPGqGLFin94UgAAAK4mz+5pstvt6tGjhz799NO82iQAAIDLyNMbwU+cOKHMzMy83CQAAIBLyNXlud/f6O1wOJSYmKh169apTZs2eTIxAAAAV5KraPrPf/7j9Nhut8vX11fDhw9Xu3bt8mRiAAAAriRX0RQXF5fX8wAAAHBpuYqmq1JSUnTkyBFJUpUqVeTr65snkwIAAHA1uYqmixcvavz48fr666+VnZ0tSXJzc9Mzzzyjt956S8WKFcvTSQIAABS0XL17bvLkyfrxxx81Y8YMbdu2Tdu2bdM///lP/fjjj5o8eXJezxEAAKDA5SqaVq1apXfeeUfh4eHy8vKSl5eXwsPDNX78eK1atSqv5wgAAFDgchVNly5dkr+//3XL/fz8dOnSpT88KQAAAFeTq2iqW7euoqKilJ6ebi27dOmSpk+frrp16+bV3AAAAFxGrm4Ef/PNN/Xiiy/q8ccfV/Xq1SVJ+/fvl6enpz7++OM8nSAAAIAryFU0BQUF6ZtvvtGSJUusjxx4+umn1apVKxUtWjRPJwgAAOAKchVN0dHR8vPzU4cOHZyWf/XVV0pJSVGfPn3yZHIAAACuIlf3NM2dO1dVqlS5bvmDDz6oOXPm/OFJAQAAuJpcRVNiYqJKly593XJfX18lJib+4UkBAAC4mlxF03333aeffvrpuuXx8fEqU6bMH54UAACAq8nVPU3t27fXxIkTlZmZqYYNG0qSNm3apHfffVc9e/bM0wkCAAC4glxF04svvqizZ89q7NixysjIkCQVKVJEL774ovr27ZunEwQAAHAFuYomm82mIUOG6JVXXtHhw4dVtGhRPfDAA/L09Mzr+QEAALiEXEXTVSVKlFBwcHBezQUAAMBl5epGcAAAgHsN0QQAAGCAaAIAADBANAEAABggmgAAAAwQTQAAAAaIJgAAAANEEwAAgAGiCQAAwADRBAAAYIBoAgAAMEA0AQAAGCCaAAAADBBNAAAABogmAAAAA0QTAACAAaIJAADAANEEAABggGgCAAAwQDQBAAAYIJoAAAAMEE0AAAAGiCYAAAADhSqaPvzwQwUFBemdd96xlqWnp2vs2LFq0KCBQkJC9NprrykpKcnpeadOnVKfPn1Up04dNWrUSH/729+UmZnpNGbLli1q06aNatWqpWbNmmnBggV35JgAAEDhUGiiadeuXZozZ46CgoKclk+cOFHfffedIiMjFRcXp9OnT+vVV1+11mdlZalv377KyMjQnDlzNHnyZC1cuFBRUVHWmBMnTqhv375q0KCBvv76a3Xv3l2jRo3S+vXr79jxAQAA11YoounChQsaMmSIJkyYIG9vb2v5+fPnNX/+fA0fPlyNGjVSrVq1NHHiRG3fvl07duyQJG3YsEGHDh3Su+++qxo1aig8PFwDBgzQ7NmzdfnyZUnSnDlzFBAQoOHDhyswMFBdunTRU089pU8++aQAjhYAALiiQhFN48aNU3h4uEJDQ52W79mzRxkZGU7LAwMDVb58eSuaduzYoWrVqsnf398aExYWprS0NB06dMga06hRI6dth4WFWdsAAABwL+gJ3M6yZcv0n//8R1999dV165KSkuTh4aFSpUo5Lffz81NiYqI15tpgkmQ9vt2YtLQ0Xbp0SUWLFjWer81mPBRAHuP1ByCncvLvhktH03//+1+98847+vjjj1WkSJGCno4RP7+SBT0F4J7k41OioKcA4C7n0tG0d+9eJScnq23bttayrKws/fjjj5o9e7b+9a9/KSMjQ6mpqU5nm5KTk1W6dGlJV84Y7dq1y2m7V99dd+2Y37/jLikpSV5eXjk6y3Rl3+flcOToKcbc3Oz8YABu4syZC8rKyi7oaQAoZGw28xMeLh1NDRs21JIlS5yWjRgxQlWqVFHv3r113333ycPDQ5s2bdJTTz0lSTpy5IhOnTqlunXrSpLq1q2rmTNnKjk5WX5+fpKkjRs3ysvLS1WrVrXG/PDDD0772bhxo7WNnHA4lG/RBODWeO0ByE8uHU1eXl6qVq2a07LixYvrT3/6k7W8Xbt2mjx5sry9veXl5aUJEyYoJCTECp6wsDBVrVpVQ4cO1ZAhQ5SYmKjIyEh17txZnp6ekqTnn39es2fP1pQpU9SuXTtt3rxZK1asUHR09B09XgAA4LpcOppMvPnmm7Lb7erfv78uX76ssLAwjR492lrv5uammTNnasyYMXruuedUrFgxtWnTRv3797fGVKxYUdHR0Zo0aZJmzZqlcuXKacKECWrcuHFBHBIAAHBBNoeDE9p5KSkp/+5pcne/ck9T58il2n8yJX92AhQy1Sv4avbrT+vMmQvKzOSeJgA5Y7NJ/v5m9zQVis9pAgAAKGhEEwAAgAGiCQAAwADRBAAAYIBoAgAAMEA0AQAAGCCaAAAADBBNAAAABogmAAAAA0QTAACAAaIJAADAANEEAABggGgCAAAwQDQBAAAYIJoAAAAMEE0AAAAGiCYAAAADRBMAAIABogkAAMAA0QQAAGCAaAIAADBANAEAABggmgAAAAwQTQAAAAaIJgAAAANEEwAAgAGiCQAAwADRBAAAYIBoAgAAMEA0AQAAGCCaAAAADBBNAAAABogmAAAAA0QTAACAAaIJAADAANEEAABggGgCAAAwQDQBAAAYIJoAAAAMEE0AAAAGiCYAAAADRBMAAIABogkAAMAA0QQAAGCAaAIAADBANAEAABggmgAAAAwQTQAAAAaIJgAAAANEEwAAgAGiCQAAwADRBAAAYIBoAgAAMEA0AQAAGCCaAAAADBBNAAAABogmAAAAAy4dTdHR0WrXrp1CQkLUqFEjvfLKKzpy5IjTmPT0dI0dO1YNGjRQSEiIXnvtNSUlJTmNOXXqlPr06aM6deqoUaNG+tvf/qbMzEynMVu2bFGbNm1Uq1YtNWvWTAsWLMj34wMAAIWHS0fT1q1b1blzZ82bN0+xsbHKzMxUr169dPHiRWvMxIkT9d133ykyMlJxcXE6ffq0Xn31VWt9VlaW+vbtq4yMDM2ZM0eTJ0/WwoULFRUVZY05ceKE+vbtqwYNGujrr79W9+7dNWrUKK1fv/6OHi8AAHBd7gU9gVv517/+5fR48uTJatSokfbu3atHHnlE58+f1/z58/X3v/9djRo1knQlolq2bKkdO3aobt262rBhgw4dOqTY2Fj5+/urRo0aGjBggP7+97/r1Vdflaenp+bMmaOAgAANHz5ckhQYGKj4+Hh98sknaty48R0/bgAA4Hpc+kzT750/f16S5O3tLUnas2ePMjIyFBoaao0JDAxU+fLltWPHDknSjh07VK1aNfn7+1tjwsLClJaWpkOHDlljrkbXtWOubiMnbLb8+wJwa/n5+uOLL77u3i9TLn2m6VrZ2dmaOHGi6tWrp2rVqkmSkpKS5OHhoVKlSjmN9fPzU2JiojXm2mCSZD2+3Zi0tDRdunRJRYsWNZ6nn1/JnB0YgDzh41OioKcA4C5XaKJp7NixOnjwoD7//POCnsotJSefl8ORP9t2c7PzgwG4iTNnLigrK7ugpwGgkLHZzE94FIpoGjdunNatW6fPPvtM5cqVs5b7+/srIyNDqampTmebkpOTVbp0aWvMrl27nLZ39d111475/TvukpKS5OXllaOzTJLkcCjfognArRX2157dbpPdnoNrBcA9IDvboexs13hxu3Q0ORwOjR8/Xt9++63i4uJUsWJFp/W1atWSh4eHNm3apKeeekqSdOTIEZ06dUp169aVJNWtW1czZ85UcnKy/Pz8JEkbN26Ul5eXqlatao354YcfnLa9ceNGaxsAkN/sdpv+9KficnMrVLeaAvkuKytbZ89edIlwculoGjt2rJYuXap//vOfKlGihHUPUsmSJVW0aFGVLFlS7dq10+TJk+Xt7S0vLy9NmDBBISEhVvCEhYWpatWqGjp0qIYMGaLExERFRkaqc+fO8vT0lCQ9//zzmj17tqZMmaJ27dpp8+bNWrFihaKjowvq0AHcY+x2m9zc7Br1+XodPX2uoKcDuITKZbw1oVNj2e02oul2vvjiC0lS165dnZZPmjRJbdu2lSS9+eabstvt6t+/vy5fvqywsDCNHj3aGuvm5qaZM2dqzJgxeu6551SsWDG1adNG/fv3t8ZUrFhR0dHRmjRpkmbNmqVy5cppwoQJfNwAgDvu6Olz2n8ypaCnAeAGXDqaDhw4cNsxRYoU0ejRo51C6fcqVKigmJiYW26nQYMGWrRoUU6nCAAA7hFcPAcAADBANAEAABggmgAAAAwQTQAAAAaIJgAAAANEEwAAgAGiCQAAwADRBAAAYIBoAgAAMEA0AQAAGCCaAAAADBBNAAAABogmAAAAA0QTAACAAaIJAADAANEEAABggGgCAAAwQDQBAAAYIJoAAAAMEE0AAAAGiCYAAAADRBMAAIABogkAAMAA0QQAAGCAaAIAADBANAEAABggmgAAAAwQTQAAAAaIJgAAAANEEwAAgAGiCQAAwADRBAAAYIBoAgAAMEA0AQAAGCCaAAAADBBNAAAABogmAAAAA0QTAACAAaIJAADAANEEAABggGgCAAAwQDQBAAAYIJoAAAAMEE0AAAAGiCYAAAADRBMAAIABogkAAMAA0QQAAGCAaAIAADBANAEAABggmgAAAAwQTQAAAAaIJgAAAANEEwAAgAGiCQAAwADRBAAAYIBo+p3Zs2erSZMmql27ttq3b69du3YV9JQAAIALIJqusXz5ck2aNEn9+vXTwoULVb16dfXq1UvJyckFPTUAAFDAiKZrxMbGqkOHDmrXrp2qVq2qsWPHqmjRopo/f35BTw0AABQwoun/u3z5svbu3avQ0FBrmd1uV2hoqLZv316AMwMAAK7AvaAn4CrOnDmjrKws+fn5OS338/PTkSNHjLdjt0sOR17Pzln18r4q5slfHSBJlfxLWd/b74L/DeT1DfyfO/H6ttnMx/LKzGO+viXzfR9vdQi9/SDgHuPjU6Kgp5AneH0D13OV1/dd8P9lecPHx0dubm7X3fSdnJwsf3//ApoVAABwFUTT/+fp6amHHnpImzZtspZlZ2dr06ZNCgkJKcCZAQAAV8DluWu88MILGjZsmGrVqqXg4GB9+umn+u2339S2bduCnhoAAChgRNM1WrZsqZSUFEVFRSkxMVE1atTQRx99xOU5AAAgm8OR3+/1AgAAKPy4pwkAAMAA0QQAAGCAaAIAADBANAEAABggmoAcWrBggerXr1/Q0wAA3GF85ADuWcOHD9fChQuvW/7NN9+oUqVKBTAjAHkpKCjolutfffVVvfbaa3doNrgbEE24pzVu3FiTJk1yWubr61tAswGQlzZs2GB9v3z5ckVFRWnlypXWsuLFi1vfOxwOZWVlyd2dH4u4OS7P4Z7m6emp0qVLO33NmjVLrVq1Ut26dRUeHq4xY8bowoULN93G/v371bVrV4WEhKhevXpq27atdu/eba3ftm2bOnXqpODgYIWHh2vChAm6ePHinTg84J527eu6ZMmSstls1uMjR46oXr16+v7779W2bVvVrl1b8fHxGj58uF555RWn7bzzzjvq2rWr9Tg7O1vR0dFq0qSJgoOD1bp1a6cYw92LaAJ+x2azaeTIkVq6dKkmT56szZs36913373p+MGDB6tcuXL66quvtGDBAvXu3VseHh6SpOPHj6t379568skntXjxYr3//vuKj4/X+PHj79ThALiF9957T4MGDdLy5ctveznvqujoaC1atEhjx47VsmXL1KNHDw0ZMkRbt27N59mioHEeEve0devWOf1C5saNGysqKsp6HBAQoNdff12jR4/WmDFjbriNU6dOqVevXgoMDJQkPfDAA9a66OhotWrVSj169LDWjRw5Ul27dtWYMWNUpEiRPD8mAOb69++vxx57zHj85cuXFR0drdjYWOvfjooVKyo+Pl5z587Vo48+ml9ThQsgmnBPa9CggVMMFStWTBs3blR0dLSOHDmitLQ0ZWVlKT09Xb/99puKFSt23TZeeOEFjRo1Sl9//bVCQ0PVvHlz3X///ZKuXLo7cOCAlixZYo13OBzKzs5WQkKCFVoACkbt2rVzNP6XX37Rb7/9pp49ezotz8jIUI0aNfJyanBBRBPuacWKFXN6p1xCQoL69u2rjh07auDAgfL29lZ8fLxGjhypjIyMG0bTa6+9pqefflrff/+9fvjhB0VFRen9999Xs2bNdPHiRT3//PNO90Ncdd999+XrsQG4vd+/pm02m37/K1kzMzOt76/ejxgdHa2yZcs6jfP09MynWcJVEE3ANfbu3SuHw6Hhw4fLbr9yy9+KFStu+7zKlSurcuXK6tGjh9544w3Nnz9fzZo1U82aNXXo0CE+wgAoJHx9fXXw4EGnZfv27bPuUwwMDJSnp6dOnTrFpbh7EDeCA9eoVKmSMjIyFBcXpxMnTmjRokWaM2fOTcdfunRJ48aN05YtW3Ty5EnFx8dr9+7d1mW33r17a/v27Ro3bpz27dunY8eOafXq1Ro3btydOiQAOdCwYUPt2bNHixYt0rFjxxQVFeUUUV5eXurZs6cmTZqkhQsX6vjx49q7d6/i4uJu+LlvuLtwpgm4RvXq1TVixAjFxMToH//4h+rXr6833nhDw4YNu+F4u92us2fPatiwYUpKSpKPj4+efPJJ9e/f39peXFycIiMj1alTJ0lXbhpt2bLlHTsmAOYaN26sV155Re+++67S09PVrl07Pfvss/r555+tMa+//rp8fX0VHR2thIQElSxZUjVr1tRLL71UgDPHnWBz/P7iLQAAAK7D5TkAAAADRBMAAIABogkAAMAA0QQAAGCAaAIAADBANAEAABggmgAAAAwQTQAAAAb4RHAAhc7w4cOtX1nh7u6usmXLqnnz5howYICKFClSwLMDcLcimgAUSo0bN9akSZOUmZmpvXv3atiwYbLZbBoyZEhBTw3AXYrLcwAKJU9PT5UuXVr33XefmjZtqtDQUG3cuFGSlJ2drejoaDVp0kTBwcFq3bq1Vq5caa17/PHH9fnnnztt7z//+Y+qV6+ukydPSpJSU1M1cuRINWzYUPXq1VO3bt20f/9+a/y0adP0zDPPaNGiRWrSpIkefvhhDRw4UGlpadaYJk2a6JNPPnHazzPPPKNp06ZZj2+3HwCug2gCUOj9/PPP2r59uzw8PCRJ0dHRWrRokcaOHatly5apR48eGjJkiLZu3Sq73a6IiAgtXbrUaRtLlixRvXr1VKFCBUnSgAEDlJycrJiYGC1YsEAPPfSQunfvrrNnz1rPOX78uNasWaOZM2cqOjpaP/74o2JiYnI0d5P9AHANXJ4DUCitW7dOISEhyszM1OXLl2W32/XWW2/p8uXLio6OVmxsrEJCQiRJFStWVHx8vObOnatHH31UrVu3VmxsrE6dOqXy5csrOztby5Yt08svvyxJ2rZtm3bt2qVNmzbJ09NTkjRs2DCtXr1aq1at0nPPPSdJcjgcmjRpkry8vCRJrVu31qZNmzRw4ECjYzDdDwDXQDQBKJQaNGigMWPG6LffftMnn3wiNzc3PfXUUzp48KB+++039ezZ02l8RkaGatSoIUmqUaOGAgMDtXTpUvXp00dbt25VSkqKmjdvLkk6cOCALl68qAYNGjht49KlSzp+/Lj1uEKFClYwSVKZMmWUnJxsfAym+wHgGogmAIVSsWLFVKlSJUnSxIkT9cwzz+jLL79UtWrVJF25RFe2bFmn51w9myNJrVq10pIlS9SnTx8tXbpUYWFh8vHxkSRduHBBpUuXVlxc3HX7LVmypPW9u/v1/4Q6HA7re5vNdt36zMxM63vT/QBwDUQTgELPbrerb9++mjx5slauXClPT0+dOnVKjz766E2f8/TTTysyMlJ79uzRqlWrNHbsWGvdQw89pKSkJLm5uSkgICDX8/L19dXp06etx2lpaUpISMjz/QC4M7gRHMBdoXnz5rLb7Zo7d6569uypSZMmaeHChTp+/Lj27t2ruLg467OdJCkgIEAhISEaOXKksrKy1KRJE2tdaGio6tatq379+mnDhg1KSEjQTz/9pPfff1+7d+82nlPDhg21ePFibdu2TQcOHNCwYcNkt//fP7t5tR8AdwZnmgDcFdzd3dWlSxd99NFHWrNmjXx9fRUdHa2EhASVLFlSNWvW1EsvveT0nFatWmns2LF69tlnVbRoUWu5zWbThx9+qMjISI0YMUJnzpyRv7+/6tevL39/f+M59e3bVwkJCerbt69KliypAQMGOJ1pyqv9ALgzbI5rL8ADAADghrg8BwAAYIBoAgAAMEA0AQAAGCCaAAAADBBNAAAABogmAAAAA0QTAACAAaIJAADAANEEAABggGgCAAAwQDQBAAAYIJoAAAAM/D/NUOgAU/bqrgAAAABJRU5ErkJggg==", "text/plain": [ "
" - ], - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAk0AAAHHCAYAAACiOWx7AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjEsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvc2/+5QAAAAlwSFlzAAAPYQAAD2EBqD+naQAAN7xJREFUeJzt3Xl8TXfi//H3vVlsIZPFUqJKVFBCVIs0mhmlRUqLobUrRVstVXtprcXodBqhQ5pO00q1aC21a1EtY6vUPqi1hJnKgggVWe7vDz/n69b2SZrIDa/n45HHI/eczz3nc+iVV88598bmcDgcAgAAwC3ZC3oCAAAAhQHRBAAAYIBoAgAAMEA0AQAAGCCaAAAADBBNAAAABogmAAAAA0QTAACAAaIJAADAANEEALnUtWtXde3a9Y7sKygoSNOmTbMeT5s2TUFBQUpJSbkj+2/SpImGDx9+R/YFuCr3gp4AgLy1YMECjRgxwnrs5uYmPz8/PfbYYxo4cKDKli1bgLNzXcOHD9fChQutx8WLF5evr68eeughRUREqFmzZrLb//j/Z/7000/697//re7du6tUqVJ/eHt5yZXnBrgCogm4S/Xv318BAQG6fPmyduzYoYULFyo+Pl5Lly5VkSJFCnp6LsnT01MTJkyQJKWnp+vkyZP67rvv1L9/fz366KOaMWOGvLy8rPH/+te/cryP7du3a/r06WrTpk2OwmTXrl1yc3PL8f5y4lZzW7lypWw2W77uH3B1RBNwl3r88cdVu3ZtSVL79u3l4+OjmJgYrVmzRi1btizg2bkmd3d3PfPMM07LBg4cqA8//FDvvfeeRo0apcjISGudp6dnvs4nOztbGRkZKlKkSIGHbn4fK1AYcE8TcI+oX7++JOnEiRNOyw8fPmydSaldu7batm2rNWvWWOt3796toKAgp0tXV61fv15BQUH67rvvrGW//vqrRowYodDQUNWqVUsRERH66quvnJ63ZcsWBQUFafny5ZoxY4YVeN27d9cvv/ziNPZm99Lc6H6iy5cvKyoqSs2aNVOtWrUUHh6uKVOm6PLly4Z/SjfWp08fhYWFaeXKlTp69Ogt5xAXF6eIiAjVqVNHjzzyiNq2baslS5ZIunIf0pQpUyRJTzzxhIKCghQUFKSEhARJV+5bGjdunBYvXqyIiAjVrl1b69evt9Zde0/TVWfOnNGAAQNUr149NWjQQBMmTFB6erq1PiEhQUFBQVqwYMF1z712m7eb243+Hk6cOGH9t1OnTh116NBB69atcxqTk79rwNVxpgm4R5w8eVKSnC67HDx4UB07dlTZsmXVu3dvFS9eXCtWrFC/fv00bdo0NWvWTLVr11bFihW1YsUKtWnTxmmby5cvl7e3t8LCwiRJSUlJ6tChg2w2mzp37ixfX1/98MMPGjlypNLS0tSjRw+n58fExMhms6lnz55KS0vTRx99pMGDB+vLL7/M8fFlZ2fr5ZdfVnx8vDp06KDAwED9/PPP+vTTT3Xs2DH985//zPE2r9W6dWtt2LBBGzduVOXKlW84Zt68eZowYYKeeuopdevWTenp6Tpw4IB27typVq1aqVmzZjp27JiWLl2qESNGyMfHR5Lk6+trbWPz5s1asWKFOnfuLB8fH1WoUOGW83r99ddVoUIFDRo0SDt27FBcXJxSU1OtADJlMrdrJSUl6fnnn9dvv/2mrl27ysfHRwsXLtTLL79sheu18vLvGigoRBNwl0pLS1NKSoouX76snTt3avr06fL09NRf/vIXa8w777yj++67T/Pnz7cuv3Tq1EkdO3bU3//+d+sHX8uWLfXxxx/r3Llz8vb2lnTlrM7q1avVrFkzeXh4SJLef/99ZWVlacmSJdYP3Y4dO+qNN97Q9OnT9fzzz6to0aLW/tPT07Vo0SJr36VKldI777yjn3/+WdWqVcvR8S5ZskQbN25UXFycdVZNkh588EGNHj1aP/30k+rVq5fTP0bL1fkcP378pmPWrVunBx98UFFRUTdcX716ddWsWVNLly5V06ZNFRAQcN2Yo0ePasmSJapatarRvAICAjRjxgxJUufOneXl5aXPP/9cPXv2VPXq1Y22YTq3a3344YdKSkrS7NmzrT/v9u3bq3Xr1po0aZKeeOIJpxvn8/LvGigoXJ4D7lI9evRQo0aNFB4erv79+6tYsWKaMWOGypUrJ0k6e/asNm/erBYtWliBlZKSojNnzigsLEzHjh3Tr7/+KulKNGVkZOibb76xtv/vf/9bqamp1v1RDodD33zzjZo0aSKHw2FtLyUlRWFhYTp//rz27t3rNMe2bds63Stzs0uIJlauXKnAwEBVqVLFad8NGzaUdOUy0R9RvHhxSdKFCxduOqZUqVL63//+p127duV6P4888ohxMElXQulaXbp0kST98MMPuZ6Die+//17BwcFOgVqiRAk999xzOnnypA4dOuQ0Pi//roGCwpkm4C719ttvq3Llyjp//rzmz5+vH3/80emH1vHjx+VwODR16lRNnTr1httITk5W2bJlVb16dVWpUkUrVqxQ+/btJV25NOfj42NFSUpKilJTUzV37lzNnTv3htv7/WcKlS9f3unx1UuHqampOT7eX375RYcPH1ajRo1ueix/xMWLFyVdCYOb6d27tzZu3Kj27durUqVKeuyxx/T000/r4YcfNt7P7c7w/F6lSpWcHt9///2y2+3WvUj55dSpU6pTp851y6tUqWKtv/YMUl7+XQMFhWgC7lLBwcHWu+eaNm2qTp06adCgQVq5cqVKlCih7OxsSVLPnj3VuHHjG27j/vvvt75v2bKlZs6cqZSUFHl5eWnt2rWKiIiQu/uVf0aubq9169bX3ft0VVBQkNPjm33ukcPhuO3xZWVlOb0FPzs7W9WqVXP6jKprXT3Dlls///yzJOc/k98LDAzUypUrtW7dOq1fv17ffPONPv/8c/Xr10/9+/c32s+1ly9z4/cfC3CzjwnIysr6Q/vJqT/ydw24CqIJuAe4ubnpjTfeULdu3TR79mz16dNHFStWlCR5eHgoNDT0ttto2bKlpk+frm+++Ub+/v5KS0tTRESEtd7X19eKMZPtmfL29r7h2YhTp05ZxyBdiZn9+/erUaNG+fJ5QosXL5bNZtNjjz12y3HFixdXy5Yt1bJlS12+fFmvvfaaZs6cqb59+6pIkSJ5PrdffvnF6c/hl19+UXZ2tnXG6uo9aL//Mzx16tR128rJ3MqXL+/0TsKrjhw5Yq0H7jbc0wTcIxo0aKDg4GB9+umnSk9Pl5+fnx599FHNnTtXp0+fvm787y+lBQYGqlq1alq+fLmWL1+u0qVL65FHHrHWu7m56amnntKqVausszK32p6pihUraufOnU4fG/Ddd9/pv//9r9O4Fi1a6Ndff9W8efOu28alS5esy2u58eGHH2rDhg1q2bKlHnjggZuOO3PmjNNjT09PBQYGyuFwKCMjQ5JUrFgxSdL58+dzPZ9rzZ492+nxZ599JunK53RJkpeXl3x8fLRt2zancZ9//vl128rJ3MLDw7Vr1y5t377dWnbx4kXNmzdPFSpUyNF9WUBhwZkm4B7Sq1cvDRgwQAsWLFDHjh01evRoderUSa1atVKHDh1UsWJFJSUlaceOHfrf//6nxYsXOz2/ZcuWioqKUpEiRfTXv/71uksugwYN0pYtW9ShQwe1b99eVatW1blz57R3715t2rRJW7duzfGc27dvr1WrVunFF19UixYtdPz4cS1ZsuS6y2TPPPOMVqxYodGjR2vLli2qV6+esrKydOTIEa1cuVIfffSRdbnyZjIzM/X1119LuvLuwJMnT2rt2rU6cOCAGjRooHHjxt3y+b169ZK/v7/q1asnPz8/HTlyRJ999pnCw8OtTxJ/6KGHJF15p2HLli3l4eGhv/zlL9aN5jmVkJCgl156SY0bN9aOHTu0ePFiPf30007vnGvfvr0+/PBDjRw5UrVq1dK2bdtueJYoJ3Pr06ePli1bpt69e6tr167y9vbWokWLlJCQoGnTpuXJr5wBXA3RBNxDnnzySd1///36+OOP1aFDB1WtWlXz58/X9OnTtXDhQp09e1a+vr6qWbOm+vXrd93zW7ZsqcjISP32229q0aLFdev9/f315Zdf6oMPPtC3336rL774Qn/6059UtWpVDR48OFdzbty4sYYPH67Y2FhNnDhRtWrV0syZM/W3v/3NaZzdbtcHH3ygTz75RF9//bW+/fZbFStWTAEBAeratetNP1vpWpcvX9bQoUMlXTnr4uvrq1q1aqlfv35Gv3vuueee05IlSxQbG6uLFy+qXLly6tq1q1555RVrTHBwsAYMGKA5c+Zo/fr1ys7O1po1a3IdTZGRkZo6daree+89ubu7q0uXLtYxXNWvXz+lpKRo1apVWrFihR5//HF99NFH1900n5O5+fv7a86cOXr33Xf12WefKT09XUFBQZo5c6b+/Oc/5+pYAFdnc3AXHgAAwG1x/hQAAMAA0QQAAGCAaAIAADBANAEAABggmgAAAAwQTQAAAAaIJgAAAANEEwAAgAE+ETyPJSefFx8XCgBA4WCzSX5+JY3GEk15zOEQ0QQAwF2Iy3MAAAAGiCYAAAADRBMAAIABogkAAMAA0QQAAGCAaAIAADBANAEAABggmgAAAAwQTQAAAAaIJgAAAANEEwAAgAGiCQAAwADRBAAAYIBoAgAAMOBe0BNAztjtNtnttoKeBuBSsrMdys52FPQ0ANzliKZCxG636U9/Ki43N04QAtfKysrW2bMXCScA+YpoKkTsdpvc3Owa9fl6HT19rqCnA7iEymW8NaFTY9ntNqIJQL4imgqho6fPaf/JlIKeBgAA9xSu8wAAABggmgAAAAwQTQAAAAaIJgAAAANEEwAAgAGiCQAAwADRBAAAYKBAo+nHH3/USy+9pLCwMAUFBWn16tVO6x0Oh6ZOnaqwsDAFBwerR48eOnbsmNOYs2fPatCgQapXr57q16+vN998UxcuXHAas3//fnXq1Em1a9dWeHi4YmJirpvLihUr1Lx5c9WuXVutWrXS999/n+fHCwAACq8CjaaLFy8qKChIo0ePvuH6mJgYxcXFacyYMZo3b56KFSumXr16KT093RozePBgHTp0SLGxsZo5c6a2bdumt99+21qflpamXr16qXz58lqwYIGGDh2q6dOna+7cudaYn376SYMGDdJf//pXLVq0SE888YT69eunn3/+Of8OHgAAFCoFGk3h4eEaOHCgmjVrdt06h8OhWbNm6eWXX1bTpk1VvXp1TZkyRadPn7bOSB0+fFjr16/XhAkTVKdOHdWvX1+jRo3SsmXL9Ouvv0qSFi9erIyMDE2cOFEPPvigIiIi1LVrV8XGxlr7mjVrlho3bqwXX3xRgYGBev3111WzZk199tlnd+YPAgAAuDyXvacpISFBiYmJCg0NtZaVLFlSderU0fbt2yVJ27dvV6lSpVS7dm1rTGhoqOx2u3bt2iVJ2rFjh+rXry9PT09rTFhYmI4ePapz585ZYxo1auS0/7CwMO3YsSPH87bZ8u8LwK3l5+uPL774unu/TLns755LTEyUJPn5+Tkt9/PzU1JSkiQpKSlJvr6+Tuvd3d3l7e1tPT8pKUkBAQFOY/z9/a113t7eSkpKspbdaD854edXMsfPAfDH+fiUKOgpALjLuWw0FVbJyeflyKdftO7mZucHA3ATZ85cUFZWdkFPA0AhY7OZn/Bw2WgqXbq0JCk5OVllypSxlicnJ6t69eqSrpwxSklJcXpeZmamzp07Zz3f39//ujNGVx9fPbt0ozHJycnXnX0y4XAo36IJwK3x2gOQn1z2nqaAgACVLl1amzZtspalpaVp586dCgkJkSSFhIQoNTVVe/bsscZs3rxZ2dnZCg4OliTVrVtX27ZtU0ZGhjVm48aNqly5sry9va0xmzdvdtr/xo0bVbdu3fw6PAAAUMgUaDRduHBB+/bt0759+yRdufl73759OnXqlGw2m7p166YZM2ZozZo1OnDggIYOHaoyZcqoadOmkqTAwEA1btxYb731lnbt2qX4+HiNHz9eERERKlu2rCSpVatW8vDw0MiRI3Xw4EEtX75cs2bN0gsvvGDNo1u3blq/fr0+/vhjHT58WNOmTdOePXvUpUuXO/+HAgAAXJLN4Si4E9pbtmxRt27drlvepk0bTZ48WQ6HQ1FRUZo3b55SU1P18MMPa/To0apcubI19uzZsxo/frzWrl0ru92uJ598UqNGjVKJEv9378/+/fs1btw47d69Wz4+PurSpYv69OnjtM8VK1YoMjJSJ0+e1AMPPKAhQ4YoPDw8x8eUlJR/9zS5u1+5p6lz5FLtP5ly+ycA94DqFXw1+/WndebMBWVmck8TgJyx2SR/f7N7mgo0mu5GRBNwZxFNAP6InESTy97TBAAA4EqIJgAAAANEEwAAgAGiCQAAwADRBAAAYIBoAgAAMEA0AQAAGCCaAAAADBBNAAAABogmAAAAA0QTAACAAaIJAADAANEEAABggGgCAAAwQDQBAAAYIJoAAAAMEE0AAAAGiCYAAAADRBMAAIABogkAAMAA0QQAAGCAaAIAADBANAEAABggmgAAAAwQTQAAAAaIJgAAAANEEwAAgAGiCQAAwADRBAAAYIBoAgAAMEA0AQAAGCCaAAAADBBNAAAABogmAAAAA0QTAACAAaIJAADAANEEAABggGgCAAAwQDQBAAAYIJoAAAAMEE0AAAAGiCYAAAADRBMAAIABogkAAMAA0QQAAGCAaAIAADBANAEAABggmgAAAAwQTQAAAAaIJgAAAANEEwAAgAGiCQAAwADRBAAAYIBoAgAAMEA0AQAAGCCaAAAADLh0NGVlZSkyMlJNmjRRcHCwmjZtqg8++EAOh8Ma43A4NHXqVIWFhSk4OFg9evTQsWPHnLZz9uxZDRo0SPXq1VP9+vX15ptv6sKFC05j9u/fr06dOql27doKDw9XTEzMnThEAABQSLh0NMXExOiLL77Q22+/reXLl2vw4MH66KOPFBcX5zQmLi5OY8aM0bx581SsWDH16tVL6enp1pjBgwfr0KFDio2N1cyZM7Vt2za9/fbb1vq0tDT16tVL5cuX14IFCzR06FBNnz5dc+fOvaPHCwAAXJdLR9P27dv1xBNP6M9//rMCAgLUvHlzhYWFadeuXZKunGWaNWuWXn75ZTVt2lTVq1fXlClTdPr0aa1evVqSdPjwYa1fv14TJkxQnTp1VL9+fY0aNUrLli3Tr7/+KklavHixMjIyNHHiRD344IOKiIhQ165dFRsbW2DHDgAAXItLR1NISIg2b96so0ePSrpyCS0+Pl6PP/64JCkhIUGJiYkKDQ21nlOyZEnVqVNH27dvl3QlvEqVKqXatWtbY0JDQ2W326342rFjh+rXry9PT09rTFhYmI4ePapz587l+3ECAADX517QE7iVPn36KC0tTS1atJCbm5uysrI0cOBAtW7dWpKUmJgoSfLz83N6np+fn5KSkiRJSUlJ8vX1dVrv7u4ub29v6/lJSUkKCAhwGuPv72+t8/b2Np6zzZaDAwSQp3j9AcipnPy74dLRtGLFCi1ZskTvvfeeqlatqn379mnSpEkqU6aM2rRpU9DTuyE/v5IFPQXgnuTjU6KgpwDgLufS0TRlyhT16dNHERERkqSgoCCdOnVK0dHRatOmjUqXLi1JSk5OVpkyZaznJScnq3r16pKunDFKSUlx2m5mZqbOnTtnPd/f3986M3XV1cdXzziZSk4+r2ve3Jen3Nzs/GAAbuLMmQvKysou6GkAKGRsNvMTHi4dTZcuXZLtd+fN3NzcrI8cCAgIUOnSpbVp0ybVqFFD0pV3wu3cuVMdO3aUdOW+qNTUVO3Zs0e1atWSJG3evFnZ2dkKDg6WJNWtW1eRkZHKyMiQh4eHJGnjxo2qXLlyji7NSZLDoXyLJgC3xmsPQH5y6RvB//KXv2jmzJlat26dEhIS9O233yo2NlZNmzaVJNlsNnXr1k0zZszQmjVrdODAAQ0dOlRlypSxxgQGBqpx48Z66623tGvXLsXHx2v8+PGKiIhQ2bJlJUmtWrWSh4eHRo4cqYMHD2r58uWaNWuWXnjhhQI7dgAA4FpsDofr/r9ZWlqapk6dqtWrV1uX4CIiItSvXz/rnW4Oh0NRUVGaN2+eUlNT9fDDD2v06NGqXLmytZ2zZ89q/PjxWrt2rex2u5588kmNGjVKJUr836Wu/fv3a9y4cdq9e7d8fHzUpUsX9enTJ8dzTkrKv8tz7u5XLs91jlyq/SdTbv8E4B5QvYKvZr/+tM6cuaDMTC7PAcgZm03y9ze7POfS0VQYEU3AnUU0AfgjchJNLn15DgAAwFUQTQAAAAaIJgAAAANEEwAAgAGiCQAAwADRBAAAYIBoAgAAMEA0AQAAGCCaAAAADBBNAAAABogmAAAAA0QTAACAAaIJAADAANEEAABggGgCAAAwQDQBAAAYIJoAAAAMEE0AAAAGiCYAAAADRBMAAIABogkAAMAA0QQAAGCAaAIAADBANAEAABggmgAAAAwQTQAAAAaIJgAAAANEEwAAgAGiCQAAwADRBAAAYIBoAgAAMEA0AQAAGCCaAAAADBBNAAAABogmAAAAA0QTAACAAaIJAADAANEEAABggGgCAAAwQDQBAAAYIJoAAAAMEE0AAAAGiCYAAAADRBMAAIABogkAAMAA0QQAAGCAaAIAADBANAEAABggmgAAAAwQTQAAAAaIJgAAAANEEwAAgAGiCQAAwADRBAAAYIBoAgAAMEA0AQAAGCCaAAAADLh8NP36668aPHiwGjRooODgYLVq1Uq7d++21jscDk2dOlVhYWEKDg5Wjx49dOzYMadtnD17VoMGDVK9evVUv359vfnmm7pw4YLTmP3796tTp06qXbu2wsPDFRMTcycODwAAFBIuHU3nzp1Tx44d5eHhoZiYGC1btkzDhg2Tt7e3NSYmJkZxcXEaM2aM5s2bp2LFiqlXr15KT0+3xgwePFiHDh1SbGysZs6cqW3btuntt9+21qelpalXr14qX768FixYoKFDh2r69OmaO3fuHT1eAADgutwLegK3EhMTo3LlymnSpEnWsooVK1rfOxwOzZo1Sy+//LKaNm0qSZoyZYpCQ0O1evVqRURE6PDhw1q/fr2++uor1a5dW5I0atQo9enTR0OHDlXZsmW1ePFiZWRkaOLEifL09NSDDz6offv2KTY2Vs8999ydPWgAAOCSXPpM09q1a1WrVi31799fjRo10rPPPqt58+ZZ6xMSEpSYmKjQ0FBrWcmSJVWnTh1t375dkrR9+3aVKlXKCiZJCg0Nld1u165duyRJO3bsUP369eXp6WmNCQsL09GjR3Xu3Lkczdlmy78vALeWn68/vvji6+79MuXSZ5pOnDihL774Qi+88IJeeukl7d69WxMmTJCHh4fatGmjxMRESZKfn5/T8/z8/JSUlCRJSkpKkq+vr9N6d3d3eXt7W89PSkpSQECA0xh/f39r3bWXA2/Hz69kzg4SQJ7w8SlR0FMAcJfLVTR169ZN06dPV6lSpZyWp6Wl6ZVXXtGsWbPyZHIOh0O1atXSG2+8IUmqWbOmDh48qDlz5qhNmzZ5so+8lpx8Xg5H/mzbzc3ODwbgJs6cuaCsrOyCngaAQsZmMz/hkato2rp1qzIyMq5bnp6ervj4+Nxs8oZKly6twMBAp2VVqlTRqlWrrPWSlJycrDJlylhjkpOTVb16dUlXzhilpKQ4bSMzM1Pnzp2znu/v72+dmbrq6uOrZ5xMORzKt2gCcGu89gDkpxxF0/79+63vDx06ZF3ekqTs7GytX79eZcuWzbPJ1atXT0ePHnVaduzYMVWoUEGSFBAQoNKlS2vTpk2qUaOGpCtnu3bu3KmOHTtKkkJCQpSamqo9e/aoVq1akqTNmzcrOztbwcHBkqS6desqMjJSGRkZ8vDwkCRt3LhRlStXztGlOQAAcPfKUTQ9++yzstlsstls6t69+3XrixYtqlGjRuXZ5Lp3766OHTtq5syZatGihXbt2qV58+Zp3LhxkiSbzaZu3bppxowZqlSpkgICAjR16lSVKVPGejddYGCgGjdurLfeektjx45VRkaGxo8fr4iICCvwWrVqpQ8++EAjR45U7969dfDgQc2aNUsjRozIs2MBAACFm83hMD+hffLkSTkcDjVt2lRffvml0w3WHh4e8vPzk5ubW55O8LvvvtM//vEPHTt2TAEBAXrhhRfUoUMHa73D4VBUVJTmzZun1NRUPfzwwxo9erQqV65sjTl79qzGjx+vtWvXym6368knn9SoUaNUosT/3R+0f/9+jRs3Trt375aPj4+6dOmiPn365Hi+SUn5d0+Tu/uVe5o6Ry7V/pMpt38CcA+oXsFXs19/WmfOXFBmJvc0AcgZm03y9ze7pylH0YTbI5qAO4toAvBH5CSacv2RA8eOHdOWLVuUnJys7Gznf6heffXV3G4WAADAJeUqmubNm6cxY8bIx8dH/v7+sl3zyVA2m41oAgAAd51cRdOMGTP0+uuv5+qeHwAAgMIoV79G5dy5c2rRokVezwUAAMBl5Sqamjdvrg0bNuT1XAAAAFxWri7PVapUSVOnTtXOnTtVrVo1ubs7b6Zbt255MjkAAABXkatomjt3rooXL66tW7dq69atTuuufuAkAADA3SRX0bR27dq8ngcAAIBLy9U9TQAAAPeaXJ1put3vZJs0aVKuJgMAAOCqchVNqampTo8zMzN18OBBpaamqmHDhnkyMQAAAFeSq2j64IMPrluWnZ2tMWPGqGLFin94UgAAAK4mz+5pstvt6tGjhz799NO82iQAAIDLyNMbwU+cOKHMzMy83CQAAIBLyNXlud/f6O1wOJSYmKh169apTZs2eTIxAAAAV5KraPrPf/7j9Nhut8vX11fDhw9Xu3bt8mRiAAAAriRX0RQXF5fX8wAAAHBpuYqmq1JSUnTkyBFJUpUqVeTr65snkwIAAHA1uYqmixcvavz48fr666+VnZ0tSXJzc9Mzzzyjt956S8WKFcvTSQIAABS0XL17bvLkyfrxxx81Y8YMbdu2Tdu2bdM///lP/fjjj5o8eXJezxEAAKDA5SqaVq1apXfeeUfh4eHy8vKSl5eXwsPDNX78eK1atSqv5wgAAFDgchVNly5dkr+//3XL/fz8dOnSpT88KQAAAFeTq2iqW7euoqKilJ6ebi27dOmSpk+frrp16+bV3AAAAFxGrm4Ef/PNN/Xiiy/q8ccfV/Xq1SVJ+/fvl6enpz7++OM8nSAAAIAryFU0BQUF6ZtvvtGSJUusjxx4+umn1apVKxUtWjRPJwgAAOAKchVN0dHR8vPzU4cOHZyWf/XVV0pJSVGfPn3yZHIAAACuIlf3NM2dO1dVqlS5bvmDDz6oOXPm/OFJAQAAuJpcRVNiYqJKly593XJfX18lJib+4UkBAAC4mlxF03333aeffvrpuuXx8fEqU6bMH54UAACAq8nVPU3t27fXxIkTlZmZqYYNG0qSNm3apHfffVc9e/bM0wkCAAC4glxF04svvqizZ89q7NixysjIkCQVKVJEL774ovr27ZunEwQAAHAFuYomm82mIUOG6JVXXtHhw4dVtGhRPfDAA/L09Mzr+QEAALiEXEXTVSVKlFBwcHBezQUAAMBl5epGcAAAgHsN0QQAAGCAaAIAADBANAEAABggmgAAAAwQTQAAAAaIJgAAAANEEwAAgAGiCQAAwADRBAAAYIBoAgAAMEA0AQAAGCCaAAAADBBNAAAABogmAAAAA0QTAACAAaIJAADAANEEAABggGgCAAAwQDQBAAAYIJoAAAAMEE0AAAAGiCYAAAADhSqaPvzwQwUFBemdd96xlqWnp2vs2LFq0KCBQkJC9NprrykpKcnpeadOnVKfPn1Up04dNWrUSH/729+UmZnpNGbLli1q06aNatWqpWbNmmnBggV35JgAAEDhUGiiadeuXZozZ46CgoKclk+cOFHfffedIiMjFRcXp9OnT+vVV1+11mdlZalv377KyMjQnDlzNHnyZC1cuFBRUVHWmBMnTqhv375q0KCBvv76a3Xv3l2jRo3S+vXr79jxAQAA11YoounChQsaMmSIJkyYIG9vb2v5+fPnNX/+fA0fPlyNGjVSrVq1NHHiRG3fvl07duyQJG3YsEGHDh3Su+++qxo1aig8PFwDBgzQ7NmzdfnyZUnSnDlzFBAQoOHDhyswMFBdunTRU089pU8++aQAjhYAALiiQhFN48aNU3h4uEJDQ52W79mzRxkZGU7LAwMDVb58eSuaduzYoWrVqsnf398aExYWprS0NB06dMga06hRI6dth4WFWdsAAABwL+gJ3M6yZcv0n//8R1999dV165KSkuTh4aFSpUo5Lffz81NiYqI15tpgkmQ9vt2YtLQ0Xbp0SUWLFjWer81mPBRAHuP1ByCncvLvhktH03//+1+98847+vjjj1WkSJGCno4RP7+SBT0F4J7k41OioKcA4C7n0tG0d+9eJScnq23bttayrKws/fjjj5o9e7b+9a9/KSMjQ6mpqU5nm5KTk1W6dGlJV84Y7dq1y2m7V99dd+2Y37/jLikpSV5eXjk6y3Rl3+flcOToKcbc3Oz8YABu4syZC8rKyi7oaQAoZGw28xMeLh1NDRs21JIlS5yWjRgxQlWqVFHv3r113333ycPDQ5s2bdJTTz0lSTpy5IhOnTqlunXrSpLq1q2rmTNnKjk5WX5+fpKkjRs3ysvLS1WrVrXG/PDDD0772bhxo7WNnHA4lG/RBODWeO0ByE8uHU1eXl6qVq2a07LixYvrT3/6k7W8Xbt2mjx5sry9veXl5aUJEyYoJCTECp6wsDBVrVpVQ4cO1ZAhQ5SYmKjIyEh17txZnp6ekqTnn39es2fP1pQpU9SuXTtt3rxZK1asUHR09B09XgAA4LpcOppMvPnmm7Lb7erfv78uX76ssLAwjR492lrv5uammTNnasyYMXruuedUrFgxtWnTRv3797fGVKxYUdHR0Zo0aZJmzZqlcuXKacKECWrcuHFBHBIAAHBBNoeDE9p5KSkp/+5pcne/ck9T58il2n8yJX92AhQy1Sv4avbrT+vMmQvKzOSeJgA5Y7NJ/v5m9zQVis9pAgAAKGhEEwAAgAGiCQAAwADRBAAAYIBoAgAAMEA0AQAAGCCaAAAADBBNAAAABogmAAAAA0QTAACAAaIJAADAANEEAABggGgCAAAwQDQBAAAYIJoAAAAMEE0AAAAGiCYAAAADRBMAAIABogkAAMAA0QQAAGCAaAIAADBANAEAABggmgAAAAwQTQAAAAaIJgAAAANEEwAAgAGiCQAAwADRBAAAYIBoAgAAMEA0AQAAGCCaAAAADBBNAAAABogmAAAAA0QTAACAAaIJAADAANEEAABggGgCAAAwQDQBAAAYIJoAAAAMEE0AAAAGiCYAAAADRBMAAIABogkAAMAA0QQAAGCAaAIAADBANAEAABggmgAAAAwQTQAAAAaIJgAAAANEEwAAgAGiCQAAwADRBAAAYIBoAgAAMEA0AQAAGCCaAAAADBBNAAAABogmAAAAAy4dTdHR0WrXrp1CQkLUqFEjvfLKKzpy5IjTmPT0dI0dO1YNGjRQSEiIXnvtNSUlJTmNOXXqlPr06aM6deqoUaNG+tvf/qbMzEynMVu2bFGbNm1Uq1YtNWvWTAsWLMj34wMAAIWHS0fT1q1b1blzZ82bN0+xsbHKzMxUr169dPHiRWvMxIkT9d133ykyMlJxcXE6ffq0Xn31VWt9VlaW+vbtq4yMDM2ZM0eTJ0/WwoULFRUVZY05ceKE+vbtqwYNGujrr79W9+7dNWrUKK1fv/6OHi8AAHBd7gU9gVv517/+5fR48uTJatSokfbu3atHHnlE58+f1/z58/X3v/9djRo1knQlolq2bKkdO3aobt262rBhgw4dOqTY2Fj5+/urRo0aGjBggP7+97/r1Vdflaenp+bMmaOAgAANHz5ckhQYGKj4+Hh98sknaty48R0/bgAA4Hpc+kzT750/f16S5O3tLUnas2ePMjIyFBoaao0JDAxU+fLltWPHDknSjh07VK1aNfn7+1tjwsLClJaWpkOHDlljrkbXtWOubiMnbLb8+wJwa/n5+uOLL77u3i9TLn2m6VrZ2dmaOHGi6tWrp2rVqkmSkpKS5OHhoVKlSjmN9fPzU2JiojXm2mCSZD2+3Zi0tDRdunRJRYsWNZ6nn1/JnB0YgDzh41OioKcA4C5XaKJp7NixOnjwoD7//POCnsotJSefl8ORP9t2c7PzgwG4iTNnLigrK7ugpwGgkLHZzE94FIpoGjdunNatW6fPPvtM5cqVs5b7+/srIyNDqampTmebkpOTVbp0aWvMrl27nLZ39d111475/TvukpKS5OXllaOzTJLkcCjfognArRX2157dbpPdnoNrBcA9IDvboexs13hxu3Q0ORwOjR8/Xt9++63i4uJUsWJFp/W1atWSh4eHNm3apKeeekqSdOTIEZ06dUp169aVJNWtW1czZ85UcnKy/Pz8JEkbN26Ul5eXqlatao354YcfnLa9ceNGaxsAkN/sdpv+9KficnMrVLeaAvkuKytbZ89edIlwculoGjt2rJYuXap//vOfKlGihHUPUsmSJVW0aFGVLFlS7dq10+TJk+Xt7S0vLy9NmDBBISEhVvCEhYWpatWqGjp0qIYMGaLExERFRkaqc+fO8vT0lCQ9//zzmj17tqZMmaJ27dpp8+bNWrFihaKjowvq0AHcY+x2m9zc7Br1+XodPX2uoKcDuITKZbw1oVNj2e02oul2vvjiC0lS165dnZZPmjRJbdu2lSS9+eabstvt6t+/vy5fvqywsDCNHj3aGuvm5qaZM2dqzJgxeu6551SsWDG1adNG/fv3t8ZUrFhR0dHRmjRpkmbNmqVy5cppwoQJfNwAgDvu6Olz2n8ypaCnAeAGXDqaDhw4cNsxRYoU0ejRo51C6fcqVKigmJiYW26nQYMGWrRoUU6nCAAA7hFcPAcAADBANAEAABggmgAAAAwQTQAAAAaIJgAAAANEEwAAgAGiCQAAwADRBAAAYIBoAgAAMEA0AQAAGCCaAAAADBBNAAAABogmAAAAA0QTAACAAaIJAADAANEEAABggGgCAAAwQDQBAAAYIJoAAAAMEE0AAAAGiCYAAAADRBMAAIABogkAAMAA0QQAAGCAaAIAADBANAEAABggmgAAAAwQTQAAAAaIJgAAAANEEwAAgAGiCQAAwADRBAAAYIBoAgAAMEA0AQAAGCCaAAAADBBNAAAABogmAAAAA0QTAACAAaIJAADAANEEAABggGgCAAAwQDQBAAAYIJoAAAAMEE0AAAAGiCYAAAADRBMAAIABogkAAMAA0QQAAGCAaAIAADBANAEAABggmgAAAAwQTQAAAAaIJgAAAANEEwAAgAGiCQAAwADRBAAAYIBo+p3Zs2erSZMmql27ttq3b69du3YV9JQAAIALIJqusXz5ck2aNEn9+vXTwoULVb16dfXq1UvJyckFPTUAAFDAiKZrxMbGqkOHDmrXrp2qVq2qsWPHqmjRopo/f35BTw0AABQwoun/u3z5svbu3avQ0FBrmd1uV2hoqLZv316AMwMAAK7AvaAn4CrOnDmjrKws+fn5OS338/PTkSNHjLdjt0sOR17Pzln18r4q5slfHSBJlfxLWd/b74L/DeT1DfyfO/H6ttnMx/LKzGO+viXzfR9vdQi9/SDgHuPjU6Kgp5AneH0D13OV1/dd8P9lecPHx0dubm7X3fSdnJwsf3//ApoVAABwFUTT/+fp6amHHnpImzZtspZlZ2dr06ZNCgkJKcCZAQAAV8DluWu88MILGjZsmGrVqqXg4GB9+umn+u2339S2bduCnhoAAChgRNM1WrZsqZSUFEVFRSkxMVE1atTQRx99xOU5AAAgm8OR3+/1AgAAKPy4pwkAAMAA0QQAAGCAaAIAADBANAEAABggmoAcWrBggerXr1/Q0wAA3GF85ADuWcOHD9fChQuvW/7NN9+oUqVKBTAjAHkpKCjolutfffVVvfbaa3doNrgbEE24pzVu3FiTJk1yWubr61tAswGQlzZs2GB9v3z5ckVFRWnlypXWsuLFi1vfOxwOZWVlyd2dH4u4OS7P4Z7m6emp0qVLO33NmjVLrVq1Ut26dRUeHq4xY8bowoULN93G/v371bVrV4WEhKhevXpq27atdu/eba3ftm2bOnXqpODgYIWHh2vChAm6ePHinTg84J527eu6ZMmSstls1uMjR46oXr16+v7779W2bVvVrl1b8fHxGj58uF555RWn7bzzzjvq2rWr9Tg7O1vR0dFq0qSJgoOD1bp1a6cYw92LaAJ+x2azaeTIkVq6dKkmT56szZs36913373p+MGDB6tcuXL66quvtGDBAvXu3VseHh6SpOPHj6t379568skntXjxYr3//vuKj4/X+PHj79ThALiF9957T4MGDdLy5ctveznvqujoaC1atEhjx47VsmXL1KNHDw0ZMkRbt27N59mioHEeEve0devWOf1C5saNGysqKsp6HBAQoNdff12jR4/WmDFjbriNU6dOqVevXgoMDJQkPfDAA9a66OhotWrVSj169LDWjRw5Ul27dtWYMWNUpEiRPD8mAOb69++vxx57zHj85cuXFR0drdjYWOvfjooVKyo+Pl5z587Vo48+ml9ThQsgmnBPa9CggVMMFStWTBs3blR0dLSOHDmitLQ0ZWVlKT09Xb/99puKFSt23TZeeOEFjRo1Sl9//bVCQ0PVvHlz3X///ZKuXLo7cOCAlixZYo13OBzKzs5WQkKCFVoACkbt2rVzNP6XX37Rb7/9pp49ezotz8jIUI0aNfJyanBBRBPuacWKFXN6p1xCQoL69u2rjh07auDAgfL29lZ8fLxGjhypjIyMG0bTa6+9pqefflrff/+9fvjhB0VFRen9999Xs2bNdPHiRT3//PNO90Ncdd999+XrsQG4vd+/pm02m37/K1kzMzOt76/ejxgdHa2yZcs6jfP09MynWcJVEE3ANfbu3SuHw6Hhw4fLbr9yy9+KFStu+7zKlSurcuXK6tGjh9544w3Nnz9fzZo1U82aNXXo0CE+wgAoJHx9fXXw4EGnZfv27bPuUwwMDJSnp6dOnTrFpbh7EDeCA9eoVKmSMjIyFBcXpxMnTmjRokWaM2fOTcdfunRJ48aN05YtW3Ty5EnFx8dr9+7d1mW33r17a/v27Ro3bpz27dunY8eOafXq1Ro3btydOiQAOdCwYUPt2bNHixYt0rFjxxQVFeUUUV5eXurZs6cmTZqkhQsX6vjx49q7d6/i4uJu+LlvuLtwpgm4RvXq1TVixAjFxMToH//4h+rXr6833nhDw4YNu+F4u92us2fPatiwYUpKSpKPj4+efPJJ9e/f39peXFycIiMj1alTJ0lXbhpt2bLlHTsmAOYaN26sV155Re+++67S09PVrl07Pfvss/r555+tMa+//rp8fX0VHR2thIQElSxZUjVr1tRLL71UgDPHnWBz/P7iLQAAAK7D5TkAAAADRBMAAIABogkAAMAA0QQAAGCAaAIAADBANAEAABggmgAAAAwQTQAAAAb4RHAAhc7w4cOtX1nh7u6usmXLqnnz5howYICKFClSwLMDcLcimgAUSo0bN9akSZOUmZmpvXv3atiwYbLZbBoyZEhBTw3AXYrLcwAKJU9PT5UuXVr33XefmjZtqtDQUG3cuFGSlJ2drejoaDVp0kTBwcFq3bq1Vq5caa17/PHH9fnnnztt7z//+Y+qV6+ukydPSpJSU1M1cuRINWzYUPXq1VO3bt20f/9+a/y0adP0zDPPaNGiRWrSpIkefvhhDRw4UGlpadaYJk2a6JNPPnHazzPPPKNp06ZZj2+3HwCug2gCUOj9/PPP2r59uzw8PCRJ0dHRWrRokcaOHatly5apR48eGjJkiLZu3Sq73a6IiAgtXbrUaRtLlixRvXr1VKFCBUnSgAEDlJycrJiYGC1YsEAPPfSQunfvrrNnz1rPOX78uNasWaOZM2cqOjpaP/74o2JiYnI0d5P9AHANXJ4DUCitW7dOISEhyszM1OXLl2W32/XWW2/p8uXLio6OVmxsrEJCQiRJFStWVHx8vObOnatHH31UrVu3VmxsrE6dOqXy5csrOztby5Yt08svvyxJ2rZtm3bt2qVNmzbJ09NTkjRs2DCtXr1aq1at0nPPPSdJcjgcmjRpkry8vCRJrVu31qZNmzRw4ECjYzDdDwDXQDQBKJQaNGigMWPG6LffftMnn3wiNzc3PfXUUzp48KB+++039ezZ02l8RkaGatSoIUmqUaOGAgMDtXTpUvXp00dbt25VSkqKmjdvLkk6cOCALl68qAYNGjht49KlSzp+/Lj1uEKFClYwSVKZMmWUnJxsfAym+wHgGogmAIVSsWLFVKlSJUnSxIkT9cwzz+jLL79UtWrVJF25RFe2bFmn51w9myNJrVq10pIlS9SnTx8tXbpUYWFh8vHxkSRduHBBpUuXVlxc3HX7LVmypPW9u/v1/4Q6HA7re5vNdt36zMxM63vT/QBwDUQTgELPbrerb9++mjx5slauXClPT0+dOnVKjz766E2f8/TTTysyMlJ79uzRqlWrNHbsWGvdQw89pKSkJLm5uSkgICDX8/L19dXp06etx2lpaUpISMjz/QC4M7gRHMBdoXnz5rLb7Zo7d6569uypSZMmaeHChTp+/Lj27t2ruLg467OdJCkgIEAhISEaOXKksrKy1KRJE2tdaGio6tatq379+mnDhg1KSEjQTz/9pPfff1+7d+82nlPDhg21ePFibdu2TQcOHNCwYcNkt//fP7t5tR8AdwZnmgDcFdzd3dWlSxd99NFHWrNmjXx9fRUdHa2EhASVLFlSNWvW1EsvveT0nFatWmns2LF69tlnVbRoUWu5zWbThx9+qMjISI0YMUJnzpyRv7+/6tevL39/f+M59e3bVwkJCerbt69KliypAQMGOJ1pyqv9ALgzbI5rL8ADAADghrg8BwAAYIBoAgAAMEA0AQAAGCCaAAAADBBNAAAABogmAAAAA0QTAACAAaIJAADAANEEAABggGgCAAAwQDQBAAAYIJoAAAAM/D/NUOgAU/bqrgAAAABJRU5ErkJggg==" + ] }, "metadata": {}, "output_type": "display_data" } ], - "execution_count": 1 + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import pandas as pd\n", + "\n", + "try:\n", + " df = pd.read_csv(\"hf://datasets/schooly/online-shoppers-intention/online_shoppers_intention.csv\")\n", + "except FileNotFoundError:\n", + " print(f\"错误: 数据集联网加载失败\")\n", + " exit()\n", + "\n", + "# --- 初步数据探索 ---\n", + "print(\"--- 数据集概览 ---\")\n", + "print(df.head())\n", + "print(\"\\n--- 数据信息 ---\")\n", + "df.info()\n", + "print(\"\\n--- 描述性统计 ---\")\n", + "print(df.describe())\n", + "print(\"\\n--- 缺失值检查 ---\")\n", + "print(df.isnull().sum())\n", + "\n", + "# 目标变量分布\n", + "print(\"\\n--- 目标变量 'Revenue' 分布 ---\")\n", + "print(df['Revenue'].value_counts(normalize=True))\n", + "sns.countplot(x='Revenue', data=df)\n", + "plt.title('Revenue Distribution')\n", + "plt.show()" + ] }, { + "cell_type": "code", + "execution_count": 2, + "id": "1945b351cafe24fb", "metadata": { "ExecuteTime": { "end_time": "2025-05-17T12:44:17.327021Z", "start_time": "2025-05-17T12:44:17.235025Z" } }, - "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- 处理后的特征维度 ---\n", + "(12330, 74)\n", + "训练集大小: X_train: (9864, 74), y_train: (9864,)\n", + "测试集大小: X_test: (2466, 74), y_test: (2466,)\n" + ] + } + ], "source": [ "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n", @@ -250,28 +265,14 @@ "\n", "print(f\"训练集大小: X_train: {X_train.shape}, y_train: {y_train.shape}\")\n", "print(f\"测试集大小: X_test: {X_test.shape}, y_test: {y_test.shape}\")" - ], - "id": "1945b351cafe24fb", - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "--- 处理后的特征维度 ---\n", - "(12330, 74)\n", - "训练集大小: X_train: (9864, 74), y_train: (9864,)\n", - "测试集大小: X_test: (2466, 74), y_test: (2466,)\n" - ] - } - ], - "execution_count": 2 + ] }, { - "metadata": {}, "cell_type": "code", - "outputs": [], "execution_count": null, + "id": "7b1a931cb634d14b", + "metadata": {}, + "outputs": [], "source": [ "# --- 从零实现逻辑回归 ---\n", "class MyLogisticRegression:\n", @@ -328,14 +329,14 @@ " y_predicted_proba = self.predict_proba(X)\n", " y_predicted_labels = [1 if i > threshold else 0 for i in y_predicted_proba]\n", " return np.array(y_predicted_labels)" - ], - "id": "7b1a931cb634d14b" + ] }, { - "metadata": {}, "cell_type": "code", - "outputs": [], "execution_count": null, + "id": "ed11b643a3bf6061", + "metadata": {}, + "outputs": [], "source": [ "# --- 训练自定义逻辑回归模型 ---\n", "print(\"\\n--- 训练自定义逻辑回归模型 ---\")\n", @@ -353,14 +354,14 @@ "# --- 进行预测 ---\n", "y_pred_proba = log_reg_model.predict_proba(X_test) # 获取概率用于ROC曲线\n", "y_pred_labels = log_reg_model.predict(X_test) # 获取类别标签" - ], - "id": "ed11b643a3bf6061" + ] }, { - "metadata": {}, "cell_type": "code", - "outputs": [], "execution_count": null, + "id": "1b9c8a29f662d051", + "metadata": {}, + "outputs": [], "source": [ "from sklearn.metrics import accuracy_score, roc_curve, auc, precision_score, recall_score, f1_score, confusion_matrix, \\\n", " classification_report\n", @@ -397,14 +398,14 @@ "plt.show()\n", "\n", "print(f\"AUC: {roc_auc:.4f}\")" - ], - "id": "1b9c8a29f662d051" + ] }, { - "metadata": {}, "cell_type": "code", - "outputs": [], "execution_count": null, + "id": "6b16c3aa37baadb4", + "metadata": {}, + "outputs": [], "source": [ "import numpy as np\n", "from tqdm import tqdm\n", @@ -520,14 +521,14 @@ "\n", " def predict(self, X):\n", " return np.where(self.project(X) >= 0, 1, 0)" - ], - "id": "6b16c3aa37baadb4" + ] }, { - "metadata": {}, "cell_type": "code", - "outputs": [], "execution_count": null, + "id": "439fe97afc87ab54", + "metadata": {}, + "outputs": [], "source": [ "# --- 训练自定义线性SVM模型 ---\n", "print(\"\\n--- 训练自定义线性SVM模型 ---\")\n", @@ -562,14 +563,14 @@ "plt.show()\n", "\n", "print(f\"AUC: {roc_auc_svm:.4f}\")" - ], - "id": "439fe97afc87ab54" + ] }, { - "metadata": {}, "cell_type": "code", - "outputs": [], "execution_count": null, + "id": "81881edd3d6dac8c", + "metadata": {}, + "outputs": [], "source": [ "import numpy as np\n", "from collections import Counter\n", @@ -705,14 +706,14 @@ "\n", " def predict(self, X):\n", " return np.array([self._traverse_tree(x, self.tree) for x in X])" - ], - "id": "81881edd3d6dac8c" + ] }, { - "metadata": {}, "cell_type": "code", - "outputs": [], "execution_count": null, + "id": "f82f5a58da4117e1", + "metadata": {}, + "outputs": [], "source": [ "# --- 训练自定义决策树模型 ---\n", "print(\"\\n--- 训练自定义决策树模型 ---\")\n", @@ -747,14 +748,14 @@ "plt.show()\n", "\n", "print(f\"AUC: {roc_auc_tree:.4f}\")" - ], - "id": "f82f5a58da4117e1" + ] }, { - "metadata": {}, "cell_type": "code", - "outputs": [], "execution_count": null, + "id": "b12fa40a4de770e5", + "metadata": {}, + "outputs": [], "source": [ "import numpy as np\n", "\n", @@ -830,14 +831,14 @@ " posteriors.append(log_posterior)\n", " predictions.append(self.classes_[np.argmax(posteriors)])\n", " return np.array(predictions)" - ], - "id": "b12fa40a4de770e5" + ] }, { - "metadata": {}, "cell_type": "code", - "outputs": [], "execution_count": null, + "id": "e8c167999fb13f6b", + "metadata": {}, + "outputs": [], "source": [ "# --- 训练自定义朴素贝叶斯模型 ---\n", "print(\"\\n--- 训练自定义朴素贝叶斯模型 ---\")\n", @@ -872,14 +873,14 @@ "plt.show()\n", "\n", "print(f\"AUC: {roc_auc_nb:.4f}\")" - ], - "id": "e8c167999fb13f6b" + ] }, { - "metadata": {}, "cell_type": "code", - "outputs": [], "execution_count": null, + "id": "718bb29ac00a859c", + "metadata": {}, + "outputs": [], "source": [ "import numpy as np\n", "from collections import Counter\n", @@ -911,14 +912,14 @@ " def predict(self, X_test):\n", " predictions = [self._predict_single(x_test_sample) for x_test_sample in X_test]\n", " return np.array(predictions)" - ], - "id": "718bb29ac00a859c" + ] }, { - "metadata": {}, "cell_type": "code", - "outputs": [], "execution_count": null, + "id": "fc3b39b5d46a42a", + "metadata": {}, + "outputs": [], "source": [ "# --- 训练自定义KNN模型 ---\n", "print(\"\\n--- 训练自定义KNN模型 ---\")\n", @@ -953,8 +954,7 @@ "plt.show()\n", "\n", "print(f\"AUC: {roc_auc_knn:.4f}\")\n" - ], - "id": "fc3b39b5d46a42a" + ] } ], "metadata": { diff --git a/main.py b/main.py new file mode 100644 index 0000000..34acf5b --- /dev/null +++ b/main.py @@ -0,0 +1,29 @@ +import numpy as np +import matplotlib.pyplot as plt +from experiments import ExperimentRunner + +def main(): + """主函数""" + print("机器学习算法实现与比较系统") + print("="*50) + print("本系统实现了以下内容:") + print("1. 改进的BP神经网络 vs 标准BP网络") + print("2. 特征提取(PCA, 特征选择)对分类性能的影响") + print("3. 多种分类算法比较(朴素贝叶斯, KNN, 决策树)") + print("4. 集成学习算法(Bagging, Voting)") + print("5. 所有算法均为自主实现,未使用任何ML库") + print("="*50) + + # 设置matplotlib中文显示 + plt.rcParams['font.sans-serif'] = ['SimHei'] + plt.rcParams['axes.unicode_minus'] = False + + # 运行实验 + runner = ExperimentRunner() + results = runner.run_all_experiments() + + print("\n实验完成!结果已保存在实验报告中。") + print("图表已保存到本地文件。") + +if __name__ == "__main__": + main() diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..60a150d --- /dev/null +++ b/utils.py @@ -0,0 +1,72 @@ +import numpy as np +import pandas as pd +from typing import Tuple, List +import math + +def load_data(filepath: str) -> Tuple[np.ndarray, np.ndarray]: + """加载数据集""" + data = pd.read_csv(filepath) + X = data.iloc[:, :-1].values + y = data.iloc[:, -1].values + return X, y + +def train_test_split(X: np.ndarray, y: np.ndarray, test_size: float = 0.3, random_state: int = 42) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: + """数据集划分""" + np.random.seed(random_state) + n_samples = X.shape[0] + n_test = int(n_samples * test_size) + + indices = np.random.permutation(n_samples) + test_indices = indices[:n_test] + train_indices = indices[n_test:] + + return X[train_indices], X[test_indices], y[train_indices], y[test_indices] + +def normalize_data(X_train: np.ndarray, X_test: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: + """数据标准化""" + mean = np.mean(X_train, axis=0) + std = np.std(X_train, axis=0) + std[std == 0] = 1 # 避免除零 + + X_train_norm = (X_train - mean) / std + X_test_norm = (X_test - mean) / std + + return X_train_norm, X_test_norm + +def accuracy_score(y_true: np.ndarray, y_pred: np.ndarray) -> float: + """计算准确率""" + return np.mean(y_true == y_pred) + +def confusion_matrix(y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray: + """计算混淆矩阵""" + classes = np.unique(np.concatenate([y_true, y_pred])) + n_classes = len(classes) + matrix = np.zeros((n_classes, n_classes), dtype=int) + + for i, true_class in enumerate(classes): + for j, pred_class in enumerate(classes): + matrix[i, j] = np.sum((y_true == true_class) & (y_pred == pred_class)) + + return matrix + +def cross_validation(classifier, X: np.ndarray, y: np.ndarray, k: int = 5) -> List[float]: + """K折交叉验证""" + n_samples = X.shape[0] + fold_size = n_samples // k + scores = [] + + for i in range(k): + start_idx = i * fold_size + end_idx = start_idx + fold_size if i < k - 1 else n_samples + + test_indices = np.arange(start_idx, end_idx) + train_indices = np.concatenate([np.arange(0, start_idx), np.arange(end_idx, n_samples)]) + + X_train_fold, X_test_fold = X[train_indices], X[test_indices] + y_train_fold, y_test_fold = y[train_indices], y[test_indices] + + classifier.fit(X_train_fold, y_train_fold) + y_pred = classifier.predict(X_test_fold) + scores.append(accuracy_score(y_test_fold, y_pred)) + + return scores