1、支持向量机支持向量机(SVM)%matplotlib inlineimport numpy as npimport matplotlib.pyplot as pltfrom scipy import stats# use seaborn plotting defaultsimport seaborn as sns; sns.set()#随机来点数据from sklearn.datasets.samples_generator import make_blobsX, y = make_blobs(n_samples=50, centers=2, random_state=0, cluster_s

2、td=0.60)plt.scatter(X:, 0, X:, 1, c=y, s=50, cmap=autumn)随便的画几条分割线,哪个好来这?xfit = np.linspace(-1, 3.5)plt.scatter(X:, 0, X:, 1, c=y, s=50, cmap=autumn)plt.plot(0.6, 2.1, x, color=red, markeredgewidth=2, markersize=10)for m, b in (1, 0.65), (0.5, 1.6), (-0.2, 2.9): plt.plot(xfit, m * xfit + b, -k)plt.x

3、lim(-1, 3.5);Support Vector Machines: 最小化雷区xfit = np.linspace(-1, 3.5)plt.scatter(X:, 0, X:, 1, c=y, s=50, cmap=autumn)for m, b, d in (1, 0.65, 0.33), (0.5, 1.6, 0.55), (-0.2, 2.9, 0.2): yfit = m * xfit + b plt.plot(xfit, yfit, -k) plt.fill_between(xfit, yfit - d, yfit + d, edgecolor=none, color=#AA

4、AAAA, alpha=0.4)plt.xlim(-1, 3.5);训练一个基本的SVMfrom sklearn.svm import SVC # Support vector classifiermodel = SVC(kernel=linear), y)#绘图函数def plot_svc_decision_function(model, ax=None, plot_support=True): Plot the decision function for a 2D SVC if ax is None: ax = plt.gca() xlim = ax.get_xlim

5、() ylim = ax.get_ylim() # create grid to evaluate model x = np.linspace(xlim0, xlim1, 30) y = np.linspace(ylim0, ylim1, 30) Y, X = np.meshgrid(y, x) xy = np.vstack(X.ravel(), Y.ravel().T P = model.decision_function(xy).reshape(X.shape) # plot decision boundary and margins ax.contour(X, Y, P, colors=

6、k, levels=-1, 0, 1, alpha=0.5, linestyles=-, -, -) # plot support vectors if plot_support: ax.scatter(model.support_vectors_:, 0, model.support_vectors_:, 1, s=300, linewidth=1, facecolors=none); ax.set_xlim(xlim)ax.set_ylim(ylim)plt.scatter(X:, 0, X:, 1, c=y, s=50, cmap=autumn)plot_svc_decision_fun

7、ction(model);model.support_vectors_def plot_svm(N=10, ax=None): X, y = make_blobs(n_samples=200, centers=2, random_state=0, cluster_std=0.60) X = X:N y = y:N model = SVC(kernel=linear, C=1E10), y) ax = ax or plt.gca() ax.scatter(X:, 0, X:, 1, c=y, s=50, cmap=autumn) ax.set_xlim(-1, 4) ax

8、.set_ylim(-1, 6) plot_svc_decision_function(model, ax)fig, ax = plt.subplots(1, 2, figsize=(16, 6)fig.subplots_adjust(left=0.0625, right=0.95, wspace=0.1)for axi, N in zip(ax, 60, 120): plot_svm(N, axi) axi.set_title(N = 0.format(N)引入核函数的SVM 首先我们先用线性的核来看一下在下面这样比较难的数据集上还能分了吗?from sklearn.datasets.sam

9、ples_generator import make_circlesX, y = make_circles(100, factor=.1, noise=.1)clf = SVC(kernel=linear).fit(X, y)plt.scatter(X:, 0, X:, 1, c=y, s=50, cmap=autumn)plot_svc_decision_function(clf, plot_support=False);#加入了新的维度rfrom mpl_toolkits import mplot3dr = np.exp(-(X * 2).sum(1)def plot_3D(elev=30

10、, azim=30, X=X, y=y): ax = plt.subplot(projection=3d) ax.scatter3D(X:, 0, X:, 1, r, c=y, s=50, cmap=autumn) ax.view_init(elev=elev, azim=azim) ax.set_xlabel(x) ax.set_ylabel(y) ax.set_zlabel(r)plot_3D(elev=45, azim=45, X=X, y=y)#加入径向基函数clf = SVC(kernel=rbf, C=1E6), y)#这回牛逼了!plt.scatter(X:,

11、0, X:, 1, c=y, s=50, cmap=autumn)plot_svc_decision_function(clf)plt.scatter(clf.support_vectors_:, 0, clf.support_vectors_:, 1, s=300, lw=1, facecolors=none);使用这种核支持向量机,我们学习一个合适的非线性决策边界。这种核变换策略在机器学习中经常被使用!调节SVM参数: Soft Margin问题调节C参数 当C趋近于无穷大时:意味着分类严格不能有错误 当C趋近于很小的时:意味着可以有更大的错误容忍X, y = make_blobs(n_s

12、amples=100, centers=2, random_state=0, cluster_std=0.8)plt.scatter(X:, 0, X:, 1, c=y, s=50, cmap=autumn);X, y = make_blobs(n_samples=100, centers=2, random_state=0, cluster_std=0.8)fig, ax = plt.subplots(1, 2, figsize=(16, 6)fig.subplots_adjust(left=0.0625, right=0.95, wspace=0.1)for axi, C in zip(a

13、x, 10.0, 0.1): model = SVC(kernel=linear, C=C).fit(X, y) axi.scatter(X:, 0, X:, 1, c=y, s=50, cmap=autumn) plot_svc_decision_function(model, axi) axi.scatter(model.support_vectors_:, 0, model.support_vectors_:, 1, s=300, lw=1, facecolors=none);axi.set_title(C = 0:.1f.format(C), size=14)X, y = make_b

14、lobs(n_samples=100, centers=2, random_state=0, cluster_std=1.1)fig, ax = plt.subplots(1, 2, figsize=(16, 6)fig.subplots_adjust(left=0.0625, right=0.95, wspace=0.1)for axi, gamma in zip(ax, 10.0, 0.1): model = SVC(kernel=rbf, gamma=gamma).fit(X, y) axi.scatter(X:, 0, X:, 1, c=y, s=50, cmap=autumn) pl

15、ot_svc_decision_function(model, axi) axi.scatter(model.support_vectors_:, 0, model.support_vectors_:, 1, s=300, lw=1, facecolors=none); axi.set_title(gamma = 0:.1f.format(gamma), size=14)Example: Face RecognitionAs an example of support vector machines in action, lets take a look at the facial recog

16、nition problem. We will use the Labeled Faces in the Wild dataset, which consists of several thousand collated photos of various public figures. A fetcher for the dataset is built into Scikit-Learn:from sklearn.datasets import fetch_lfw_peoplefaces = fetch_lfw_people(min_faces_per_person=60)print(fa

17、ces.target_names)print(faces.images.shape)fig, ax = plt.subplots(3, 5)for i, axi in enumerate(ax.flat): axi.imshow(faces.imagesi, cmap=bone) axi.set(xticks=, yticks=, xlabel=faces.target_namesfaces.targeti) 每个图的大小是 6247 在这里我们就把每一个像素点当成了一个特征,但是这样特征太多了,用PCA降维一下吧!from sklearn.svm import SVC#from sklear

18、n.decomposition import RandomizedPCAfrom sklearn.decomposition import PCAfrom sklearn.pipeline import make_pipelinepca = PCA(n_components=150, whiten=True, random_state=42)svc = SVC(kernel=rbf, class_weight=balanced)model = make_pipeline(pca, svc)from sklearn.model_selection import train_test_splitX

19、train, Xtest, ytrain, ytest = train_test_split(,, random_state=40)使用grid search cross-validation来选择我们的参数from sklearn.model_selection import GridSearchCVparam_grid = svc_C: 1, 5, 10, svc_gamma: 0.0001, 0.0005, 0.001grid = GridSearchCV(model, param_grid)%time, yt

20、rain)print(grid.best_params_)model = grid.best_estimator_yfit = model.predict(Xtest)yfit.shapefig, ax = plt.subplots(4, 6)for i, axi in enumerate(ax.flat): axi.imshow(Xtesti.reshape(62, 47), cmap=bone) axi.set(xticks=, yticks=) axi.set_ylabel(faces.target_namesyfiti.split()-1, color=black if yfiti =

21、 ytesti else red)fig.suptitle(Predicted Names; Incorrect Labels in Red, size=14);from sklearn.metrics import classification_reportprint(classification_report(ytest, yfit, target_names=faces.target_names) 精度(precision) = 正确预测的个数(TP)/被预测正确的个数(TP+FP) 召回率(recall)=正确预测的个数(TP)/预测个数(TP+FN) F1 = 2精度召回率/(精度+召回率)from sklearn.metrics import confusion_matrixmat = confusion_matrix(ytest, yfit)sns.heatmap(mat.T, square=True, annot=True, fmt=d, cbar=False, xticklabels=faces.target_names, yticklabels=faces.target_names)plt.xlabel(true label)plt.ylabel(predicted label);

