本文共 1700 字,大约阅读时间需要 5 分钟。
#补集朴素贝叶斯-多项式朴素贝叶斯的补充,所以也是要求特征是离散的,非连续型from sklearn.naive_bayes import ComplementNBfrom time import timeimport datetime#探索补集朴素贝叶斯在不平衡数据中的处理#探索样本不均衡问题from sklearn.preprocessing import KBinsDiscretizerfrom sklearn.datasets import make_blobsfrom sklearn.naive_bayes import GaussianNB,MultinomialNB,BernoulliNBfrom sklearn.model_selection import train_test_splitfrom sklearn.metrics import brier_score_loss as bs,recall_score,roc_auc_score as auc#创建数据集class1=50000class2=500centers=[[0.0,0.0],[5.0,5.0]]std=[3,1]X,y=make_blobs(n_samples=[class1,class2names=["Multinomial","Gaussian","Bernoulli","Complement"]models=[MultinomialNB(),GaussianNB(),BernoulliNB(),ComplementNB()]for clf,name in zip(models,names): times=time() Xtrain,Xtest,ytrain,ytest=train_test_split(X,y,test_size=0.3,random_state=420) if name!="Gaussian": kbs=KBinsDiscretizer(n_bins=10,encode="onehot").fit(Xtrain) Xtrain=kbs.transform(Xtrain) Xtest=kbs.transform(Xtest) clf.fit(Xtrain,ytrain) y_pred=clf.predict(Xtest) proba=clf.predict_proba(Xtest)[:,1] score=clf.score(Xtest,ytest) print(name) print("\tBrier:{:.3f}".format(bs(ytest,proba,pos_label=1))) print("\tAccuracy:{:.3f}".format(score)) print("\tRecall:{:.3f}".format(recall_score(ytest,y_pred))) print("\tAUC:{:.3f}".format(auc(ytest,proba))) print(datetime.datetime.fromtimestamp(time()-times).strftime("%M:%S:%f"))], centers=centers, cluster_std=std, random_state=0, shuffle=False)
在这里插入代码片
转载地址:http://ptrmz.baihongyu.com/