# 손글씨 숫자 구별하기. 머신러닝 프로젝트3
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC,SVR,NuSVC,NuSVR
from sklearn import utils, metrics
##데이터 섞는 셔플 있음
import pandas as pd
# 0. 데이터를 준비 : 학습용, 테스트용 --> 지도학습(데이터들, 레이블)
study_df = pd.read_csv("train_5k.csv") # CSV --> DataFrame
# study_df = utils.shuffle(study_df)
test_df = pd.read_csv("train_1k.csv") # CSV --> DataFrame
# test_df = utils.shuffle(test_df)
# 훈련용, 학습용으로 분리
studySize = study_df.shape[0]
testSize = test_df.shape[0]
# 데이터와 답(레이블)을 구분
study_data = study_df.iloc[0:studySize, 1:] # 맨 마지막 전까지
study_label = study_df.iloc[0:studySize, [0]] # 맨 마지막 꺼
test_data = test_df.iloc[0:testSize:, 1:] # 맨 마지막 전까지
test_label = test_df.iloc[0:testSize:, [0]] # 맨 마지막 꺼
# 1. 머신러닝 알고리즘 선택 ( knn,SVM, 의사결정 트리 ...... + 딥러닝도 가능)
clf = KNeighborsClassifier(n_neighbors=3)
# clf = SVC(gamma="auto")
# 2. 학습데이터로 훈련시키기 ---> 오래 걸리는 작업 (좋은 컴퓨터 + GPU)
# ---> 모델이 완성됨!
clf.fit(study_data,study_label) ## 공부해 (훈련용 데이터, 답)
# 3. <ex 모의고사 >모델의 정답률(신뢰도) : 테스트용 데이터 ---> 몇% 맞추니??
anwsers = clf.predict(test_data) ##문제 풀어봐 / 예측해봐
score = metrics.accuracy_score(anwsers, test_label) *100
print ("정답률 : %5.2f %%" %(score))
# 4. 정답을 모르는 데이터로 예측해 보기 : 이것이 최종 목표
anwser =clf.predict([[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31,40,129,234,234,159,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,68,150,239,254,253,253,253,215,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,156,201,254,254,254,241,150,98,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,154,254,236,203,83,39,30,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,144,253,145,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,129,222,78,79,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,134,253,167,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,254,78,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,201,253,226,69,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,55,6,0,18,128,253,241,41,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,25,205,235,100,0,0,20,253,253,58,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,231,245,108,0,0,0,132,253,185,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,121,245,254,254,254,217,254,223,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,116,165,233,233,234,180,39,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] ])
print("이 글은", anwser, " 입니다..","단," ,score, "%의 확률입니다.")
정답률 : 96.30 %
이 글은 [5] 입니다.. 단, 96.3 %의 확률입니다.
15-M 태아 건강 분류 예측하기 (0) | 2020.10.21 |
---|---|
15-04 머신러닝4 MNIST 덤프(모델저장) (0) | 2020.10.21 |
15-02 머신러닝2 와인 품질 고르기 (0) | 2020.10.21 |
15-01 머신러닝1 붓꽃 (0) | 2020.10.21 |
댓글 영역