1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
| import numpy as np import random import warnings warnings.filterwarnings("ignore")
def load_breast_cancer(): X = [] y = [] line = input() while line: dx = [] data = [np.float64(l) for l in line.strip().split(',')] X.append(np.array(data[:-1])) y.append(int(data[-1])) line = input() return np.array(X),np.array(y)
def train_test_split(X,Y,test_size=0.2,random_state=5): n_samples = len(X) indices = np.arange(n_samples) train_indexs = list(set(random.sample(indices.tolist(),int(n_samples*(1-test_size))))) test_indexs = [k for k in indices if k not in train_indexs] return X[train_indexs],X[test_indexs],Y[train_indexs],Y[test_indexs]
X,y = load_breast_cancer() x_train,x_test,y_train,y_test = train_test_split(X,y) class Logisticregression():
def __init__(self, learn_rate = 0.001, max_iteration=10000):
self.learn_rate = learn_rate self.max_iteration = max_iteration self._X_train = None self._y_train = None self._w = None
def fit(self, X_train, y_train):
m_samples, n_features = X_train.shape self._X_train = np.insert(X_train, 0, 1, axis=1) self._y_train = np.reshape(y_train, (m_samples, 1)) limit = np.sqrt(1 / n_features) w = np.random.uniform(-limit, limit, (n_features, 1)) b = 0 self.w = np.insert(w, 0, b, axis=0) iteration = 0 while iteration < self.max_iteration: h_x = self._X_train.dot(self.w) y_pred = 1/(1+np.exp(- h_x)) w_grad = self._X_train.T.dot(y_pred - self._y_train) self.w = self.w - self.learn_rate * w_grad iteration = iteration + 1
def predict(self, X_test):
X_test = np.insert(X_test, 0, 1, axis=1) h_x = X_test.dot(self.w) y_pripr_1 = (1/(1+np.exp(-h_x))) y_pripr_0 = 1 - y_pripr_1 y_cal = y_pripr_1 - y_pripr_0 y_class = np.where(y_cal > 0, 1, 0) return y_class
def score(self, X_test, y_test):
j = 0 y_test = np.reshape(y_test,(len(y_test),1)) y_hat = self.predict(X_test) for i in range(y_test.shape[0]): if y_hat[i,0] == y_test[i,0]: j += 1 acc = j / len(y_test) y_test = list(y_test.reshape((1,-1))[0]) y_hat = list(y_hat.reshape((1,-1))[0])
precision = self.get_precision(y_test,y_hat) recall = self.get_recall(y_test,y_hat) auc = self.get_auc(y_test,y_hat) return acc,precision,recall,auc
def get_precision(self,y,y_hat): true_positive = sum(yi and yi_hat for yi,yi_hat in zip(y,y_hat)) predicted_positive = sum(y_hat) return true_positive/predicted_positive
def get_recall(self,y,y_hat): true_positive = sum(yi and yi_hat for yi,yi_hat in zip(y,y_hat)) actual_positive = sum(y) return true_positive/actual_positive
def get_tnr(self,y,y_hat): true_negative = sum(1-(yi or yi_hat) for yi,yi_hat in zip(y,y_hat)) actual_negative = len(y) - sum(y) return true_negative/actual_negative
def get_roc(self,y,y_hat): thresholds = sorted(set(y_hat),reverse=True) ret = [[0,0]] for threshold in thresholds: y_hat = [int(yi_hat >= threshold) for yi_hat in y_hat] ret.append([self.get_recall(y,y_hat),1-self.get_tnr(y,y_hat)]) return ret
def get_auc(self,y,y_hat): roc = iter(self.get_roc(y,y_hat)) tpr_pre, fpr_pre = next(roc) auc = 0 for tpr,fpr in roc: auc += (tpr+tpr_pre)*(fpr-fpr_pre)/2 tpr_pre = tpr fpr_pre = fpr return auc
lr = Logisticregression() lr.fit(x_train,y_train) acc,precision,recall,auc = lr.score(x_test,y_test)
print("debug_begin"); def test(acc,auc): if acc>0.8 or auc>0.8: print(True) else: print(False) print("debug_end"); test(acc,auc)
|