python 部分代码2

class List:
    def __init__(self, lst=None):
        if lst is None:
                    self.lst = []
        else:
            self.lst = lst


def length(l):
    return len(l.lst)


def isempty(l):
    if len(l.lst)==0:
        return True
    else:
        return False

def get(l, i):
    if isempty(l):
        print("The get() is unsuccessful!")
        print("The list is empty!")
        return -1
    elif (i<1) or (i>length(l)):
        print("The get() is unsuccessful!")
        print("The index given is out of range!")
        return -1
    else:
        return(l.lst[i-1])

def locatebyvalue(l, x):
    position = 1
    for item in l.lst:
        if x == item:
            return position
        else:
            position = position+1
    return -1        

def locatebyid(l, idd):
    position = 1
    for item in l.lst:
        if item.id == idd:
            return position
        else:
            position = position+1
    return -1        

def insert(l, i, x):
    if (i<1) or (i>length(l)+1):
        print("The insert() is unsuccessful!")
        print("The index given is out of range!")
        
    else: 
        l.lst.insert(i-1, x)

def delete(l, i):
    if isempty(l):
        print("The delete() is unsuccessful!")
        print("The list is empty!")
        return
    elif (i<1) or (i>length(l)):
        print("The delete() is unsuccessful!")
        print("The index given is out of range!")
        return
    else: 
        del l.lst[i-1]


def display(l):
    print(l.lst)



    

 

 

 

from queuestackm import *

def main():
    qs = Stack()
    if(isemptys(qs)):
        print("The stack is empty now.")
    else:
        print("The stack is not empty now.")	
    push(qs, "a")
    push(qs, "b")
    push(qs, "c")
    print("The push operations has been performed three times.")	
    if(isemptys(qs)):
        print("The stack is empty now.")
    else:
        print("The stack is not empty now.")	
    top = gettop(qs)
    print(f"Now, the top element is '{top}'.")
    print("Now, the stack is containing ", end="")
    displays(qs)
    pop(qs)
    de = pop(qs)
    print("The pop operations has been performed two times.")	
    print(f"The last popped element is '{de}'.")	
    push(qs, "d")
    push(qs, "e")
    top = gettop(qs)
    print(f"Now, the top element is '{top}'.")
    print("Now, the stack is containing ", end="")
    displays(qs)


main()

 

from listm import *


class Queue(List):
    def __init__(self, que=None):
        List.__init__(self, que)
        self.rear = 0


def isemptyq(q):
    if isempty(q):
        return True
    else:
        return False

def enqueue(q, x):
    q.rear= q.rear + 1
    insert(q, q.rear, x)
    


def dequeue(q):
    if isemptyq(q):
        print("Dequeue error, queue underflow!!!\n")
        return -1
    else:
        tmp = get(q, 1)
        delete(q, 1)
        q.rear = q.rear - 1
        return tmp    



def getfront(q):
    if isemptyq(q):
        print("Getfront error, queue underflow!!!\n")
        return -1
    else:
        tmp = get(q, 1)
        return tmp


def displayq(q):
    display(q)

 

from listm import *

class Stack(List):
    def __init__(self, stk=None):
        List.__init__(self, stk)
        self.top = length(self)


def isemptys(s):
    if isempty(s):
        return True
    else:
        return False

def push(s, x):
    s.top = s.top + 1
    insert(s, s.top, x)
    


def pop(s):
    if isemptys(s):
        print("Pop error, stack underflow!!!\n")
        return -1
    else:
        tmp = get(s, s.top)
        delete(s, s.top)
        s.top = s.top - 1
        return tmp    



def gettop(s):
    if isemptys(s):
        print("Gettop error, stack underflow!!!\n")
        return -1
    else:
        tmp = get(s, s.top)
        return tmp


def displays(s):
    display(s)



from sklearn.model_selection import StratifiedKFold, cross_val_score, train_test_split, GridSearchCV
# 10-fold cross-validation
cvKFold=StratifiedKFold(n_splits=10, shuffle=True, random_state=0)
# K-Nearest Neighbour
from sklearn.neighbors import KNeighborsClassifier
def kNNClassifier(X, y, K):
    knn = KNeighborsClassifier(n_neighbors=K)
    scores = cross_val_score(knn, np.asarray(X, dtype='float64'), y,
                             cv=cvKFold)
    print("{:.4f}".format(scores.mean()), end='')

# Logistic Regression
from sklearn.linear_model import LogisticRegression
def logregClassifier(X, y):
    logreg = LogisticRegression(random_state=0)
    scores = cross_val_score(logreg, np.asarray(X, dtype='float64'), y,
                             cv=cvKFold)
    print("{:.4f}".format(scores.mean()), end='')
# Naïve Bayes
from sklearn.naive_bayes import GaussianNB
def nbClassifier(X, y):
    nb = GaussianNB()
    scores = cross_val_score(nb, np.asarray(X, dtype='float64'), y,
                             cv=cvKFold)
    print("{:.4f}".format(scores.mean()), end='')
# Decision Tree
from sklearn.tree import DecisionTreeClassifier
def dtClassifier(X, y):
    tree = DecisionTreeClassifier(criterion='entropy', random_state=0)
    scores = cross_val_score(tree, np.asarray(X, dtype='float64'), y,
                             cv=cvKFold)
    print("{:.4f}".format(scores.mean()), end='')
# Bagging
from sklearn.ensemble import BaggingClassifier
def bagDTClassifier(X, y, n_estimators, max_samples, max_depth):
    bag_clf = BaggingClassifier(DecisionTreeClassifier(max_depth=max_depth,
                                         criterion='entropy', random_state=0), n_estimators=n_estimators,
                  max_samples=max_samples, random_state=0)
    scores = cross_val_score(bag_clf, np.asarray(X, dtype='float64'), y,
                         cv=cvKFold)
    print("{:.4f}".format(scores.mean()), end='')
# Ada Boost
from sklearn.ensemble import AdaBoostClassifier
def adaDTClassifier(X, y, n_estimators, learning_rate, max_depth):
    ada_clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=max_depth,
                                          criterion='entropy', random_state=0), n_estimators=n_estimators,
                   learning_rate=learning_rate, random_state=0)
    scores = cross_val_score(ada_clf, np.asarray(X, dtype='float64'), y,
                         cv=cvKFold)
    print("{:.4f}".format(scores.mean()), end='')
# Gradient Boosting
from sklearn.ensemble import GradientBoostingClassifier
def gbClassifier(X, y, n_estimators, learning_rate):
    gb_clf = GradientBoostingClassifier(n_estimators=n_estimators,
                                        learning_rate=learning_rate, random_state=0)
    scores = cross_val_score(gb_clf, np.asarray(X, dtype='float64'), y,
                             cv=cvKFold)
    print("{:.4f}".format(scores.mean()), end='')
# Linear SVM
from sklearn.svm import SVC
def bestLinClassifier(X,y):
    X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                        stratify=y, random_state=0)
    param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100],
                  'gamma': [0.001, 0.01, 0.1, 1, 10, 100]}
    grid_search = GridSearchCV(SVC(kernel="linear", random_state=0),
                               param_grid, cv=cvKFold, return_train_score=True)
    grid_search.fit(X_train, y_train)
    print(grid_search.best_params_['C'])
    print(grid_search.best_params_['gamma'])
    print("{:.4f}".format(grid_search.best_score_))
    print("{:.4f}".format(grid_search.score(X_test, y_test)), end='')
# Random Forest
from sklearn.ensemble import RandomForestClassifier
def bestRFClassifier(X,y):
    X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                        stratify=y, random_state=0)
    param_grid = {'n_estimators': [10, 20, 50, 100],
                  'max_features': ['auto', 'sqrt', 'log2'],
                  'max_leaf_nodes': [10, 20, 30]}
    grid_search = GridSearchCV(RandomForestClassifier(random_state=0,
                                                      criterion='entropy'), param_grid, cv=cvKFold, return_train_score=True)
    grid_search.fit(X_train, y_train)
    print(grid_search.best_params_['n_estimators'])
    print(grid_search.best_params_['max_features'])
    print(grid_search.best_params_['max_leaf_nodes'])
    print("{:.4f}".format(grid_search.best_score_))
    print("{:.4f}".format(grid_search.score(X_test, y_test)), end='')
import sys
import numpy as np
import pandas as pd
# Read the data file
df = pd.read_csv(sys.argv[1])
import numpy as np
# Preprocess data
# Replace missing data '?' with np.nan
df = df.replace('?', np.nan)
df_feature = df.iloc[:, 0:-1]
# Replace missing data with the mean value of the column
from sklearn.impute import SimpleImputer
imp = SimpleImputer(missing_values=np.nan, strategy='mean')
features = imp.fit_transform(df_feature)
# Normalise each attribute value and truncated to 4 decimal places
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
# np.set_printoptions(formatter={'float_kind':"{:.4f}".format})
features1 = scaler.fit_transform(features)
# Save the 4 decimal truncated value as a list of string
data_list = []
for i in features1:
    temp = []
    for j in i:
        temp.append('%0.4f' % j)
    data_list.append(temp)
# Get a list of all the classes
classes = df.iloc[:, -1].tolist()
# Label encoding
from sklearn.preprocessing import LabelEncoder
labels = np.unique(classes)
lEnc = LabelEncoder()
lEnc.fit(labels)
label_encoder = lEnc.transform(classes)
numClass = len(labels)
label_encoder=label_encoder.astype(np.float64)
def p():
    # Print the preprocessed data
    for i in range(len(data_list)):
        for j in data_list[i]:
            print(j, end =',')
    if i < len(data_list) - 1:
        print(int(label_encoder[i]))
    else:
        print(int(label_encoder[i]), end='')
# Read the config file
def conf_file(file):
    conf = pd.read_csv(file)

    # Convert parameters to a list:
    parameters = conf.iloc[0].tolist()
    return parameters
if sys.argv[2] == 'NN':
    parameter_list = conf_file(sys.argv[3])
    K = int(parameter_list[0])
    kNNClassifier(features1, label_encoder, K)
if sys.argv[2] == 'LR':
    logregClassifier(features1, label_encoder)
if sys.argv[2] == 'NB':
    nbClassifier(features1, label_encoder)
if sys.argv[2] == 'DT':
    dtClassifier(features1, label_encoder)
if sys.argv[2] == 'BAG':
    parameter_list = conf_file(sys.argv[3])
    n_estimators = int(parameter_list[0])
    max_samples = int(parameter_list[1])
    max_depth = int(parameter_list[2])
    bagDTClassifier(features1, label_encoder, n_estimators, max_samples,
                    max_depth)
if sys.argv[2] == 'ADA':
    parameter_list = conf_file(sys.argv[3])
    n_estimators = int(parameter_list[0])
    learning_rate = parameter_list[1]
    max_depth = int(parameter_list[2])
    adaDTClassifier(features1, label_encoder, n_estimators,
                    learning_rate, max_depth)
if sys.argv[2] == 'GB':
    parameter_list = conf_file(sys.argv[3])
    n_estimators = int(parameter_list[0])
    learning_rate = parameter_list[1]
    gbClassifier(features1, label_encoder, n_estimators, learning_rate)
if sys.argv[2] == 'RF':
    bestRFClassifier(features1, label_encoder)
if sys.argv[2] == 'SVM':
    bestLinClassifier(features1, label_encoder)
if sys.argv[2] == 'P':
    p()

 

你可能感兴趣的:(python)