Lixd_2048

kaggle 泰坦尼克

#coding:utf-8
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=False #用来正常显示负号
import pandas as pd  # 数据分析
import numpy as np  # 科学计算
from pandas import Series, DataFrame
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.ensemble import RandomForestRegressor
import sklearn.preprocessing as preprocessing
from sklearn import linear_model
from sklearn.ensemble import BaggingRegressor
from sklearn import cross_validation
import time
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier,ExtraTreesClassifier,AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import Perceptron
from sklearn.linear_model import SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.metrics import precision_score
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV, cross_val_score, StratifiedKFold, learning_curve,KFold
import time
import os
from sklearn.feature_extraction.text import TfidfTransformer

# warnings.simplefilter('ignore', DeprecationWarning)
import warnings
warnings.filterwarnings('ignore')

# import tensorflow as tf
#
# tf.flags.DEFINE_float("l2_reg_lambda", 0.0, "L2 regularization lambda (default: 0.0)")
# FLAGS = tf.flags.FLAGS
#
#
# class TextCNN(object):
#     """
#     A CNN for text classification.
#     Uses an embedding layer, followed by a convolutional, max-pooling and softmax layer.
#     """
#
#     def __init__(
#             self, sequence_length, num_classes, vocab_size,
#             embedding_size, filter_sizes, num_filters, l2_reg_lambda=0.8):
#         # Placeholders for input, output and dropout
#         self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x")
#         self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y")
#         self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
#
#         # Keeping track of l2 regularization loss (optional)
#         l2_loss = tf.constant(0.0)
#         print(l2_reg_lambda)
#
#     # def pri(self):
#     #     print(sequence_length)
#
#
# cnn = TextCNN(
#     sequence_length=1,
#     num_classes=1,
#     vocab_size=1,
#     embedding_size=1,
#     filter_sizes=1,
#     num_filters=1,
#     )
#
# # cnn.pri()

data_train = pd.read_csv("D:/titanic/train.csv")
data_test = pd.read_csv("D:/titanic/test.csv")

# print(data_train.Sex.value_counts())
# print('***************')
# print(pd.DataFrame(data_train.groupby(['Sex','Survived']).count()['PassengerId']))
# print(pd.DataFrame(data_train.groupby(['Sex']).mean()['Survived']))
# print(data_train[data_train.Sex == 'male'].Survived.value_counts())
# print('*******************************************************')


# df = pd.DataFrame({'key1':list('aabba'),
#                   'key2': ['one','two','one','two','one'],
#                   'data1': np.random.randn(5),
#                   'data2': np.random.randn(5)})
# print(df)
# g = df.groupby(['key1','key2'])
# print(pd.DataFrame(g.count()))

# data_train.Age.value_counts().plot(kind = 'kde')
# plt.show()
#
# data_train.Age[data_train.Survived == 0].plot(kind = 'kde')
# data_train.Age[data_train.Survived == 1].plot(kind = 'kde')
# plt.legend((u'1',u'0'),loc = 'best')
# plt.show()
#
# data_train.Fare[data_train.Survived == 0].plot(kind = 'kde')
# data_train.Fare[data_train.Survived == 1].plot(kind = 'kde')
# plt.legend((u'0',u'1'),loc = 'best')
# plt.gca().set_xticklabels(('0','50','100','150','200','250'))
# plt.show()

# data_train.Age[data_train.Embarked == 'C'].plot(kind = 'kde')
# data_train.Age[data_train.Embarked == 'Q'].plot(kind = 'kde')
# data_train.Age[data_train.Embarked == 'S'].plot(kind = 'kde')
# plt.legend((u'C',u'Q',u'S'),loc = 'best')
# plt.show()

# for i in ['C','Q','S']:
#     for j in range(1,4,1):
#         data_train.Survived[data_train.Embarked == i][data_train.Pclass == j].value_counts().plot(kind = 'bar',label = '{}/{}'.format(i,j))
#
# plt.show()

# fig=plt.figure()
# fig.set(alpha=0.65) # 设置图像透明度，无所谓
# plt.title(u"根据舱等级和性别的获救情况")
#
# ax1=fig.add_subplot(141)
# data_train.Survived[data_train.Sex == 'female'][data_train.Pclass != 3].value_counts().plot(kind='bar', label="female highclass", color='#FA2479')
# print(data_train.Survived[data_train.Sex == 'female'][data_train.Pclass != 3].value_counts())
# ax1.set_xticklabels([u"获救", u"未获救"], rotation=0)
# ax1.legend([u"女性/高级舱"], loc='best')
#
# ax2=fig.add_subplot(142, sharey=ax1)
# data_train.Survived[data_train.Sex == 'female'][data_train.Pclass == 3].value_counts().plot(kind='bar', label='female, low class', color='pink')
# print(data_train.Survived[data_train.Sex == 'female'][data_train.Pclass == 3].value_counts())
# ax2.set_xticklabels([u"未获救", u"获救"], rotation=0)
# plt.legend([u"女性/低级舱"], loc='best')
#
# ax3=fig.add_subplot(143, sharey=ax1)
# data_train.Survived[data_train.Sex == 'male'][data_train.Pclass != 3].value_counts().plot(kind='bar', label='male, high class',color='lightblue')
# print(data_train.Survived[data_train.Sex == 'male'][data_train.Pclass != 3].value_counts())
# ax3.set_xticklabels([u"未获救", u"获救"], rotation=0)
# plt.legend([u"男性/高级舱"], loc='best')
#
# ax4=fig.add_subplot(144, sharey=ax1)
# data_train.Survived[data_train.Sex == 'male'][data_train.Pclass == 3].value_counts().plot(kind='bar', label='male low class', color='steelblue')
# print(data_train.Survived[data_train.Sex == 'male'][data_train.Pclass == 3].value_counts())
# ax4.set_xticklabels([u"未获救", u"获救"], rotation=0)
# plt.legend([u"男性/低级舱"], loc='best')
# print(type(data_train.Survived[data_train.Sex == 'male'][data_train.Pclass == 3].value_counts()))
# plt.show()

#
# survived_1 = data_train.SibSp[data_train.Survived == 1].value_counts()
# survived_0 = data_train.SibSp[data_train.Survived == 0].value_counts()
# df = pd.DataFrame({u'1':survived_1,u'0':survived_0})
# df.plot(kind = 'bar',stacked = True)
# plt.show()




def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None, n_jobs=1,
                        train_sizes=np.linspace(.05, 1., 20), verbose=0, plot=True):
    """
    画出data在某模型上的learning curve.
    参数解释
    ----------
    estimator : 你用的分类器。
    title : 表格的标题。
    X : 输入的feature，numpy类型
    y : 输入的target vector
    ylim : tuple格式的(ymin, ymax), 设定图像中纵坐标的最低点和最高点
    cv : 做cross-validation的时候，数据分成的份数，其中一份作为cv集，其余n-1份作为training(默认为3份)
    n_jobs : 并行的的任务数(默认1)
    """
    train_sizes, train_scores, test_scores = learning_curve(
        estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes, verbose=verbose)

    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    test_scores_std = np.std(test_scores, axis=1)

    if plot:
        plt.figure()
        plt.title(title)
        if ylim is not None:
            plt.ylim(*ylim)
        plt.xlabel(u"训练样本数")
        plt.ylabel(u"得分")
        plt.gca().invert_yaxis()
        plt.grid()

        plt.fill_between(train_sizes, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std,
                         alpha=0.1, color="b")
        plt.fill_between(train_sizes, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std,
                         alpha=0.1, color="r")
        plt.plot(train_sizes, train_scores_mean, 'o-', color="b", label=u"训练集上得分")
        plt.plot(train_sizes, test_scores_mean, 'o-', color="r", label=u"交叉验证集上得分")

        plt.legend(loc="best")

        plt.draw()
        plt.show()
        plt.gca().invert_yaxis()

    midpoint = ((train_scores_mean[-1] + train_scores_std[-1]) + (test_scores_mean[-1] - test_scores_std[-1])) / 2
    diff = (train_scores_mean[-1] + train_scores_std[-1]) - (test_scores_mean[-1] - test_scores_std[-1])
    return midpoint, diff



# #知乎的stacking方法，但没有使用5-折交叉训练，所以效果略差
# class Ensemble(object):
#     def __init__(self, estimators):
#         self.estimator_names = []
#         self.estimators = []
#         for i in estimators:
#             self.estimator_names.append(i[0])
#             self.estimators.append(i[1])
#         self.clf = LogisticRegression()
#
#     def fit(self, train_x, train_y):
#         for i in self.estimators:
#             i.fit(train_x, train_y)
#         x = np.array([i.predict(train_x) for i in self.estimators]).T
#         y = train_y
#         self.clf.fit(x, y)
#
#     def predict(self, x):
#         x = np.array([i.predict(x) for i in self.estimators]).T
#         # print(x)
#         return self.clf.predict(x)
#
#     def score(self, x, y):
#         s = precision_score(y, self.predict(x))
#         return s

#使用了k-折 交叉训练
class Stacking(object):
    def __init__(self, first_estimators):
        self.estimators = []
        self.first_esti_nums = len(first_estimators)
        for i in first_estimators:
            self.estimators.append(i[1])


    def get_oof_predictions(self,clf,train_x,train_y,test,Kfold):
        ntrain = train_x.shape[0]
        ntest = test.shape[0]
        oof_train = np.zeros((ntrain,))
        oof_test = np.zeros((ntest,))
        oof_test_skf = np.empty((Kfold, ntest))
        kf = KFold(n_splits=Kfold)
        for i, (train_index, test_index) in enumerate(kf.split(train_x)):
            kf_x_train = train_x[train_index]
            kf_y_train = train_y[train_index]
            kf_x_test = train_x[test_index]

            clf.fit(kf_x_train, kf_y_train)

            oof_train[test_index] = clf.predict(kf_x_test)
            oof_test_skf[i, :] = clf.predict(test)

        oof_test = oof_test_skf.mean(axis = 0)
        return oof_train.reshape(-1,1)[:,0],oof_test.reshape(-1,1)[:,0]




    def fit_predict(self, train_x, train_y,test,Kfold):
        # print('Kfold = {}'.format(Kfold))
        ntrain = train_x.shape[0]
        ntest = test.shape[0]
        self.middle_train = np.zeros((ntrain, self.first_esti_nums))
        self.middle_test = np.zeros((ntest, self.first_esti_nums))
        for i, clf in enumerate(self.estimators):
            self.middle_train[:, i], self.middle_test[:, i] = self.get_oof_predictions(clf, train_x, train_y, test,Kfold)
        second_estimator = GridSearchCV(estimator=LogisticRegression(), param_grid=param_lr, cv=5, n_jobs=-1)
        second_estimator.fit(self.middle_train,train_y)
        second_estimator.best_estimator_.fit(self.middle_train, train_y)
        predictions = second_estimator.best_estimator_.predict(self.middle_test)
        return predictions

    def score(self, x, y):
        s = precision_score(y,self.fit_predict(x,y,x,10))
        return s

if __name__ =='__main__':
    combine_df = pd.concat([data_train,data_test])
    #Name特征
    combine_df['Name_Len'] = combine_df.Name.apply(lambda x:len(x))
    combine_df['Name_Len'] = pd.qcut(combine_df.Name_Len,5)
    df_name = pd.get_dummies(combine_df.Name_Len, prefix='Name_Len')
    combine_df = pd.concat([combine_df, df_name], axis=1).drop('Name_Len', axis=1)

    #不同的称谓类似，有显著不同的获救概率
    # combine_df.groupby(combine_df['Name'].apply(lambda x: x.split(', ')[1]).apply(lambda x: x.split('.')[0]))[
    #     'Survived'].mean().plot(kind = 'bar')
    # plt.show()

    combine_df['Title'] = combine_df['Name'].apply(lambda x: x.split(', ')[1]).apply(lambda x: x.split('.')[0])
    combine_df['Title'] = combine_df['Title'].replace(
        ['Don', 'Dona', 'Major', 'Capt', 'Jonkheer', 'Rev', 'Col', 'Sir', 'Dr'], 'Mr')
    combine_df['Title'] = combine_df['Title'].replace(['Mlle', 'Ms'], 'Miss')
    combine_df['Title'] = combine_df['Title'].replace(['the Countess', 'Mme', 'Lady', 'Dr'], 'Mrs')
    df = pd.get_dummies(combine_df['Title'], prefix='Title')
    combine_df = pd.concat([combine_df, df], axis=1)
    # print(combine_df.Title)


    #同一个family下的生存死亡模式有很大程度上是相同的，例如：有一个family有一个女性死亡，这个family其他的女性的死亡概率也比较高。
    # 因此，我们标注出这些特殊的family即可
    combine_df['Fname'] = combine_df['Name'].apply(lambda x: x.split(',')[0])
    combine_df['Familysize'] = combine_df['SibSp'] + combine_df['Parch']
    dead_female_Fname = list(set(combine_df[(combine_df.Sex == 'female') & (combine_df.Age >= 12)
                                            & (combine_df.Survived == 0) & (combine_df.Familysize > 1)][
                                     'Fname'].values))
    survive_male_Fname = list(set(combine_df[(combine_df.Sex == 'male') & (combine_df.Age >= 12)
                                             & (combine_df.Survived == 1) & (combine_df.Familysize > 1)][
                                      'Fname'].values))
    combine_df['Dead_female_family'] = np.where(combine_df['Fname'].isin(dead_female_Fname), 1, 0)
    combine_df['Survive_male_family'] = np.where(combine_df['Fname'].isin(survive_male_Fname), 1, 0)
    combine_df = combine_df.drop(['Name', 'Fname'], axis=1)


    #Age  添加一个小孩子标签
    # print(combine_df.Age)
    group = combine_df.groupby(['Title', 'Pclass'])['Age']
    # print(group.count())
    print('******************************************')
    combine_df['Age'] = group.transform(lambda x: x.fillna(x.median()))

    combine_df = combine_df.drop('Title', axis=1)

    combine_df['IsChild'] = np.where(combine_df['Age'] <= 12, 1, 0)
    # print(combine_df.columns)
    combine_df['Age'] = pd.cut(combine_df['Age'], 5)
    combine_df = combine_df.drop('Age', axis=1)

    #Familysize    我们将上面提取过的Familysize再离散化
    combine_df['Familysize'] = np.where(combine_df['Familysize'] == 0, 'solo',
                                        np.where(combine_df['Familysize'] <= 3, 'normal', 'big'))
    df = pd.get_dummies(combine_df['Familysize'], prefix='Familysize')
    combine_df = pd.concat([combine_df, df], axis=1).drop(['SibSp', 'Parch', 'Familysize'],axis = 1)
    # print(combine_df.columns)
    # print(len(combine_df.columns))


    #Ticket
    combine_df['Ticket_Lett'] = combine_df['Ticket'].apply(lambda x: str(x)[0])
    combine_df['Ticket_Lett'] = combine_df['Ticket_Lett'].apply(lambda x: str(x))

    combine_df['High_Survival_Ticket'] = np.where(combine_df['Ticket_Lett'].isin(['1', '2', 'P']), 1, 0)
    combine_df['Low_Survival_Ticket'] = np.where(combine_df['Ticket_Lett'].isin(['A', 'W', '3', '7']), 1, 0)
    combine_df = combine_df.drop(['Ticket', 'Ticket_Lett'], axis=1)

    #Embarked
    combine_df.Embarked = combine_df.Embarked.fillna('S')
    df = pd.get_dummies(combine_df['Embarked'], prefix='Embarked')
    combine_df = pd.concat([combine_df, df], axis=1).drop('Embarked', axis=1)

    #Cabin
    combine_df['Cabin_isNull'] = np.where(combine_df['Cabin'].isnull(), 0, 1)
    combine_df = combine_df.drop('Cabin', axis=1)


    #Pclass
    df = pd.get_dummies(combine_df['Pclass'],prefix = 'Pclass')
    combine_df = pd.concat([combine_df,df],axis = 1).drop('Pclass',axis = 1)

    #Sex
    df = pd.get_dummies(combine_df['Sex'], prefix='Sex')
    combine_df = pd.concat([combine_df, df], axis=1).drop('Sex', axis=1)

    #
    # #Fare  缺省值用众数填充，之后进行离散化
    # combine_df['Fare'] = pd.qcut(combine_df.Fare, 3)
    # # print(combine_df.Fare)
    # df = pd.get_dummies(combine_df.Fare, prefix='Fare').drop('Fare_(-0.001, 8.662]', axis=1)
    # combine_df = pd.concat([combine_df, df], axis=1).drop('Fare', axis=1)


    # Fare
    combine_df['Fare'].fillna(combine_df['Fare'].dropna().median(), inplace=True)
    combine_df['Low_Fare'] = np.where(combine_df['Fare'] <= 8.662, 1, 0)
    combine_df['High_Fare'] = np.where(combine_df['Fare'] >= 26, 1, 0)
    combine_df = combine_df.drop('Fare', axis=1)



    print(combine_df.columns.values)
    print(len(combine_df.columns.values))


    print('before LabelEncoder.....')
    print(combine_df)
    print(len(combine_df.columns))

    #所有特征转化成数值型编码
    features = combine_df.drop(["PassengerId", "Survived"], axis=1).columns
    le = LabelEncoder()
    for feature in features:
        le = le.fit(combine_df[feature])
        combine_df[feature] = le.transform(combine_df[feature])

    print('After LabelEncoder....')
    print(combine_df)

    # 防止xgboost 不识别特征中的  ']'
    combine_df.rename(columns=lambda x: x.replace('[', '').replace(']',''), inplace=True)

    #得到训练/测试数据
    X_all = combine_df.iloc[:891, :].drop(["PassengerId", "Survived"], axis=1)
    Y_all = combine_df.iloc[:891, :]["Survived"]
    X_test = combine_df.iloc[891:, :].drop(["PassengerId", "Survived"], axis=1)
    # print('X_all = ')
    # print(X_all)
    # print(Y_all)
    # print(X_test)
    # print(X_all.columns.values)
    # print(X_test.columns.values)
    print('*******************************************************')


    #
    #分别考察逻辑回归、支持向量机、最近邻、决策树、随机森林、gbdt、xgbGBDT几类算法的性能。
    param_lr = {'penalty': ['l1', 'l2'], 'C': [0.1, 0.5, 5.0]}
    grd_lr = GridSearchCV(estimator = LogisticRegression(),param_grid = param_lr,cv = 5,n_jobs = -1)
    grd_lr.fit(X_all,Y_all)
    # print(grd_lr.best_estimator_,grd_lr.best_score_)
    lr = grd_lr.best_estimator_

    param_svc = {'C':[0.1,1.0,10.0],'gamma':['auto',1,0.1,0.01],'kernel':['linear','rbf','sigmoid']}
    grd_svc = GridSearchCV(estimator = SVC(),param_grid = param_svc,cv = 5,n_jobs = -1)
    grd_svc.fit(X_all,Y_all)
    # print(grd_svc.best_estimator_,grd_svc.best_score_)
    svc = grd_svc.best_estimator_

    k_range = list(range(1, 10))
    leaf_range = list(range(1, 2))
    weight_options = ['uniform', 'distance']
    algorithm_options = ['auto', 'ball_tree', 'kd_tree', 'brute']
    param_knn = dict(n_neighbors=k_range, weights=weight_options, algorithm=algorithm_options)
    grd_knn = GridSearchCV(estimator = KNeighborsClassifier(),param_grid = param_knn,cv = 5,n_jobs = -1)
    grd_knn.fit(X_all,Y_all)
    # print(grd_knn.best_estimator_,grd_knn.best_score_)
    knn = grd_knn.best_estimator_

    param_dt = {'max_depth': [1, 2, 3, 4, 5], 'max_features': [1, 2, 3, 4]}
    grd_dt = GridSearchCV(estimator=DecisionTreeClassifier(), param_grid=param_dt, cv=5, n_jobs=-1)
    grd_dt.fit(X_all, Y_all)
    # print(grd_dt.best_estimator_, grd_dt.best_score_)
    dt = grd_dt.best_estimator_

    param_rf = {'n_estimators': [50,100,300,500], 'min_samples_leaf': [1, 2, 3, 4],'class_weight':[{0: 0.745, 1: 0.255}]}
    grd_rf = GridSearchCV(estimator=RandomForestClassifier(), param_grid=param_rf, cv=5, n_jobs=-1)
    grd_rf.fit(X_all, Y_all)
    # print(grd_rf.best_estimator_, grd_rf.best_score_)
    rf = grd_rf.best_estimator_

    gbdt = GradientBoostingClassifier(n_estimators=500, learning_rate=0.03, max_depth=3)
    xgb = XGBClassifier(max_depth=3, n_estimators=300, learning_rate=0.05)
    # clfs = [('lr', lr), ('svc', svc), ('knn', knn), ('dt', dt), ('rf', rf), ('gbdt', gbdt),('xgb',xgb)]
    #
    # kfold = 10
    # cv_results = []
    # for classifier in clfs:
    #     cv_results.append(cross_val_score(classifier, X_all, y=Y_all, scoring="accuracy", cv=kfold, n_jobs=4))

    # for classifier in clfs:
    #     print('the cross_value of {} is:'.format(classifier[0]))
    #     # print(cross_val_score(classifier[1], X_all, y=Y_all, scoring="accuracy", cv=kfold, n_jobs=4))
    #     cross_result = list(cross_validation.cross_val_score(classifier[1], X_all, Y_all, cv=5))
    #     cross_result.append(np.mean(cross_result))
    #     print(cross_result)





    #
    # #XGBClassifier 很坑，属性名中不能有 '[,]'
    # demo_train = X_all.iloc[:,:]           #是深拷贝，怎么实现浅拷贝？
    # demo_train['Fare_1'] = X_all['Fare_(8.662, 26.0]']
    # demo_train['Fare_2'] = X_all['Fare_(26.0, 512.329]']
    # demo_test = X_test.iloc[:,:]
    # demo_test['Fare_1'] = X_all['Fare_(8.662, 26.0]']
    # demo_test['Fare_2'] = X_all['Fare_(26.0, 512.329]']
    # demo_test = demo_test.drop(['Fare_(8.662, 26.0]', 'Fare_(26.0, 512.329]'], axis=1)
    # print(X_all.columns.values)
    # print(demo_train.columns.values)
    # demo_train = demo_train.drop(['Fare_(8.662, 26.0]','Fare_(26.0, 512.329]'],axis = 1)
    # print(demo_train.columns.values)
    # print(cross_val_score(rf, demo_train, y=Y_all, scoring="accuracy", cv=kfold, n_jobs=4))
    # rf.fit(demo_train,Y_all)
    # print(rf.feature_importances_)
    # print(type(rf.feature_importances_))
    # print(rf.feature_importances_.sum())

    #
    # #实现浅拷贝X_all
    # demo = X_all.drop(['Fare_(8.662, 26.0]','Fare_(26.0, 512.329]'],axis = 1)
    # print(demo.columns.values)
    # print(X_all.columns.values)
    # demo['Fare1'] = X_all['Fare_(8.662, 26.0]']
    # demo['Fare2'] = X_all['Fare_(26.0, 512.329]']
    # print(demo.columns.values)
    # print(X_all.columns.values)

    # cv_means = []
    # cv_std = []
    # for cv_result in cv_results:
    #     cv_means.append(cv_result.mean())
    #     cv_std.append(cv_result.std())

    # cv_res = pd.DataFrame({"CrossValMeans": cv_means, "CrossValerrors": cv_std,
    #                        "Algorithm": ["LR", "SVC", 'KNN', 'decision_tree', "random_forest", "GBDT", "xgbGBDT"]})

    # # g = sns.barplot("CrossValMeans", "Algorithm", data=cv_res, palette="Set3", orient="h", **{'xerr': cv_std})
    # # g.set_xlabel("Mean Accuracy")
    # # g = g.set_title("Cross validation scores")






    #观察发现不同的模型的feature importance 有比较大的差别，，，把他们组合再一起会不会更好呢



    #集成框架Ensemble，我们把基分类器丢进去。
    # bag = Ensemble([('xgb', xgb), ('lr', lr), ('rf', rf), ('svc', svc), ('gbdt', gbdt)])
    # bag = Ensemble([('xgb', xgb), ('lr', lr), ('gbdt', gbdt), ('rf', rf)])
    # score = 0
    # for i in range(0, 10):
    #     num_test = 0.20
    #     X_train, X_cv, Y_train, Y_cv = train_test_split(X_all, Y_all, test_size=num_test)
    #     bag.fit(X_train, Y_train)
    #     # Y_test = bag.predict(X_test)
    #     acc_xgb = round(bag.score(X_cv, Y_cv) * 100, 2)
    #     score += acc_xgb
    # print(score / 10)  # 0.8786
    #
    # print(X_all.values)

    x = X_all.as_matrix()
    y = Y_all.as_matrix()
    test = X_test.as_matrix()
    # stackings = [('lr', lr), ('svc', svc), ('knn', knn), ('dt', dt), ('rf', rf), ('gbdt', gbdt),('xgb',xgb)]
    first_estimators =[('lr', lr), ('svc', svc), ('knn', knn), ('dt', dt), ('rf', rf), ('gbdt', gbdt),('xgb',xgb)]

    # param = {'penalty':['l1','l2'],'C':[0.1,0.5,5.0]}
    # #use best estimator
    # my_lr = lr2.best_estimator_
    # my_lr.fit(middle_train,y)
    # predictions = my_lr.predict(middle_test)
    sta = Stacking(first_estimators)
    print(sta.score(x,y))
    predictions = sta.fit_predict(x,y,test,Kfold = 10)

    # lr2.fit(middle_train,y)
    # predictions = lr2.predict(middle_test)
    # print(predictsions)
    # print(predictsions.shape)
    # print(cross_validation.cross_val_score(lr2, middle_train, y, cv=5))

    #stacking saving
    # result = pd.DataFrame(
    #     {'PassengerId': data_test['PassengerId'].as_matrix(), 'Survived': predictions.astype(np.int32)})
    # result.to_csv('D:/titanic/results/' +'stacking'+'_predictions' + time.strftime('%Y-%m-%d %H-%M-%S',
    #                                                                            time.localtime(time.time())) + '.csv',
    #               index=False)







    #
    # bag.fit(X_all,Y_all)
    # predictions = bag.predict(X_test)
    # result = pd.DataFrame(
    #     {'PassengerId': data_test['PassengerId'].as_matrix(), 'Survived': predictions.astype(np.int32)})
    # result.to_csv('D:/titanic/results/' +'bag' +'_predictions' + time.strftime('%Y-%m-%d %H-%M-%S',
    #                                                                            time.localtime(time.time())) + '.csv',
    #               index=False)
    #














    # 预测并打印结果到csv
    # for i in clfs:
        # i[1].fit(X_all,Y_all)
        # predictions = i[1].predict(X_test)
        # result = pd.DataFrame(
        #     {'PassengerId': data_test['PassengerId'].as_matrix(), 'Survived': predictions.astype(np.int32)})
        # result.to_csv('D:/titanic/results/' +i[0] +'_predictions' + time.strftime('%Y-%m-%d %H-%M-%S',
        #                                                                            time.localtime(time.time())) + '.csv',
        #               index=False)










#
# class A(object):
#     def __init__(self,c = 1):
#         self.b = 0
#     def cccccc(self):
#         self.cccccc = 10
#     def print_(self):
#         print('b = {}'.format(self.b))
#         print('ccccccc = {}'.format(self.cccccc))
#         print('c = {}'.format(c))
#
#
# aaa = A(100)
# aaa.print_()
#
#

爱自己是终身浪漫的开始不语_2880
在《25岁，我的初老症状》那篇文章下，有个姑娘跟我说“爱自己是终身浪漫的开始“突然也想到李上安歌词里唱的那句”爱自己才能终身浪漫，爱别人才能获取温暖“二刷完《泰坦尼克号》后，尽管还是被最后的结局赚足了眼泪，被那个说最怕冰凉刺骨的海水却还是毫不犹豫的将露丝推上了唯一漂浮板的爱情所感动“你以后要坚强的活下去，结婚，生子，好好过完这一生”但也一直在想露丝愿意放下一切跟他走，到底是因为那个人的爱，还是自己
推荐算法学习记录2.2——kaggle数据集的动漫电影数据集推荐算法实践——基于内容的推荐算法、协同过滤推荐萱仔学习自我记录推荐算法学习 python matplotlib 开发语言
1、基于内容的推荐：这种方法根据项的相关信息（如描述信息、标签等）和用户对项的操作行为（如评论、收藏、点赞等）来构建推荐算法模型。它可以直接利用物品的内容特征进行推荐，适用于内容较为丰富的场景。‌#1.基于内容的推荐算法fromsklearn.feature_extraction.textimportTfidfVectorizerfromsklearn.metrics.pairwiseimport
《泰坦尼克号》观后感源心爱
几年前看《泰坦尼克号》，我完全看不懂。最近又看一遍，看得我泪眼朦胧。因为，知道底子本是悲的，所以看的时候，心情格外柔软。杰克赢了船票，那种兴高采烈，却不知道，前方却是死亡。年老的露丝回望年轻时的美丽，回望过往的繁华。让我感动的落泪，为的是仅有一次的生命。年轻的露丝活在教条中，不敢做自己想做的事，不敢走自己想走的路。是杰克用自己旺盛的生命之火唤醒了她内在的生命力，然而，若不是这场劫难，露丝不定能奋起
免费GPU平台教程，助力你的AI, pytorch tensorflow 支持cuda zhangfeng1133 人工智能 pytorch tensorflow
Colab：https://drive.google.com/drive/home阿里天池实验室：https://tianchi.aliyun.com/60个小时gputianchi.aliyun.com/notebook-ai/天池实验室_实时在线的数据分析协作工具，享受免费计算资源-阿里云天池移动九天：https://jiutian.10086.cn/edu/#/homekagglekaggl
Rose,大西洋的天空很美蝈宝
观《泰坦尼克号》有感上帝擦去他们所有的眼泪，死亡不再有,也不再有悲伤和生死离别,不再有痛苦,因往事已矣.我仿佛听到泰坦尼克号上和这个世界离别的旋律。人世间，最无助的莫过于经历灾难的发生却束手无策，莫过于看着一条条生命在眼前快速流逝却无可奈何，莫过于自己都难以生存的绝境。这是一部经典。台词很棒，音乐很美，演员很美，是那一种很深邃的模样，作为一个颜值控看电影的时候，总是不自觉的被主角的外貌吸引，真的很
49Kaggle 数据分析项目入门实战--绝地求生游戏最终排名预测 Jachin111
绝地求生介绍相信很多都玩过绝地求生这款游戏，其游戏规则主要是将100名玩家空手被扔到一个岛上，这些玩家必须探索、寻找、消灭其他玩家，直到只剩下一个玩家活着。绝地求生很受欢迎。这款游戏销量目前超过5000万份，是有史以来销量排名前五的游戏，每月有数百万活跃玩家。而我们本次实验的任务就是根据玩家在游戏中的种种表现来预测出其在最终的排名。导入数据并预览首先安装实验需要的statsmodels包。!pip
李沐《动手学深度学习》课程笔记：15 实战：Kaggle房价预测 + 课程竞赛：加州2020年房价预测非文的NLP修炼笔记 #李沐《动手学深度学习》课程笔记深度学习人工智能
15实战：Kaggle房价预测+课程竞赛：加州2020年房价预测1.访问和读取数据集importhashlibimportosimporttarfileimportzipfileimportrequestsDATA_HUB=dict()DATA_URL='http://d2l_data.s3-accelerate.amazonaws.com/'defdownload(name,cache_dir=
苹果汽车项目的败局：起步失误与方向迷茫 Hcoco_me AIMAKEMONEY 汽车 arm开发
苹果汽车的发展方向内部分歧导致项目多年挣扎，最终在本周宣布终止。BrianX.Chen和TrippMickle从项目初期就开始关注苹果的汽车项目。在过去十年中，许多参与苹果秘密汽车项目“泰坦”（内部代号）的员工戏称其为“泰坦尼克号”般的灾难，他们深知这个项目注定失败。项目自诞生以来经历了多次重启和取消，途中裁减了数百名员工。由于领导层对苹果汽车的定位存在分歧，项目从一款与特斯拉竞争的电动车起步，演
Kaggle Intermediate ML Part Two 卢延吉 New Developer 数据 (Data)ML &ME &GPT Data ML
CategoricalVariablesCategoricalvariables,alsoknownasqualitativevariables,areafundamentalconceptinstatisticsanddataanalysis.Here'sabreakdowntohelpyouunderstandthem:Whatarethey?Categoricalvariablesrepre
【工业智能】VSB Power Line Fault Detection-chapter1 凭轩听雨199407 学习 python 制造数据挖掘
VSBPowerLineFaultDetection-chapter1backgrounddataset数据介绍信号处理方法EDAtrainfeatureengineeringmodeltraintry信息来源：KaggleCompetition:VSBPowerLineFaultDetectionbackground中压高架线路绵延上百公里来为城市提供电力。因为距离很远，所以人工检测那些没有立即
【工业智能】VSB Power Line Fault Detection-chapter2 凭轩听雨199407 数据挖掘
工业智能】VSBPowerLineFaultDetection-chapter2关键信息依赖版本信息名词术语tricks信息来源：KaggleCompetition:VSBPowerLineFaultDetection分析冠军代码。源文件URL：https://www.kaggle.com/code/mark4h/vsb-1st-place-solution关键信息LGB标准5折验证9个特征所有特
机器学习网格搜索超参数优化实战(随机森林) ##4 恒c 机器学习随机森林人工智能
文章目录基于Kaggle电信用户流失案例数据（可在官网进行下载）数据预处理模块时序特征衍生第一轮网格搜索第二轮搜索第三轮搜索第四轮搜索第五轮搜索基于Kaggle电信用户流失案例数据（可在官网进行下载）导入库#基础数据科学运算库importnumpyasnpimportpandasaspd#可视化库importseabornassnsimportmatplotlib.pyplotasplt#时间模块
多元统计分析课程论文-聚类效果评价 talle2021 数据分析机器学习聚类数据挖掘机器学习
数据集来源：UnsupervisedLearningonCountryData(kaggle.com)代码参考：Clustering:PCA|K-Means-DBSCAN-Hierarchical||Kaggle基于特征合成降维和主成分分析法降维的国家数据集聚类效果评价目录1.特征合成降维2.PCA降维3.K-Means聚类3.1对特征合成降维的数据聚类分析3.2对PCA降维的数据聚类分析摘要：本
R语言课程论文-飞机失事数据可视化分析 talle2021 数据分析 r语言数据分析数据可视化
数据来源：AirplaneCrashesSince1908(kaggle.com)代码参考：ExploringhistoricAirPlanecrashdata|Kaggle数据指标及其含义指标名含义Date事故发生日期(年-月-日)Time当地时间，24小时制，格式为hh:mmLocation事故发生的地点Operator航空公司或飞机的运营商Flight由飞机操作员指定的航班号Route事故前
Dataframe型数据分析技巧汇总我叫杨傲天学习笔记机器学习数据分析数据挖掘
Kaggle如何针对少量数据集比赛的打法。数据降维的几种方法HF.075|时间序列趋势性分析方法汇总机器学习必须了解的7种交叉验证方法（附代码）这个图！Python也能一键绘制了，而且样式更多..散点图，把散点图画出花来综述：机器学习中的模型评价、模型选择与算法选择！表格任务中的深度学习模型性能比较再见Onehot！KaggleMaster的上分神操作！特征重要性评估方法之排列重要性
Task 11 XGBoost 算法分析与案例调参实例沫2021
1.XGBoost算法XGBoost是陈天奇等人开发的一个开源机器学习项目，高效地实现了GBDT算法并进行了算法和工程上的许多改进，被广泛应用在Kaggle竞赛及其他许多机器学习竞赛中并取得了不错的成绩。XGBoost是一个优化的分布式梯度增强库，旨在实现高效，灵活和便携。它在GradientBoosting框架下实现机器学习算法。XGBoost提供了并行树提升（也称为GBDT，GBM），可以快速
关于商店销售量的数据处理小问题（Python）不期而遇__ python pandas 数据分析大数据
通过学校举行的某次学科竞赛，我接触到了kaggle上的一道题：StoreSales-TimeSeriesForecasting。由于题主资质尚浅，本文将对前期数据处理的一些小问题做出解答，不涉及后续更难的问题。此处放原题链接：StoreSales-TimeSeriesForecasting题主也是看了很多的资料，也看到了CSDN上另外一位大佬写的文章，收获颇多，此处也放一下链接：Kaggle实战：
学习笔记 2019-04-30 段勇_bf97
HousePrices-bagging_xgboost+lasso+ridgeKaggle入門級賽題：房價預測FFMPEG视音频编解码零基础学习方法35岁程序员的独家面试经历公司名称公司介绍薪水车辆工程专业33岁简历有些传感器方面的东西20k-35k非渣硕是如何获得百度、京东双SP一些面试经验20k-40k吴以均的简历一个大牛的简历北京航空航天大学毕业生的简历厦门大学软件学院毕业生的简历名称介绍H
数据分析基础之《pandas（8）—综合案例》 csj50 机器学习数据分析
一、需求1、现在我们有一组从2006年到2016年1000部最流行的电影数据数据来源：https://www.kaggle.com/damianpanek/sunday-eda/data2、问题1想知道这些电影数据中评分的平均分，导演的人数等信息，我们应该怎么获取？3、问题2对于这一组电影数据，如果我们想看Rating、Runtime(Minutes)的分布情况，应该如何呈现数据？4、问题3对于这
R语言逻辑回归logistic模型分析泰坦尼克titanic数据集预测生还情况拓端研究室 R语言 R语言逻辑回归 logistic 泰坦尼克 titanic
最近我们被客户要求撰写关于逻辑回归的研究报告，包括一些图形和统计输出。相关视频：R语言逻辑回归（Logistic回归）模型分类预测病人冠心病风险逻辑回归Logistic模型原理和R语言分类预测冠心病风险实例，时长06:48逻辑回归是一种拟合回归曲线的方法，y=f(x)，当y是一个分类变量时。这个模型的典型用途是在给定一组预测因素x的情况下预测y，预测因素可以是连续的、分类的或混合的。一般来说，分类
异国的梦依布茶卡
我梦见躺在一块漂浮着的木筏上，窄得仅能容下腹部，周围是冰气逼人的海，没有边际，像泰坦尼克号沉没的那片海域，虽然危险，但我的处境还算安全，我没有太多的顾虑。我在前往澳大利亚的路上，我电话朋友NJ，告诉她我快要到了。我在澳大利亚的某个小镇上，在一个本地人家里，他家里穷，我可以在他那里买回程票，不用钱，用纸，报纸白纸都通用。纸在这个地方是稀缺品，中国人在这个地方也是富有的少数。我翻了翻我带来的纸，没有白
呵呵，电影明月劫
2015年5月20日呵呵，电影！我都十多年没有去电影院看过电影了，记得，我在电影院看过的最后一部电影是《铁达尼号》，就是俩小情人儿在泰坦尼克船头卖萌那片子。当时啊，我疯咯，连续看了三场，似乎很贵的，少吃了好几顿肉肉，还买了泰坦尼克号模型和碟片，花去半月工资，可碟片至今还在书橱里搁着，模型在派斯学院池塘玩了两次就不知哪儿去了，估计沉没了罢。或许那时我就知道，我将永远和电影院告别了。后来，就有了电脑，
以汝之姓，冠我之名 Zhuliguan呀
《泰坦尼克号》中最有名的台词该是那句：Youjump,Ijump.但我在听到：I'mDawson,RoseDawson.时心却一颤。影片的最后：满头白发的Rose躺在温暖的床上安详地离去。床头放着一张张照片—有头戴飞行帽的，有在海上冲浪的，有在马场奔驰的……这是Rose生命中没有Jack的时光，但却是Jack沉入海底前最后的嘱托，她用一生去做到了。我常常想，相爱究竟是什么感觉？爱一个人究竟可以付出
XGBoost算法小森( ﹡ˆoˆ﹡ ) 机器学习算法算法人工智能机器学习
XGBoost在机器学习中被广泛应用于多种场景，特别是在结构化数据的处理上表现出色，XGBoost适用于多种监督学习任务，包括分类、回归和排名问题。在数据挖掘和数据科学竞赛中，XGBoost因其出色的性能而被频繁使用。例如，在Kaggle平台上的许多获奖方案中，XGBoost都发挥了重要作用。此外，它在处理缺失值和大规模数据集上也有很好的表现。XGBoost是一种基于梯度提升决策树（GBDT）的算
Kaggle Intro Model Validation and Underfitting and Overfitting 卢延吉 New Developer 数据 (Data)ML &ME &GPT 机器学习
ModelValidationModelvalidationisthecornerstoneofensuringarobustandreliablemachinelearningmodel.It'stherigorousassessmentofhowwellyourmodelperformsonunseendata,mimickingreal-worldscenarios.Doneright,it
CinealtaV：《阿凡达2》的杀手锏相阳成
CinealtaV：《阿凡达2》的杀手锏即使在电影票房越来越浮夸的今天，《泰坦尼克号》的18亿票房仍然是电影界的一个传奇。传奇的缔造者——詹姆斯•卡梅隆在2009年又为世界送上了一个奇迹——《阿凡达》。当年的《阿凡达》一举封神，开创了3D时代——《阿凡达》的拍摄主要使用8台SonyHDC-F950数字电影摄像机及8部SonyCineAltaF23数字电影摄影机组成的3D拍摄系统。FusionCam
《阿凡达》：卡梅隆告诉你，能打败他的只有自己！常优秀
2009年《阿凡达》一经上映，就在全球引发极大的轰动，仅用十几天就票房过亿，在短短几月内就打破了《泰坦尼克号》的票房纪录，成为新的票房榜首。《阿凡达》取得如此成绩实至名归，不管是场景布置，声音效果还是特效设置无一不在透露出《阿凡达》工作人员的用心与仔细，之前也听过不少关于它的评论，看过的人都表示极为震撼。震撼于潘多拉星球的神秘与美丽：震撼于纳美人动作与表情的细致表达；震撼于惊险刺激的对战场景；更震
一点不能说的文化 _溯_
工作千篇一律，上班下班，两点一线生活规律的没有任何波澜，以至于绞尽脑汁不知如何下笔。社会在发展，但有些文化似乎在倒退。曾经，不管何种文化状态，引进内地后都不曾“Y割”，大胆如泰坦尼克号，对着大荧幕，全年级上千的青春年少盯着大荧幕，通俗的大家从未觉得少儿不宜，坦荡接触着所有的懵懂，而儿时的那个年代，也从不曾因为看了这些所谓不该看的犯下不该犯的错。闲来无聊，重温以前的老片。TVB的刑侦剧，遇到敏感话题
创意自虐写作（31）写一写是什么引发了灾难贾海荣
海啸，地震，火山爆发……各种自然灾害都会引发灾难！那是什么引发自然灾害？汝之蜜糖，彼之砒霜雷霆雨露，俱是天恩泰坦尼克号对人类是灾害，对船上厨房里活着的海鲜来说就是生命的奇迹。天道好轮回，苍天饶过谁？
kaggle实战语义分割-Car segmentation（附源码）橘柚jvyou python 人工智能计算机视觉深度学习 pytorch
目录前言项目介绍数据集处理数据集加载定义网络训练网络验证网络前言本篇文章会讲解使用pytorch完成另外一个计算机视觉的基本任务-语义分割。语义分割是将图片中每个部分根据其语义分割出来，其相比于图像分类的不同点是，图像分类是对一张图片进行分类，而语义分割是对图像中的每个像素点进行分类。我们这里使用的语义分割数据集是kaggle上的一个数据集。数据集来源：https://www.kaggle.com
knob UI插件使用换个号韩国红果果 JavaScript jsonp knob
图形是用canvas绘制的 js代码 var paras = { max:800, min:100, skin:'tron',//button type thickness:.3,//button width width:'200',//define canvas width.,canvas height displayInput:'tr
Android+Jquery Mobile学习系列(5)-SQLite数据库白糖_ JQuery Mobile
目录导航 SQLite是轻量级的、嵌入式的、关系型数据库，目前已经在iPhone、Android等手机系统中使用,SQLite可移植性好，很容易使用，很小，高效而且可靠。因为Android已经集成了SQLite，所以开发人员无需引入任何JAR包，而且Android也针对SQLite封装了专属的API，调用起来非常快捷方便。我也是第一次接触S
impala-2.1.2-CDH5.3.2 dayutianfei impala
最近在整理impala编译的东西，简单记录几个要点：根据官网的信息（https://github.com/cloudera/Impala/wiki/How-to-build-Impala）： 1. 首次编译impala，推荐使用命令： ${IMPALA_HOME}/buildall.sh -skiptests -build_shared_libs -format 2.仅编译BE ${I
求二进制数中1的个数周凡杨 java 算法二进制
解法一：对于一个正整数如果是偶数，该数的二进制数的最后一位是 0 ，反之若是奇数，则该数的二进制数的最后一位是 1 。因此，可以考虑利用位移、判断奇偶来实现。 public int bitCount(int x){ int count = 0; while(x!=0){ if(x%2!=0){ /
spring中hibernate及事务配置 g21121 Hibernate
hibernate的sessionFactory配置：  <bean id="sessionFactory" class="org.springframework.orm.hibernate3.LocalSessionFactoryBean"> <
log4j.properties 使用 510888780 log4j
log4j.properties 使用一.参数意义说明输出级别的种类 ERROR、WARN、INFO、DEBUG ERROR 为严重错误主要是程序的错误 WARN 为一般警告，比如session丢失 INFO 为一般要显示的信息，比如登录登出 DEBUG 为程序的调试信息配置日志信息输出目的地 log4j.appender.appenderName = fully.qua
Spring mvc-jfreeChart柱图（2）布衣凌宇 jfreechart
上一篇中生成的图是静态的，这篇将按条件进行搜索，并统计成图表，左面为统计图，右面显示搜索出的结果。第一步：导包第二步；配置web.xml(上一篇有代码) 建BarRenderer类用于柱子颜色 import java.awt.Color; import java.awt.Paint; import org.jfree.chart.renderer.category.BarR
我的spring学习笔记14-容器扩展点之PropertyPlaceholderConfigurer aijuans Spring3
PropertyPlaceholderConfigurer是个bean工厂后置处理器的实现，也就是BeanFactoryPostProcessor接口的一个实现。关于BeanFactoryPostProcessor和BeanPostProcessor类似。我会在其他地方介绍。 PropertyPlaceholderConfigurer可以将上下文（配置文件）中的属性值放在另一个单独的标准java
maven 之 cobertura 简单使用 antlove maven test unit cobertura report
1. 创建一个maven项目 2. 创建com.CoberturaStart.java package com; public class CoberturaStart { public void helloEveryone(){ System.out.println("=================================================
程序的执行顺序百合不是茶 JAVA执行顺序
刚在看java核心技术时发现对java的执行顺序不是很明白了,百度一下也没有找到适合自己的资料,所以就简单的回顾一下吧代码如下; 经典的程序执行面试题 //关于程序执行的顺序 //例如： //定义一个基类 public class A(){ public A(
设置session失效的几种方法 bijian1013 web.xml session失效监听器
在系统登录后，都会设置一个当前session失效的时间，以确保在用户长时间不与服务器交互，自动退出登录，销毁session。具体设置很简单，方法有三种：（1）在主页面或者公共页面中加入：session.setMaxInactiveInterval(900);参数900单位是秒，即在没有活动15分钟后，session将失效。这里要注意这个session设置的时间是根据服务器来计算的，而不是客户端。所
java jvm常用命令工具 bijian1013 java jvm
一.概述程序运行中经常会遇到各种问题，定位问题时通常需要综合各种信息，如系统日志、堆dump文件、线程dump文件、GC日志等。通过虚拟机监控和诊断工具可以帮忙我们快速获取、分析需要的数据，进而提高问题解决速度。本文将介绍虚拟机常用监控和问题诊断命令工具的使用方法，主要包含以下工具: &nbs
【Spring框架一】Spring常用注解之Autowired和Resource注解 bit1129 Spring常用注解
Spring自从2.0引入注解的方式取代XML配置的方式来做IOC之后，对Spring一些常用注解的含义行为一直处于比较模糊的状态，写几篇总结下Spring常用的注解。本篇包含的注解有如下几个： Autowired Resource Component Service Controller Transactional 根据它们的功能、目的，可以分为三组，Autow
mysql 操作遇到safe update mode问题 bitray update
我并不知道出现这个问题的实际原理,只是通过其他朋友的博客,文章得知的一个解决方案,目前先记录一个解决方法,未来要是真了解以后,还会继续补全. 在mysql5中有一个safe update mode,这个模式让sql操作更加安全,据说要求有where条件,防止全表更新操作.如果必须要进行全表操作,我们可以执行 SET
nginx_perl试用 ronin47 nginx_perl试用
因为空闲时间比较多，所以在CPAN上乱翻，看到了nginx_perl这个项目(原名Nginx::Engine)，现在托管在github.com上。地址见：https://github.com/zzzcpan/nginx-perl 这个模块的目的，是在nginx内置官方perl模块的基础上，实现一系列异步非阻塞的api。用connector/writer/reader完成类似proxy的功能（这里
java-63-在字符串中删除特定的字符 bylijinnan java
public class DeleteSpecificChars { /** * Q 63 在字符串中删除特定的字符 * 输入两个字符串，从第一字符串中删除第二个字符串中所有的字符。 * 例如，输入”They are students.”和”aeiou”，则删除之后的第一个字符串变成”Thy r stdnts.” */ public static voi
EffectiveJava--创建和销毁对象 ccii 创建和销毁对象
本章内容： 1. 考虑用静态工厂方法代替构造器 2. 遇到多个构造器参数时要考虑用构建器（Builder模式） 3. 用私有构造器或者枚举类型强化Singleton属性 4. 通过私有构造器强化不可实例化的能力 5. 避免创建不必要的对象 6. 消除过期的对象引用 7. 避免使用终结方法 1. 考虑用静态工厂方法代替构造器类可以通过
[宇宙时代]四边形理论与光速飞行 comsci
从四边形理论来推论为什么光子飞船必须获得星光信号才能够进行光速飞行？一组星体组成星座向空间辐射一组由复杂星光信号组成的辐射频带，按照四边形-频率假说一组频率就代表一个时空的入口那么这种由星光信号组成的辐射频带就代表由这些星体所控制的时空通道，该时空通道在三维空间的投影是一
ubuntu server下python脚本迁移数据 cywhoyi python Kettle pymysql cx_Oracle ubuntu server
因为是在Ubuntu下，所以安装python、pip、pymysql等都极其方便，sudo apt-get install pymysql，但是在安装cx_Oracle（连接oracle的模块）出现许多问题，查阅相关资料，发现这边文章能够帮我解决，希望大家少走点弯路。http://www.tbdazhe.com/archives/602 1.安装python 2.安装pip、pymysql
Ajax正确但是请求不到值解决方案 dashuaifu Ajax async
Ajax正确但是请求不到值解决方案解决方案：1 . async: false , 2. 设置延时执行js里的ajax或者延时后台java方法！！！！！！！例如： $.ajax({ &
windows安装配置php+memcached dcj3sjt126com PHP Install memcache
Windows下Memcached的安装配置方法 1、将第一个包解压放某个盘下面，比如在c:\memcached。 2、在终端（也即cmd命令界面）下输入 'c:\memcached\memcached.exe -d install' 安装。 3、再输入： 'c:\memcached\memcached.exe -d start' 启动。（需要注意的: 以后memcached将作为windo
iOS开发学习路径的一些建议 dcj3sjt126com ios
iOS论坛里有朋友要求回答帖子，帖子的标题是：想学IOS开发高阶一点的东西，从何开始，然后我吧啦吧啦回答写了很多。既然敲了那么多字，我就把我写的回复也贴到博客里来分享，希望能对大家有帮助。欢迎大家也到帖子里讨论和分享，地址：http://bbs.csdn.net/topics/390920759 下面是我回复的内容：结合自己情况聊下iOS学习建议，
Javascript闭包概念 fanfanlovey JavaScript 闭包
1.参考资料 http://www.jb51.net/article/24101.htm http://blog.csdn.net/yn49782026/article/details/8549462 2.内容概述要理解闭包，首先需要理解变量作用域问题内部函数可以饮用外面全局变量 var n=999; 　　functio
yum安装mysql5.6 haisheng mysql
1、安装http://dev.mysql.com/get/mysql-community-release-el7-5.noarch.rpm 2、yum install mysql 3、yum install mysql-server 4、vi /etc/my.cnf 添加character_set_server=utf8
po/bo/vo/dao/pojo的详介 IT_zhlp80 java BO VO DAO POJO po
JAVA几种对象的解释 PO:persistant object持久对象,可以看成是与数据库中的表相映射的java对象。最简单的PO就是对应数据库中某个表中的一条记录，多个记录可以用PO的集合。PO中应该不包含任何对数据库的操作. VO:value object值对象。通常用于业务层之间的数据传递，和PO一样也是仅仅包含数据而已。但应是抽象出的业务对象,可
java设计模式 kerryg java 设计模式
设计模式的分类：一、设计模式总体分为三大类： 1、创建型模式（5种）：工厂方法模式，抽象工厂模式，单例模式，建造者模式，原型模式。 2、结构型模式（7种）：适配器模式，装饰器模式，代理模式，外观模式，桥接模式，组合模式，享元模式。 3、行为型模式（11种）：策略模式，模版方法模式，观察者模式，迭代子模式，责任链模式，命令模式，备忘录模式，状态模式，访问者
[1]CXF3.1整合Spring开发webservice——helloworld篇木头.java spring webservice CXF
Spring 版本3.2.10 CXF 版本3.1.1 项目采用MAVEN组织依赖jar 我这里是有parent的pom，为了简洁明了，我直接把所有的依赖都列一起了，所以都没version，反正上面已经写了版本 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="ht
Google 工程师亲授：菜鸟开发者一定要投资的十大目标 qindongliang1922 工作感悟人生
身为软件开发者，有什么是一定得投资的？ Google 软件工程师 Emanuel Saringan 整理了十项他认为必要的投资，第一项就是身体健康，英文与数学也都是必备能力吗？来看看他怎么说。（以下文字以作者第一人称撰写））你的健康无疑地，软件开发者是世界上最久坐不动的职业之一。每天连坐八到十六小时，休息时间只有一点点，绝对会让你的鲔鱼肚肆无忌惮的生长。肥胖容易扩大罹患其他疾病的风险，
linux打开最大文件数量1,048,576 tianzhihehe c linux
File descriptors are represented by the C int type. Not using a special type is often considered odd, but is, historically, the Unix way. Each Linux process has a maximum number of files th
java语言中PO、VO、DAO、BO、POJO几种对象的解释衞酆夼 java VO BO POJO po
PO:persistant object持久对象最形象的理解就是一个PO就是数据库中的一条记录。好处是可以把一条记录作为一个对象处理，可以方便的转为其它对象。可以看成是与数据库中的表相映射的java对象。最简单的PO就是对应数据库中某个表中的一条记录，多个记录可以用PO的集合。PO中应该不包含任何对数据库的操作。 BO:business object业务对象封装业务逻辑的java对象

kaggle 泰坦尼克

你可能感兴趣的:(kaggle 泰坦尼克)