python常用语句总结(机器学习...)
# 导入pandas用于数据分析
import pandas as pd
# 导入numpy,并命名为 np
import numpy as np
# 利用pandas的read_csv模块传入数据文件地址,并返回数据(dataframe格式)保存到data,测试数据可同样读取或从中分割,见下面
data = pd.read_csv('../Datasets/Breast-Cancer/breast-cancer-train.csv') #地址可为路径
data = pd.read_csv('http://biostat.mc.vanderbilt.edu/wiki/pub/Main/DataSets/titanic.txt')#地址可为URL
# 有时需要自己设定特征列表
column_names = ['Sample code number', 'Clump Thickness', 'Uniformity of Cell Size', 'Uniformity of Cell Shape', 'Marginal Adhesion', 'Single Epithelial Cell Size', 'Bare Nuclei', 'Bland Chromatin', 'Normal Nucleoli', 'Mitoses', 'Class']
data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data', names = column_names )
#输出数据量和维度
data.shape
# 观察前几行数据确定数据种类,数字型,类别型甚至缺省值
data.head()
# 缺省值的处理
data = data.replace(to_replace = '?', value = np.nan)
##1.丢弃含缺省值样本
data = data.dropna(how = 'any')
##2.用平均值赋值
data.fillna(data.mean(), inplace=True)
# 查看数据的统计特性
data.info()
# 特征选择
X = data[['pclass', 'age', 'sex']]
y = data['survived']
# 从sklearn.model_selection中的train_test_split模块用于数据分割,随机25%做测试样本
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state = 33)
# 使用scikit-learn.feature_extraction中的特征转换器
from sklearn.feature_extraction import DictVectorizer
vec = DictVectorizer(sparse=False)
# 转换特征后,凡是类别型的特征都单独剥离出来,独成一列特征,数值型的则保持不变
X_train = vec.fit_transform(X_train.to_dict(orient='record'))
X_test = vec.transform(X_test.to_dict(orient='record'))
print(vec.feature_names_)
# 检查样本的数量和类别分布
y_train.value_counts()
y_test.value_counts()
#有时需要标准化数据,使每个维度的特征数据方差为1,均值为0,使预测结果不会被某些维度过大的特征值而主导
# 从sklearn.preprocessing里导入StandardScaler
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
X_train = ss.fit_transform(X_train)
X_test = ss.transform(X_test)
#使用不同的模型进行训练 .
***.fit(X_train, y_train)
#使用训练好的模型进行预测
y_predict = ***.predict(X_test)
# 从sklearn.metrics导入classification_report
from sklearn.metrics import classification_report
# 输出预测准确性
print('The Accuracy is :', ***.score(X_test, y_test))
# 输出更加详细的分类性能
print(classification_report(y_predict, y_test, target_names = ['died', 'survived']))
十个最常用机器学习算法编码指南(Python版)
Machine Learning Algorithms Python Code
LinearRegression线性回归
#Import Library
#Import other necessary libraries like pandas,
#numpy...
from sklearn import linear_model
#Load Train and Test datasets
#Identify feature and response variable(s) and
#values must be numeric and numpy arrays
x_train=input_variables_values_training_datasets
y_train=target_variables_values_training_datasets
x_test=input_variables_values_test_datasets
#Create linear regression object
linear = linear_model.LinearRegression()
#Train the model using the training sets and
#check score
linear.fit(x_train, y_train)
linear.score(x_train, y_train)
#Equation coefficient and Intercept
print('Coefficient: \n', linear.coef_)
print('Intercept: \n', linear.intercept_)
#Predict Output
predicted= linear.predict(x_test)
Logistic Regression逻辑回归
#Import Library
from sklearn.linear_model import LogisticRegression
#Assumed you have, X (predictor) and Y (target)
#for training data set and x_test(predictor)
#of test_dataset
#Create logistic regression object
model = LogisticRegression()
#Train the model using the training sets
#and check score
model.fit(X, y)
model.score(X, y)
#Equation coefficient and Intercept
print('Coefficient: \n', model.coef_)
print('Intercept: \n', model.intercept_)
#Predict Output
predicted= model.predict(x_test)
Decision Tree决策树
#Import Library
#Import other necessary libraries like pandas, numpy...
from sklearn import tree
#Assumed you have, X (predictor) and Y (target) for
#training data set and x_test(predictor) of
#test_dataset
#Create tree object
model = tree.DecisionTreeClassifier(criterion='gini')
#for classification, here you can change the
#algorithm as gini or entropy (information gain) by
#default it is gini
#model = tree.DecisionTreeRegressor() for
#regression
#Train the model using the training sets and check
#score
model.fit(X, y)
model.score(X, y)
#Predict Output
predicted= model.predict(x_test)
SVM (Support Vector Machine)支持向量机
#Import Library
from sklearn import svm
#Assumed you have, X (predictor) and Y (target) for
#training data set and x_test(predictor) of test_dataset
#Create SVM classification object
model = svm.svc()
#there are various options associated
with it, this is simple for classification.
#Train the model using the training sets and check
#score
model.fit(X, y)
model.score(X, y)
#Predict Output
predicted= model.predict(x_test)
Naive Bayes朴素贝叶斯
#Import Library
from sklearn.naive_bayes import GaussianNB
#Assumed you have, X (predictor) and Y (target) for
#training data set and x_test(predictor) of test_dataset
#Create SVM classification object model = GaussianNB()
#there is other distribution for multinomial classes
like Bernoulli Naive Bayes
#Train the model using the training sets and check
#score
model.fit(X, y)
#Predict Output
predicted= model.predict(x_test)
kNN (k- Nearest Neighbors)K临近节点算法
#Import Library
from sklearn.neighbors import KNeighborsClassifier
#Assumed you have, X (predictor) and Y (target) for
#training data set and x_test(predictor) of test_dataset
#Create KNeighbors classifier object model
KNeighborsClassifier(n_neighbors=6)
#default value for n_neighbors is 5
#Train the model using the training sets and check score
model.fit(X, y)
#Predict Output
predicted= model.predict(x_test)
k-Means聚类算法
#Import Library
from sklearn.cluster import KMeans
#Assumed you have, X (attributes) for training data set
#and x_test(attributes) of test_dataset
#Create KNeighbors classifier object model
k_means = KMeans(n_clusters=3, random_state=0)
#Train the model using the training sets and check score
model.fit(X)
#Predict Output
predicted= model.predict(x_test)
Random Forest随机森林
#Import Library
from sklearn.ensemble import RandomForestClassifier
#Assumed you have, X (predictor) and Y (target) for
#training data set and x_test(predictor) of test_dataset
#Create Random Forest object
model= RandomForestClassifier()
#Train the model using the training sets and check score
model.fit(X, y)
#Predict Output
predicted= model.predict(x_test)
Dimensionality Reduction Algorithms降维算法
#Import Library
from sklearn import decomposition
#Assumed you have training and test data set as train and
#test
#Create PCA object pca= decomposition.PCA(n_components=k)
#default value of k =min(n_sample, n_features)
#For Factor analysis
#fa= decomposition.FactorAnalysis()
#Reduced the dimension of training dataset using PCA
train_reduced = pca.fit_transform(train)
#Reduced the dimension of test dataset
test_reduced = pca.transform(test)
Gradient Boosting & AdaBoost梯度增加&演算法
#Import Library
from sklearn.ensemble import GradientBoostingClassifier
#Assumed you have, X (predictor) and Y (target) for
#training data set and x_test(predictor) of test_dataset
#Create Gradient Boosting Classifier object
model= GradientBoostingClassifier(n_estimators=100, \
learning_rate=1.0, max_depth=1, random_state=0)
#Train the model using the training sets and check score
model.fit(X, y)
#Predict Output
predicted= model.predict(x_test)
Python语句总结
简单的说语句就是告诉Python程序应该做什么的句子Python是面向过程的、基于语句的语言。通用组合这些语句,可以指定一个过程。
Python程序结构:
1、程序由模块构成
2、模块包含语句
3、语句包含表达式
4、表达式建立并处理对象
Python的语法实际上是由语句和表达式组成的。表达式处理对象并嵌套在语句中,语句还是对象生成的地方。语句总是存在于模块当中,而模块本身是由语句来管理的。
Python的语句:
语句 |
角色 |
例子 |
赋值 |
创建引用值 |
a,b,c = 'good','bad','ugly' |
调用 |
执行函数 |
log.write("spam,ham") |
打印调用 |
打印对象 |
print('The Killer',joke) |
if/elif/else |
选择动作 |
if 'Python' in text: |
for/else |
序列迭代 |
for x in mylist: |
while/else |
一般循环 |
while X > Y: |
pass |
空占位符 |
while True: |
break |
循环退出 |
while True: |
continue |
循环继续 |
while True: |
def |
函数和方法 |
def f(a, b, c=1,*d): |
return |
函数结果 |
def f(a, b, c=1,*d): |
yield |
生成器函数 |
def gen(n): |
global |
命名空间 |
x = 'old' |
nonlocal |
Namespace(3.0+) |
def outer(): |
import |
模块访问 |
inport sys |
from |
属性访问 |
from sys import stdin |
class |
创建对象 |
class Subclass (Superclass): |
try/except/finally |
捕捉异常 |
try: |
raise |
触发异常 |
raise EndSearch(location) |
assert |
调试检查 |
assert X>Y, 'X too small' |
with/as |
环境管理器(2.6) |
with open('data') as myfile: |
del |
删除引用 |
del data[k] |
两个if的故事:
类C语言语法写出来的:
if (x > y) {
x = 1;
y = 2;
}
if (x > y) {
x = 1;
y = 2;
}
Python 语言中与之等价的句子:
if x > y:
x = 1
y = 2
if x > y:
x = 1
y = 2
注意:等价的Python语句没有那么杂乱。也就是说,语法成分较少,这是刻意设计的。作为脚本语言,Python的目标之一就是让程序员少打一些字让生活轻松一点。
Python增加了什么?
Python中新的语法成分是冒号:。所有Python的复合语句(也就是语句中嵌套了语句的)都有相同的一般形式,也就是以冒号结尾,首行下一行嵌套的代码按照缩进的形式书写,如下所示:
[python] view plain copy print?
Header Line:
Nested statement block
Header Line:
Nested statement block
冒号是不可或缺的,遗漏冒号是新手最常见的错误之一。
Python删除了什么?
虽然在Python中需要额外的冒号,但是你必须在类C语言中加入,而通常不需要在Python中加入的语法成分有三项。
1)、括号是可选的
(但是一般没有一个Python程序终会有给表达式加括号的情况——这只会让你的键盘坏的更快)
类C:
if (x < y)
if (x < y)
Python:
if x < y:
if x < y:
2)、终止行就是终止语句
不会在Python中出现的第二个重要的语法成分是分号。Python中不需要使用分号终止语句。
类C:
X = 1;
X = 1;
Python:(在Python中,一般的原则是一行的结束会自动终止出现在该行的语句。)
X = 1
X = 1
Python的风格就是完全不需要分号。
3)、缩进的结束就是代码块的结束
在Python中不用输入任何语法上用来表明嵌套代码块的开头和结尾的东西。
if (x > y) {
x = 1;
y = 2;
}
if (x > y) {
x = 1;
y = 2;
}
取而代之的是,在Python中,我们一致的把嵌套块里的所有语句向右缩进相同的距离,Python能够使用语句的实际缩进来确定代码块的开头与结尾。
if x > y:
x = 1
y = 2
if x > y:
x = 1
y = 2
所谓缩进,是指这里的两个嵌套语句至左侧的所有空白。语法规则规定一个单独的嵌套块中的所有语句都必须缩进相同的距离。
几个特殊的实例:
在Python语法模型中:
一行的结束就是终止该行的语句(木有分号);
嵌套语句是代码块与实际的缩进相关(木有大括号)
语句规则的特殊情况:
1)、虽然语句都是一行一个,但是Python中也有可能出现某一行挤进多个语句的情况,这是他们由分号隔开:
[python] view plain copy print?
a = 1; b = 2; print(a +b)
a = 1; b = 2; print(a +b)
这是Python中唯一需要分号的地方——作为语句界定符。不过,只有当在一起的语句本身不是复合语句才行,即:只能把简单语句放到一起。例如:赋值操作、打印操作、函数调用。复合语句还是必须出现在自己的行里。
2)、语句的另一个特殊规则基本上是相反的——可以让一个语句的范围横跨多行。为了这一操作,你只需要一对括号把语句括起来就好了:括号(())、方括号([])、或者是字典的大括号({})。任何包含在这些程序里的代码都可以横跨好几行。语句将一致运行直到遇到包含闭合括号的一行。
Eg:列表常量
>>> mylist = ['hello',
... 'I',
... 'Love'
... 'Python']
>>> mylist = ['hello',
... 'I',
... 'Love'
... 'Python']
一个简单的交互式循环:
>>> while True:
... reply = input("Enter text:")
... if reply == 'stop':
... break //完全跳出循环语句而程序会继续循环之后的部分
... print reply.upper()
...
Enter text:'abc'
ABC
Enter text:'stop'
>>> while True:
... reply = input("Enter text:")
... if reply == 'stop':
... break //完全跳出循环语句而程序会继续循环之后的部分
... print reply.upper()
...
Enter text:'abc'
ABC
Enter text:'stop'
对用户输入数据作数学运算:
>>> while True:
... reply = input("Enter text:")
... if reply == 'stop':
... break
... print (int(reply)**2)
>>> while True:
... reply = input("Enter text:")
... if reply == 'stop':
... break
... print (int(reply)**2)
[python] view plain copy print?
Enter text:42
1764
Enter text:2
4
Enter text:stop
Enter text:42
1764
Enter text:2
4
Enter text:stop
用测试输入数据来处理错误:
>>> while True:
... reply = input("Enter text:")
... if reply == 'stop':
... break
... elif not reply.isdigit():
... print('bad!' * 8)
... else:
... print(int(reply)**2)
...
>>> while True:
... reply = input("Enter text:")
... if reply == 'stop':
... break
... elif not reply.isdigit():
... print('bad!' * 8)
... else:
... print(int(reply)**2)
...
使用try语句处理错误:
之前介绍的简单方法能够工作,但是在Python中,处理错误的方法是使用try语句,它能够捕捉并完全复原错误。
>>> while True:
... reply = input('Enter text:')
... if reply == 'stop':break
... try:
... num = int(reply)
... except:
... print('Bad!'*8)
... else:
... print(int(reply) ** 2)
...
Enter text:12
144
Enter text:9
81
Enter text:'stop'
>>> while True:
... reply = input('Enter text:')
... if reply == 'stop':break
... try:
... num = int(reply)
... except:
... print('Bad!'*8)
... else:
... print(int(reply) ** 2)
...
Enter text:12
144
Enter text:9
81
Enter text:'stop'
try语句的组成:
try关键字后面跟代码的主要代码块(我们尝试运行的代码),再跟except部分,给异常处理器代码,在接else部分,如果try部分没有引发异常就执行这一部分代码。Python会先执行try部分,然后执行except部分(如果有异常发生)或else部分(如果没有异常发生)。
嵌套代码三层:
[python] view plain copy print?
>>> while True:
... reply = input('Enter text:')
... if reply == 'stop':break
... elif not reply.isdigit():
... print('Bad!'*8)
... else:
... num = int(reply)
... if num < 20:
... print('low')
... else:
... print(num ** 2)
>>> while True:
... reply = input('Enter text:')
... if reply == 'stop':break
... elif not reply.isdigit():
... print('Bad!'*8)
... else:
... num = int(reply)
... if num < 20:
... print('low')
... else:
... print(num ** 2)
总结:
上述内容快速浏览了Python的基本语法。介绍了语句和代码块编写的通用规则:
在Python中,一般情况下是每一行编写一条语句,而嵌套代码块中的所有语句都缩进相同的量(缩进是Python语法的一部分)。
参考:
Python学习手册