长沙有肥鱼

机器学习——逻辑回归

概念

逻辑回归属于分类算法。

线性逻辑回归

logistic_regression.py

import numpy as np
from scipy.optimize import minimize
from utils.features import prepare_for_training
from utils.hypothesis import sigmoid

class LogisticRegression:
    def __init__(self,data,labels,polynomial_degree = 0,sinusoid_degree = 0,normalize_data=False):
        """
        1.对数据进行预处理操作
        2.先得到所有的特征个数
        3.初始化参数矩阵
        """
        (data_processed,
         features_mean, 
         features_deviation) = prepare_for_training(data, polynomial_degree, sinusoid_degree,normalize_data=False)
         
        self.data = data_processed
        self.labels = labels
        self.unique_labels = np.unique(labels)
        # 计算标签的数量
        self.features_mean = features_mean
        # 特征值平均
        self.features_deviation = features_deviation
        self.polynomial_degree = polynomial_degree
        self.sinusoid_degree = sinusoid_degree
        self.normalize_data = normalize_data
        
        num_features = self.data.shape[1]
        # 特征的个数
        num_unique_labels = np.unique(labels).shape[0]
        # 特征类别
        self.theta = np.zeros((num_unique_labels,num_features))
        # num_unique_labels -> 类别  num_features -> 特征个数
    # 定义训练函数
    def train(self,max_iterations=1000):
        cost_histories = []
        # 将损失值记录下来
        num_features = self.data.shape[1]
        for label_index,unique_label in enumerate(self.unique_labels):
            # unique_label -> 标签值
            current_initial_theta = np.copy(self.theta[label_index].reshape(num_features,1))
            current_labels = (self.labels == unique_label).astype(float)
            # self.labels == unique_label判断当前标签与训练标签是否一样
            (current_theta,cost_history) = LogisticRegression.gradient_descent(self.data,current_labels,current_initial_theta,max_iterations)
            # self.data —> 数据 current_labels -> 标签  current_initial_theta -> theta值  max_iterations -> 最大迭代次数
            self.theta[label_index] = current_theta.T
            cost_histories.append(cost_history)
            
        return self.theta, cost_histories

    # 梯度下降
    @staticmethod        
    def gradient_descent(data,labels,current_initial_theta,max_iterations):
        cost_history = []
        # 初始化为list结构
        num_features = data.shape[1]
        result = minimize(
            # 要优化的目标：
            lambda current_theta:LogisticRegression.cost_function(data,labels,current_theta.reshape(num_features,1)),
            # 初始化的权重参数
            current_initial_theta,
            # 选择优化策略
            method='CG',
            # 梯度下降迭代计算公式
            jac=lambda current_theta:LogisticRegression.gradient_step(data,labels,current_theta.reshape(num_features,1)),
            # 记录结果
            callback=lambda current_theta:cost_history.append(LogisticRegression.cost_function(data,labels,current_theta.reshape((num_features,1)))),
            # 迭代次数  
            options={'maxiter': max_iterations}                                               
            )
        if not result.success:
            raise ArithmeticError('Can not minimize cost function'+result.message)
        optimized_theta = result.x.reshape(num_features,1)
        return optimized_theta,cost_history

    # 损失函数
    @staticmethod 
    def cost_function(data,labels,theta):
        num_examples = data.shape[0]
        predictions = LogisticRegression.hypothesis(data,theta)
        y_is_set_cost = np.dot(labels[labels == 1].T,np.log(predictions[labels == 1]))
        # 计算当前是1的损失
        y_is_not_set_cost = np.dot(1-labels[labels == 0].T,np.log(1-predictions[labels == 0]))
        cost = (-1/num_examples)*(y_is_set_cost+y_is_not_set_cost)
        return cost

    @staticmethod 
    def hypothesis(data,theta):
        
        predictions = sigmoid(np.dot(data,theta))
        
        return predictions
    # 梯度下降
    @staticmethod
    def gradient_step(data,labels,theta):
        num_examples = labels.shape[0]
        predictions = LogisticRegression.hypothesis(data,theta)
        label_diff = predictions - labels
        gradients = (1/num_examples)*np.dot(data.T,label_diff)
        
        return gradients.T.flatten()
    # 预测函数
    def predict(self,data):
        num_examples = data.shape[0]
        data_processed = prepare_for_training(data, self.polynomial_degree, self.sinusoid_degree,self.normalize_data)[0]
        prob = LogisticRegression.hypothesis(data_processed,self.theta.T)
        max_prob_index = np.argmax(prob, axis=1)
        # 返回沿轴的最大值的索引
        class_prediction = np.empty(max_prob_index.shape,dtype=object)
        for index,label in enumerate(self.unique_labels):
            class_prediction[max_prob_index == index] = label
        return class_prediction.reshape((num_examples,1))

logistic_regression_with_linear_boundary.py

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from logistic_regression import LogisticRegression


data = pd.read_csv('../data/iris.csv')
iris_types = ['SETOSA','VERSICOLOR','VIRGINICA']
# 0 = 'SETOSA' 1 = 'VERSICOLOR' 2 = 'VIRGINICA'
x_axis = 'petal_length'
# 长度
y_axis = 'petal_width'
# 宽度
for iris_type in iris_types:
    plt.scatter(data[x_axis][data['class'] == iris_type],
                data[y_axis][data['class'] == iris_type],
                label=iris_type
                )
plt.show()

num_examples = data.shape[0]
# 计算样本数量
x_train = data[[x_axis,y_axis]].values.reshape((num_examples,2))
y_train = data['class'].values.reshape((num_examples,1))

max_iterations = 1000
polynomial_degree = 0
sinusoid_degree = 0

logistic_regression = LogisticRegression(x_train,y_train,polynomial_degree,sinusoid_degree)
thetas,cost_histories = logistic_regression.train(max_iterations)
labels = logistic_regression.unique_labels
# 标签

# 画出三个二分类
plt.plot(range(len(cost_histories[0])),cost_histories[0],label = labels[0])
plt.plot(range(len(cost_histories[1])),cost_histories[1],label = labels[1])
plt.plot(range(len(cost_histories[2])),cost_histories[2],label = labels[2])
plt.show()

y_train_prections = logistic_regression.predict(x_train)
precision = np.sum(y_train_prections == y_train)/y_train.shape[0] * 100
# y_train_prections == y_train -> 表示预测的和训练的一样  y_train.shape[0] -> 样本总数
print('precision:', precision)

x_min = np.min(x_train[:,0])
x_max = np.max(x_train[:,0])
y_min = np.min(x_train[:,1])
y_max = np.max(x_train[:,1])
samples= 150 
X = np.linspace(x_min,x_max,samples)
Y = np.linspace(y_min,y_max,samples)

Z_SETOSA = np.zeros((samples,samples))
Z_VERSICOLOR = np.zeros((samples,samples))
Z_VIRGINICA = np.zeros((samples,samples))

for x_index,x in enumerate(X):
    for y_index,y in enumerate(Y):
        data = np.array([[x,y]])
        prediction = logistic_regression.predict(data)[0][0]
        if prediction == 'SETOSA':
            Z_SETOSA[x_index][y_index] =1
        elif prediction == 'VERSICOLOR':
            Z_VERSICOLOR[x_index][y_index] =1
        elif prediction == 'VIRGINICA':
            Z_VIRGINICA[x_index][y_index] =1
            
for iris_type in iris_types:
    plt.scatter(
        x_train[(y_train == iris_type).flatten(),0],
        # y_train == iris_type -> 判断当前训练数据是否是当前类型
        x_train[(y_train == iris_type).flatten(),1],
        label = iris_type
                )
# 画出等高图 决策边界
plt.contour(X,Y,Z_SETOSA)
# 第一个决策边界
plt.contour(X,Y,Z_VERSICOLOR)
# 第二个决策边界
plt.contour(X,Y,Z_VIRGINICA)
plt.show()

mnist.py

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import math

from logistic_regression import LogisticRegression

data = pd.read_csv('../data/mnist-demo.csv')

# 绘图设置
numbers_to_display = 25
num_cells = math.ceil(math.sqrt(numbers_to_display))
plt.figure(figsize=(10, 10))

# 
for plot_index in range(numbers_to_display):
    # 读取数据
    digit = data[plot_index:plot_index + 1].values
    digit_label = digit[0][0]
    digit_pixels = digit[0][1:]

    # 正方形的
    image_size = int(math.sqrt(digit_pixels.shape[0]))
    
    # 转换成图像形式
    frame = digit_pixels.reshape((image_size, image_size))
    
    # 展示图像
    plt.subplot(num_cells, num_cells, plot_index + 1)
    plt.imshow(frame, cmap='Greys')
    plt.title(digit_label)
    plt.tick_params(axis='both', which='both', bottom=False, left=False, labelbottom=False, labelleft=False)

plt.subplots_adjust(hspace=0.5, wspace=0.5)
plt.show()

# 训练集划分
pd_train_data = data.sample(frac=0.8)
pd_test_data = data.drop(pd_train_data.index)

# Ndarray数组格式
train_data = pd_train_data.values
test_data = pd_test_data.values

num_training_examples = 6000
x_train = train_data[:num_training_examples, 1:]
y_train = train_data[:num_training_examples, [0]]

x_test = test_data[:, 1:]
y_test = test_data[:, [0]]

# 训练参数
max_iterations = 10000  
polynomial_degree = 0  
sinusoid_degree = 0  
normalize_data = True  

# 逻辑回归
logistic_regression = LogisticRegression(x_train, y_train, polynomial_degree, sinusoid_degree, normalize_data)

(thetas, costs) = logistic_regression.train(max_iterations)

pd.DataFrame(thetas)

# How many numbers to display.
numbers_to_display = 9

# Calculate the number of cells that will hold all the numbers.
num_cells = math.ceil(math.sqrt(numbers_to_display))

# Make the plot a little bit bigger than default one.
plt.figure(figsize=(10, 10))

# Go through the thetas and print them.
for plot_index in range(numbers_to_display):
    # Extrace digit data.
    digit_pixels = thetas[plot_index][1:]

    # Calculate image size (remember that each picture has square proportions).
    image_size = int(math.sqrt(digit_pixels.shape[0]))
    
    # Convert image vector into the matrix of pixels.
    frame = digit_pixels.reshape((image_size, image_size))
    
    # Plot the number matrix.
    plt.subplot(num_cells, num_cells, plot_index + 1)
    plt.imshow(frame, cmap='Greys')
    plt.title(plot_index)
    plt.tick_params(axis='both', which='both', bottom=False, left=False, labelbottom=False, labelleft=False)

# Plot all subplots.
plt.subplots_adjust(hspace=0.5, wspace=0.5)
plt.show()

# 训练情况
labels = logistic_regression.unique_labels
for index, label in enumerate(labels):
    plt.plot(range(len(costs[index])), costs[index], label=labels[index])

plt.xlabel('Gradient Steps')
plt.ylabel('Cost')
plt.legend()
plt.show()

# 测试结果
y_train_predictions = logistic_regression.predict(x_train)
y_test_predictions = logistic_regression.predict(x_test)

train_precision = np.sum(y_train_predictions == y_train) / y_train.shape[0] * 100
test_precision = np.sum(y_test_predictions == y_test) / y_test.shape[0] * 100

print('Training Precision: {:5.4f}%'.format(train_precision))
print('Test Precision: {:5.4f}%'.format(test_precision))

# How many numbers to display.
numbers_to_display = 64

# Calculate the number of cells that will hold all the numbers.
num_cells = math.ceil(math.sqrt(numbers_to_display))

# Make the plot a little bit bigger than default one.
plt.figure(figsize=(15, 15))

# Go through the first numbers in a test set and plot them.
for plot_index in range(numbers_to_display):
    # Extrace digit data.
    digit_label = y_test[plot_index, 0]
    digit_pixels = x_test[plot_index, :]
    
    # Predicted label.
    predicted_label = y_test_predictions[plot_index][0]

    # Calculate image size (remember that each picture has square proportions).
    image_size = int(math.sqrt(digit_pixels.shape[0]))
    
    # Convert image vector into the matrix of pixels.
    frame = digit_pixels.reshape((image_size, image_size))
    
    # Plot the number matrix.
    color_map = 'Greens' if predicted_label == digit_label else 'Reds'
    plt.subplot(num_cells, num_cells, plot_index + 1)
    plt.imshow(frame, cmap=color_map)
    plt.title(predicted_label)
    plt.tick_params(axis='both', which='both', bottom=False, left=False, labelbottom=False, labelleft=False)

# Plot all subplots.
plt.subplots_adjust(hspace=0.5, wspace=0.5)
plt.show()

非线性逻辑回归

NonLinearBoundary.py

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import math

from logistic_regression import LogisticRegression

data = pd.read_csv('../data/mnist-demo.csv')

# 绘图设置
numbers_to_display = 25
num_cells = math.ceil(math.sqrt(numbers_to_display))
plt.figure(figsize=(10, 10))

# 
for plot_index in range(numbers_to_display):
    # 读取数据
    digit = data[plot_index:plot_index + 1].values
    digit_label = digit[0][0]
    digit_pixels = digit[0][1:]

    # 正方形的
    image_size = int(math.sqrt(digit_pixels.shape[0]))
    
    # 转换成图像形式
    frame = digit_pixels.reshape((image_size, image_size))
    
    # 展示图像
    plt.subplot(num_cells, num_cells, plot_index + 1)
    plt.imshow(frame, cmap='Greys')
    plt.title(digit_label)
    plt.tick_params(axis='both', which='both', bottom=False, left=False, labelbottom=False, labelleft=False)

plt.subplots_adjust(hspace=0.5, wspace=0.5)
plt.show()

# 训练集划分
pd_train_data = data.sample(frac=0.8)
pd_test_data = data.drop(pd_train_data.index)

# Ndarray数组格式
train_data = pd_train_data.values
test_data = pd_test_data.values

num_training_examples = 6000
x_train = train_data[:num_training_examples, 1:]
y_train = train_data[:num_training_examples, [0]]

x_test = test_data[:, 1:]
y_test = test_data[:, [0]]

# 训练参数
max_iterations = 10000  
polynomial_degree = 0  
sinusoid_degree = 0  
normalize_data = True  

# 逻辑回归
logistic_regression = LogisticRegression(x_train, y_train, polynomial_degree, sinusoid_degree, normalize_data)

(thetas, costs) = logistic_regression.train(max_iterations)

pd.DataFrame(thetas)

# How many numbers to display.
numbers_to_display = 9

# Calculate the number of cells that will hold all the numbers.
num_cells = math.ceil(math.sqrt(numbers_to_display))

# Make the plot a little bit bigger than default one.
plt.figure(figsize=(10, 10))

# Go through the thetas and print them.
for plot_index in range(numbers_to_display):
    # Extrace digit data.
    digit_pixels = thetas[plot_index][1:]

    # Calculate image size (remember that each picture has square proportions).
    image_size = int(math.sqrt(digit_pixels.shape[0]))
    
    # Convert image vector into the matrix of pixels.
    frame = digit_pixels.reshape((image_size, image_size))
    
    # Plot the number matrix.
    plt.subplot(num_cells, num_cells, plot_index + 1)
    plt.imshow(frame, cmap='Greys')
    plt.title(plot_index)
    plt.tick_params(axis='both', which='both', bottom=False, left=False, labelbottom=False, labelleft=False)

# Plot all subplots.
plt.subplots_adjust(hspace=0.5, wspace=0.5)
plt.show()

# 训练情况
labels = logistic_regression.unique_labels
for index, label in enumerate(labels):
    plt.plot(range(len(costs[index])), costs[index], label=labels[index])

plt.xlabel('Gradient Steps')
plt.ylabel('Cost')
plt.legend()
plt.show()

# 测试结果
y_train_predictions = logistic_regression.predict(x_train)
y_test_predictions = logistic_regression.predict(x_test)

train_precision = np.sum(y_train_predictions == y_train) / y_train.shape[0] * 100
test_precision = np.sum(y_test_predictions == y_test) / y_test.shape[0] * 100

print('Training Precision: {:5.4f}%'.format(train_precision))
print('Test Precision: {:5.4f}%'.format(test_precision))

# How many numbers to display.
numbers_to_display = 64

# Calculate the number of cells that will hold all the numbers.
num_cells = math.ceil(math.sqrt(numbers_to_display))

# Make the plot a little bit bigger than default one.
plt.figure(figsize=(15, 15))

# Go through the first numbers in a test set and plot them.
for plot_index in range(numbers_to_display):
    # Extrace digit data.
    digit_label = y_test[plot_index, 0]
    digit_pixels = x_test[plot_index, :]
    
    # Predicted label.
    predicted_label = y_test_predictions[plot_index][0]

    # Calculate image size (remember that each picture has square proportions).
    image_size = int(math.sqrt(digit_pixels.shape[0]))
    
    # Convert image vector into the matrix of pixels.
    frame = digit_pixels.reshape((image_size, image_size))
    
    # Plot the number matrix.
    color_map = 'Greens' if predicted_label == digit_label else 'Reds'
    plt.subplot(num_cells, num_cells, plot_index + 1)
    plt.imshow(frame, cmap=color_map)
    plt.title(predicted_label)
    plt.tick_params(axis='both', which='both', bottom=False, left=False, labelbottom=False, labelleft=False)

# Plot all subplots.
plt.subplots_adjust(hspace=0.5, wspace=0.5)
plt.show()

day11 学习笔记豆豆学习笔记 python
文章目录前言一、类方法二、静态方法三、构造方法四、魔术方法前言通过今天的学习，我掌握了更多Python中有关面向对象编程思想中方法的概念与操作，包括类方法，静态方法，构造方法，魔术方法一、类方法类方法是属于类的行为，一般使用类而非对象进行调用类方法需要使用@classmethod装饰器定义类方法至少有一个形参用于绑定类，约定为cls类和该类的实例都可以调用类方法，但一般不用实例进行调用类方法不能访
“四预”驱动数字孪生水利：让智慧治水守护山河安澜 GeoSaaS 实景三维智慧城市人工智能 gis 大数据安全
近年来，从黄河秋汛到海河特大洪水，从珠江流域性洪灾到长江罕见骤旱，极端天气频发让水安全问题备受关注。如何实现“治水于未发”？数字孪生水利以“预报、预警、预演、预案”（四预）为核心，正在掀起一场水利治理的智慧革命。一、数字孪生水利：从物理世界到虚拟镜像的跃迁数字孪生水利并非简单的“数字建模”，而是通过高精度传感器、大数据、人工智能等技术，在虚拟空间构建与物理流域完全映射的“数字分身”，实现水情、工情
硬件NAS将成为电子垃圾？ DeepSeek+NAS 家用NAS WinNAS 飞牛NAS 人工智能安卓NAS
随着人工智能（AI）技术的快速发展，传统的NAS设备正面临一场深刻的变革。过去，NAS的主要功能是提供数据存储和共享服务，但在AI时代，单纯的存储功能已无法满足用户需求。未来的NAS必须集成本地AI能力，才能成为真正的AI-NAS。然而，当前市场上的NAS产品硬件配置普遍较低，无法支持本地AI的运行。因此，现有的硬件NAS在三年内可能会被淘汰，取而代之的将是集成了AI和NAS功能的家用AI服务器。
Python 正则表达式小结1 大收藏家 Python 正则表达式 python
[声明]：本文参考了白夜黑雨老师的网页讲解。如有侵权，请与我联系！！！Python正则表达式小结11.正则表达式验证2.特殊元字符及含义3匹配某种字符类型4.正则表达式举例大收藏家说1.正则表达式验证提供两个网站用于正则表达式的验证，可以敲入文本与正则表达式。通过该网站，验证正则表达式的正确性。非常好用！英文网站中文网站2.特殊元字符及含义元字符含义.表示要匹配除了换行符之外的任何单个字符*星号-
python离线语音转文本_使用Python将语音转换为文本的方法 weixin_39760619 python离线语音转文本
使用Python将语音转换为文本的方法,语音,转换为,文本,您的,麦克风使用Python将语音转换为文本的方法易采站长站，站长之家为您整理了使用Python将语音转换为文本的方法的相关内容。语音识别是计算机软件识别口语中的单词和短语，并将其转换为可读文本的能力。那么如何在Python中将语音转换为文本？如何使用SpeechRecognition库在Python中将语音转换为文本？我们不需要从头开始
【DeepSeek】全方位使用指南————简版諰. 人工智能 ai AI写作
一、平台概述DeepSeek（深度求索）是专注实现AGI的中国的人工智能公司，提供多款AI产品：智能对话（Chat）文生图（Art）代码助手（Coder）API开发接口企业定制解决方案二、注册与登录2.1账号创建访问官网https://www.deepseek.com点击右上角「注册」支持三种方式：手机号+短信验证邮箱注册（需验证邮件）第三方登录（微信/Google账号）2.2订阅计划套餐类型免费
Open-Sora - 为所有人实现高效的视频制作大众化小众AI AI开源音视频人工智能 AI编程
GitHub：https://github.com/hpcaitech/Open-Sora更多AI开源软件：发现分享好用的AI工具、AI开源软件、AI模型、AI变现-小众AI这是一款开源的SOTA（State-of-the-Art）视频生成模型，仅用20万美元（224张GPU）就能训练出商业级11B参数的视频生成大模型。它采用Python语言和PyTorch深度学习框架开发，具有生成速度快、资源消
8、Python 字符串处理与正则表达式实战指南 wolf犭良 python python 正则表达式
Python字符串处理与正则表达式实战指南文章概述本文深入探讨Python字符串处理核心方法与正则表达式实战技巧，涵盖字符串编码转换、分割替换、正则表达式语法精髓，并通过日志解析、数据清洗等真实场景案例展示高阶应用。最后提供10道阶梯式练习题（附完整答案代码），助你从基础到进阶全面掌握文本处理技能。一、字符串处理核心三剑客1.1编码转换（encode/decode）text="中文文本"utf8_
deepseek api参数详解孽小倩大语言模型 python java 前端人工智能 deepseek
deepseek的参数与openai保持兼容，所以openai能用的参数deepseek都可以使用，以下是常用的参数介绍。在使用Deepseek/OpenAI的PythonAPI时，最常用的API端点是chat/completions，用于调用deepseek生成文本对话内容。以下是openai.ChatCompletion.create()方法的主要参数及其作用：1.model作用：指定使用的模
算法基础——蓝桥杯（python实现，实际上大多数用c++更明白易懂）（第一部分，共12个小题） New_Teen 算法蓝桥杯 python
1.成绩统计问题描述:编写一个程序，建立一个字典，每个字典包含姓名、学号、英语成绩、数学成绩和C++成绩，并通过字典操作平均分最高的学生和平均分最低的学生并且输出。输入格式：输入n+1行，第一行输入一个正整数n，表示学生数量；接下来的n行每行输入5个数据，分别表示姓名、学号、英语成绩、数学成绩和C++成绩。注意成绩有可能会有小数。输出格式：输出两行，第一行输出平均成绩最高的学生姓名。第二行输出平均
Python（正则表达式）羡江007 Python进阶 python 正则表达式开发语言
re模块#在Python中需要通过正则表达式对字符串进行匹配的时候，可以使用一个re模块'''re模块三步走#第一步：导入re模块importre#第二步：使用match方法进行匹配操作result=re.match(pattern正则表达式,string要匹配的字符串,flags=0)#第三步：如果数据匹配成功，使用group方法来提取数据result.group()re.match(patte
Ubuntu20.04安装并配置Pycharm2020.2.5 搬砖的打工人!!! ubuntu pycharm python
一.下载pycharm社区版1.下载地址：PyCharm:thePythonIDEfordatascienceandwebdevelopmentThePythonIDEfordatascienceandwebdevelopmentwithintelligentcodecompletion,on-the-flyerrorchecking,quick-fixes,andmuchmore.https:/
使用python中you-get库实现下载网抑云瞎老弟 python python 音视频爬虫
WYY音乐下载前言代码实现使用说明前言前几天，我做了b站视频的下载，有好兄弟表示，只下载视频，不能够让人满足，我还希望能够下载网易云的音乐。上一次在我发布的b站视频下载中，使用了you-get库作为下载方式，但是实际上，这个库也可以下载网易云音乐，因此，我们只需要参考我们上次的b站视频下载，简单的进行一下修改，就可以完成音乐的下载了。为了能够更加方便的批量下载音乐，这里采用了“按艺术家“的下载方式
机器学习课堂4线性回归模型+特征缩放木尘152132 机器学习线性回归 python
一、实验2-2，线性回归模型，计算模型在训练数据集和测试数据集上的均方根误差代码：#2-2线性回归模型importpandasaspdimportnumpyasnpimportmatplotlib.pyplotasplt#参数设置iterations=3000#迭代次数learning_rate=0.0001#学习率m_train=3000#训练样本的数量flag_plot_lines=False
Python用Pyqt5制作音乐播放器 Aix959 python 开发语言
具体效果如下需要实现的功能主要的几个有：1、搜索结果更新至当前音乐的列表，这样播放下一首是搜素结果的下一首2、自动播放3、滚动音乐文本4、音乐进度条5、根据实际情况生成音乐列表。我这里的是下面的情况，音乐文件的格式是歌名_歌手.mp3所以根据需求修改find_mp3_files方法，我这里返回的是[{"path":音乐文件路径,"music":歌名,"singer":歌手},{"path":音乐文
使用vscode远程连接linux运行项目报错解决方案大数据lsy 笔记 vscode linux python
报错：subprocess.CalledProcessError:Command'['/xxx/anaconda3/envs/graphinvent/bin/python','./graphinvent/main.py','--job-dir','/xxx/GraphINVENT/output_gdb13_1K/example/job_0/']'returnednon-zeroexitstatus
代码管理工具——SVN weixin_33728708 git 开发工具 python
2019独角兽企业重金招聘Python工程师标准>>>SVN版本控制的作用：记录若干文件内容变化，以便将来查阅特定版本修订情况。版本管理工具发展简史，cvs-->svn-->Git（参考：http://luckypoem14.github.io/test/2012/04/24/scm-history/）。svn全称subversion，是一个开源版本控制系统（C/S架构），始于2000年；git（
jieba库词频统计_jieba分词器（应用及字典的补充）及文档高频词提取实战袁圆园建建 jieba库词频统计
jieba分词器是Python中最好的中文分词组件，本文讲解一下jieba分词器及其应用。1、jieba分词器的分词模式jieba分词器提供了三种常用的分词模式1、精确模式：将句子按照最精确的方法进行切分，适合用于进行文本分析；2、全模式：将句子当中所有可以成词的词语都扫描出来，分词速度很快但容易产生歧义；3、搜索引擎模式：在精确模式分词的基础上，将长的句子再次进行切分，提高召回率，适用于搜索引擎
【LeetCode 热题 100】3. 无重复字符的最长子串 | python 【中等】一只小白跳起来 leetcode java 算法开发语言
美美超过管解题目：3.无重复字符的最长子串给定一个字符串s，请你找出其中不含有重复字符的最长的长度。示例1:输入:s="abcabcbb"输出:3解释:因为无重复字符的最长子串是"abc"，所以其长度为3。注意：考虑空字符串问题有重复之后要在重复的那个后面新建序列，减少时间，故需要列表储存（标准做法里用的集合捏）标准做法：把重复的set.remove（），a指针步进，没有重复的话，b指针一直步进怎
并发爬虫实战：多线程高效抓取王者荣耀全英雄皮肤 YiFoEr_Liu 爬虫案例实操爬虫部署 python 爬虫 python 大数据
一、场景与挑战在网络爬虫开发中，我们常常面临以下挑战：需要处理成百上千个页面的数据抓取目标服务器存在反爬机制和请求频率限制单线程模式下载效率低下，难以充分利用带宽本文以王者荣耀英雄皮肤下载为例（日访问量超过1亿的热门游戏），演示如何通过Python并发编程实现高效数据抓取。二、技术选型分析2.1为什么选择并发线程？I/O密集型场景：网络请求占比90%以上GIL限制：Python线程适合I/O密集型
【机器学习】模型拟合 CH3_CH2_CHO 什么？！是机器学习！！机器学习人工智能欠拟合过拟合
1、欠拟合1.1现象欠拟合是机器学习和统计建模中的一种常见问题，表现为模型无法充分捕捉数据中的潜在规律和模式。无论是训练数据还是测试数据，模型的预测误差都居高不下。在实际应用中，欠拟合的模型往往显得过于简单和粗糙，无法对数据进行有效的拟合和描述。1.2原因模型过于简单是导致欠拟合的主要原因：例如，使用直线去拟合具有明显曲线趋势的数据，或者使用低阶多项式去拟合高阶的复杂函数关系。这种情况下，模型的表
【人工智能】注意力机制深入理解问道飞鱼机器学习与人工智能人工智能注意力机制
文章目录**一、注意力机制的核心思想****二、传统序列模型的局限性****三、Transformer与自注意力机制****1.自注意力机制的数学公式****四、注意力机制的关键改进****1.稀疏注意力（SparseAttention）****2.相对位置编码（RelativePositionEncoding）****3.图注意力网络（GraphAttentionNetwork,GAN）****
使用提示词进行信息抽取的实用方法 scaFHIO windows python
在大规模语言模型（LLM）中进行信息抽取时，我们不一定需要工具调用功能。通过精心设计的提示词（prompt）可以指导模型输出特定格式的信息，然后对其进行解析以生成结构化数据。这种方法依赖于创建良好的提示词，并将LLM的输出解析为所需的Python对象。技术背景介绍大规模语言模型可以根据提示词生成特定格式的文本。例如，我们可以要求模型以JSON格式输出所需的信息。在信息抽取的场景中，设计良好的提示词
深入解析Python测试框架pytest 一休哥助手 python python pytest 开发语言
目录引言pytest简介安装与配置安装pytest配置pytest基础用法编写测试用例运行测试用例测试结果报告
怎么进入python 的venv文件夹_python虚拟环境模块venv使用及示例 weixin_39796140 怎么进入python 的venv文件夹
相信只要学习python的同学对于虚拟环境这个概念肯定不会太陌生，虚拟环境指的是一个个单独隔离的python开发环境。各个虚拟环境之间互不干扰，都有自己独立的开发包。就像是在电脑上装了很多个虚拟机，每个虚拟机里面你随便折腾，不会影响到物理机，也不会影响到其他虚拟机。既然这么有用，那么Python里面用来创建虚拟环境的模块virtualenv是怎么使用的呢？我们一起来看一下。virtualenv基本
python的离线安装包下载 Lake说科技 python 服务器 linux 开发语言运维
Python,安装相关视频讲解：python的or运算赋值用法用python编程Excel有没有用处？011_编程到底好玩在哪？查看python文件_输出py文件_cat_运行python文件_shel如何实现Python的离线安装包下载一、流程：步骤说明1确定需要下载的Python安装包版本2下载对应版本的离线安装包3将下载好的安装包传输至目标机器4在目标机器上进行安装二、具体步骤及代码：步骤1
深度学习的颠覆性发展：从卷积神经网络到Transformer AI天才研究院 AI大模型应用入门实战与进阶 ChatGPT 大数据人工智能语言模型 AI LLM Java Python 架构设计 Agent RPA
1.背景介绍深度学习是人工智能的核心技术之一，它通过模拟人类大脑中的神经网络学习从大数据中抽取知识，从而实现智能化的自动化处理。深度学习的发展历程可以分为以下几个阶段：2006年，GeoffreyHinton等人开始研究卷积神经网络（ConvolutionalNeuralNetworks，CNN），这是深度学习的第一个大突破。CNN主要应用于图像处理和语音识别等领域。2012年，AlexKrizh
python3+ffmpeg下载B站视频，附代码才华横溢吴道简爬虫 python windows
最近要去外面玩，旅途漫长，于是乎，就写了个代码，从B站上下载纪录片看，代码附后，请自取，如果觉得有用，麻烦点个赞，鼓励一下。感谢~~一、下载安装ffmpegFfmpeg是一款自由软件，用于视频和音频文件的处理，在本例中，我使用它进行视频文件和音频文件的合并。合并代码写在python脚本中，你只需下载好ffmpeg即可，而且因为合并代码中使用ffmpeg的绝对路径，所以也不用设置环境配置。Ffmpe
学习Python如何高效处理CSV文件的技巧！程序员总部 python python json
在Python中，处理CSV文件是一项非常常见的任务，特别是在数据分析和数据科学领域。CSV文件的全称是Comma-SeparatedValues，顾名思义，它以逗号为分隔符来存储表格数据。这种格式简单易读，也很方便进行数据的存储和交换。接下来就让我们一起探讨一下如何在Python中读取和写入CSV文件吧！CSV模块简介Python内置了一个非常强大的库，名为csv，这个库专门用于处理各种CSV文
PyCharm如何有效地添加源与库？程序员总部 python pycharm ide python
在使用PyCharm进行Python开发的时候，很多时候我们需要添加库或者设置源。这些操作可以帮助我们更方便地管理项目依赖，提升开发效率。接下来我会详细介绍如何在PyCharm中添加源和库，让你的开发环境更加灵活！第一步：安装PyCharm在开始之前，你得确保自己已经安装了PyCharm！如果还没有，可以前往JetBrains官网进行下载和安装。有个小贴士，最好选择社区版或者专业版，根据自己的需要
mysql主从数据同步林鹤霄 mysql主从数据同步
配置mysql5.5主从服务器(转) 教程开始：一、安装MySQL 说明：在两台MySQL服务器192.168.21.169和192.168.21.168上分别进行如下操作，安装MySQL 5.5.22 二、配置MySQL主服务器（192.168.21.169）mysql -uroot -p &nb
oracle学习笔记 caoyong oracle
1、ORACLE的安装 a>、ORACLE的版本 8i,9i : i是internet 10g,11g : grid (网格) 12c : cloud (云计算) b>、10g不支持win7 &
数据库，SQL零基础入门天子之骄 sql 数据库入门基本术语
数据库，SQL零基础入门做网站肯定离不开数据库，本人之前没怎么具体接触SQL，这几天起早贪黑得各种入门，恶补脑洞。一些具体的知识点，可以让小白不再迷茫的术语，拿来与大家分享。数据库，永久数据的一个或多个大型结构化集合，通常与更新和查询数据的软件相关
pom.xml 一炮送你回车库 pom.xml
1、一级元素dependencies是可以被子项目继承的 2、一级元素dependencyManagement是定义该项目群里jar包版本号的，通常和一级元素properties一起使用，既然有继承，也肯定有一级元素modules来定义子元素 3、父项目里的一级元素<modules> <module>lcas-admin-war</module> <
sql查地区省市县 3213213333332132 sql mysql
-- db_yhm_city SELECT * FROM db_yhm_city WHERE class_parent_id = 1 -- 海南 class_id = 9 港、奥、台 class_id = 33、34、35 SELECT * FROM db_yhm_city WHERE class_parent_id =169 SELECT d1.cla
关于监听器那些让人头疼的事宝剑锋梅花香画图板监听器鼠标监听器
本人初学JAVA，对于界面开发我只能说有点蛋疼，用JAVA来做界面的话确实需要一定的耐心（不使用插件，就算使用插件的话也没好多少）既然Java提供了界面开发，老师又要求做，只能硬着头皮上啦。但是监听器还真是个难懂的地方，我是上了几次课才略微搞懂了些。
JAVA的遍历MAP darkranger map
Java Map遍历方式的选择 1. 阐述　　对于Java中Map的遍历方式，很多文章都推荐使用entrySet，认为其比keySet的效率高很多。理由是：entrySet方法一次拿到所有key和value的集合；而keySet拿到的只是key的集合，针对每个key，都要去Map中额外查找一次value，从而降低了总体效率。那么实际情况如何呢？　　为了解遍历性能的真实差距，包括在遍历ke
POJ 2312 Battle City 优先多列+bfs aijuans 搜索
来源：http://poj.org/problem?id=2312 题意：题目背景就是小时候玩的坦克大战，求从起点到终点最少需要多少步。已知S和R是不能走得，E是空的，可以走，B是砖，只有打掉后才可以通过。思路：很容易看出来这是一道广搜的题目，但是因为走E和走B所需要的时间不一样，因此不能用普通的队列存点。因为对于走B来说，要先打掉砖才能通过，所以我们可以理解为走B需要两步，而走E是指需要1
Hibernate与Jpa的关系，终于弄懂 avords java Hibernate 数据库 jpa
我知道Jpa是一种规范，而Hibernate是它的一种实现。除了Hibernate，还有EclipseLink(曾经的toplink)，OpenJPA等可供选择，所以使用Jpa的一个好处是，可以更换实现而不必改动太多代码。在play中定义Model时，使用的是jpa的annotations，比如javax.persistence.Entity, Table, Column, OneToMany
酸爽的console.log bee1314 console
在前端的开发中，console.log那是开发必备啊，简直直观。通过写小函数，组合大功能。更容易测试。但是在打版本时，就要删除console.log，打完版本进入开发状态又要添加，真不够爽。重复劳动太多。所以可以做些简单地封装，方便开发和上线。 /** * log.js hufeng * The safe wrapper for `console.xxx` functions *
哈佛教授：穷人和过于忙碌的人有一个共同思维特质 bijian1013 时间管理励志人生穷人过于忙碌
一个跨学科团队今年完成了一项对资源稀缺状况下人的思维方式的研究，结论是：穷人和过于忙碌的人有一个共同思维特质，即注意力被稀缺资源过分占据，引起认知和判断力的全面下降。这项研究是心理学、行为经济学和政策研究学者协作的典范。　　这个研究源于穆来纳森对自己拖延症的憎恨。他7岁从印度移民美国，很快就如鱼得水，哈佛毕业
other operate 征客丶 OS osx
一、Mac Finder 设置排序方式，预览栏在显示－》查看显示选项中二、有时预览显示时，卡死在那，有可能是一些临时文件夹被删除了，如：/private/tmp[有待验证] -------------------------------------------------------------------- 若有其他凝问或文中有错误，请及时向我指出，我好及时改正，同时也让我们一
【Scala五】分析Spark源代码总结的Scala语法三 bit1129 scala
1. If语句作为表达式 val properties = if (jobIdToActiveJob.contains(jobId)) { jobIdToActiveJob(stage.jobId).properties } else { // this stage will be assigned to "default" po
ZooKeeper 入门 BlueSkator 中间件 zk
ZooKeeper是一个高可用的分布式数据管理与系统协调框架。基于对Paxos算法的实现，使该框架保证了分布式环境中数据的强一致性，也正是基于这样的特性，使得ZooKeeper解决很多分布式问题。网上对ZK的应用场景也有不少介绍，本文将结合作者身边的项目例子，系统地对ZK的应用场景进行一个分门归类的介绍。值得注意的是，ZK并非天生就是为这些应用场景设计的，都是后来众多开发者根据其框架的特性，利
MySQL取得当前时间的函数是什么格式化日期的函数是什么 BreakingBad mysql Date
取得当前时间用 now() 就行。在数据库中格式化时间用DATE_FORMA T(date, format) . 根据格式串format 格式化日期或日期和时间值date，返回结果串。可用DATE_FORMAT( ) 来格式化DATE 或DATETIME 值，以便得到所希望的格式。根据format字符串格式化date值: %S, %s 两位数字形式的秒（ 00,01,
读《研磨设计模式》-代码笔记-组合模式 bylijinnan java 设计模式
声明：本文只为方便我个人查阅和理解，详细的分析以及源代码请移步原作者的博客http://chjavach.iteye.com/ import java.util.ArrayList; import java.util.List; abstract class Component { public abstract void printStruct(Str
4_JAVA+Oracle面试题(有答案) chenke oracle
基础测试题卷面上不能出现任何的涂写文字，所有的答案要求写在答题纸上，考卷不得带走。选择题 1、 What will happen when you attempt to compile and run the following code? （3） public class Static { static { int x = 5; // 在static内有效 } st
新一代工作流系统设计目标 comsci 工作算法脚本
用户只需要给工作流系统制定若干个需求，流程系统根据需求，并结合事先输入的组织机构和权限结构，调用若干算法，在流程展示版面上面显示出系统自动生成的流程图，然后由用户根据实际情况对该流程图进行微调，直到满意为止，流程在运行过程中，系统和用户可以根据情况对流程进行实时的调整，包括拓扑结构的调整，权限的调整，内置脚本的调整。。。。。在这个设计中，最难的地方是系统根据什么来生成流
oracle 行链接与行迁移 daizj oracle 行迁移
表里的一行对于一个数据块太大的情况有二种(一行在一个数据块里放不下) 第一种情况: INSERT的时候，INSERT时候行的大小就超一个块的大小。Oracle把这行的数据存储在一连串的数据块里(Oracle Stores the data for the row in a chain of data blocks)，这种情况称为行链接(Row Chain)，一般不可避免(除非使用更大的数据
[JShop]开源电子商务系统jshop的系统缓存实现 dinguangx jshop 电子商务
前言 jeeshop中通过SystemManager管理了大量的缓存数据，来提升系统的性能，但这些缓存数据全部都是存放于内存中的，无法满足特定场景的数据更新（如集群环境）。JShop对jeeshop的缓存机制进行了扩展，提供CacheProvider来辅助SystemManager管理这些缓存数据，通过CacheProvider,可以把缓存存放在内存,ehcache,redis，memcache
初三全学年难记忆单词 dcj3sjt126com english word
several 儿子；若干 shelf 架子 knowledge 知识；学问 librarian 图书管理员 abroad 到国外，在国外 surf 冲浪 wave 浪；波浪 twice 两次；两倍 describe 描写；叙述 especially 特别；尤其 attract 吸引 prize 奖品；奖赏 competition 比赛；竞争 event 大事；事件 O
sphinx实践 dcj3sjt126com sphinx
安装参考地址:http://briansnelson.com/How_to_install_Sphinx_on_Centos_Server yum install sphinx 如果失败的话使用下面的方式安装 wget http://sphinxsearch.com/files/sphinx-2.2.9-1.rhel6.x86_64.rpm yum loca
JPA之JPQL（三） frank1234 orm jpa JPQL
1 什么是JPQL JPQL是Java Persistence Query Language的简称，可以看成是JPA中的HQL， JPQL支持各种复杂查询。 2 检索单个对象 @Test public void querySingleObject1() { Query query = em.createQuery("sele
Remove Duplicates from Sorted Array II hcx2013 remove
Follow up for "Remove Duplicates":What if duplicates are allowed at most twice? For example,Given sorted array nums = [1,1,1,2,2,3], Your function should return length
Spring4新特性——Groovy Bean定义DSL jinnianshilongnian spring 4
Spring4新特性——泛型限定式依赖注入 Spring4新特性——核心容器的其他改进 Spring4新特性——Web开发的增强 Spring4新特性——集成Bean Validation 1.1(JSR-349)到SpringMVC Spring4新特性——Groovy Bean定义DSL Spring4新特性——更好的Java泛型操作API Spring4新
CentOS安装Mysql5.5 liuxingguome centos
CentOS下以RPM方式安装MySQL5.5 首先卸载系统自带Mysql： yum remove mysql mysql-server mysql-libs compat-mysql51 rm -rf /var/lib/mysql rm /etc/my.cnf 查看是否还有mysql软件： rpm -qa|grep mysql 去http://dev.mysql.c
第14章工具函数（下） onestopweb 函数
index.html <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/
POJ 1050 SaraWon 二维数组子矩阵最大和
POJ ACM第1050题的详细描述，请参照 http://acm.pku.edu.cn/JudgeOnline/problem?id=1050 题目意思：给定包含有正负整型的二维数组，找出所有子矩阵的和的最大值。如二维数组 0 -2 -7 0 9 2 -6 2 -4 1 -4 1 -1 8 0 -2 中和最大的子矩阵是 9 2 -4 1 -1 8 且最大和是15
Java8全新打造，英语学习supertool yangshangchuan java superword 闭包 java8 函数式编程
superword是一个Java实现的英文单词分析软件，主要研究英语单词音近形似转化规律、前缀后缀规律、词之间的相似性规律等等。Clean code、Fluent style、Java8 feature: Lambdas, Streams and Functional-style Programming。升学考试、工作求职、充电提高，都少不了英语的身影，英语对我们来说实在太重要

机器学习——逻辑回归

概念

线性逻辑回归

logistic_regression.py

logistic_regression_with_linear_boundary.py

mnist.py

非线性逻辑回归

NonLinearBoundary.py

你可能感兴趣的:(机器学习,机器学习,逻辑回归,人工智能,python)