任菜菜学编程

动手学深度学习(tensorflow)---学习笔记整理（十、计算机视觉篇）

有关公式、基本理论等大量内容摘自《动手学深度学习》(TF2.0版)）

这一部分主要是计算机视觉内容，之前说的cnn模型也是和视觉联系很大的～

通过cnn的学习，我们了解了图片的结构和图片分类等内容，计算机视觉还有两个非常重要的内容，一个是类似在图片内对目标进行检测（目标检测），另一个是生成图片（迁移学习）。

再说上述内容之前，先了解一下计算机视觉其他的基础的一些知识。

图像增广

常见的方法：翻转、裁剪和变换颜色

原图：

翻转

剪裁

变换颜色

亮度

色调

代码如下：

import tensorflow as tf
import numpy as np
print(tf.__version__)
from matplotlib import pyplot as plt

img = plt.imread('img/girl.jpg')
print(img.shape)
plt.imshow(img)
plt.show()
#绘图函数
def show_images(imgs, num_rows, num_cols, scale=2):
    figsize = (num_cols * scale, num_rows * scale)
    _, axes = plt.subplots(num_rows, num_cols, figsize=figsize)
    for i in range(num_rows):
        for j in range(num_cols):
            axes[i][j].imshow(imgs[i * num_cols + j])
            axes[i][j].axes.get_xaxis().set_visible(False)
            axes[i][j].axes.get_yaxis().set_visible(False)
    plt.show()
    return axes
#运行图像增广方法aug并展示所有的结果，默认2*4个
def apply(img, aug, num_rows=2, num_cols=4, scale=1.5):
    Y = [aug(img) for _ in range(num_rows * num_cols)]
    show_images(Y, num_rows, num_cols, scale)
#左右翻转
apply(img, tf.image.random_flip_left_right)
#上下翻转
apply(img, tf.image.random_flip_up_down)
#随机剪裁10%-100%，像素缩放到600*600
aug=tf.image.random_crop
num_rows=2
num_cols=4
scale=1.5
#图像尺寸
crop_size=600
Y = [aug(img, (crop_size, crop_size, 3)) for _ in range(num_rows * num_cols)]
show_images(Y, num_rows, num_cols, scale)
#变换颜色亮度，50%-150%
#tf.image.random_brightness函数实现
aug=tf.image.random_brightness
num_rows=2
num_cols=4
scale=1.5
max_delta=0.5
Y = [aug(img, max_delta) for _ in range(num_rows * num_cols)]
show_images(Y, num_rows, num_cols, scale)
#变换颜色色调
#tf.image.random_hue实现
aug=tf.image.random_hue
num_rows=2
num_cols=4
scale=1.5
max_delta=0.5

Y = [aug(img, max_delta) for _ in range(num_rows * num_cols)]
show_images(Y, num_rows, num_cols, scale)
#可以从4个方面改变图像的颜色：亮度、对比度、饱和度和色调，还有两个没介绍

使用图像增广训练模型

（代码有点点小问题，后续修改）

import tensorflow as tf
import numpy as np
print(tf.__version__)
from matplotlib import pyplot as plt
#绘图函数
def show_images(imgs, num_rows, num_cols, scale=2):
    figsize = (num_cols * scale, num_rows * scale)
    _, axes = plt.subplots(num_rows, num_cols, figsize=figsize)
    for i in range(num_rows):
        for j in range(num_cols):
            axes[i][j].imshow(imgs[i * num_cols + j])
            axes[i][j].axes.get_xaxis().set_visible(False)
            axes[i][j].axes.get_yaxis().set_visible(False)
    plt.show()
    return axes
#获取数据集合
(x, y), (test_x, test_y) = tf.keras.datasets.cifar10.load_data()
print(x.shape, test_x.shape)
#绘制前8个
show_images(x[0:8][0], 2, 4, scale=0.8)
#定义残差神经网络
from tensorflow.keras import layers,activations
class Residual(tf.keras.Model):
    def __init__(self, num_channels, use_1x1conv=False, strides=1, **kwargs):
        super(Residual, self).__init__(**kwargs)
        self.conv1 = layers.Conv2D(num_channels,
                                   padding='same',
                                   kernel_size=3,
                                   strides=strides)
        self.conv2 = layers.Conv2D(num_channels, kernel_size=3,padding='same')
        if use_1x1conv:
            self.conv3 = layers.Conv2D(num_channels,
                                       kernel_size=1,
                                       strides=strides)
        else:
            self.conv3 = None
        self.bn1 = layers.BatchNormalization()
        self.bn2 = layers.BatchNormalization()

    def call(self, X):
        Y = activations.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        return activations.relu(Y + X)

class ResnetBlock(tf.keras.layers.Layer):
    def __init__(self,num_channels, num_residuals, first_block=False,**kwargs):
        super(ResnetBlock, self).__init__(**kwargs)
        self.listLayers=[]
        for i in range(num_residuals):
            if i == 0 and not first_block:
                self.listLayers.append(Residual(num_channels, use_1x1conv=True, strides=2))
            else:
                self.listLayers.append(Residual(num_channels))

    def call(self, X):
        for layer in self.listLayers.layers:
            X = layer(X)
        return X

class ResNet(tf.keras.Model):
    def __init__(self,num_blocks,**kwargs):
        super(ResNet, self).__init__(**kwargs)
        self.conv=tf.keras.layers.Conv2D(64, kernel_size=7, strides=2, padding='same')
        self.bn=tf.keras.layers.BatchNormalization()
        self.relu=tf.keras.layers.Activation('relu')
        self.mp=tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')
        self.resnet_block1=ResnetBlock(64,num_blocks[0], first_block=True)
        self.resnet_block2=ResnetBlock(128,num_blocks[1])
        self.resnet_block3=ResnetBlock(256,num_blocks[2])
        self.resnet_block4=ResnetBlock(512,num_blocks[3])
        self.gap=tf.keras.layers.GlobalAvgPool2D()
        self.fc=tf.keras.layers.Dense(units=10,activation=tf.keras.activations.softmax)

    def call(self, x):
        x=self.conv(x)
        x=self.bn(x)
        x=self.relu(x)
        x=self.mp(x)
        x=self.resnet_block1(x)
        x=self.resnet_block2(x)
        x=self.resnet_block3(x)
        x=self.resnet_block4(x)
        x=self.gap(x)
        x=self.fc(x)
        return x

net = ResNet([2,2,2,2])
#使用随机左右翻转的图像增广来训练模型
print(type(x))
x = np.array([tf.image.random_flip_left_right(i) for i in x])
print(type(x))
net.compile(loss='sparse_categorical_crossentropy',
              optimizer=tf.keras.optimizers.Adam(),
              metrics=['accuracy'])

history = net.fit(x, y,
                    batch_size=64,
                    epochs=2,
                    validation_split=0.2)
test_scores = net.evaluate(test_x, test_y, verbose=2)

微调

说白了就是加载别人已经训练好的，然后进行修改。原理上来说，就是别人训练好的模型可以分20个类别，你仅需要预测其中的几个类别，但是数据集不太一样，但是别人的模型已经具备识别能力了，在自己的模型上进行训练（微调即可）。

热狗识别微调代码：

import tensorflow as tf
import numpy as np
import os
import zipfile
import wget
#使用的热狗数据集是从网上抓取的，它含有1400张包含热狗的正类图像，和同样多包含其他食品的负类图像。
#各类的1000张图像被用于训练，其余则用于测试。其实就是热狗类和其他类
#我们首先将压缩后的数据集下载到路径../data之下，然后在该路径将下载好的数据集解压，得到两个文件夹hotdog/train和hotdog/test。
# 这两个文件夹下面均有hotdog和not-hotdog两个类别文件夹，每个类别文件夹里面是图像文件
def download_data():
    data = os.getcwd()+'/data'
    #下载压缩文件
    if not os.path.exists(data+'/hotdog.zip'):
        base_url = 'https://apache-mxnet.s3-accelerate.amazonaws.com/'
        wget.download(
            base_url + 'gluon/dataset/hotdog.zip',
            data)
        print("已存在")
    #解压文件
    with zipfile.ZipFile(data+'/hotdog.zip', 'r') as z:
        z.extractall(os.getcwd())
download_data()

import pathlib
#解压后的文件路径
train_dir = 'hotdog/train'
test_dir = 'hotdog/test'
#加载文件
train_dir = pathlib.Path(train_dir)
# train_count = len(list(train_dir.glob('*/*.jpg')))
test_dir = pathlib.Path(test_dir)
# test_count = len(list(test_dir.glob('*/*.jpg')))
#获取所有便签种类
CLASS_NAMES = np.array([item.name for item in train_dir.glob('*') if item.name != 'LICENSE.txt' and item.name[0] != '.'])
#总共两类
print(len(CLASS_NAMES))
print(CLASS_NAMES)
#ImageDataGenerator是tf用于图像增广的库，下属操作将训练集和测试集归一化且调整为224*224的图片
image_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)
BATCH_SIZE = 32
IMG_HEIGHT = 224
IMG_WIDTH = 224

train_data_gen = image_generator.flow_from_directory(directory=str(train_dir),
                                                    batch_size=BATCH_SIZE,
                                                    target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                    shuffle=True,
                                                    classes = list(CLASS_NAMES))

test_data_gen = image_generator.flow_from_directory(directory=str(test_dir),
                                                    batch_size=BATCH_SIZE,
                                                    target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                    shuffle=True,
                                                    classes = list(CLASS_NAMES))
import matplotlib.pyplot as plt

def show_batch(image_batch, label_batch):
    plt.figure(figsize=(10,10))
    for n in range(15):
        ax = plt.subplot(5,5,n+1)
        plt.imshow(image_batch[n])
        plt.title(CLASS_NAMES[label_batch[n]==1][0].title())
        plt.axis('off')
    plt.show()
#train_data_gen是image_generator结构，如果要提取图片需要用next函数，返回一下
image_batch, label_batch = next(train_data_gen)
#32*224*224*3
print(image_batch.shape)
show_batch(image_batch, label_batch)
#使用在ImageNet数据集上预训练的ResNet-50作为源模型。
# 这里指定weights='imagenet'来自动下载并加载预训练的模型参数。
#一个神经网络包括两部分，一部分是features，另一部分是classifier，后者是全连接层
#仅仅加载features
ResNet50 = tf.keras.applications.resnet_v2.ResNet50V2(weights='imagenet', input_shape=(224,224,3))
#也可以只是用结构
#ResNet50 = tf.keras.applications.resnet_v2.ResNet50V2(input_shape=(224,224,3))
for layer in ResNet50.layers:
    layer.trainable = False
net = tf.keras.models.Sequential()
net.add(ResNet50)
net.add(tf.keras.layers.Flatten())
net.add(tf.keras.layers.Dense(2, activation='softmax'))
net.compile(optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy'])

history = net.fit_generator(
                    train_data_gen,
                    steps_per_epoch=10,
                    epochs=3,
                    validation_data=test_data_gen,
                    validation_steps=10
                    )

没有初始化参数的模型收敛速度更慢。微调的模型因为参数初始值更好，往往在相同迭代周期下取得更高的精度。

小结：

迁移学习将从源数据集学到的知识迁移到目标数据集上。微调是迁移学习的一种常用技术。
目标模型复制了源模型上除了输出层外的所有模型设计及其参数，并基于目标数据集微调这些参数。而目标模型的输出层需要从头训练。
一般来说，微调参数会使用较小的学习率，而从头训练输出层可以使用较大的学习率。

目标检测和边界框

边界框

代码实现（只是人为的弄个框，并不是自动检测出来的）：

import tensorflow as tf
import os
print(tf.__version__)
import matplotlib.pyplot as plt

img = plt.imread(os.getcwd()+'/img/girl2.jpg')
plt.imshow(img)
# bbox是bounding box的缩写
dog_bbox, cat_bbox = [20, 235, 300, 790], [520, 100, 750, 790]
#绘制函数
def bbox_to_rect(bbox, color):
    # 将边界框(左上x, 左上y, 右下x, 右下y)格式转换成matplotlib格式：
    # ((左上x, 左上y), 宽, 高)
    return plt.Rectangle(
        xy=(bbox[0], bbox[1]), width=bbox[2]-bbox[0], height=bbox[3]-bbox[1],
        fill=False, edgecolor=color, linewidth=2)
#进行绘制
fig = plt.imshow(img)
fig.axes.add_patch(bbox_to_rect(dog_bbox, 'blue'))
fig.axes.add_patch(bbox_to_rect(cat_bbox, 'red'))
plt.show()

锚框

说白了就是以某个像素为中心可以生成特别多的边界框，这些边界框就叫做锚框。

代码如下：

import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
import math
import os
img_raw = tf.io.read_file(os.getcwd()+'/img/girl2.jpg')
print(type(img_raw))
#转numpy
img = tf.image.decode_jpeg(img_raw).numpy()
print(type(img))
h, w = img.shape[0:2]
print(h, w)
#定义生成所有锚框的函数。对于整个输入图像，我们将一共生成 wh(n+m−1) 个锚框
def MultiBoxPrior(feature_map, sizes=[0.75, 0.5, 0.25], ratios=[1, 2, 0.5]):
    """
    # 按照「9.4.1. 生成多个锚框」所讲的实现, anchor表示成(xmin, ymin, xmax, ymax).
    https://zh.d2l.ai/chapter_computer-vision/anchor.html
    Args:
        feature_map: torch tensor, Shape: [N, C, H, W].
        sizes: List of sizes (0~1) of generated MultiBoxPriores.
        ratios: List of aspect ratios (non-negative) of generated MultiBoxPriores.
    Returns:
        anchors of shape (1, num_anchors, 4). 由于batch里每个都一样, 所以第一维为1
    """
    pairs = [] # pair of (size, sqrt(ratio))
    for r in ratios:
        pairs.append([sizes[0], np.sqrt(r)])
    for s in sizes[1:]:
        pairs.append([s, np.sqrt(ratios[0])])

    pairs = np.array(pairs)

    ss1 = pairs[:, 0] * pairs[:, 1] # size * sqrt(ration)
    ss2 = pairs[:, 0] / pairs[:, 1] # size / sqrt(retion)

    base_anchors = tf.stack([-ss1, -ss2, ss1, ss2], axis=1) / 2

    h, w = feature_map.shape[-2:]
    shifts_x = tf.divide(tf.range(0, w), w)
    shifts_y = tf.divide(tf.range(0, h), h)
    shift_x, shift_y = tf.meshgrid(shifts_x, shifts_y)
    shift_x = tf.reshape(shift_x, (-1,))
    shift_y = tf.reshape(shift_y, (-1,))
    shifts = tf.stack((shift_x, shift_y, shift_x, shift_y), axis=1)

    anchors = tf.add(tf.reshape(shifts, (-1,1,4)), tf.reshape(base_anchors, (1,-1,4)))
    return tf.cast(tf.reshape(anchors, (1,-1,4)), tf.float32)

x = tf.zeros((1,3,h,w))
y = MultiBoxPrior(x)
print(y.shape)
#3064000=800*766*5,5=len(sizes)+len(ratios)-1
#访问以（250，250）为中心的第一个锚框。
#它有4个元素，分别是锚框左上角的xx和yy轴坐标和右下角的xx和yy轴坐标，其中xx和yy轴的坐标值分别已除以图像的宽和高，因此值域均为0和1之间。
boxes = tf.reshape(y, (h,w,5,4))
print(boxes[250,250,0,:])

#描绘图像中以某个像素为中心的所有锚框的函数
def bbox_to_rect(bbox, color):
    # 将边界框(左上x, 左上y, 右下x, 右下y)格式转换成matplotlib格式：
    # ((左上x, 左上y), 宽, 高)
    return plt.Rectangle(
        xy=(bbox[0], bbox[1]), width=bbox[2]-bbox[0], height=bbox[3]-bbox[1],
        fill=False, edgecolor=color, linewidth=2)
def show_bboxes(axes, bboxes, labels=None, colors=None):
    def _make_list(obj, default_values=None):
        if obj is None:
            obj = default_values
        elif not isinstance(obj, (list, tuple)):
            obj = [obj]
        return obj

    labels = _make_list(labels)
    colors = _make_list(colors, ['b', 'g', 'r', 'm', 'c'])
    for i, bbox in enumerate(bboxes):
        color = colors[i % len(colors)]
        rect = bbox_to_rect(bbox.numpy(), color)
        axes.add_patch(rect)
        if labels and len(labels) > i:
            text_color = 'k' if color == 'w' else 'w'
            axes.text(rect.xy[0], rect.xy[1], labels[i],
                va='center', ha='center', fontsize=6,
                color=text_color, bbox=dict(facecolor=color, lw=0))
    plt.show()
from IPython import display
def use_svg_display():
    """Use svg format to display plot in jupyter"""
    display.set_matplotlib_formats('svg')

use_svg_display()
# 设置图的尺寸
plt.rcParams['figure.figsize'] = (3.5, 2.5)


fig = plt.imshow(img)
bbox_scale = tf.constant([[w,h,w,h]], dtype=tf.float32)
show_bboxes(fig.axes, tf.multiply(boxes[200,250,:,:], bbox_scale),
    ['s=0.75, r=1', 's=0.75, r=2', 's=0.55, r=0.5',
     's=0.5, r=1', 's=0.25, r=1'])

交并比

标注训练集的锚框

真实框：

生成框：

抑制后的框：

具体过程就不详细介绍了，直接看代码研究研究吧（其实我也不是很懂，大概光会用）

import tensorflow as tf
import matplotlib.pyplot as plt
import os
import numpy as np
#描绘图像中以某个像素为中心的所有锚框的函数
def bbox_to_rect(bbox, color):
    # 将边界框(左上x, 左上y, 右下x, 右下y)格式转换成matplotlib格式：
    # ((左上x, 左上y), 宽, 高)
    return plt.Rectangle(
        xy=(bbox[0], bbox[1]), width=bbox[2]-bbox[0], height=bbox[3]-bbox[1],
        fill=False, edgecolor=color, linewidth=2)
def show_bboxes(axes, bboxes, labels=None, colors=None):
    def _make_list(obj, default_values=None):
        if obj is None:
            obj = default_values
        elif not isinstance(obj, (list, tuple)):
            obj = [obj]
        return obj

    labels = _make_list(labels)
    colors = _make_list(colors, ['b', 'g', 'r', 'm', 'c'])
    for i, bbox in enumerate(bboxes):
        color = colors[i % len(colors)]
        rect = bbox_to_rect(bbox.numpy(), color)
        axes.add_patch(rect)
        if labels and len(labels) > i:
            text_color = 'k' if color == 'w' else 'w'
            axes.text(rect.xy[0], rect.xy[1], labels[i],
                va='center', ha='center', fontsize=6,
                color=text_color, bbox=dict(facecolor=color, lw=0))
    plt.show()

set_1 = [[1,2,3,4],[5,6,7,8]]
set_2 = [[1,1,1,1],[2,2,2,2]]
lower_bounds = tf.maximum(tf.expand_dims(set_1, axis=1), tf.expand_dims(set_2, axis=0)) # (n1, n2, 2)
upper_bounds = tf.minimum(tf.expand_dims(set_1, axis=1), tf.expand_dims(set_2, axis=0)) # (n1, n2, 2)

print(tf.expand_dims(set_1, axis=1), tf.expand_dims(set_2, axis=0), lower_bounds, tf.multiply(set_1, set_2), tf.subtract(set_1, set_2))
#交并比
# 参考https://github.com/sgrvinod/a-PyTorch-Tutorial-to-Object-Detection/blob/master/utils.py#L356
def compute_intersection(set_1, set_2):
    """
    计算anchor之间的交集
    Args:
        set_1: a tensor of dimensions (n1, 4), anchor表示成(xmin, ymin, xmax, ymax)
        set_2: a tensor of dimensions (n2, 4), anchor表示成(xmin, ymin, xmax, ymax)
    Returns:
        intersection of each of the boxes in set 1 with respect to each of the boxes in set 2, shape: (n1, n2)
    """
    # tensorflow auto-broadcasts singleton dimensions
    lower_bounds = tf.maximum(tf.expand_dims(set_1[:,:2], axis=1), tf.expand_dims(set_2[:,:2], axis=0)) # (n1, n2, 2)
    upper_bounds = tf.minimum(tf.expand_dims(set_1[:,2:], axis=1), tf.expand_dims(set_2[:,2:], axis=0)) # (n1, n2, 2)
    # 设置最小值
    intersection_dims = tf.clip_by_value(upper_bounds - lower_bounds, clip_value_min=0, clip_value_max=3) # (n1, n2, 2)
    return tf.multiply(intersection_dims[:, :, 0], intersection_dims[:, :, 1]) # (n1, n2)

def compute_jaccard(set_1, set_2):
    """
    计算anchor之间的Jaccard系数(IoU)
    Args:
        set_1: a tensor of dimensions (n1, 4), anchor表示成(xmin, ymin, xmax, ymax)
        set_2: a tensor of dimensions (n2, 4), anchor表示成(xmin, ymin, xmax, ymax)
    Returns:
        Jaccard Overlap of each of the boxes in set 1 with respect to each of the boxes in set 2, shape: (n1, n2)
    """
    # Find intersections
    intersection = compute_intersection(set_1, set_2)

    # Find areas of each box in both sets
    areas_set_1 = tf.multiply(tf.subtract(set_1[:, 2], set_1[:, 0]), tf.subtract(set_1[:, 3], set_1[:, 1]))  # (n1)
    areas_set_2 = tf.multiply(tf.subtract(set_2[:, 2], set_2[:, 0]), tf.subtract(set_2[:, 3], set_2[:, 1]))  # (n2)

    # Find the union
    union = tf.add(tf.expand_dims(areas_set_1, axis=1), tf.expand_dims(areas_set_2, axis=0))  # (n1, n2)
    union = tf.subtract(union, intersection)  # (n1, n2)

    return tf.divide(intersection, union) #(n1, n2)
def MultiBoxPrior(feature_map, sizes=[0.75, 0.5, 0.25], ratios=[1, 2, 0.5]):
    """
    # 按照「9.4.1. 生成多个锚框」所讲的实现, anchor表示成(xmin, ymin, xmax, ymax).
    https://zh.d2l.ai/chapter_computer-vision/anchor.html
    Args:
        feature_map: torch tensor, Shape: [N, C, H, W].
        sizes: List of sizes (0~1) of generated MultiBoxPriores.
        ratios: List of aspect ratios (non-negative) of generated MultiBoxPriores.
    Returns:
        anchors of shape (1, num_anchors, 4). 由于batch里每个都一样, 所以第一维为1
    """
    pairs = [] # pair of (size, sqrt(ratio))
    for r in ratios:
        pairs.append([sizes[0], np.sqrt(r)])
    for s in sizes[1:]:
        pairs.append([s, np.sqrt(ratios[0])])

    pairs = np.array(pairs)

    ss1 = pairs[:, 0] * pairs[:, 1] # size * sqrt(ration)
    ss2 = pairs[:, 0] / pairs[:, 1] # size / sqrt(retion)

    base_anchors = tf.stack([-ss1, -ss2, ss1, ss2], axis=1) / 2

    h, w = feature_map.shape[-2:]
    shifts_x = tf.divide(tf.range(0, w), w)
    shifts_y = tf.divide(tf.range(0, h), h)
    shift_x, shift_y = tf.meshgrid(shifts_x, shifts_y)
    shift_x = tf.reshape(shift_x, (-1,))
    shift_y = tf.reshape(shift_y, (-1,))
    shifts = tf.stack((shift_x, shift_y, shift_x, shift_y), axis=1)

    anchors = tf.add(tf.reshape(shifts, (-1,1,4)), tf.reshape(base_anchors, (1,-1,4)))
    return tf.cast(tf.reshape(anchors, (1,-1,4)), tf.float32)

img_raw = tf.io.read_file(os.getcwd()+'/img/girl2.jpg')
img = tf.image.decode_jpeg(img_raw).numpy()
h, w = img.shape[0:2]
x = tf.zeros((1,3,h,w))
y = MultiBoxPrior(x)
print(y.shape)
boxes = tf.reshape(y, (h,w,5,4))
print(tf.expand_dims(boxes[200,250,:,:][:, :2], axis=1), tf.expand_dims(boxes[210,260,1:2,:][:, :2], axis=0)
)
print(tf.maximum(tf.expand_dims(boxes[200,250,:,:][:, :2], axis=1), tf.expand_dims(boxes[210,260,1:2,:][:, :2], axis=0))
)
#使用交并比来衡量锚框与真实边界框以及锚框与锚框之间的相似度
bbox_scale = tf.constant([[w,h,w,h]], dtype=tf.float32)
#真实框
ground_truth = tf.constant([[0, 0, 0.3, 0.39, 0.99],
                [1, 0.57, 0.15, 0.99, 1]])
#生成的锚框
anchors = tf.constant([[0, 0.1, 0.2, 0.3],
            [0.15, 0.2, 0.4, 0.4],
            [0.63, 0.05, 0.88, 0.98],
            [0.66, 0.45, 0.8, 0.8],
            [0.57, 0.3,  0.92, 0.9]])

fig = plt.imshow(img)
show_bboxes(fig.axes, tf.multiply(ground_truth[:, 1:], bbox_scale),
        ['girl', 'boy'], 'k')
show_bboxes(fig.axes, tf.multiply(anchors, bbox_scale),
        ['0', '1', '2', '3', '4'])


def assign_anchor(bb, anchor, jaccard_threshold=0.5):
    """
    # 按照「9.4.1. 生成多个锚框」图9.3所讲为每个anchor分配真实的bb, anchor表示成归一化(xmin, ymin, xmax, ymax).
    https://zh.d2l.ai/chapter_computer-vision/anchor.html
    Args:
        bb: 真实边界框(bounding box), shape:（nb, 4）
        anchor: 待分配的anchor, shape:（na, 4）
        jaccard_threshold: 预先设定的阈值
    Returns:
        assigned_idx: shape: (na, ), 每个anchor分配的真实bb对应的索引, 若未分配任何bb则为-1
    """
    na = anchor.shape[0]
    nb = bb.shape[0]
    jaccard = compute_jaccard(anchor, bb).numpy()   # shape: (na, nb)
    assigned_idx = np.ones(na) * -1 # 初始全为-1

    # 先为每个bb分配一个anchor（不要求满足jaccard_threshold）
    jaccard_cp = jaccard.copy()
    for j in range(nb):
        i = np.argmax(jaccard_cp[:, j])
        assigned_idx[i] = j
        jaccard_cp[i, :] = float("-inf")    # 赋值为负无穷, 相当于去掉这一行

    # 处理还未被分配的anchor， 要求满足jaccard_threshold
    for i in range(na):
        if assigned_idx[i] == -1:
            j = np.argmax(jaccard[i, :])
            if jaccard[i, j] >= jaccard_threshold:
                assigned_idx[i] = j
    return tf.cast(assigned_idx, tf.int32)

def xy_to_cxcy(xy):
    """
    将(x_min, y_min, x_max, y_max)形式的anchor转换成(center_x, center_y, w, h)形式的.
    https://github.com/sgrvinod/a-PyTorch-Tutorial-to-Object-Detection/blob/master/utils.py
    Args:
        xy: bounding boxes in boundary coordinates, a tensor of size (n_boxes, 4)
    Returns:
        bounding boxes in center-size coordinates, a tensor of size (n_boxes, 4)
    """
    return tf.concat(((xy[:, 2:] + xy[:, :2]) / 2,  #c_x, c_y
              xy[:, 2:] - xy[:, :2]), axis=1)



def MultiBoxTarget(anchor, label):
    """
    # 按照「9.4.1. 生成多个锚框」所讲的实现, anchor表示成归一化(xmin, ymin, xmax, ymax).
    https://zh.d2l.ai/chapter_computer-vision/anchor.html
    Args:
        anchor: torch tensor, 输入的锚框, 一般是通过MultiBoxPrior生成, shape:（1，锚框总数，4）
        label: 真实标签, shape为(bn, 每张图片最多的真实锚框数, 5)
               第二维中，如果给定图片没有这么多锚框, 可以先用-1填充空白, 最后一维中的元素为[类别标签, 四个坐标值]
    Returns:
        列表, [bbox_offset, bbox_mask, cls_labels]
        bbox_offset: 每个锚框的标注偏移量，形状为(bn，锚框总数*4)
        bbox_mask: 形状同bbox_offset, 每个锚框的掩码, 一一对应上面的偏移量, 负类锚框(背景)对应的掩码均为0, 正类锚框的掩码均为1
        cls_labels: 每个锚框的标注类别, 其中0表示为背景, 形状为(bn，锚框总数)
    """
    assert len(anchor.shape) == 3 and len(label.shape) == 3
    bn = label.shape[0]

    def MultiBoxTarget_one(anchor, label, eps=1e-6):
        """
        MultiBoxTarget函数的辅助函数, 处理batch中的一个
        Args:
            anchor: shape of (锚框总数, 4)
            label: shape of (真实锚框数, 5), 5代表[类别标签, 四个坐标值]
            eps: 一个极小值, 防止log0
        Returns:
            offset: (锚框总数*4, )
            bbox_mask: (锚框总数*4, ), 0代表背景, 1代表非背景
            cls_labels: (锚框总数, 4), 0代表背景
        """
        an = anchor.shape[0]
        assigned_idx = assign_anchor(label[:, 1:], anchor) ## (锚框总数, )
        # 决定anchor留下或者舍去
        bbox_mask = tf.repeat(tf.expand_dims(tf.cast((assigned_idx >= 0), dtype=tf.double), axis=-1), repeats=4, axis=1)

        cls_labels = np.zeros(an, dtype=int) # 0表示背景
        assigned_bb = np.zeros((an, 4), dtype=float) # 所有anchor对应的bb坐标
        for i in range(an):
            bb_idx = assigned_idx[i]
            if bb_idx >= 0: # 即非背景
                cls_labels[i] = label.numpy()[bb_idx, 0] + 1 # 要注意加1
                assigned_bb[i, :] = label.numpy()[bb_idx, 1:]

        center_anchor = tf.cast(xy_to_cxcy(anchor), dtype=tf.double)  # (center_x, center_y, w, h)
        center_assigned_bb = tf.cast(xy_to_cxcy(assigned_bb), dtype=tf.double) # (center_x, center_y, w, h)

        offset_xy = 10.0 * (center_assigned_bb[:,:2] - center_anchor[:,:2]) / center_anchor[:,2:]
        offset_wh = 5.0 * tf.math.log(eps + center_assigned_bb[:, 2:] / center_anchor[:, 2:])
        offset = tf.multiply(tf.concat((offset_xy, offset_wh), axis=1), bbox_mask)    # (锚框总数, 4)

        return tf.reshape(offset, (-1,)), tf.reshape(bbox_mask, (-1,)), cls_labels

    batch_offset = []
    batch_mask = []
    batch_cls_labels = []
    for b in range(bn):
        offset, bbox_mask, cls_labels = MultiBoxTarget_one(anchor[0, :, :], label[b,:,:])

        batch_offset.append(offset)
        batch_mask.append(bbox_mask)
        batch_cls_labels.append(cls_labels)

    batch_offset = tf.convert_to_tensor(batch_offset)
    batch_mask = tf.convert_to_tensor(batch_mask)
    batch_cls_labels = tf.convert_to_tensor(batch_cls_labels)

    return [batch_offset, batch_mask, batch_cls_labels]
labels = MultiBoxTarget(tf.expand_dims(anchors, axis=0),tf.expand_dims(ground_truth, axis=0))
print(labels[2],labels[1],labels[0])


anchors = tf.convert_to_tensor([[0.1, 0.28, 0.38, 0.99],
                [0.08, 0.2, 0.56, 0.95],
                [0.15, 0.3, 0.62, 0.91],
                [0.7, 0.2, 0.98, 0.96]])
offset_preds = tf.convert_to_tensor([0.0] * (4 * len(anchors)))
cls_probs = tf.convert_to_tensor([[0., 0., 0., 0.], # 背景的预测概率
                [0.9, 0.6, 0.4, 0.1],    # 女孩的预测概率
                [0.1, 0.2, 0.3, 0.9]])   # 男孩的预测概率
print(anchors, offset_preds, cls_probs)
fig = plt.imshow(img)
show_bboxes(fig.axes, anchors * bbox_scale,
        ['girl=0.9', 'girl=0.6', 'girl=0.4', 'boy=0.9'])
#非极大值抑制
from collections import namedtuple
Pred_BB_Info = namedtuple("Pred_BB_Info",
        ["index", "class_id", "confidence", "xyxy"])

def non_max_suppression(bb_info_list, nms_threshold=0.5):
    """
    非极大抑制处理预测的边界框
    Args:
        bb_info_list: Pred_BB_Info的列表, 包含预测类别、置信度等信息
        nms_threshold: 阈值
    Returns:
        output: Pred_BB_Info的列表, 只保留过滤后的边界框信息
    """
    output = []
    # 现根据置信度从高到底排序
    sorted_bb_info_list = sorted(bb_info_list,
                    key = lambda x: x.confidence,
                    reverse=True)
    while len(sorted_bb_info_list) != 0:
        best = sorted_bb_info_list.pop(0)
        output.append(best)

        if len(sorted_bb_info_list) == 0:
            break
        bb_xyxy = []
        for bb in sorted_bb_info_list:
            bb_xyxy.append(bb.xyxy)

        iou = compute_jaccard(tf.convert_to_tensor(best.xyxy),
                    tf.squeeze(tf.convert_to_tensor(bb_xyxy), axis=1))[0] # shape: (len(sorted_bb_info_list), )
        n = len(sorted_bb_info_list)
        sorted_bb_info_list = [
                    sorted_bb_info_list[i] for i in
                    range(n) if iou[i] <= nms_threshold]
    return output
def MultiBoxDetection(cls_prob, loc_pred, anchor, nms_threshold=0.5):
    """
    # 按照「9.4.1. 生成多个锚框」所讲的实现, anchor表示成归一化(xmin, ymin, xmax, ymax).
    https://zh.d2l.ai/chapter_computer-vision/anchor.html
    Args:
        cls_prob: 经过softmax后得到的各个锚框的预测概率, shape:(bn, 预测总类别数+1, 锚框个数)
        loc_pred: 预测的各个锚框的偏移量, shape:(bn, 锚框个数*4)
        anchor: MultiBoxPrior输出的默认锚框, shape: (1, 锚框个数, 4)
        nms_threshold: 非极大抑制中的阈值
    Returns:
        所有锚框的信息, shape: (bn, 锚框个数, 6)
        每个锚框信息由[class_id, confidence, xmin, ymin, xmax, ymax]表示
        class_id=-1 表示背景或在非极大值抑制中被移除了
    """
    assert len(cls_prob.shape) == 3 and len(loc_pred.shape) == 2 and len(anchor.shape) == 3
    bn = cls_prob.shape[0]

    def MultiBoxDetection_one(c_p, l_p, anc, nms_threshold=0.5):
        """
        MultiBoxDetection的辅助函数, 处理batch中的一个
        Args:
            c_p: (预测总类别数+1, 锚框个数)
            l_p: (锚框个数*4, )
            anc: (锚框个数, 4)
            nms_threshold: 非极大抑制中的阈值
        Return:
            output: (锚框个数, 6)
        """
        pred_bb_num = c_p.shape[1]
        # 加上偏移量
        anc = tf.add(anc, tf.reshape(l_p, (pred_bb_num, 4))).numpy()

        # 最大的概率
        confidence = tf.reduce_max(c_p, axis=0)
        # 最大概率对应的id
        class_id = tf.argmax(c_p, axis=0)
        confidence = confidence.numpy()
        class_id = class_id.numpy()

        pred_bb_info = [Pred_BB_Info(index=i,
                    class_id=class_id[i]-1,
                    confidence=confidence[i],
                    xyxy=[anc[i]]) # xyxy是个列表
                for i in range(pred_bb_num)]
        # 正类的index
        obj_bb_idx = [bb.index for bb
                in non_max_suppression(pred_bb_info,
                            nms_threshold)]
        output = []
        for bb in pred_bb_info:
            output.append(np.append([
                (bb.class_id if bb.index in obj_bb_idx
                        else -1.0),
                bb.confidence],
                bb.xyxy))

        return tf.convert_to_tensor(output) # shape: (锚框个数， 6)

    batch_output = []
    for b in range(bn):
        batch_output.append(MultiBoxDetection_one(cls_prob[b],
                        loc_pred[b], anchor[0],
                        nms_threshold))

    return tf.convert_to_tensor(batch_output)
output = MultiBoxDetection(
    tf.expand_dims(cls_probs, 0),
    tf.expand_dims(offset_preds, 0),
    tf.expand_dims(anchors, 0),
    nms_threshold=0.5)
print(output)
fig = plt.imshow(img)
list_label=[]
list_anchors=[]
for i in output[0].numpy():
    if i[0] == -1:
        continue
    if i[1]<0.5:
        continue
    label = ('girl=', 'boy=')[int(i[0])] + str(i[1])
    list_label.append(label)
    print(i,label)
    list_anchors.append((i[2:]))
    #show_bboxes(fig.axes, tf.multiply(i[2:], bbox_scale), label)

show_bboxes(fig.axes, list_anchors * bbox_scale,
        list_label)

多尺度目标检测

设锚框大小为0.15，特征图的高和宽分别为4。

将特征图的高和宽分别减半，并用更大的锚框检测更大的目标。当锚框大小设0.4时，有些锚框的区域有重合。

将特征图的高和宽进一步减半至1，并将锚框大小增至0.8。此时锚框中心即图像中心。

小结：

可以在多个尺度下生成不同数量和不同大小的锚框，从而在多个尺度下检测不同大小的目标。
特征图的形状能确定任一图像上均匀采样的锚框中心。
用输入图像在某个感受野区域内的信息来预测输入图像上与该区域相近的锚框的类别和偏移量。

其他内容：tf版本尚未更新，后续更新

你可能感兴趣的:(深度学习,python,人工智能,深度学习,tensorflow,计算机视觉)

芯片的未来发展趋势 iccnewer
2024年，该行业将专注于AI/ML、RISC-V、量子、安全等发展趋势。今年年初，大多数人从未听说过生成式人工智能。现在整个世界都在竞相利用它，而这仅仅是个开始。量子计算、6G、智能基础设施等新市场领域专用处理正在加速对更快、更高效、更多数据的需求。与每隔几年等待下一个工艺节点的日子相比，未来几年的事件将与电话或汽车的引入一样重要。但可能不会只有一种创新技术，将会有很多技术一起以一种将让科技界惊
基于Python编程语言实现“机器学习”，用于车牌识别项目我的sun&shine Python python 机器学习计算机视觉
基于Python的验证码识别研究与实现1.摘要验证码的主要目的是区分人类和计算机，用来防止自动化脚本程序对网站的一些恶意行为，目前绝大部分网站都利用验证码来阻止恶意脚本程序的入侵。验证码的自动识别对于减少自动登录时长，识别难以识别的验证码图片有着重要的作用。对验证码图像进行灰度化、二值化、去离散噪声、字符分割、归一化、特征提取、训练和字符识别等过程可以实现验证码自动识别。首先将原图片进行灰度化处理
6.8:Python如何处理文件写入时出现的错误？小兔子平安 Python完整学习全解答 java windows html
Python是一种功能强大且易于学习的编程语言，已经成为了当今最流行的编程语言之一。随着Python应用领域的不断扩大，越来越多的人开始学习Python，希望能够掌握这个有用的工具，从而实现更多的创意和创新。而文件操作是Python编程中不可或缺的一部分，对于处理文件写入时的错误更是必须掌握的技能。本文主要介绍如何处理Python中文件写入时的错误。我们将详细讲解如何使用try-except语句、
Python3包开发的高效Cookiecutter模板：python-package-template 一只爪子
本文还有配套的精品资源，点击获取简介：本文介绍了一个名为python-package-template的Cookiecutter模板，用于简化Python包的开发过程。该模板遵循Python的最佳实践，并自动创建项目结构，包括setup.py、MANIFEST.in、LICENSE、README.md、.gitignore、requirements.txt、测试配置文件、CI配置文件、测试目录和文
python列表操作计算列表长度并输出,Python基础2：列表想吃草莓干
一、列表列表是按照特定顺序的排列组合，就像数学中的数列，列表中的元素具有⼀定的排列顺序。在Python中，列表用方括号[]来表示列表，比如：>>>a=['Python','C','Java']1、访问列表中的元素索引开始：0如果我们想要打印上述列表中Python，就需要我们访问列表中第一个元素，在Python中，列表的访问从0开始，索引数为元素的位置减去1，访问的元素位置放在方括号里面，如果我们想
Python项目自动化模板构建：深入理解Cookiecutter TEDDYYW
本文还有配套的精品资源，点击获取简介：Python项目的标准化构建过程对于代码的整洁和可维护性至关重要。本文将深入探讨如何利用"cookiecutter"这一Python命令行工具自动化项目的初始化过程。Cookiecutter通过读取预定义模板并根据用户输入自动生成项目结构，简化了项目设置。我们将详细了解"cookiecutter-python-master"模板的组成，包括标准项目结构、初始化
多阶段构建实现 Docker 加速与体积减小：含文件查看、上传及拷贝功能的 FastAPI 应用镜像构建九不多 Docker docker fastapi python YOLO
本文围绕使用Docker构建FastAPI应用镜像展开，着重介绍了多阶段构建的Dockerfile编写及相关操作。借助多阶段构建，不仅实现了Docker构建的加速，还有效减小了镜像体积。1.Dockerfile内容以下是我们要使用的Dockerfile内容：#第一个阶段-构建应用FROMdocker.1ms.run/python:3.9ASbuilder#设置工作目录WORKDIR/app#复制依
Python列表的创建只是没遇到 python
Python3列表序列是Python中最基本的数据结构。序列中的每个值都有对应的位置值，称之为索引，第一个索引是0，第二个索引是1，依此类推。Python有6个序列的内置类型，但最常见的是列表和元组。列表都可以进行的操作包括索引，切片，加，乘，检查成员。此外，Python已经内置确定序列的长度以及确定最大和最小的元素的方法。列表是最常用的Python数据类型，它可以作为一个方括号内的逗号分隔值出现
Python最佳实践项目Cookiecutter常见问题解决方案柯茵沙
Python最佳实践项目Cookiecutter常见问题解决方案python-best-practices-cookiecutterPythonbestpracticesprojectcookiecutter项目地址:https://gitcode.com/gh_mirrors/py/python-best-practices-cookiecutter项目基础介绍本项目是一个Python最佳实践的
Vision mamba(mamba_ssm)安装踩坑指南 ggitjcg 深度学习 python
在这篇博客中，我将分享我在linux环境安装和使用VisionMamba（mamba_ssm）过程中遇到的一些问题和解决方法。前置检查：PyTorch和Python版本在安装mamba_ssm前，请确保你的PyTorch和Python环境版本正确。以下代码可用来检查环境信息：importtorchprint("PyTorchVersion:{}".format(torch.__version__)
探秘 Cookiecutter：一个高效项目模板生成器尤琦珺Bess
探秘Cookiecutter：一个高效项目模板生成器cookiecutter项目地址:https://gitcode.com/gh_mirrors/coo/cookiecutter如果你是一位热衷于Python开发的程序员，或者你经常需要初始化新的项目，那么你可能会对感兴趣。这是一个强大的工具，它能够根据预定义的模板快速生成项目结构，极大地提高了开发效率。项目简介Cookiecutter是一个命令
Mac【卸载 Python】 - 3.12.2 Stongtang Python macos python
一、若使用官方安装包安装1.删除Python框架Python官方安装包会将Python安装到/Library/Frameworks/Python.framework/Versions/3.12目录下。你可以在终端中使用以下命令删除该目录：sudorm-rf/Library/Frameworks/Python.framework/Versions/3.12执行此命令时，系统会要求你输入管理员密码，输
Ubuntu和Windows系统之Mamba_ssm安装 Netceor Python ubuntu windows linux
Mamba的论文：https://arxiv.org/abs/2312.00752Mamba的github：https://github.com/state-spaces/mamba一、Ubuntu安装直接新建一个环境是最好的，不然很容易产生各种冲突#创建环境和相关包condacreate-nmambapython=3.10.13condaactivatemambacondainstallcuda
Python, C ++开发工厂管理APP Geeker-2025 python c++
开发一款通用的**工厂管理App**，结合Python和C++的优势，可以实现高效的后端数据处理、实时的生产监控以及用户友好的前端界面。以下是一个详细的开发方案，涵盖技术选型、功能模块、开发步骤等内容。##技术选型###后端（Python）-**编程语言**：Python-**Web框架**：Django或Flask-**数据库**：PostgreSQL或MySQL-**实时通信**：WebSoc
Python列表1 cfjybgkmf Python python 开发语言
#coding:utf-8print("————————————列表——————————————")'''列表是指一系列按照特定顺序排列的元素组成是Python中内置的可变序列使用[]定义列表，元素与元素之间使用英文的逗号分隔列表中的元素可以是任意的数据类型''''''列表的创建：（1）使用[]直接创建列表列表名=[element1,element2,...,elementN]（2）使用内置函数l
怎样才能把网页数据保存到网络上？ 2301_79698214 html javascript java 前端 html5
要将网页数据存放到网络中，一般可以通过以下几种常见的方式：1.使用后端服务器自建服务器：你可以搭建自己的服务器，例如使用Node.js的Express框架或者Python的Flask、Django框架。以下是一个使用Flask框架存储数据到服务器的简单示例：pythonApplyfromflaskimportFlask,requestapp=Flask(__name__)@app.route('/
Python列表2 cfjybgkmf Python python 开发语言
print("——————————列表的相关操作————————————")'''lst.append('x')在列表lst最后增加一个元素lst.insert(index,'x')在列表中第index位置增加一个元素lst.clear()清除列表lst中所有元素lst.pop(index)将列表lst中第index位置的元素取出，并从列表中将其删除lst.remove('x')将列表lst中出现
深入了解 Cookiecutter：Python 项目模板的强大工具 boringhex.top python 开源 python 开发语言
在软件开发过程中，创建新的项目往往需要重复执行一系列繁琐的步骤，尤其是在设置项目结构、配置文件和依赖方面。Cookiecutter是一个开源的命令行工具，旨在帮助开发者快速生成项目模板，从而提高开发效率。本文将深入探讨Cookiecutter的功能、工作原理、常见用法以及一些最佳实践。什么是Cookiecutter？Cookiecutter是一个用于创建项目模板的工具，支持多种语言和框架。它允许开
pyqt5报错：qt.qpa.plugin: Could not find the Qt platform plugin “xcb“（已解决）一问三不知_ 计算机知识 qt 开发语言 ubuntu bug conda python
我在使用pyqt库的时候报错：qt.qpa.plugin:CouldnotloadtheQtplatformplugin"xcb"in\"/mnt/private_disk/anaconda3/envs/aot-manip/lib/python3.8/site-packages/PyQt5/Qt5/plugins/platforms"eventhoughitwasfound.Thisapplica
Python程序设计（入门） xyyykx python 开发语言
目录一丶Python概述二丶Python数据类型三丶常用的进制四丶字符串型五丶程序控制结构六丶组合数据类型一丶Python概述Python是一种高级编程语言，由GuidovanRossum于1991年开发并发布。它具有简洁、易读、易学的语法特点，被广泛应用于多个领域，包括软件开发、数据科学、人工智能、网络编程等。以下是Python的一些主要特点和优势：简单易学：Python的语法简洁明了，易于理解
Python模块化设计 ——函数调用不解风情的老妖怪哎 Python程序设计题库 python windows 开发语言
1.以下代码的输出结果是()。defyoung(age):if25=60:print(“作为一个老师,你可以退休了”)else:print(“作为一个老师,你很有爱心”)young(42)A、作为一个老师,你很年轻B、作为一个老师,你太年轻了C、作为一个老师,你可以退休了D、作为一个老师,你很有爱心答案：D。将实参42传递给函数形参变量age,之后进入多分支结构,依次判断,因为30<42<60,故
LLM：软件测试的颠覆性力量 AI天才研究院 DeepSeek R1 &大数据AI人工智能大模型 AI大模型企业级应用开发实战计算科学神经计算深度学习神经网络大数据人工智能大型语言模型 AI AGI LLM Java Python 架构设计 Agent RPA
LLM：软件测试的颠覆性力量关键词：大语言模型（LLM）、软件测试、人工智能、测试自动化、测试效率、质量保证、测试革新1.背景介绍在当今快速发展的软件行业中，测试一直是确保产品质量的关键环节。随着人工智能技术的飞速进步，特别是大语言模型（LargeLanguageModels，简称LLM）的出现，软件测试领域正经历着前所未有的变革。LLM凭借其强大的自然语言处理能力和广泛的知识储备，正在重塑我们对
Bug:eventlet ImportError cannot import name ‘ALREADY HANDLED uncle_ll Bug合集
问题测试gunicorn不同work下的性能时候，在eventlet方式下报错误Error:classuri'eventlet'invalidornotfound:[Traceback(mostrecentcalllast):File"/app/venv/lib64/python3.6/site-packages/gunicorn/util.py",line99,inload_classmod=i
使用Dall-E生成图像：文本到图像的魔力 shuoac 计算机视觉人工智能 python
使用Dall-E生成图像：文本到图像的魔力技术背景介绍Dall-E是OpenAI开发的一个强大的文本到图像生成模型，它能够根据自然语言描述创造出全新的数字图像。这一技术基于深度学习的方法，使得创意与AI图像生成的结合更具可能性。本文将介绍如何调用Dall-EAPI来生成图像，从而使开发者能够将这一技术应用到自己的项目中。核心原理解析Dall-E利用大型语言模型（LLM）从用户提供的文本描述中提取详
Python语言程序设计 1 摸你就像摸自己 python
目录1Python基本语法元素￲1.1程序设计基本方法1.1.1计算机与程序设计1.1.2编译与解释1.1.3程序的基本编写方法—IPO1.1.4计算机编程1.2Python开发环境配置1.2.1Python语言概述1.2.2Python程序的编写与运行例1：计算圆面积例2：绘制同切圆例3：绘制五角星1.3实例一：温度转换1.3.1问题分析：实例编写：1.4Python程序语法元素分析1.4.1格
深入了解盘古大模型：技术、应用与未来 Hardess-god Literature review 人工智能
随着人工智能技术的迅猛发展，预训练大模型已成为AI领域最前沿、最热门的研究方向之一。近年来，中国自主研发的大模型之一——盘古模型（PanGuModel）逐渐进入公众视野，凭借其强大的性能和广泛的应用前景，引发了行业内外的广泛关注。什么是盘古大模型？盘古大模型是华为公司联合多家科研机构共同研发的超大规模预训练语言模型。该模型以中文数据为主进行训练，旨在推动中文自然语言处理（NLP）以及跨模态应用的技
华为OD机试 - 最佳对手（ Python） AsiaFT. Py 华为OD机试AB卷 python od
题目描述游戏里面，队伍通过匹配实力相近的对手进行对战。但是如果匹配的队伍实力相差太大，对于双方游戏体验都不会太好。给定n个队伍的实力值，对其进行两两实力匹配，两支队伍实例差距在允许的最大差距d内，则可以匹配。要求在匹配队伍最多的情况下匹配出的各组实力差距的总和最小。输入描述第一行，n，d。队伍个数n。允许的最大实力差距d。2<=n<=500<=d<=100第二行，n个队伍的实力值空格分割。0<=各
零基础上手Python数据分析 (6)：Python 异常处理，告别程序崩溃的烦恼！ kakaZhui python 数据分析数据库 excel 数据挖掘
回顾一下，前几篇博客我们学习了Python的基本语法、数据结构和文件操作。现在，我们已经掌握了Python编程的基础知识，可以开始编写更复杂的数据分析代码了。但是，在实际的数据分析工作中，程序并非总能一帆风顺地运行，总会遇到各种意外情况，例如：文件找不到：程序尝试读取一个不存在的数据文件。数据格式错误：数据文件中包含非预期的格式，例如本应是数字的列包含了文本。网络连接中断：程序尝试从网络获取数据，
2023华为OD机试真题-最佳对手(JAVA、Python、C++) huaweiod123 华为OD机试真题2023 java c++算法华为 python
题目描述：游戏里面，队伍通过匹配实力相近的对手进行对战。但是如果匹配的队伍实例相差太大，对于双方游戏体验都不会太好。给定n个队伍的实力值，对其进行两两实力匹配，两支队伍实例差距在允许的最大差距d内，则可以匹配。要求在匹配队伍最多的情况下，匹配出的各组实力差距的总和最小。输入描述：第一行，n，d。队伍个数n。允许的最大实力差距d。(2<=n<=50,0<=d<=100)。第二行，n个队伍的实力值，空
【深度学习|地学应用】滑坡灾害早期隐患的概念、特征及识别方法，同时解释其与人工边坡、滑坡易发性之间的联系与区别。 985小水博一枚呀深度学习人工智能
【深度学习|地学应用】滑坡灾害早期隐患的概念、特征及识别方法，同时解释其与人工边坡、滑坡易发性之间的联系与区别。【深度学习|地学应用】滑坡灾害早期隐患的概念、特征及识别方法，同时解释其与人工边坡、滑坡易发性之间的联系与区别。文章目录【深度学习|地学应用】滑坡灾害早期隐患的概念、特征及识别方法，同时解释其与人工边坡、滑坡易发性之间的联系与区别。1.滑坡灾害早期隐患的概念与特征概念主要特征2.通过光学
Hadoop(一) 朱辉辉33 hadoop linux
今天在诺基亚第一天开始培训大数据，因为之前没接触过Linux，所以这次一起学了，任务量还是蛮大的。首先下载安装了Xshell软件，然后公司给了账号密码连接上了河南郑州那边的服务器，接下来开始按照给的资料学习，全英文的，头也不讲解，说锻炼我们的学习能力，然后就开始跌跌撞撞的自学。这里写部分已经运行成功的代码吧. 在hdfs下，运行hadoop fs -mkdir /u
maven An error occurred while filtering resources blackproof maven 报错
转：http://stackoverflow.com/questions/18145774/eclipse-an-error-occurred-while-filtering-resources maven报错： maven An error occurred while filtering resources Maven -> Update Proje
jdk常用故障排查命令 daysinsun jvm
linux下常见定位命令： 1、jps 输出Java进程 -q 只输出进程ID的名称，省略主类的名称； -m 输出进程启动时传递给main函数的参数； &nb
java 位移运算与乘法运算周凡杨 java 位移运算乘法
对于 JAVA 编程中，适当的采用位移运算，会减少代码的运行时间，提高项目的运行效率。这个可以从一道面试题说起：问题：用最有效率的方法算出2 乘以8 等於几?” 答案：2 << 3 由此就引发了我的思考，为什么位移运算会比乘法运算更快呢？其实简单的想想，计算机的内存是用由 0 和 1 组成的二
java中的枚举(enmu) g21121 java
从jdk1.5开始，java增加了enum(枚举)这个类型，但是大家在平时运用中还是比较少用到枚举的，而且很多人和我一样对枚举一知半解，下面就跟大家一起学习下enmu枚举。先看一个最简单的枚举类型，一个返回类型的枚举： public enum ResultType { /** * 成功 */ SUCCESS, /** * 失败 */ FAIL,
MQ初级学习 510888780 activemq
1.下载ActiveMQ 去官方网站下载：http://activemq.apache.org/ 2.运行ActiveMQ 解压缩apache-activemq-5.9.0-bin.zip到C盘，然后双击apache-activemq-5.9.0-\bin\activemq-admin.bat运行ActiveMQ程序。启动ActiveMQ以后，登陆：http://localhos
Spring_Transactional_Propagation 布衣凌宇 spring transactional
//事务传播属性 @Transactional(propagation=Propagation.REQUIRED)//如果有事务，那么加入事务，没有的话新创建一个 @Transactional(propagation=Propagation.NOT_SUPPORTED)//这个方法不开启事务 @Transactional(propagation=Propagation.REQUIREDS_N
我的spring学习笔记12-idref与ref的区别 aijuans spring
idref用来将容器内其他bean的id传给<constructor-arg>/<property>元素，同时提供错误验证功能。例如： <bean id ="theTargetBean" class="..." /> <bean id ="theClientBean" class=&quo
Jqplot之折线图 antlove js jquery Web timeseries jqplot
timeseriesChart.html <script type="text/javascript" src="jslib/jquery.min.js"></script> <script type="text/javascript" src="jslib/excanvas.min.js&
JDBC中事务处理应用百合不是茶 java JDBC编程事务控制语句
解释事务的概念; 事务控制是sql语句中的核心之一;事务控制的作用就是保证数据的正常执行与异常之后可以恢复事务常用命令: Commit提交
[转]ConcurrentHashMap Collections.synchronizedMap和Hashtable讨论 bijian1013 java 多线程线程安全 HashMap
在Java类库中出现的第一个关联的集合类是Hashtable，它是JDK1.0的一部分。 Hashtable提供了一种易于使用的、线程安全的、关联的map功能，这当然也是方便的。然而，线程安全性是凭代价换来的――Hashtable的所有方法都是同步的。此时，无竞争的同步会导致可观的性能代价。Hashtable的后继者HashMap是作为JDK1.2中的集合框架的一部分出现的，它通过提供一个不同步的
ng-if与ng-show、ng-hide指令的区别和注意事项 bijian1013 JavaScript AngularJS
angularJS中的ng-show、ng-hide、ng-if指令都可以用来控制dom元素的显示或隐藏。ng-show和ng-hide根据所给表达式的值来显示或隐藏HTML元素。当赋值给ng-show指令的值为false时元素会被隐藏，值为true时元素会显示。ng-hide功能类似，使用方式相反。元素的显示或
【持久化框架MyBatis3七】MyBatis3定义typeHandler bit1129 TypeHandler
什么是typeHandler? typeHandler用于将某个类型的数据映射到表的某一列上，以完成MyBatis列跟某个属性的映射内置typeHandler MyBatis内置了很多typeHandler，这写typeHandler通过org.apache.ibatis.type.TypeHandlerRegistry进行注册，比如对于日期型数据的typeHandler，
上传下载文件rz,sz命令 bitcarter linux命令rz
刚开始使用rz上传和sz下载命令：因为我们是通过secureCRT终端工具进行使用的所以会有上传下载这样的需求：我遇到的问题： sz下载A文件10M左右，没有问题但是将这个文件A再传到另一天服务器上时就出现传不上去，甚至出现乱码，死掉现象，具体问题解决方法：上传命令改为;rz -ybe 下载命令改为：sz -be filename 如果还是有问题：那就是文
通过ngx-lua来统计nginx上的虚拟主机性能数据 ronin47 ngx-lua　统计解禁ip
介绍以前我们为nginx做统计,都是通过对日志的分析来完成.比较麻烦,现在基于ngx_lua插件,开发了实时统计站点状态的脚本,解放生产力.项目主页: https://github.com/skyeydemon/ngx-lua-stats 功能支持分不同虚拟主机统计, 同一个虚拟主机下可以分不同的location统计. 可以统计与query-times request-time
java-68-把数组排成最小的数。一个正整数数组，将它们连接起来排成一个数，输出能排出的所有数字中最小的。例如输入数组{32, 321}，则输出32132 bylijinnan java
import java.util.Arrays; import java.util.Comparator; public class MinNumFromIntArray { /** * Q68输入一个正整数数组，将它们连接起来排成一个数，输出能排出的所有数字中最小的一个。 * 例如输入数组{32, 321}，则输出这两个能排成的最小数字32132。请给出解决问题
Oracle基本操作 ccii Oracle SQL总结 Oracle SQL语法 Oracle基本操作 Oracle SQL
一、表操作 1. 常用数据类型 NUMBER(p,s)：可变长度的数字。p表示整数加小数的最大位数，s为最大小数位数。支持最大精度为38位 NVARCHAR2(size)：变长字符串，最大长度为4000字节（以字符数为单位） VARCHAR2(size)：变长字符串，最大长度为4000字节（以字节数为单位） CHAR(size)：定长字符串，最大长度为2000字节，最小为1字节，默认
[强人工智能]实现强人工智能的路线图 comsci 人工智能
1：创建一个用于记录拓扑网络连接的矩阵数据表 2:自动构造或者人工复制一个包含10万个连接(1000*1000)的流程图 3：将这个流程图导入到矩阵数据表中 4：在矩阵的每个有意义的节点中嵌入一段简单的
给Tomcat，Apache配置gzip压缩(HTTP压缩)功能 cwqcwqmax9 apache
背景： HTTP 压缩可以大大提高浏览网站的速度，它的原理是，在客户端请求网页后，从服务器端将网页文件压缩，再下载到客户端，由客户端的浏览器负责解压缩并浏览。相对于普通的浏览过程HTML ,CSS,Javascript , Text ，它可以节省40%左右的流量。更为重要的是，它可以对动态生成的，包括CGI、PHP , JSP , ASP , Servlet,SHTML等输出的网页也能进行压缩，
SpringMVC and Struts2 dashuaifu struts2 springMVC
SpringMVC VS Struts2 1: spring3开发效率高于struts 2: spring3 mvc可以认为已经100%零配置 3: struts2是类级别的拦截，一个类对应一个request上下文， springmvc是方法级别的拦截，一个方法对应一个request上下文，而方法同时又跟一个url对应所以说从架构本身上 spring3 mvc就容易实现r
windows常用命令行命令 dcj3sjt126com windows cmd command
在windows系统中，点击开始－运行，可以直接输入命令行，快速打开一些原本需要多次点击图标才能打开的界面，如常用的输入cmd打开dos命令行，输入taskmgr打开任务管理器。此处列出了网上搜集到的一些常用命令。winver 检查windows版本 wmimgmt.msc 打开windows管理体系结构(wmi) wupdmgr windows更新程序 wscrip
再看知名应用背后的第三方开源项目 dcj3sjt126com ios
知名应用程序的设计和技术一直都是开发者需要学习的，同样这些应用所使用的开源框架也是不可忽视的一部分。此前《 iOS第三方开源库的吐槽和备忘》中作者ibireme列举了国内多款知名应用所使用的开源框架，并对其中一些框架进行了分析，同样国外开发者 @iOSCowboy也在博客中给我们列出了国外多款知名应用使用的开源框架。另外txx's blog中详细介绍了 Facebook Paper使用的第三
Objective-c单例模式的正确写法 jsntghf 单例 ios iPhone
一般情况下，可能我们写的单例模式是这样的： #import <Foundation/Foundation.h> @interface Downloader : NSObject + (instancetype)sharedDownloader; @end #import "Downloader.h" @implementation
jquery easyui datagrid 加载成功，选中某一行 hae jquery easyui datagrid 数据加载
1.首先你需要设置datagrid的onLoadSuccess $( '#dg' ).datagrid({onLoadSuccess : function (data){ $( '#dg' ).datagrid( 'selectRow' ,3); }}); 2.onL
jQuery用户数字打分评价效果 ini JavaScript html jquery Web css
效果体验：http://hovertree.com/texiao/jquery/5.htmHTML文件代码： <!DOCTYPE html> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <title>jQuery用户数字打分评分代码 - HoverTree</
mybatis的paramType kerryg DAO sql
MyBatis传多个参数： 1、采用#{0},#{1}获得参数： Dao层函数方法： public User selectUser(String name,String area); 对应的Mapper.xml <select id="selectUser" result
centos 7安装mysql5.5 MrLee23 centos
首先centos7 已经不支持mysql，因为收费了你懂得，所以内部集成了mariadb，而安装mysql的话会和mariadb的文件冲突，所以需要先卸载掉mariadb，以下为卸载mariadb，安装mysql的步骤。 #列出所有被安装的rpm package rpm -qa | grep mariadb #卸载 rpm -e mariadb-libs-5.
利用thrift来实现消息群发 qifeifei thrift
Thrift项目一般用来做内部项目接偶用的，还有能跨不同语言的功能，非常方便，一般前端系统和后台server线上都是3个节点，然后前端通过获取client来访问后台server，那么如果是多太server，就是有一个负载均衡的方法，然后最后访问其中一个节点。那么换个思路，能不能发送给所有节点的server呢，如果能就
实现一个sizeof获取Java对象大小 teasp java HotSpot 内存对象大小 sizeof
由于Java的设计者不想让程序员管理和了解内存的使用，我们想要知道一个对象在内存中的大小变得比较困难了。本文提供了可以获取对象的大小的方法，但是由于各个虚拟机在内存使用上可能存在不同，因此该方法不能在各虚拟机上都适用，而是仅在hotspot 32位虚拟机上，或者其它内存管理方式与hotspot 32位虚拟机相同的虚拟机上适用。
SVN错误及处理 xiangqian0505 SVN提交文件时服务器强行关闭
在SVN服务控制台打开资源库“SVN无法读取current” ---摘自网络写道 SVN无法读取current修复方法 Can't read file : End of file found 文件：repository/db/txn_current、repository/db/current 其中current记录当前最新版本号，txn_current记录版本库中版本