weixin_44140703

paddleOCR代码学习-详细注释

文章目录

- 可视化-代码
- predict_system.py
- det_db_head.py
- det_db_head.py
- predict_det.py
- db_postprocess.py

可视化-代码

def draw_ocr_box_txt(image,
                     boxes,
                     txts,
                     scores=None,
                     drop_score=0.5,
                     font_path="./doc/simfang.ttf"):
    """
    把结果画在图片上
    image,  图片
    boxes,  预测的点坐标 [[(171, 1308), (1440, 1462), (1438, 1692), (162, 1539)], ...]
    txts,   文本        ["888", ...]
    scores = None,  分数
    drop_score = 0.5, 丢弃阈值
    font_path = "./doc/simfang.ttf"  字体
    """
    h, w = image.height, image.width
    img_left = image.copy()
    # PIL.Image.new() 使用给定的模式和大小创建一个新图像
    # 新建一张白色的图
    img_right = Image.new('RGB', (w, h), (255, 255, 255))
    # ImageDraw模块提供了图像对象的简单2D绘制。用户可以使用这个模块创建新的图像，注释或润饰已存在图像，为web应用实时产生各种图形。 Draw()创建一个可以在给定图像上绘图的对象。
    draw_left = ImageDraw.Draw(img_left)
    draw_right = ImageDraw.Draw(img_right)
    for idx, (box, txt) in enumerate(zip(boxes, txts)):
        if scores is not None and scores[idx] < drop_score:
            continue
        # rgb 颜色设置 这里随机生成RGB颜色
        color = (random.randint(0, 255), random.randint(0, 255),
                 random.randint(0, 255))
        # 根据点坐标画多边形 fill为填充颜色
        draw_left.polygon(box, fill=color)
        # 根据点坐标画多边形，outline为边框颜色
        draw_right.polygon(
            [
                box[0][0], box[0][1], box[1][0], box[1][1], box[2][0],
                box[2][1], box[3][0], box[3][1]
            ],
            outline=color)
        # 高度计算 sqrt 开平方根 四个点的顺序是 左上点 右上点，右下点，左下点，宽度计算为使用前两个点坐标，利用勾股定理计算
        box_height = math.sqrt((box[0][0] - box[3][0])**2 + (box[0][1] - box[3][
            1])**2)
        # 宽度计算，开平方根
        box_width = math.sqrt((box[0][0] - box[1][0])**2 + (box[0][1] - box[1][
            1])**2)
        # 判断是竖着的还是横着的框 height大于2倍的width，认为框是竖着的
        if box_height > 2 * box_width:
            # 字体大小
            font_size = max(int(box_width * 0.9), 10)
            font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
            # 初始纵坐标
            cur_y = box[0][1]
            # 把字符分开 竖起来写书写
            for c in txt:
                char_size = font.getsize(c) # char_size,字符的尺寸，2个维度（width, height）
                draw_right.text(
                    (box[0][0] + 3, cur_y), c, fill=(0, 0, 0), font=font)
                cur_y += char_size[1]
        # 框横着
        else:
            font_size = max(int(box_height * 0.8), 10)
            font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
            draw_right.text(
                [box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font)
    # Image.blend 图像合成，两张图合成一张，0.5为不透明度，值越大，越不透明
    img = Image.blend(image, img_left, 0.5)
    # 新建一张图，设置RGB色彩(255, 255, 255)，设置宽度和高度，这里设置2倍的宽是为了粘贴2张图像
    img_show = Image.new('RGB', (w * 2, h), (255, 255, 255))
    # 把图像粘贴在img_show上
    img_show.paste(img, (0, 0, w, h))
    img_show.paste(img_right, (w, 0, w * 2, h))
    return np.array(img_show) # 转化为array格式后，opencv可以读取，也可以进入神经网络计算

predict_system.py

class TextSystem(object):
    def __init__(self, args):
        self.text_detector = predict_det.TextDetector(args)
        self.text_recognizer = predict_rec.TextRecognizer(args)
        self.use_angle_cls = args.use_angle_cls
        self.drop_score = args.drop_score
        if self.use_angle_cls:
            self.text_classifier = predict_cls.TextClassifier(args)

    def get_rotate_crop_image(self, img, points):
        '''
        剪切--做透视变换 和 旋转

        img_height, img_width = img.shape[0:2]
        left = int(np.min(points[:, 0]))
        right = int(np.max(points[:, 0]))
        top = int(np.min(points[:, 1]))
        bottom = int(np.max(points[:, 1]))
        img_crop = img[top:bottom, left:right, :].copy()
        points[:, 0] = points[:, 0] - left
        points[:, 1] = points[:, 1] - top
        '''
        img_crop_width = int(
            max(
                np.linalg.norm(points[0] - points[1]),
                np.linalg.norm(points[2] - points[3])))
        img_crop_height = int(
            max(
                np.linalg.norm(points[0] - points[3]),
                np.linalg.norm(points[1] - points[2])))
        pts_std = np.float32([[0, 0], [img_crop_width, 0],
                              [img_crop_width, img_crop_height],
                              [0, img_crop_height]])
        M = cv2.getPerspectiveTransform(points, pts_std)
        # cv2.warpPerspective()透视变换函数，可保持直线不变形，但是平行线可能不再平行
        dst_img = cv2.warpPerspective(
            img,
            M, (img_crop_width, img_crop_height),
            borderMode=cv2.BORDER_REPLICATE,
            flags=cv2.INTER_CUBIC)
        dst_img_height, dst_img_width = dst_img.shape[0:2]
        if dst_img_height * 1.0 / dst_img_width >= 1.5:
            dst_img = np.rot90(dst_img)
        return dst_img

    def print_draw_crop_rec_res(self, img_crop_list, rec_res):
        bbox_num = len(img_crop_list)
        for bno in range(bbox_num):
            cv2.imwrite("./output/img_crop_%d.jpg" % bno, img_crop_list[bno])
            logger.info(bno, rec_res[bno])

    def __call__(self, img):
        ori_im = img.copy()
        # 区域预测，返回预测区域四点和使用时间
        dt_boxes, elapse = self.text_detector(img)
        logger.info("dt_boxes num : {}, elapse : {}".format(
            len(dt_boxes), elapse))
        if dt_boxes is None:
            return None, None
        img_crop_list = []

        # 按从上到下、从左到右的顺序对文本框排序
        dt_boxes = sorted_boxes(dt_boxes)

        # 把预测的位置框裁剪出来一一做仿射变换，数据加入到列表中
        for bno in range(len(dt_boxes)):
            tmp_box = copy.deepcopy(dt_boxes[bno])
            # 裁剪--做透视变换和旋转
            img_crop = self.get_rotate_crop_image(ori_im, tmp_box)
            img_crop_list.append(img_crop)
        # 是否使用方向旋转
        if self.use_angle_cls:
            img_crop_list, angle_list, elapse = self.text_classifier(
                img_crop_list)
            logger.info("cls num  : {}, elapse : {}".format(
                len(img_crop_list), elapse))

        # 文本识别 返回结果和耗时
        rec_res, elapse = self.text_recognizer(img_crop_list)
        logger.info("rec_res num  : {}, elapse : {}".format(
            len(rec_res), elapse))
        # self.print_draw_crop_rec_res(img_crop_list, rec_res)
        filter_boxes, filter_rec_res = [], []
        # 这里根据 置信度 和 丢弃值 判断识别结果是否要丢弃，丢弃置信度低的结果，返回置信度高于丢弃值的结果
        for box, rec_reuslt in zip(dt_boxes, rec_res):
            text, score = rec_reuslt
            if score >= self.drop_score:
                filter_boxes.append(box)
                filter_rec_res.append(rec_reuslt)
        return filter_boxes, filter_rec_res


def sorted_boxes(dt_boxes):
    """
    Sort text boxes in order from top to bottom, left to right
    args:
        dt_boxes(array):detected text boxes with shape [4, 2]
    return:
        sorted boxes(array) with shape [4, 2]

    按从上到下、从左到右的顺序对文本框排序
    参数:
        dt_boxes(array):检测到形状为[4,2]的文本框
    返回:
        形状为[4,2]的排序框(数组)
    [[[ 275. 1996.],  [2664. 1829.],  [2697. 2295.],  [ 307. 2463.]],, [[ 623. 1566.],  [1523. 1520.],  [1540. 1854.],  [ 640. 1900.]],, [[ 392. 1540.],  [ 654. 1540.],  [ 654. 1859.],  [ 392. 1859.]]]
    """
    num_boxes = dt_boxes.shape[0]
    # 按照列表中x[0][1], x[0][0] 这两个元素的顺序-优先按照第一个，把列表排序
    sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0]))
    _boxes = list(sorted_boxes)

    # 这里实现了一个功能，排序后的几个标注框坐标，相邻两个点，满足给定条件，互换位置，重新排序
    for i in range(num_boxes - 1):
        if abs(_boxes[i + 1][0][1] - _boxes[i][0][1]) < 10 and \
                (_boxes[i + 1][0][0] < _boxes[i][0][0]):
            tmp = _boxes[i]
            _boxes[i] = _boxes[i + 1]
            _boxes[i + 1] = tmp
    return _boxes


def main(args):
    # 获取图片列表，返回一个list，可能有一个或者多个元素（图片）
    image_file_list = get_image_file_list(args.image_dir)
    # 实例化ocr模型
    text_sys = TextSystem(args)
    # 可视化
    is_visualize = True
    # 字体路径
    font_path = args.vis_font_path
    # 丢弃阈值 0.5
    drop_score = args.drop_score
    for image_file in image_file_list:
        # 读取图片，看是否是gif类型。若不是gif类型，返回 None, False
        img, flag = check_and_read_gif(image_file)
        # 如果flag为False 直接读取图片 ndarray类型数据，形状是(H, W, C)
        if not flag:
            img = cv2.imread(image_file)
        if img is None:
            logger.info("error in loading image:{}".format(image_file))
            continue
        # 开始时间
        starttime = time.time()
        # 进入ocr系统识别 返回识别出区域的点坐标 和 识别结果
        # dt_boxes：[[区域的四个点坐标],[区域的四个点坐标],[区域的四个点坐标]]
        # [array([[ 392., 1540.],
        #        [ 654., 1540.],
        #        [ 654., 1859.],
        #        [ 392., 1859.]], dtype=float32), array([[ 623., 1566.],
        #        [1523., 1520.],
        #        [1540., 1854.],
        #        [ 640., 1900.]], dtype=float32), array([[ 275., 1996.],
        #        [2664., 1829.],
        #        [2697., 2295.],
        #        [ 307., 2463.]], dtype=float32)]

        # rec_res：[(识别结果，置信度),(识别结果，置信度),(识别结果，置信度)]   [('P', 0.7642319), ('C2NT', 0.808735), ('3338490', 0.570314)]
        dt_boxes, rec_res = text_sys(img)
        # 用的时间
        elapse = time.time() - starttime
        # 日志信息,记录所用时间
        logger.info("Predict time of %s: %.3fs" % (image_file, elapse))
        # 日志信息，记录识别结果
        for text, score in rec_res:
            logger.info("{}, {:.3f}".format(text, score))

        # 可视化 如果True
        if is_visualize:
            # 把opencv读取的图片首先转化成RGB格式--cv2.cvtColor，再转化成PIL可以读取的格式--Image.fromarray
            image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            # 文字区域点坐标
            boxes = dt_boxes
            # 只取文本，不取置信度，获得文本列表
            txts = [rec_res[i][0] for i in range(len(rec_res))]
            # 获得置信度列表
            scores = [rec_res[i][1] for i in range(len(rec_res))]
            # 画图
            draw_img = draw_ocr_box_txt(
                image,  # 图片数据
                boxes,  # 区域点坐标
                txts,   # 识别出的文本结果列表
                scores, # 识别出的文本结果置信度列表
                drop_score=drop_score, # 丢弃阈值
                font_path=font_path)   # 字体路径
            # 结果图片保存路径
            draw_img_save = "./inference_results/"
            if not os.path.exists(draw_img_save):
                os.makedirs(draw_img_save)
            # 保存图片结果 最后的draw_img[:, :, ::-1]，用切片的方法把图片改成RGB格式
            cv2.imwrite(
                os.path.join(draw_img_save, os.path.basename(image_file)),
                draw_img[:, :, ::-1])
            # 日志信息
            logger.info("The visualized image saved in {}".format(
                os.path.join(draw_img_save, os.path.basename(image_file))))

det_db_head.py

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import math
import paddle
from paddle import nn
import paddle.nn.functional as F
from paddle import ParamAttr


def get_bias_attr(k, name):
    stdv = 1.0 / math.sqrt(k * 1.0)
    initializer = paddle.nn.initializer.Uniform(-stdv, stdv)
    bias_attr = ParamAttr(initializer=initializer, name=name + "_b_attr")
    return bias_attr


class Head(nn.Layer):
    """
    这里，概率图和阈值图经过结构相同的网络（结构相同，参数不同）计算而来，二值图使用可微二值化公式计算而来，

    """
    def __init__(self, in_channels, name_list):
        """
        name_list:  ['conv2d_57', 'batch_norm_49', 'conv2d_transpose_2', 'batch_norm_50', 'conv2d_transpose_3', 'thresh']
        """
        super(Head, self).__init__()
        self.conv1 = nn.Conv2D(
            in_channels=in_channels,
            out_channels=in_channels // 4,
            kernel_size=3,
            padding=1,
            weight_attr=ParamAttr(name=name_list[0] + '.w_0'),   # name_list:让网络参数名字不重复且可控，并不影响实际的网络结构
            bias_attr=False)
        self.conv_bn1 = nn.BatchNorm(
            num_channels=in_channels // 4,
            param_attr=ParamAttr(
                name=name_list[1] + '.w_0',
                initializer=paddle.nn.initializer.Constant(value=1.0)),
            bias_attr=ParamAttr(
                name=name_list[1] + '.b_0',
                initializer=paddle.nn.initializer.Constant(value=1e-4)),
            moving_mean_name=name_list[1] + '.w_1',
            moving_variance_name=name_list[1] + '.w_2',
            act='relu')
        self.conv2 = nn.Conv2DTranspose(
            in_channels=in_channels // 4,
            out_channels=in_channels // 4,
            kernel_size=2,
            stride=2,
            weight_attr=ParamAttr(
                name=name_list[2] + '.w_0',
                initializer=paddle.nn.initializer.KaimingUniform()),
            bias_attr=get_bias_attr(in_channels // 4, name_list[-1] + "conv2"))
        self.conv_bn2 = nn.BatchNorm(
            num_channels=in_channels // 4,
            param_attr=ParamAttr(
                name=name_list[3] + '.w_0',
                initializer=paddle.nn.initializer.Constant(value=1.0)),
            bias_attr=ParamAttr(
                name=name_list[3] + '.b_0',
                initializer=paddle.nn.initializer.Constant(value=1e-4)),
            moving_mean_name=name_list[3] + '.w_1',
            moving_variance_name=name_list[3] + '.w_2',
            act="relu")
        # Conv2DTranspose：该层根据输入（input）、卷积核（kernel）和空洞大小（dilations）、步长（stride）、填充（padding）来计算输出特征层大小或者通过output_size指定输出特征层大小。
        # 输入(Input)和输出(Output)为NCHW或NHWC格式，其中N为批尺寸，C为通道数（channel），H为特征层高度，W为特征层宽度。卷积核是MCHW格式，M是输出图像通道数，C是输入图像通道数，H是卷积核高度，W是卷积核宽度。
        # 如果组数大于1，C等于输入图像通道数除以组数的结果。转置卷积的计算过程相当于卷积的反向计算。转置卷积又被称为反卷积（但其实并不是真正的反卷积）。
        self.conv3 = nn.Conv2DTranspose(
            in_channels=in_channels // 4,
            out_channels=1,
            kernel_size=2,
            stride=2,
            weight_attr=ParamAttr(
                name=name_list[4] + '.w_0',
                initializer=paddle.nn.initializer.KaimingUniform()),
            bias_attr=get_bias_attr(in_channels // 4, name_list[-1] + "conv3"),
        )

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv_bn1(x)
        x = self.conv2(x)
        x = self.conv_bn2(x)
        x = self.conv3(x)
        x = F.sigmoid(x)
        return x


class DBHead(nn.Layer):
    """
    Differentiable Binarization (DB) for text detection:
        see https://arxiv.org/abs/1911.08947
    args:
        params(dict): super parameters for build DB network
    """

    def __init__(self, in_channels, k=50, **kwargs):
        super(DBHead, self).__init__()
        self.k = k
        binarize_name_list = [
            'conv2d_56', 'batch_norm_47', 'conv2d_transpose_0', 'batch_norm_48',
            'conv2d_transpose_1', 'binarize'
        ]
        thresh_name_list = [
            'conv2d_57', 'batch_norm_49', 'conv2d_transpose_2', 'batch_norm_50',
            'conv2d_transpose_3', 'thresh'
        ]
        # 这里 binarize（收缩图-概率图） 和 thresh（阈值图）是完全一样的网络结构，只是在训练过程中，参数的调整，使其成具有不同的参数。
        self.binarize = Head(in_channels, binarize_name_list)
        self.thresh = Head(in_channels, thresh_name_list)

    def step_function(self, x, y):
        """
        根据收缩图-概率图和阈值图 求二值图
        """
        # paddle.reciprocal 对输入Tensor取倒数。这里是根据论文中可微二值化公式来的 k一般是50
        return paddle.reciprocal(1 + paddle.exp(-self.k * (x - y)))

    def forward(self, x):
        # 收缩图-概率图
        shrink_maps = self.binarize(x)
        if not self.training:
            return {'maps': shrink_maps}
        # 阈值图
        threshold_maps = self.thresh(x)
        # 二值图
        binary_maps = self.step_function(shrink_maps, threshold_maps)
        y = paddle.concat([shrink_maps, threshold_maps, binary_maps], axis=1)
        return {'maps': y}

det_db_head.py

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import math
import paddle
from paddle import nn
import paddle.nn.functional as F
from paddle import ParamAttr


def get_bias_attr(k, name):
    stdv = 1.0 / math.sqrt(k * 1.0)
    initializer = paddle.nn.initializer.Uniform(-stdv, stdv)
    bias_attr = ParamAttr(initializer=initializer, name=name + "_b_attr")
    return bias_attr


class Head(nn.Layer):
    """
    这里，概率图和阈值图经过结构相同的网络（结构相同，参数不同）计算而来，二值图使用可微二值化公式计算而来

    """
    def __init__(self, in_channels, name_list):
        """
        name_list:  ['conv2d_57', 'batch_norm_49', 'conv2d_transpose_2', 'batch_norm_50', 'conv2d_transpose_3', 'thresh']
        """
        super(Head, self).__init__()
        self.conv1 = nn.Conv2D(
            in_channels=in_channels,
            out_channels=in_channels // 4,
            kernel_size=3,
            padding=1,
            weight_attr=ParamAttr(name=name_list[0] + '.w_0'),   # name_list:让网络参数名字不重复且可控，并不影响实际的网络结构
            bias_attr=False)
        self.conv_bn1 = nn.BatchNorm(
            num_channels=in_channels // 4,
            param_attr=ParamAttr(
                name=name_list[1] + '.w_0',
                initializer=paddle.nn.initializer.Constant(value=1.0)),
            bias_attr=ParamAttr(
                name=name_list[1] + '.b_0',
                initializer=paddle.nn.initializer.Constant(value=1e-4)),
            moving_mean_name=name_list[1] + '.w_1',
            moving_variance_name=name_list[1] + '.w_2',
            act='relu')
        self.conv2 = nn.Conv2DTranspose(
            in_channels=in_channels // 4,
            out_channels=in_channels // 4,
            kernel_size=2,
            stride=2,
            weight_attr=ParamAttr(
                name=name_list[2] + '.w_0',
                initializer=paddle.nn.initializer.KaimingUniform()),
            bias_attr=get_bias_attr(in_channels // 4, name_list[-1] + "conv2"))
        self.conv_bn2 = nn.BatchNorm(
            num_channels=in_channels // 4,
            param_attr=ParamAttr(
                name=name_list[3] + '.w_0',
                initializer=paddle.nn.initializer.Constant(value=1.0)),
            bias_attr=ParamAttr(
                name=name_list[3] + '.b_0',
                initializer=paddle.nn.initializer.Constant(value=1e-4)),
            moving_mean_name=name_list[3] + '.w_1',
            moving_variance_name=name_list[3] + '.w_2',
            act="relu")
        # Conv2DTranspose：该层根据输入（input）、卷积核（kernel）和空洞大小（dilations）、步长（stride）、填充（padding）来计算输出特征层大小或者通过output_size指定输出特征层大小。
        # 输入(Input)和输出(Output)为NCHW或NHWC格式，其中N为批尺寸，C为通道数（channel），H为特征层高度，W为特征层宽度。卷积核是MCHW格式，M是输出图像通道数，C是输入图像通道数，H是卷积核高度，W是卷积核宽度。
        # 如果组数大于1，C等于输入图像通道数除以组数的结果。转置卷积的计算过程相当于卷积的反向计算。转置卷积又被称为反卷积（但其实并不是真正的反卷积）。
        self.conv3 = nn.Conv2DTranspose(
            in_channels=in_channels // 4,
            out_channels=1,
            kernel_size=2,
            stride=2,
            weight_attr=ParamAttr(
                name=name_list[4] + '.w_0',
                initializer=paddle.nn.initializer.KaimingUniform()),
            bias_attr=get_bias_attr(in_channels // 4, name_list[-1] + "conv3"),
        )

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv_bn1(x)
        x = self.conv2(x)
        x = self.conv_bn2(x)
        x = self.conv3(x)
        x = F.sigmoid(x)
        return x


class DBHead(nn.Layer):
    """
    Differentiable Binarization (DB) for text detection:
        see https://arxiv.org/abs/1911.08947
    args:
        params(dict): super parameters for build DB network
    """

    def __init__(self, in_channels, k=50, **kwargs):
        super(DBHead, self).__init__()
        self.k = k
        binarize_name_list = [
            'conv2d_56', 'batch_norm_47', 'conv2d_transpose_0', 'batch_norm_48',
            'conv2d_transpose_1', 'binarize'
        ]
        thresh_name_list = [
            'conv2d_57', 'batch_norm_49', 'conv2d_transpose_2', 'batch_norm_50',
            'conv2d_transpose_3', 'thresh'
        ]
        # 这里 binarize（收缩图-概率图） 和 thresh（阈值图）是完全一样的网络结构，只是在训练过程中，参数的调整，使其成具有不同的参数。
        self.binarize = Head(in_channels, binarize_name_list)
        self.thresh = Head(in_channels, thresh_name_list)

    def step_function(self, x, y):
        """
        根据收缩图和阈值图 求二值图
        """
        # paddle.reciprocal 对输入Tensor取倒数。这里是根据论文中可微二值化公式来的 k一般是50
        return paddle.reciprocal(1 + paddle.exp(-self.k * (x - y)))

    def forward(self, x):
        # 收缩图-概率图,概率图和阈值图经过结构相同的网络（结构相同，参数不同）计算而来，二值图使用可微二值化公式计算而来，
        shrink_maps = self.binarize(x)
        if not self.training:
            return {'maps': shrink_maps}
        # 阈值图
        threshold_maps = self.thresh(x)
        # 二值图
        binary_maps = self.step_function(shrink_maps, threshold_maps)
        y = paddle.concat([shrink_maps, threshold_maps, binary_maps], axis=1)
        return {'maps': y}

predict_det.py

import os
import sys

__dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))

os.environ["FLAGS_allocator_strategy"] = 'auto_growth'

import cv2
import numpy as np
import time
import sys

import tools.infer.utility as utility
from ppocr.utils.logging import get_logger
from ppocr.utils.utility import get_image_file_list, check_and_read_gif
from ppocr.data import create_operators, transform
from ppocr.postprocess import build_post_process

logger = get_logger()


class TextDetector(object):
    def __init__(self, args):
        self.args = args
        self.det_algorithm = args.det_algorithm
        pre_process_list = [{
            'DetResizeForTest': {
                'limit_side_len': args.det_limit_side_len,
                'limit_type': args.det_limit_type
            }
        }, {
            'NormalizeImage': {
                'std': [0.229, 0.224, 0.225],
                'mean': [0.485, 0.456, 0.406],
                'scale': '1./255.',
                'order': 'hwc'
            }
        }, {
            'ToCHWImage': None
        }, {
            'KeepKeys': {
                'keep_keys': ['image', 'shape']
            }
        }]
        postprocess_params = {}
        if self.det_algorithm == "DB":
            postprocess_params['name'] = 'DBPostProcess'
            postprocess_params["thresh"] = args.det_db_thresh
            postprocess_params["box_thresh"] = args.det_db_box_thresh
            postprocess_params["max_candidates"] = 1000
            postprocess_params["unclip_ratio"] = args.det_db_unclip_ratio
            postprocess_params["use_dilation"] = True
        elif self.det_algorithm == "EAST":
            postprocess_params['name'] = 'EASTPostProcess'
            postprocess_params["score_thresh"] = args.det_east_score_thresh
            postprocess_params["cover_thresh"] = args.det_east_cover_thresh
            postprocess_params["nms_thresh"] = args.det_east_nms_thresh
        elif self.det_algorithm == "SAST":
            pre_process_list[0] = {
                'DetResizeForTest': {
                    'resize_long': args.det_limit_side_len
                }
            }
            postprocess_params['name'] = 'SASTPostProcess'
            postprocess_params["score_thresh"] = args.det_sast_score_thresh
            postprocess_params["nms_thresh"] = args.det_sast_nms_thresh
            self.det_sast_polygon = args.det_sast_polygon
            if self.det_sast_polygon:
                postprocess_params["sample_pts_num"] = 6
                postprocess_params["expand_scale"] = 1.2
                postprocess_params["shrink_ratio_of_width"] = 0.2
            else:
                postprocess_params["sample_pts_num"] = 2
                postprocess_params["expand_scale"] = 1.0
                postprocess_params["shrink_ratio_of_width"] = 0.3
        else:
            logger.info("unknown det_algorithm:{}".format(self.det_algorithm))
            sys.exit(0)

        self.preprocess_op = create_operators(pre_process_list)
        self.postprocess_op = build_post_process(postprocess_params)
        self.predictor, self.input_tensor, self.output_tensors = utility.create_predictor(
            args, 'det', logger)  # paddle.jit.load(args.det_model_dir)
        # self.predictor.eval()

    def order_points_clockwise(self, pts):
        """
        把点坐标 按照顺时针方向排序
        
        reference from: https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py
        # sort the points based on their x-coordinates  根据这些点的x坐标对它们排序
        """
        # np.argsort将矩阵a按照axis排序，并返回排序后的下标  pts[np.argsort(pts[:, 0]), :] 根据返回的下标取值
        xSorted = pts[np.argsort(pts[:, 0]), :]

        # grab the left-most and right-most points from the sorted 从已排序的对象中获取最左和最右的点
        # x-roodinate points
        leftMost = xSorted[:2, :]
        rightMost = xSorted[2:, :]

        # now, sort the left-most coordinates according to their
        # y-coordinates so we can grab the top-left and bottom-left
        # points, respectively
        leftMost = leftMost[np.argsort(leftMost[:, 1]), :]
        (tl, bl) = leftMost

        rightMost = rightMost[np.argsort(rightMost[:, 1]), :]
        (tr, br) = rightMost

        rect = np.array([tl, tr, br, bl], dtype="float32")
        return rect

    def clip_det_res(self, points, img_height, img_width):
        """对点坐标进行裁剪，使其坐标不超出图片范围"""
        for pno in range(points.shape[0]):
            points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1))
            points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1))
        return points

    def filter_tag_det_res(self, dt_boxes, image_shape):
        """对文本框进行过滤  按顺时针排序-限制坐标-求两点距离，有两点间直线距离小于3的过滤掉"""
        img_height, img_width = image_shape[0:2]
        dt_boxes_new = []
        for box in dt_boxes:
            # 把点坐标按照顺时针方向排序
            box = self.order_points_clockwise(box)
            # 对点坐标进行裁剪，使其坐标不超出图片范围
            box = self.clip_det_res(box, img_height, img_width)
            # np.linalg.norm 求矩阵二范式，即求两点间直线距离
            rect_width = int(np.linalg.norm(box[0] - box[1]))
            rect_height = int(np.linalg.norm(box[0] - box[3]))
            if rect_width <= 3 or rect_height <= 3:
                continue
            dt_boxes_new.append(box)
        dt_boxes = np.array(dt_boxes_new)
        return dt_boxes

    def filter_tag_det_res_only_clip(self, dt_boxes, image_shape):
        img_height, img_width = image_shape[0:2]
        dt_boxes_new = []
        for box in dt_boxes:
            box = self.clip_det_res(box, img_height, img_width)
            dt_boxes_new.append(box)
        dt_boxes = np.array(dt_boxes_new)
        return dt_boxes

    def __call__(self, img):
        # ori_im 原始图片做备份
        ori_im = img.copy()
        data = {'image': img}
        # 对数据进行处理
        data = transform(data, self.preprocess_op)
        img, shape_list = data
        if img is None:
            return None, 0
        img = np.expand_dims(img, axis=0)
        shape_list = np.expand_dims(shape_list, axis=0)
        img = img.copy()
        starttime = time.time()
        # 获取输入的张量  copy_from_cpu--从cpu获取模型运行所需输入数据
        self.input_tensor.copy_from_cpu(img)
        # 运行predictor
        self.predictor.run()
        outputs = []
        # 获取输出  copy_to_cpu: 获取模型运行输出结果
        for output_tensor in self.output_tensors:
            output = output_tensor.copy_to_cpu()
            outputs.append(output)

        preds = {}
        if self.det_algorithm == "EAST":
            preds['f_geo'] = outputs[0]
            preds['f_score'] = outputs[1]
        elif self.det_algorithm == 'SAST':
            preds['f_border'] = outputs[0]
            preds['f_score'] = outputs[1]
            preds['f_tco'] = outputs[2]
            preds['f_tvo'] = outputs[3]
        elif self.det_algorithm == 'DB':
            preds['maps'] = outputs[0]
        else:
            raise NotImplementedError
        # postprocess_op：后处理，获取后处理结果post_result
        post_result = self.postprocess_op(preds, shape_list)
        # dt_boxes：预测结果 点坐标
        dt_boxes = post_result[0]['points']
        if self.det_algorithm == "SAST" and self.det_sast_polygon:
            dt_boxes = self.filter_tag_det_res_only_clip(dt_boxes, ori_im.shape)
        else:
            # 对文本框进行过滤  有两点间直线距离小于3（设置的阈值）的过滤掉
            dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape)
        elapse = time.time() - starttime
        return dt_boxes, elapse

db_postprocess.py

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import cv2
import paddle
from shapely.geometry import Polygon
import pyclipper


class DBPostProcess(object):
    """
    The post process for Differentiable Binarization (DB).
    可微二值化(DB)后处理。
    """

    def __init__(self,
                 thresh=0.3,
                 box_thresh=0.7,
                 max_candidates=1000,
                 unclip_ratio=2.0,
                 use_dilation=False,
                 **kwargs):
        self.thresh = thresh
        self.box_thresh = box_thresh
        self.max_candidates = max_candidates
        self.unclip_ratio = unclip_ratio
        self.min_size = 3
        self.dilation_kernel = None if not use_dilation else np.array(
            [[1, 1], [1, 1]])

    def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
        '''
        从二值图获取box

        _bitmap: single map with shape (1, H, W),
                whose values are binarized as {0, 1}

        _bitmap:带有形状(1,H, W)的二值化单图，
                其值二值化为{0,1}
        '''

        bitmap = _bitmap
        height, width = bitmap.shape
        # cv2.findContours()函数：查找检测物体的轮廓 接受的参数为二值图，即黑白的（不是灰度图）
        # findcontours函数会“原地”修改输入的图像。
        # 第一个参数是寻找轮廓的图像；
        # 第二个参数表示轮廓的检索模式，有四种（本文介绍的都是新的cv2接口）：
        # cv2.RETR_EXTERNAL
        # 表示只检测外轮廓
        # cv2.RETR_LIST
        # 检测的轮廓不建立等级关系
        # cv2.RETR_CCOMP
        # 建立两个等级的轮廓，上面的一层为外边界，里面的一层为内孔的边界信息。如果内孔内还有一个连通物体，这个物体的边界也在顶层。
        # cv2.RETR_TREE
        # 建立一个等级树结构的轮廓。

        # 第三个参数method为轮廓的近似办法
        # cv2.CHAIN_APPROX_NONE
        # 存储所有的轮廓点，相邻的两个点的像素位置差不超过1，即max（abs（x1 - x2），abs（y2 - y1）） == 1
        # cv2.CHAIN_APPROX_SIMPLE
        # 压缩水平方向，垂直方向，对角线方向的元素，只保留该方向的终点坐标，例如一个矩形轮廓只需4个点来保存轮廓信息
        # cv2.CHAIN_APPROX_TC89_L1，CV_CHAIN_APPROX_TC89_KCOS
        # 使用teh - Chinlchain近似算法

        # 返回两个值，一个是轮廓本身，还有一个是每条轮廓对应的属性。
        # cv2.findContours()函数首先返回一个list，list中每个元素都是图像中的一个轮廓，用numpy中的ndarray表示;每个轮廓是一个ndarray，每个ndarray是轮廓上的点的集合。
        outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST,
                                cv2.CHAIN_APPROX_SIMPLE)
        if len(outs) == 3:
            img, contours, _ = outs[0], outs[1], outs[2]
        elif len(outs) == 2:
            contours, _ = outs[0], outs[1]
        # num_contours 中每个元素是一个轮廓---是轮廓上的点的集合
        num_contours = min(len(contours), self.max_candidates)

        boxes = []
        scores = []
        for index in range(num_contours):
            contour = contours[index]
            # 获取区域的四点坐标，sside是h和w的其中较小的值
            points, sside = self.get_mini_boxes(contour)
            # 小于最小值min_size的过滤掉
            if sside < self.min_size:
                continue
            points = np.array(points)
            # 计算得到文本框分数
            score = self.box_score_fast(pred, points.reshape(-1, 2))
            # 根据分数和阈值过滤
            if self.box_thresh > score:
                continue

            # 根据points获取box
            box = self.unclip(points).reshape(-1, 1, 2)
            box, sside = self.get_mini_boxes(box)
            if sside < self.min_size + 2:
                continue
            box = np.array(box)
            # np.clip限制点坐标，使其不超出图片
            box[:, 0] = np.clip(
                np.round(box[:, 0] / width * dest_width), 0, dest_width)
            box[:, 1] = np.clip(
                np.round(box[:, 1] / height * dest_height), 0, dest_height)
            boxes.append(box.astype(np.int16))
            scores.append(score)
        return np.array(boxes, dtype=np.int16), scores


    def unclip(self, box):
        """"
        在推理时可以采用概率图或近似二值图来生成文本框，为了方便作者选择了概率图，具体步骤如下：
        1、使用固定阈值0.2将概率图做二值化得到二值化图；
        2、由二值化图得到收缩文字区域；
        3、将收缩文字区域按Vatti clipping算法的偏移系数D'进行扩张得到最终文本框。
        其中A'、L'是收缩区域的面积、周长，r‘依经验设置为1.5（对应收缩比例r=0.4）。
        """
        unclip_ratio = self.unclip_ratio
        poly = Polygon(box)
        distance = poly.area * unclip_ratio / poly.length
        # pco = pyclipper.PyclipperOffset()
        # pco.AddPath(subj, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
        # solution = pco.Execute(-7.0)   这三行代码做多边形的坐标偏移 在这里是做文本框的伸缩
        offset = pyclipper.PyclipperOffset()
        offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
        expanded = np.array(offset.Execute(distance))
        return expanded

    def get_mini_boxes(self, contour):
        """
        获取最小的box并且排序，返回box点坐标

        contour: 构成轮廓的几个点坐标
        """
        # cv2.minAreaRect(Points)
        # 其中points是点集，数据类型为ndarray，array((x1,y1),(x2,y2),....,(xn,yn))
        # 而minAreaRect就是求出在上述点集下的最小面积矩形。
        # 函数 cv2.minAreaRect() 返回一个Box2D结构 rect：（最小外接矩形的中心（x，y），（宽度，高度），旋转角度）。
        # 分别对应于返回值：(rect[0][0],  rect[0][1]),  (rect[1][0],  rect[1][1]),  rect[2]
        # cv2.boxPoints(rect)可以返回四个点的值，其中cv2.boxPoints(rect)[0]为point[0]，cv2.boxPoints(rect)[1]为point[1]......
        bounding_box = cv2.minAreaRect(contour)
        points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])

        # 以下代码只是对点坐标做一个排序，按照左上，右上，右下，左下顺序排列
        index_1, index_2, index_3, index_4 = 0, 1, 2, 3
        if points[1][1] > points[0][1]:
            index_1 = 0
            index_4 = 1
        else:
            index_1 = 1
            index_4 = 0
        if points[3][1] > points[2][1]:
            index_2 = 2
            index_3 = 3
        else:
            index_2 = 3
            index_3 = 2

        box = [
            points[index_1], points[index_2], points[index_3], points[index_4]
        ]
        # bounding_box[1] 是(w, h)  这里返回h和w的最小值
        return box, min(bounding_box[1])

    def box_score_fast(self, bitmap, _box):
        """
        通过点坐标 和 mask填充，计算得到一个均值分数，这里不太懂 ？？？

        """
        h, w = bitmap.shape[:2]
        box = _box.copy()
        # np.clip这个函数将将数组中的元素限制在a_min, a_max之间，大于a_max的就使得它等于 a_max，小于a_min,的就使得它等于a_min。
        # 这里是对区域的几个点做一个限制，使其不超出图片区域！
        xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1)
        xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1)
        ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1)
        ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int), 0, h - 1)

        mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
        box[:, 0] = box[:, 0] - xmin
        box[:, 1] = box[:, 1] - ymin
        # cv2.fillPoly()函数可以用来填充任意形状的图型.可以用来绘制多边形,工作中也经常使用非常多个边来近似的画一条曲线.cv2.fillPoly()函数可以一次填充多个图型.
        cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
        return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]

    def __call__(self, outs_dict, shape_list):
        """
        outs_dict：{'maps': [1,1,960,544]}
        shape_list：[1, 4]
        """
        pred = outs_dict['maps']
        if isinstance(pred, paddle.Tensor):
            pred = pred.numpy()
        pred = pred[:, 0, :, :]
        # segmentation : 布尔值矩阵 由 pred > self.thresh 得到的结果矩阵
        # 使用固定阈值0.3将概率图做二值化得到二值化图；
        segmentation = pred > self.thresh

        boxes_batch = []
        for batch_index in range(pred.shape[0]):
            src_h, src_w, ratio_h, ratio_w = shape_list[batch_index]
            if self.dilation_kernel is not None:
                # cv2.dilate()函数对图片进行膨胀处理,-白色部分膨胀，黑色部分变小. 该函数的参数含义： img – 目标图片 kernel – 进行操作的内核，默认为3×3的矩阵 iterations – 腐蚀次数，默认为1
                mask = cv2.dilate(
                    np.array(segmentation[batch_index]).astype(np.uint8),
                    self.dilation_kernel)
            else:
                mask = segmentation[batch_index]
            # 从二值图得到文本框和分数
            boxes, scores = self.boxes_from_bitmap(pred[batch_index], mask,
                                                   src_w, src_h)
            boxes_batch.append({'points': boxes})
        return boxes_batch

你可能感兴趣的:(python学习,学习总结,计算机视觉,python,可视化,深度学习,神经网络)

PyWavelets shangjg3 PyTorch pytorch 人工智能 python
PyWavelets（pywt）是Python中用于小波变换的核心库，提供了丰富的信号处理和图像处理功能。以下是其核心功能的详细介绍：1.小波变换基础（1）离散小波变换（DWT）将信号分解为近似系数（Approximation）和细节系数（Detail）。importpywtimportnumpyasnp#示例信号signal=np.array([1
Anaconda插件开发 lyh1344 数据库开发
开发环境准备安装Anaconda或Miniconda，确保conda命令可用。推荐使用Python3.7及以上版本。创建独立的开发环境以避免依赖冲突：condacreate-nplugin_devpython=3.8condaactivateplugin_dev插件结构设计Anaconda插件通常采用Python包的标准结构。核心文件包括__init__.py和setup.py。典型目录结构如下：
Python3 数字(Number) froginwe11 开发语言
Python3数字(Number)引言在编程语言中，数字是构成程序的基础元素之一。Python3作为一种高级编程语言，提供了丰富的数字类型和操作方法。本文将详细介绍Python3中的数字类型，包括整数、浮点数、复数等，并探讨它们的特性和应用。整数（Integer）整数是Python3中最基本的数据类型之一，用于表示没有小数部分的数值。在Python3中，整数类型没有大小限制，可以表示任意大小的整数
Python编程：使用Opencv进行图像处理
【参考】https://github.com/opencv/opencv/tree/4.x/samples/pythonPython使用OpenCV进行图像处理OpenCV(OpenSourceComputerVisionLibrary)是一个开源的计算机视觉和机器学习软件库。下面将从基础到高阶介绍如何使用Python中的OpenCV进行图像处理。一、安装首先需要安装OpenCV库：pipinst
Web3前沿科技：开启数字资产交易新征程 AI天才研究院 AI大模型企业级应用开发实战 Agentic AI 实战 AI人工智能与大数据 web3 科技 ai
Web3前沿科技：开启数字资产交易新征程关键词：Web3、数字资产交易、区块链、智能合约、去中心化金融摘要：本文聚焦于Web3前沿科技在数字资产交易领域的应用与发展。详细阐述了Web3的核心概念、相关技术原理，包括区块链、智能合约等。通过具体的算法原理和Python代码示例，深入剖析了数字资产交易在Web3环境下的运行机制。同时，结合实际项目案例，讲解了开发环境搭建、代码实现与解读。探讨了Web3
区块链技术概述：从比特币到Web3.0 闲人编程 Python区块链50讲区块链 web3 python 元宇宙比特币安全
目录区块链技术概述：从比特币到Web3.0引言：数字革命的下一篇章1.区块链技术基础1.1区块链定义与核心特征1.2区块链数据结构可视化2.比特币：区块链的开端2.1比特币的核心创新2.2比特币交易生命周期3.以太坊与智能合约革命3.1以太坊的核心创新3.2智能合约执行流程4.Web3.0：互联网的新范式4.1Web3.0的核心特征4.2Web3技术栈5.Python实现简易区块链系统5.1区块类
【Python常用模块】_Pandas模块3-DataFrame对象失心疯_2023 Python常用模块数据分析 pandas 数据挖掘 python 数据统计数据处理
课程推荐我的个人主页：失心疯的个人主页入门教程推荐：Python零基础入门教程合集虚拟环境搭建：Python项目虚拟环境(超详细讲解)PyQt5系列教程：PythonGUI(PyQt5)教程合集Oracle数据库教程：Oracle数据库教程合集MySQL数据库教程：MySQL数据库教程合集优质资源下载：资源下载合集
《Python数据分析与挖掘实战》Chapter8中医证型关联规则挖掘笔记茫茫大地真干净机器学习 Python 数据挖掘
最近在学习《Python数据分析与挖掘实战》中的案例，写写自己的心得。代码分为两大部分：1.读取数据并进行聚类分析2.应用Apriori关联规则挖掘规律1.聚类部分函数分析：defprogrammer_1():datafile="C:/Users/longming/Desktop/chapter8/data/data.xls"processedfile="C:/Users/longming/Des
利用chatGPT提取复杂json数据到excel文件中 z日火工具使用 excel chatgpt json
利用chatGPT提取复杂json数据到excel文件中1利用swagger导出json类型的接口数据2使用hiJson工具查看json结构3利用ChatGPT写python代码解析数据4复制代码到vscode运行任务说明：整理一个项目的所有接口，保存到excel文档中。在这里插入图片描述1利用swagger导出json类型的接口数据2使用hiJson工具查看json结构我需要json数据的"pa
【Python深度学习】零基础掌握Pytorch Pooling layers nn.MaxPool方法 Mr数据杨 Python 深度学习 python 深度学习 pytorch
在深度学习的世界中，MaxPooling是一种关键的操作，用于降低数据的维度并保留重要特征。这就像是从一堆照片中挑选出最能代表某个场景的那张。PyTorch提供了多种MaxPooling层，包括nn.MaxPool1d、nn.MaxPool2d和nn.MaxPool3d，它们分别适用于不同维度的数据处理。如果处理的是声音信号（一维数据），就会用到nn.MaxPool1d。而处理图像（二维数据）时，
误差的回响：反向传播算法与神经网络的惊天逆转田园Coder 人工智能科普人工智能科普
当专家系统在20世纪80年代初期大放异彩，成为人工智能实用化的耀眼明星时，另一股曾经被宣判“死刑”的力量——连接主义（神经网络）——正在寒冬的冻土下悄然涌动，孕育着一场惊天动地的复苏。马文·明斯基和西摩·帕尔特在1969年《感知机》专著中那精准而冷酷的理论批判，如同沉重的封印，将多层神经网络的研究禁锢了近二十年。他们指出的核心死结——缺乏有效算法来训练具有隐藏层的网络——仿佛一道无法逾越的天堑。单
根包含文件——Luaconf.h (src) LLLLLLLLLLLLLL265161 Inside Lua lua integer 编译器 alignment c++dll
Luaconf.h是配置的总集，定义了平台相关的设置，是所有文件都包含的，即RootlyIncluded。0.前言开始关注Lua也是06年六月的事情，《程序员》的2006年第六期中，我独独看中了Lua，而不是当时我已经比较熟悉的Python和Ruby，即使它们我都关注了好几年，但是都没有Lua给我的震撼大。于是那个夏天，稍微地尝试读了Lua的代码。开学后，我突然觉得自己有点受唆使，轻信了动态的福音
推荐使用：Vue-Cron —— 简易CRON表达式生成器乌昱有Melanie
推荐使用：Vue-Cron——简易CRON表达式生成器项目地址:https://gitcode.com/gh_mirrors/vu/vue-cronVue-Cron是一款专为Vue.js和Element-UI设计的高效、易用的CRON表达式生成插件。它能帮助开发者轻松创建和管理定时任务策略，提供直观的界面和完善的国际化支持。项目介绍Vue-Cron提供了一个简洁的交互界面，让用户能够通过可视化的操
Python3获取5000个元素的单字符表 DechinPhy
技术背景此前考虑过一个问题，有没有办法获取到python里面所有定义好的单字符的表，比如我们获取5000个不一样的单字符，但是常用的chr(number)的方法里面包含了太多的非字母条目，比如缩进换行符等，也会被识别为长度为1的符号。因此需要在此基础上加一个isalpha()的判断。输出5000个字符示例先解释一下思路，我们还是遍历chr中所包含的字符，此时得到的是所有的长度为1的字符，再用str
【安装Stable Diffusion以及遇到问题和总结】岁月玲珑 AI stable diffusion AI编程 AI作画
在本地安装部署StableDiffusion，需要准备好硬件环境，安装相关依赖，然后配置模型。下面为你详细介绍安装部署的步骤：一、硬件要求显卡：需要NVIDIAGPU，显存至少6GB，推荐8GB及以上。系统：Windows10/11、Linux（Ubuntu等）或macOS（需要Rosetta2）。内存：至少16GBRAM。存储空间：准备10GB以上的可用空间。二、软件准备首先要安装Python和
力扣网C语言编程题：接雨水（双指针法）魏劭逻辑编程题 C语言 c语言 leetcode 算法
一.简介前面文章是以动态规划方法实现的，文章如下：力扣网C语言编程题：接雨水（动态规划实现）-CSDN博客本文继续针对力扣网的接雨水问题，以另一种解题思路（双指针）以C语言实现和Python实现。二.力扣网C语言编程题：接雨水（双指针法）题目：接雨水给定n个非负整数表示每个宽度为1的柱子的高度图，计算按此排列的柱子，下雨之后能接多少雨水。示例2：输入：height=[4,2,0,3,2,5]输出：
阅读笔记(2) 单层网络:回归 a2507283885 笔记
阅读笔记(2)单层网络:回归该笔记是DataWhale组队学习计划（共度AI新圣经：深度学习基础与概念）的Task02以下内容为个人理解，可能存在不准确或疏漏之处，请以教材为主。1.从泛函视角来看线性回归还记得线性代数里学过的“基”这个概念吗？一组基向量是一组线性无关的向量，它们通过线性组合可以张成一个向量空间。也就是说，这个空间里的任意一个向量，都可以表示成这组基的线性组合。函数其实也可以看作是
Python各版本发布时间和重要特性 mosquito_lover1 python
1.Python1.x:-Python1.0(1994年1月):第一个正式版本。-Python1.6(2000年9月):最后一个1.x版本。2.Python2.x:-Python2.0(2000年10月):引入了列表推导、垃圾回收等特性。-Python2.7(2010年7月):Python2.x系列的最后一个版本，长期支持至2020年1月1日。3.Python3.x:-Python3.0(2008
python中用matplotlib画图解决中文问题！！！！！！！终于ok了 luckylbb python 爬虫
在网上用了很多方法基本一样最后终于解决了，分享一下，前面几步似曾相识，但是依旧我发解决问题，重点在最后一步，亲测有效！！！！1、首先在Windons\Fonts下面找到simhei的字体没有就去下载，其实就是黑体，将它拖到桌面备用2、importmatplotlibprint(matplotlib.matplotlib_fname())输入命令查找到自己下载的matplotlib配置文件的位置我的
Python使用matplotlib绘制图像时，中文图例或标题无法正常显示问题独不懂 Python python matplotlib 开发语言
Python使用matplotlib绘制图像时，中文图例或标题无法显示问题解决方法一、问题描述二、解决方法欢迎学习交流！邮箱：z…@1…6.com网站：https://zephyrhours.github.io/一、问题描述Matplotlib库是Python中经常使用的绘图工具，但是有时候我们在使用plt绘制图像，需要将英文标题或者图例显示为中文样式，总会出现无法显示的问题，具体情况如下：imp
Python 文件操作与 wc 工具的重构：从文件对象到输入重定向的全面指南面朝大海，春不暖，花不开 Python基础 python 重构开发语言
文章大纲引言在编程世界中，文件操作是一项基础且至关重要的技能。无论是读取配置文件、处理日志，还是实现数据持久化，文件操作都扮演着核心角色。Python作为一门简洁而强大的语言，提供了直观的文件处理接口，其中open函数和文件对象是开发者最常使用的工具。通过这些工具，我们可以轻松实现文件的读写操作。本文将深入探讨Python文件操作的各个方面，从open函数的基本用法到文件对象的操作方法，再到资源管
AI助力基因数据分析：用Python玩转生命密码的秘密 Echo_Wish 前沿技术人工智能人工智能数据分析 python
AI助力基因数据分析：用Python玩转生命密码的秘密说到基因数据，听起来是不是感觉有点高大上？其实，基因数据分析正变得越来越“接地气”，而AI正是这条路上的神奇钥匙。今天，咱们就用Python聊聊如何利用AI技术做基因数据分析与建模，帮你破解生命的密码，找到疾病预测、个性化医疗的新路子。一、基因数据为何如此特别？基因组测序技术让我们能够获取人体细胞内数以百万计的DNA序列变异信息。但数据量巨大、
python中使用grpc方法示例_Python中使用grpc与consul weixin_39719077
gRPC客户端和服务端可以在多种环境中运行和交互，并且可以用任何gRPC支持的语言来编写。gRPC支持C++JavaPythonGoRubyC#Node.jsPHPDart等语言gRPC默认使用protocolbuffers，这是Google开源的一种轻便高效的结构化数据存储格式，可以用于结构化数据串行化，或者说序列化。它很适合做数据存储或RPC数据交换格式。安装GoogleProtocolBuf
python做生物信息学分析_Python从零开始第五章生物信息学①提取差异基因吴敬欣 python做生物信息学分析
目前来说，做生物信息学的人越来越多，但是我觉得目前而言做生信的主要有三类人：老本行是做实验的，做生信可能是为了辅助研究或者是为了发paper(有非常多的临床生选择趟生信这波水)主要是做生信的，主要涵盖高通量测序数据分析，组学数据分析等等，专门从事生物学数据分析的这群人，其大部分也是本科生物狗作为强大的生力军，以调包写R，python为主。那么这群人就要熟悉看各种包的tutorial以及如何进行常规
用Python实现生信分析——功能预测详解写代码的M教授生信分析 python 开发语言
功能预测是生物信息学中的一项重要任务，通过分析基因或蛋白质序列的特征，推测它们的生物学功能。功能预测通常涉及多种方法，包括序列比对、基序识别、机器学习模型等。这些方法可以帮助科学家推断未知基因的功能，从而加速生物学研究的进展。1.功能预测的主要方法（1）同源性比对：通过将未知基因或蛋白质序列与数据库中的已知序列进行比对，识别出同源序列，并推测它们的功能。常用工具包括BLAST、HMMER等。（2）
用Python实现生信分析——序列搜索和比对工具详解写代码的M教授生信分析 python
1.什么是序列搜索和比对工具？序列搜索和比对工具在生物信息学中用于在大型序列数据库中搜索与查询序列相似的序列，并进行比对分析。这些工具可以帮助研究人员识别与目标序列相关的已知序列，从而推测其功能、结构和进化关系。常见的序列搜索和比对工具包括：BLAST（BasicLocalAlignmentSearchTool）：最常用的序列搜索工具，能够快速找到与查询序列相似的序列。FASTA：另一个常用的序列
DeepSeek-V3混合精度推理（FP8/BF16）原理与实战全解析 CarlowZJ DEEPSEEK-V3
目录摘要混合精度推理的背景与意义DeepSeek-V3混合精度架构设计FP8与BF16核心原理详解混合精度推理核心实现实践案例：FP8权重转BF16与推理部署常见问题与注意事项最佳实践与扩展建议总结参考资料附录：可视化图表1.摘要本文系统梳理DeepSeek-V3在FP8/BF16混合精度推理方面的架构设计与工程实现，结合源码与实际案例，帮助开发者深入理解其混合精度推理原理、工程落地方法与性能优化
python 实战 grpc Avaricious_Bear python 开发语言
title:grpc|python实战grpcdescription:只要代码可以跑起来,很多难题都会迎刃而解.so,keepcodingandstayhungry.grpc的基础:protobufgrpchelloworld:python实战grpc环境配置grpcbasic:grpc4种通信方式grpc的基础:protobufgrpc使用protobuf进行数据传输.protobuf是一种数据
gRPC技术解析与python示例漫谈网络 NetDevOps 智联空间 python 开发语言 rpc grpc
一、gRPC核心架构1.多语言存根2.HTTP/2传输3.多语言实现生成生成客户端gRPC客户端库gRPC服务端库服务端业务逻辑ProtobufIDL二、成为「多语言RPC标准」的5大支柱1.语言无关的接口定义使用Protobuf作为接口定义语言（IDL）：//服务定义(hello.proto)syntax="proto3";serviceGreeter{rpcSayHello(HelloRequ
Python打卡训练营day31 weixin_70153780 Python打卡训练营 python 开发语言
规范的文件命名目标：将文件夹中的文件按规则重命名（如添加前缀、序号等）。#示例importos#定义文件夹路径folder_path=r'C:\Users\Name\Documents\Project\Files'#遍历文件夹中的文件forindex,filenameinenumerate(os.listdir(folder_path)):#拆分文件名和后缀file_ext=os.path.spl
web前段跨域nginx代理配置刘正强 nginx cms Web
nginx代理配置可参考server部分 server { listen 80; server_name localhost;
spring学习笔记 caoyong spring
一、概述 a>、核心技术 : IOC与AOP b>、开发为什么需要面向接口而不是实现接口降低一个组件与整个系统的藕合程度，当该组件不满足系统需求时，可以很容易的将该组件从系统中替换掉，而不会对整个系统产生大的影响 c>、面向接口编口编程的难点在于如何对接口进行初始化,(使用工厂设计模式)
Eclipse打开workspace提示工作空间不可用 0624chenhong eclipse
做项目的时候，难免会用到整个团队的代码，或者上一任同事创建的workspace， 1.电脑切换账号后，Eclipse打开时，会提示Eclipse对应的目录锁定，无法访问，根据提示，找到对应目录，G:\eclipse\configuration\org.eclipse.osgi\.manager，其中文件.fileTableLock提示被锁定。解决办法，删掉.fileTableLock文件，重
Javascript 面向对面写法的必要性？一炮送你回车库 JavaScript
现在Javascript面向对象的方式来写页面很流行，什么纯javascript的mvc框架都出来了：ember 这是javascript层的mvc框架哦,不是j2ee的mvc框架我想说的是，javascript本来就不是一门面向对象的语言，用它写出来的面向对象的程序，本身就有些别扭，很多人提到js的面向对象首先提的是：复用性。那么我请问你写的js里有多少是可以复用的，用fu
js array对象的迭代方法换个号韩国红果果 array
1.forEach 该方法接受一个函数作为参数，对数组中的每个元素使用该函数 return 语句失效 function square(num) { print(num, num * num); } var nums = [1,2,3,4,5,6,7,8,9,10]; nums.forEach(square); 2.every 该方法接受一个返回值为布尔类型
对Hibernate缓存机制的理解归来朝歌 session 一级缓存对象持久化
在hibernate中session一级缓存机制中，有这么一种情况：问题描述：我需要new一个对象，对它的几个字段赋值，但是有一些属性并没有进行赋值，然后调用 session.save()方法，在提交事务后，会出现这样的情况： 1：在数据库中有默认属性的字段的值为空 2：既然是持久化对象，为什么在最后对象拿不到默认属性的值？通过调试后解决方案如下：对于问题一，如你在数据库里设置了
WebService调用错误合集 darkranger webservice
Java.Lang.NoClassDefFoundError: Org/Apache/Commons/Discovery/Tools/DiscoverSingleton 调用接口出错，一个简单的WebService import org.apache.axis.client.Call;import org.apache.axis.client.Service; 首先必不可
JSP和Servlet的中文乱码处理 aijuans Java Web
JSP和Servlet的中文乱码处理前几天学习了JSP和Servlet中有关中文乱码的一些问题，写成了博客，今天进行更新一下。应该是可以解决日常的乱码问题了。现在作以下总结希望对需要的人有所帮助。我也是刚学，所以有不足之处希望谅解。一、表单提交时出现乱码：在进行表单提交的时候，经常提交一些中文，自然就避免不了出现中文乱码的情况，对于表单来说有两种提交方式：get和post提交方式。所以
面试经典六问 atongyeye 工作面试
题记：因为我不善沟通，所以在面试中经常碰壁，看了网上太多面试宝典，基本上不太靠谱。只好自己总结，并试着根据最近工作情况完成个人答案。以备不时之需。以下是人事了解应聘者情况的最典型的六个问题： 1 简单自我介绍关于这个问题，主要为了弄清两件事，一是了解应聘者的背景，二是应聘者将这些背景信息组织成合适语言的能力。我的回答：(针对技术面试回答，如果是人事面试，可以就掌
contentResolver.query()参数详解百合不是茶 android query()详解
收藏csdn的博客,介绍的比较详细,新手值得一看 1.获取联系人姓名一个简单的例子，这个函数获取设备上所有的联系人ID和联系人NAME。 [java] view plain copy public void fetchAllContacts() {
ora-00054:resource busy and acquire with nowait specified解决方法 bijian1013 oracle 数据库 kill nowait
当某个数据库用户在数据库中插入、更新、删除一个表的数据，或者增加一个表的主键时或者表的索引时，常常会出现ora-00054:resource busy and acquire with nowait specified这样的错误。主要是因为有事务正在执行（或者事务已经被锁），所有导致执行不成功。 1.下面的语句
web 开发乱码征客丶 spring Web
以下前端都是 utf-8 字符集编码一、后台接收 1.1、 get 请求乱码 get 请求中，请求参数在请求头中；乱码解决方法： a、通过在web 服务器中配置编码格式：tomcat 中，在 Connector 中添加URIEncoding="UTF-8"； 1.2、post 请求乱码 post 请求中，请求参数分两部份， 1.2.1、url？参数，
【Spark十六】： Spark SQL第二部分数据源和注册表的几种方式 bit1129 spark
Spark SQL数据源和表的Schema case class apply schema parquet json JSON数据源准备源数据 {"name":"Jack", "age": 12, "addr":{"city":"beijing&
JVM学习之:调优总结 -Xms -Xmx -Xmn -Xss BlueSkator -Xss -Xmn -Xms -Xmx
堆大小设置JVM 中最大堆大小有三方面限制：相关操作系统的数据模型（32-bt还是64-bit）限制；系统的可用虚拟内存限制；系统的可用物理内存限制。32位系统下，一般限制在1.5G~2G；64为操作系统对内存无限制。我在Windows Server 2003 系统，3.5G物理内存，JDK5.0下测试，最大可设置为1478m。典型设置： java -Xmx355
jqGrid 各种参数详解(转帖) BreakingBad jqGrid
jqGrid 各种参数详解分类：源代码分享个人随笔请勿参考解决开发问题 2012-05-09 20:29 84282人阅读评论(22) 收藏举报 jquery 服务器 parameters function ajax string
读《研磨设计模式》-代码笔记-代理模式-Proxy bylijinnan java 设计模式
声明：本文只为方便我个人查阅和理解，详细的分析以及源代码请移步原作者的博客http://chjavach.iteye.com/ import java.lang.reflect.InvocationHandler; import java.lang.reflect.Method; import java.lang.reflect.Proxy; /* * 下面
应用升级iOS8中遇到的一些问题 chenhbc ios8 升级iOS8
1、很奇怪的问题，登录界面，有一个判断，如果不存在某个值，则跳转到设置界面，ios8之前的系统都可以正常跳转，iOS8中代码已经执行到下一个界面了，但界面并没有跳转过去，而且这个值如果设置过的话，也是可以正常跳转过去的，这个问题纠结了两天多，之前的判断我是在 -(void)viewWillAppear:(BOOL)animated 中写的，最终的解决办法是把判断写在 -(void
工作流与自组织的关系？ comsci 设计模式工作
目前的工作流系统中的节点及其相互之间的连接是事先根据管理的实际需要而绘制好的，这种固定的模式在实际的运用中会受到很多限制，特别是节点之间的依存关系是固定的，节点的处理不考虑到流程整体的运行情况，细节和整体间的关系是脱节的，那么我们提出一个新的观点，一个流程是否可以通过节点的自组织运动来自动生成呢？这种流程有什么实际意义呢？这里有篇论文，摘要是：“针对网格中的服务
Oracle11.2新特性之INSERT提示IGNORE_ROW_ON_DUPKEY_INDEX daizj oracle
insert提示IGNORE_ROW_ON_DUPKEY_INDEX 转自：http://space.itpub.net/18922393/viewspace-752123 在 insert into tablea ...select * from tableb中，如果存在唯一约束，会导致整个insert操作失败。使用IGNORE_ROW_ON_DUPKEY_INDEX提示，会忽略唯一
二叉树:堆 dieslrae 二叉树
这里说的堆其实是一个完全二叉树,每个节点都不小于自己的子节点,不要跟jvm的堆搞混了.由于是完全二叉树,可以用数组来构建.用数组构建树的规则很简单: 一个节点的父节点下标为: (当前下标 - 1)/2 一个节点的左节点下标为: 当前下标 * 2 + 1 &
C语言学习八结构体 dcj3sjt126com c
为什么需要结构体，看代码 # include <stdio.h> struct Student //定义一个学生类型，里面有age, score, sex, 然后可以定义这个类型的变量 { int age; float score; char sex; } int main(void) { struct Student st = {80, 66.6,
centos安装golang dcj3sjt126com centos
#在国内镜像下载二进制包 wget -c http://www.golangtc.com/static/go/go1.4.1.linux-amd64.tar.gz tar -C /usr/local -xzf go1.4.1.linux-amd64.tar.gz #把golang的bin目录加入全局环境变量 cat >>/etc/profile<
10.性能优化-监控-MySQL慢查询 frank1234 性能优化 MySQL慢查询
1.记录慢查询配置 show variables where variable_name like 'slow%' ; --查看默认日志路径查询结果：--不用的机器可能不同 slow_query_log_file=/var/lib/mysql/centos-slow.log 修改mysqld配置文件：/usr /my.cnf[一般在/etc/my.cnf，本机在/user/my.cn
Java父类取得子类类名 happyqing java this 父类子类类名
在继承关系中，不管父类还是子类，这些类里面的this都代表了最终new出来的那个类的实例对象，所以在父类中你可以用this获取到子类的信息！ package com.urthinker.module.test; import org.junit.Test; abstract class BaseDao<T> { public void
Spring3.2新注解@ControllerAdvice jinnianshilongnian @Controller
@ControllerAdvice，是spring3.2提供的新注解，从名字上可以看出大体意思是控制器增强。让我们先看看@ControllerAdvice的实现： @Target(ElementType.TYPE) @Retention(RetentionPolicy.RUNTIME) @Documented @Component public @interface Co
Java spring mvc多数据源配置 liuxihope spring
转自：http://www.itpub.net/thread-1906608-1-1.html 1、首先配置两个数据库 <bean id="dataSourceA" class="org.apache.commons.dbcp.BasicDataSource" destroy-method="close&quo
第12章 Ajax（下） onestopweb Ajax
index.html <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/
BW / Universe Mappings blueoxygen BO
BW Element OLAP Universe Element Cube Dimension Class Charateristic A class with dimension and detail objects (Detail objects for key and desription) Hi
Java开发熟手该当心的11个错误 tomcat_oracle java 多线程工作单元测试
#1、不在属性文件或XML文件中外化配置属性。比如，没有把批处理使用的线程数设置成可在属性文件中配置。你的批处理程序无论在DEV环境中，还是UAT（用户验收测试）环境中，都可以顺畅无阻地运行，但是一旦部署在PROD 上，把它作为多线程程序处理更大的数据集时，就会抛出IOException，原因可能是JDBC驱动版本不同，也可能是#2中讨论的问题。如果线程数目可以在属性文件中配置，那么使它成为
推行国产操作系统的优劣 yananay windows linux 国产操作系统
最近刮起了一股风，就是去“国外货”。从应用程序开始，到基础的系统，数据库，现在已经刮到操作系统了。原因就是“棱镜计划”，使我们终于认识到了国外货的危害，开始重视起了信息安全。操作系统是计算机的灵魂。既然是灵魂，为了信息安全，那我们就自然要使用和推行国货。可是，一味地推行，是否就一定正确呢？先说说信息安全。其实从很早以来大家就在讨论信息安全。很多年以前，就据传某世界级的网络设备制造商生产的交