求大佬帮帮忙看看这个问题:‘utf-8‘ codec can‘t decode byte 0xd5的问题

在学习神经网络的时候,用cifar-10做测试。
结果报错,并且没有给出具体的位置,debug也没有找到确切的位置

Traceback (most recent call last):
  File "D:\Program Files\anaconda\envs\tensorflow\lib\threading.py", line 917, in _bootstrap_inner
    self.run()
  File "D:\Program Files\anaconda\envs\tensorflow\lib\threading.py", line 865, in run
    self._target(*self._args, **self._kwargs)
  File "D:\Program Files\anaconda\envs\tensorflow\lib\site-packages\tensorflow\python\training\queue_runner_impl.py", line 257, in
 _run
    enqueue_callable()
  File "D:\Program Files\anaconda\envs\tensorflow\lib\site-packages\tensorflow\python\client\session.py", line 1287, in _single_op
eration_run
    self._call_tf_sessionrun(None, {}, [], target_list, None)
  File "D:\Program Files\anaconda\envs\tensorflow\lib\site-packages\tensorflow\python\client\session.py", line 1443, in _call_tf_s
essionrun
    run_metadata)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xd5 in position 119: invalid continuation byte

各种方法都尝试过了,包括声明,包括修改训练文件和测试文件为utf-8的编码方式。都不行。下面附两段代码:

import os
import tensorflow as tf
import tensorflow.compat.v1 as tf1
tf1.disable_eager_execution()
num_classes = 10
#需要分类成10类
num_examples_pre_epoch_for_train = 50000
num_examples_pre_epoch_for_eval = 10000
#用于训练和评估的样本总数
class CIFAR10Record(object):
    pass
#定义一个空类用于返回读取的Cifar-10数据
def read_cifar10(file_queue):
#定义读取cifar_10的函数
    result = CIFAR10Record()
    label_bytes = 1
    #如果是Cifar-100此处为2
    result.height = 32
    result.width = 32
    result.depth = 3
    #三通道,深度3
    image_bytes = result.height * result.width * result.depth #=3072
    #定义图像字节数
    record_bytes = label_bytes + image_bytes
    reader = tf1.FixedLengthRecordReader (record_bytes= record_bytes )
    #用于读取固定长度字节信息,针对bin文件
    result.key, value = reader.read(file_queue)
    #提取dict的key和value
    record_bytes = tf1.decode_raw (value ,tf.uint8 )
    #将字符串解析成图像对应的像素数组
    result.label = tf1.cast(tf.strided_slice(record_bytes ,[0],[label_bytes ]),tf.int32)
    #将x的数据格式转化成dtype.例如,原来x的数据格式是bool,那么将其转化成float以后,就能够将其转化成0和1的序列。
    depth_major = tf1.reshape(tf.strided_slice(record_bytes , [label_bytes ], [label_bytes + image_bytes ]), [result.depth ,result.height ,result.width])
    #切割函数strided_slice(input_, begin, end, strides=None, begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=0, var=None, name=None)
    #前四个参数分别为:输入数据,开始切片处,终止切片处,步长。
    #begin,end处[x1,x2,x3]分别代表第0,1,2维,若begin为[1,0,0],end为[2,1,3]
    #则第0维应该返回输入量[1,2)索引,第一维返回输入量[0,1),第二维返回索引[0,3)
    #[[[]]]这样形式表示的输入为三维输入变量
    result.uint8image = tf.transpose(depth_major ,[1,2,0])
    #tf.transpose(input, [dimension_1, dimenaion_2,..,dimension_n]):这个函数主要适用于交换输入张量的不同维度用的,如果输入张量是二维,就相当是转置。dimension_n是整数,如果张量是三维,就是用0,1,2来表示。这个列表里的每个数对应相应的维度。如果是[2,1,0],就把输入张量的第三维度和第一维度交换。
    return result
def inputs(data_dir, batch_size, distorted):
    filenames = [os.path.join(data_dir, "data_batch_%d.bin"% i)for i in range(1, 6)]
    #路径拼接,连接两个或更多的路径名组件
    #形成六个文件的路径,/data_dir/data_batch_i.bin i从1~6分别属于六个文件
    file_queue = tf1.train.string_input_producer(filenames)
    #输出字符串到一个输入管道队列。
    read_input = read_cifar10(file_queue)
    #创建一个文件队列,并调用read_cifar10()函数读取队列中的文件
    reshaped_image = tf1.cast(read_input.uint8image, tf.float32)
    #将图片数据转换成为float32格式
    num_examples_pre_epoch = num_examples_pre_epoch_for_train
    #训练样本数
    if distorted != None:
        #对图像数据进行数据增强处理
        cropped_image = tf1.random_crop (reshaped_image , [24,24,3])
        #将32,32,3的图片随机裁剪成24,24,3的大小
        flipped_image = tf1.image.random_flip_left_right (cropped_image )
        #随机左右翻转图片
        adjusted_brightness = tf1.image.random_brightness(flipped_image , max_delta= 0.8)
        #tf.image.random_brightness(image,max_delta,seed=None)
        #通过随机因子调整图像的亮度
        adjusted_contrast = tf1.image.random_contrast(adjusted_brightness , lower = 0.2 , upper = 1.8)
        #调整对比度,tf.image.random_contrast( image,lower(随机对比因子的下限),upper(随机对比因子的上限),seed=None)
        float_image = tf.image.per_image_standardization(adjusted_contrast)
        #此函数的运算过程是将整幅图片标准化(不是归一化),主要有如下操作,(x - mean) / adjusted_stddev,其中x为图片的RGB三通道像素值,mean分别为三通道像素的均值,adjusted_stddev = max(stddev, 1.0/sqrt(image.NumElements()))。stddev为三通道像素的标准差
        float_image.set_shape([24,24,3])
        read_input.label.set_shape([1])
        min_queue_examples = int(num_examples_pre_epoch_for_eval * 0.4)
        #设置图片数据以及label的形状
        print('Filling queue with %d CIFAR images before starting to train.''This will take a few minutes.'% min_queue_examples )
        image_train, labels_train = tf1.train.shuffle_batch([float_image , read_input.label], batch_size= batch_size , num_threads= 16, capacity= min_queue_examples + 3 * batch_size ,
                                                            min_after_dequeue= min_queue_examples )
        #使用shuffle_batch()函数随机产生一个batch的image和label
        return image_train , tf.reshape(labels_train ,  [batch_size ])
    else:
        #不对图像数据进行数据增强处理
        resized_image = tf.image.resize_with_crop_or_pad (reshaped_image ,24 ,24 )
        #裁剪或将图像填充到目标宽度和高度
        float_image = tf.image.per_image_standardization (resized_image )
        #线性缩放image以具有零均值和单位范数
        float_image.set_shape([24,24,3])
        read_input.label.set_shape([1])
        min_queue_examples = int(num_examples_pre_epoch * 0.4)
        # 设置图片数据以及label的形状
        images_test, labels_test = tf1.train.batch([float_image , read_input.label],batch_size  = batch_size,
                                                   capacity= min_queue_examples + 3 * batch_size )
        #利用一个tensor的列表或字典来获取一个batch数据
        #tf.train.batch(tensors,batch_size,num_threads=1,capacity=32,enqueue_many=False,shapes=None,dynamic_pad=False,allow_smaller_final_batch=False,shared_name=None,name=None)
        return  images_test , tf.reshape (labels_test, [batch_size ])

import tensorflow as tf
import tensorflow.compat.v1 as tf1
import Cifar10_data
import time
import math
import numpy as np
# -*- coding: gbk -*-
tf1.disable_eager_execution()
max_steps = 4000
batch_size = 100
num_examples_for_eval = 10000
data_dir = "D/公用程序部分/tensor/7-4/cifar-10-batches-bin"
def variable_with_weight_loss(shape, stddev, w1):
    var = tf1.Variable(tf1.truncated_normal(shape, stddev = stddev))
    #定义一个符合正态分布的带有限制的变量
    if w1 is not None:
        weights_loss = tf.multiply(tf.nn.l2_loss(var), w1, name = "weights_loss")
        tf1.add_to_collection("losses", weights_loss )
    return var
images_train, labels_train = Cifar10_data.inputs(data_dir = data_dir ,batch_size = batch_size ,distorted = True)
images_test, labels_test = Cifar10_data.inputs(data_dir = data_dir ,batch_size = batch_size ,distorted = None )
x = tf1.placeholder (tf.float32 ,[batch_size ,24,24,3])
y_ = tf1.placeholder (tf.int32 ,[batch_size])
kernel1 = variable_with_weight_loss(shape = [5,5,3,64],stddev= 5e-2,w1 = 0.0)
conv1 = tf.nn.conv2d (x,kernel1 ,[1,1,1,1],padding= "SAME")
bias1 = tf1.Variable(tf.constant(0.0, shape=[64]))
relu1 = tf.nn.relu(tf.nn.bias_add (conv1 ,bias1 ))
pool1 = tf.nn.max_pool (relu1,ksize= [1,3,3,1],strides= [1,2,2,1],padding= "SAME")
kernel1 = variable_with_weight_loss(shape = [5,5,64,64],stddev= 5e-2,w1 = 0.0)
conv2 = tf.nn.conv2d (pool1 ,kernel1 ,[1,1,1,1],padding= "SAME")
bias2 = tf1.Variable(tf.constant (0.1,shape= [64]))
relu2 = tf.nn.relu (tf.nn.bias_add (conv2 ,bias2 ))
pool2 = tf.nn.max_pool (relu2 ,ksize = [1,3,3,1], strides = [1,2,2,1], padding= "SAME")
reshape = tf.reshape(pool2 ,[batch_size ,-1])
dim = reshape.get_shape()[1]
weigh1 = variable_with_weight_loss(shape = [dim,384],stddev= 0.04, w1 = 0.004)
fc_bias1 = tf1.Variable (tf.constant (0.1,shape = [384]))
fc_1 = tf.nn.relu (tf.matmul (reshape ,weigh1 )+fc_bias1 )
weigh2 = variable_with_weight_loss(shape= [384,192],stddev= 0.04,w1 = 0.004)
fc_bias2 = tf.Variable (tf.constant (0.1,shape = [192]))
local4 = tf.nn.relu(tf.matmul (fc_1 ,weigh2 )+fc_bias2 )
weigh3 = variable_with_weight_loss(shape= [192,10],stddev= 1/192.0, w1 = 0.0)
fc_bias3 = tf1.Variable (tf.constant (0.0,shape= [10]))
result = tf.add(tf.matmul (local4 ,weigh3 ),fc_bias3 )

cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits (logits= result ,labels= tf.cast(y_,tf.int64))
weights_with_l2_loss = tf.add_n(tf1.get_collection ("losses"))
loss = tf.reduce_mean (cross_entropy )+weights_with_l2_loss
train_op =tf1.train.AdamOptimizer (1e-3).minimize(loss)
top_k_op = tf1.nn.in_top_k (result ,y_ ,1)

with tf1.Session() as sess:
    tf1.global_variables_initializer().run()

    #开启多线程
    tf1.train.start_queue_runners()

    for step in range(max_steps):
        start_time = time.time()
        image_batch, label_batch = sess.run([images_train, labels_train])
        _, loss_value = sess.run([train_op, loss], feed_dict={x: image_batch,
                                                              y_: label_batch})
        duration = time.time() - start_time

        if step % 100 == 0:
            examples_per_sec = batch_size / duration
            sec_per_batch = float(duration)

            #打印每一轮训练的耗时
            print("step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)"%
                     (step, loss_value, examples_per_sec, sec_per_batch))


    #math.ceil()函数用于求整,原型为ceil(x)
    num_batch = int(math.ceil(num_examples_for_eval / batch_size))
    true_count = 0
    total_sample_count = num_iter * batch_size

    # 在一个for循环内统计所有预测正确的样例的个数
    for j in range(num_batch):
        image_batch, label_batch = sess.run([images_test, labels_test])
        predictions = sess.run([top_k_op], feed_dict={x: image_batch,
                                                      y_: label_batch})
        true_count += np.sum(predictions)

    #打印正确率信息
    print("accuracy = %.3f%%" % ((true_count/total_sample_count)*100))

第一个就是cifar10_data文件
求大佬帮帮忙看看这个问题:‘utf-8‘ codec can‘t decode byte 0xd5的问题_第1张图片
求帮帮忙,卡两天了!!~~

你可能感兴趣的:(求助)