numpy的loadtxt导入文件时,怎样调过第一行标题

在用numpy的loadtxt导入数据文件时候,发现有报错:
经过如下:
第一次:

#数据准备
raw_data = np.loadtxt('D:/jupter_workspace/chapter4/cluster.txt') #导入数据文件
X = raw_data[:, :-1] # 分割要聚类的数据
y_true = raw_data[:, -1]
print(X)
-----------------------------
打印输出结果报错:
raw_data = np.loadtxt('D:/jupter_workspace/chapter4/cluster.txt') #导入数据文件
  File "E:\Anaconda3\lib\site-packages\numpy\lib\npyio.py", line 1093, in loadtxt
    first_line = next(fh)
UnicodeDecodeError: 'gbk' codec can't decode byte 0x83 in position 88: illegal multibyte sequence

第二次:
上边报错是UnicodeDecodeError吧啦吧啦的一些错误,那我就加上encoding='utf-8’试一试


#数据准备
raw_data = np.loadtxt('D:/jupter_workspace/chapter4/cluster.txt',encoding='utf-8') #导入数据文件
X = raw_data[:, :-1] # 分割要聚类的数据
y_true = raw_data[:, -1]
print(X)
-------------------------------------------------
打印输出结果还是报错:
ile "E:/tsf/655295/clusterAnalysisCode.py", line 14, in <module>
    raw_data = np.loadtxt('D:/jupter_workspace/chapter4/cluster.txt',encoding='utf-8') #导入数据文件
  File "E:\Anaconda3\lib\site-packages\numpy\lib\npyio.py", line 1141, in loadtxt
    for x in read_data(_loadtxt_chunksize):
  File "E:\Anaconda3\lib\site-packages\numpy\lib\npyio.py", line 1068, in read_data
    items = [conv(val) for (conv, val) in zip(converters, vals)]
  File "E:\Anaconda3\lib\site-packages\numpy\lib\npyio.py", line 1068, in <listcomp>
    items = [conv(val) for (conv, val) in zip(converters, vals)]
  File "E:\Anaconda3\lib\site-packages\numpy\lib\npyio.py", line 775, in floatconv
    return float(x)
ValueError: could not convert string to float: 'USER_ID,AVG_ORDERS,AVG_MONEY,IS_ACTIVE,SEX'

第三次:
上面报错是ValueError吧啦吧啦的一堆,仔细一看,那不是数据文件中第一行的列名称吗?那我就用skiprows=1来跳过第一行试一试

#数据准备
raw_data = np.loadtxt('D:/jupter_workspace/chapter4/cluster.txt', encoding='utf-8', skiprows=1) #导入数据文件
X = raw_data[:, :-1] # 分割要聚类的数据
y_true = raw_data[:, -1]
print(X)
------------------------
打印输出还是错误,欲哭无泪了
ValueError: could not convert string to float: '1,3.58,40.43,活跃,1'

第四次:
最后加一个dtype='str’才能成功

#数据准备
raw_data = np.loadtxt('D:/jupter_workspace/chapter4/cluster.txt', dtype='str', encoding='utf-8', skiprows=1 ) #导入数据文件
print(raw_data)
------------
打印输出结果
['1,3.58,40.43,活跃,1' '2,4.71,41.16,不活跃,1' '3,3.80,39.49,不活跃,2'
 '4,2.85,38.36,不活跃,1' '5,3.71,38.34,活跃,1' '6,2.37,39.18,不活跃,0'
 '7,4.90,40.98,活跃,0' '8,3.10,41.16,活跃,0' '9,2.80,39.35,不活跃,2'
 '10,1.95,38.89,不活跃,1' '11,2.46,39.07,不活跃,1' '12,3.96,41.55,不活跃,0'
 '13,4.58,39.02,不活跃,2' '14,4.34,40.90,活跃,0' '15,3.84,39.31,活跃,2'
 '16,4.49,41.48,活跃,1' '17,1.69,39.22,活跃,1' '18,3.55,38.99,活跃,2'
 '19,3.66,38.45,活跃,2' '20,3.69,39.35,活跃,1' '21,4.14,38.81,活跃,2'
 '22,3.45,39.78,活跃,2' '23,3.47,40.82,活跃,0' '24,3.33,39.32,活跃,2'
 '25,3.95,41.45,不活跃,0' '26,2.12,39.33,不活跃,1' '27,2.15,39.59,不活跃,1'
 '28,3.22,40.74,活跃,0' '29,4.32,41.03,不活跃,0' '30,1.35,38.97,不活跃,1'
 '31,2.70,38.95,活跃,1' '32,4.40,39.24,不活跃,2' '33,4.03,38.67,活跃,2'
 '34,4.01,39.02,活跃,2' '35,1.79,38.53,不活跃,1' '36,2.01,38.97,不活跃,1'
 '37,3.93,39.36,不活跃,2' '38,1.88,38.20,不活跃,1' '39,4.43,40.82,不活跃,0'
 '40,3.45,39.20,不活跃,2' '41,4.08,39.28,活跃,2' '42,4.00,39.50,不活跃,2'
 '43,3.33,40.64,不活跃,0' '44,3.05,41.35,活跃,0' '45,3.77,39.13,活跃,2'
 '46,4.01,38.26,不活跃,2' '47,3.81,39.00,不活跃,2' '48,3.75,40.85,不活跃,0'
 '49,3.98,40.96,不活跃,0' '50,2.08,39.35,活跃,1' '51,2.29,38.97,活跃,1'
 '52,4.21,40.93,活跃,0' '53,3.08,41.10,不活跃,0' '54,3.52,40.80,不活跃,0'
 '55,2.31,38.95,不活跃,1' '56,3.61,39.57,活跃,2' '57,1.86,38.49,活跃,1'
 '58,3.32,39.29,活跃,2' '59,4.18,40.61,不活跃,0' '60,2.33,39.13,活跃,1'
 '61,3.82,40.74,活跃,0' '62,1.86,39.81,活跃,1' '63,3.48,41.11,活跃,0'
 '64,4.30,40.52,活跃,0' '65,2.57,39.16,不活跃,1' '66,3.27,39.08,活跃,1'
 '67,4.43,39.98,活跃,0' '68,4.35,39.59,活跃,2' '69,3.98,40.89,不活跃,0'
 '70,4.02,40.34,不活跃,0' '71,4.70,41.40,活跃,0' '72,2.03,39.01,不活跃,1'
 '73,3.97,41.69,不活跃,0' '74,4.09,41.13,不活跃,0' '75,1.53,38.54,活跃,1'
 '76,4.30,41.05,不活跃,0' '77,3.94,41.90,不活跃,0' '78,4.34,38.73,不活跃,2'
 '79,3.98,40.53,活跃,0' '80,3.99,41.17,不活跃,0' '81,3.95,40.73,活跃,0'
 '82,1.70,38.98,活跃,1' '83,4.22,38.57,活跃,2' '84,3.85,40.62,活跃,0'
 '85,4.64,38.92,不活跃,2' '86,1.80,39.01,活跃,1' '87,2.53,38.93,活跃,1'
 '88,4.07,41.67,不活跃,0' '89,1.94,39.42,不活跃,1' '90,4.17,41.27,不活跃,0'
 '91,3.56,38.99,活跃,2' '92,0.89,39.46,活跃,1' '93,2.02,38.28,活跃,1'
 '94,4.75,41.36,活跃,0' '95,4.33,41.54,不活跃,0' '96,4.25,40.36,活跃,0'
 '97,3.88,38.82,活跃,2' '98,1.71,38.64,不活跃,1' '99,1.96,39.01,活跃,1'
 '100,4.48,41.13,活跃,0' '101,3.73,38.77,不活跃,2' '102,3.84,40.54,活跃,0'
 '103,3.58,41.48,活跃,0' '104,1.99,39.33,不活跃,1' '105,3.92,39.63,活跃,2'
 '106,3.85,40.90,活跃,0' '107,1.88,38.86,不活跃,1' '108,4.27,39.52,活跃,2'
 '109,4.59,41.34,不活跃,0' '110,3.38,41.03,不活跃,0' '111,3.67,39.07,活跃,2'
 '112,4.12,41.53,不活跃,0' '113,3.98,39.11,不活跃,2' '114,4.25,41.37,活跃,0'
 '115,4.17,38.50,活跃,2' '116,4.02,39.25,不活跃,2' '117,1.73,39.13,活跃,1'
 '118,1.40,38.29,不活跃,1' '119,3.76,40.55,不活跃,0' '120,4.35,40.70,不活跃,0'
 '121,4.17,38.80,不活跃,2' '122,3.98,38.76,活跃,2' '123,3.71,41.08,不活跃,0'
 '124,3.95,41.04,活跃,0' '125,2.01,38.66,活跃,1' '126,4.47,40.75,活跃,0'
 '127,4.26,40.35,不活跃,0' '128,4.60,41.76,不活跃,0' '129,3.01,39.71,不活跃,1'
 '130,4.31,41.33,不活跃,0' '131,3.54,40.82,不活跃,0' '132,2.38,39.16,活跃,1'
 '133,3.86,39.22,不活跃,2' '134,3.27,38.72,不活跃,2' '135,4.37,41.14,活跃,0'
 '136,5.05,38.70,不活跃,2' '137,1.97,38.77,不活跃,1' '138,3.85,41.04,不活跃,0'
 '139,4.42,39.07,不活跃,2' '140,4.95,41.38,活跃,0' '141,2.20,39.14,不活跃,1'
 '142,4.18,38.86,活跃,2' '143,4.02,39.74,不活跃,2' '144,3.72,41.61,不活跃,0'
 '145,3.51,39.11,活跃,2' '146,4.40,38.93,不活跃,2' '147,3.74,38.55,活跃,2'
 '148,4.11,40.96,不活跃,0' '149,3.85,39.31,活跃,2' '150,3.66,41.76,不活跃,0'
 '151,3.77,41.11,活跃,0' '152,2.09,39.56,活跃,1' '153,3.47,40.46,活跃,0'
 '154,3.83,41.74,活跃,0' '155,2.04,39.15,不活跃,1' '156,2.08,39.12,不活跃,1'
 '157,3.82,38.30,不活跃,2' '158,4.33,41.00,不活跃,0' '159,3.78,41.17,活跃,0'
 '160,4.28,40.94,不活跃,0' '161,3.86,40.45,活跃,0' '162,3.43,40.80,活跃,0'
 '163,3.40,39.60,不活跃,2' '164,4.57,39.58,不活跃,2' '165,4.20,40.95,不活跃,0'
 '166,3.97,40.47,不活跃,0' '167,2.13,38.94,不活跃,1' '168,2.31,38.89,不活跃,1'
 '169,4.07,41.04,活跃,0' '170,1.95,39.12,不活跃,1' '171,3.62,39.41,不活跃,2'
 '172,4.24,38.35,活跃,2' '173,3.81,41.25,不活跃,0' '174,3.52,39.17,不活跃,2'
 '175,1.54,39.02,活跃,1' '176,4.28,41.28,活跃,0' '177,1.66,38.87,活跃,1'
 '178,3.75,40.81,活跃,0' '179,3.92,41.35,不活跃,0' '180,3.84,41.15,活跃,0'
 '181,1.20,39.15,活跃,1' '182,1.93,38.80,不活跃,1' '183,1.97,38.48,不活跃,1'
 '184,4.19,38.96,活跃,2' '185,2.20,38.95,活跃,1' '186,3.63,40.06,不活跃,2'
 '187,1.66,39.13,活跃,1' '188,2.09,39.03,活跃,1' '189,3.45,38.79,不活跃,2'
 '190,1.46,38.98,不活跃,1' '191,3.54,41.31,活跃,0' '192,4.37,38.98,不活跃,2'
 '193,1.69,39.11,不活跃,1' '194,3.76,41.62,不活跃,0' '195,2.71,38.84,不活跃,1'
 '196,1.86,38.68,活跃,1' '197,1.27,39.12,活跃,1' '198,4.46,41.43,不活跃,0'
 '199,3.81,38.66,不活跃,2' '200,1.94,39.31,活跃,1' '201,4.23,39.06,不活跃,2'
 '202,4.47,40.93,活跃,0' '203,3.76,38.54,不活跃,2' '204,2.38,39.09,不活跃,1'
 '205,4.01,38.83,不活跃,2' '206,4.00,41.71,不活跃,0' '207,3.62,41.10,不活跃,0'
 '208,4.53,38.87,活跃,2' '209,3.42,41.32,活跃,0' '210,3.63,40.49,活跃,0'
 '211,4.53,40.65,活跃,0' '212,4.31,40.81,活跃,0' '213,3.19,40.78,不活跃,0'
 '214,4.02,41.20,活跃,0' '215,3.94,38.67,活跃,2' '216,4.10,39.61,活跃,2'
 '217,2.19,39.06,不活跃,1' '218,4.45,38.37,不活跃,2' '219,1.84,39.29,不活跃,1'
 '220,4.27,40.82,活跃,0' '221,2.81,38.88,活跃,1' '222,3.32,41.78,不活跃,0'
 '223,4.60,40.92,不活跃,0' '224,3.44,40.76,不活跃,0' '225,3.77,39.19,不活跃,2'
 '226,4.01,39.42,活跃,2' '227,3.02,39.45,活跃,2' '228,1.59,38.76,活跃,1'
 '229,3.54,40.88,不活跃,0' '230,4.01,38.99,活跃,2' '231,1.86,39.43,不活跃,1'
 '232,1.54,38.51,不活跃,1' '233,4.09,39.23,不活跃,2' '234,4.00,39.35,不活跃,2'
 '235,3.53,41.52,不活跃,0' '236,1.43,39.77,不活跃,1' '237,4.01,39.89,活跃,2'
 '238,3.36,38.45,活跃,2' '239,3.84,41.49,不活跃,0' '240,4.24,41.36,不活跃,0'
 '241,3.96,38.58,活跃,2' '242,4.17,38.98,活跃,2' '243,4.10,40.64,活跃,0'
 '244,4.35,39.48,不活跃,2' '245,3.47,39.57,不活跃,2' '246,1.84,38.87,活跃,1'
 '247,3.95,38.41,不活跃,2' '248,4.31,39.17,活跃,2' '249,3.42,39.15,不活跃,2'
 '250,4.92,40.58,活跃,0' '251,1.62,39.17,活跃,1' '252,3.86,41.06,活跃,0'
 '253,4.25,39.36,活跃,2' '254,4.06,38.91,活跃,2' '255,3.96,38.58,活跃,2'
 '256,3.54,41.31,活跃,0' '257,3.60,38.60,活跃,2' '258,2.09,39.08,不活跃,1'
 '259,2.09,38.96,活跃,1' '260,4.21,41.04,不活跃,0' '261,4.08,39.04,不活跃,2'
 '262,4.37,38.44,不活跃,2' '263,4.38,39.22,不活跃,2' '264,1.66,38.76,活跃,1'
 '265,3.30,41.18,活跃,0' '266,3.98,38.62,不活跃,2' '267,3.61,39.19,活跃,2'
 '268,3.79,38.62,活跃,2' '269,3.93,38.97,不活跃,2' '270,3.74,39.09,不活跃,2'
 '271,2.31,38.34,不活跃,1' '272,2.63,39.26,活跃,1' '273,4.37,41.23,不活跃,0'
 '274,1.62,39.41,不活跃,1' '275,2.10,38.84,不活跃,1' '276,4.24,40.85,不活跃,0'
 '277,4.55,40.72,活跃,0' '278,1.46,39.51,活跃,1' '279,1.69,39.25,活跃,1'
 '280,3.88,41.04,不活跃,0' '281,4.14,39.26,不活跃,2' '282,1.65,38.91,活跃,1'
 '283,4.21,37.98,活跃,2' '284,1.74,39.19,活跃,1' '285,4.66,41.07,活跃,0'
 '286,4.04,39.22,不活跃,2' '287,4.05,38.21,不活跃,2' '288,1.43,39.80,活跃,1'
 '289,2.55,38.80,活跃,1' '290,3.91,40.63,不活跃,0' '291,3.74,40.79,活跃,0'
 '292,1.48,39.51,活跃,1' '293,3.05,41.01,活跃,0' '294,3.93,39.33,活跃,2'
 '295,4.35,39.07,不活跃,2' '296,1.72,39.27,活跃,1' '297,1.66,38.64,不活跃,1'
 '298,3.84,39.38,不活跃,2' '299,3.59,40.97,不活跃,0' '300,4.28,41.52,活跃,0'
 '301,2.51,39.20,不活跃,1' '302,3.96,41.16,活跃,0' '303,4.05,41.16,不活跃,0'
 '304,3.99,41.15,活跃,0' '305,4.33,38.88,活跃,2' '306,4.05,40.85,不活跃,0'
 '307,3.75,38.84,活跃,2' '308,4.61,41.59,活跃,0' '309,3.15,38.26,不活跃,2'
 '310,2.02,39.12,不活跃,1' '311,4.51,38.72,不活跃,2' '312,2.76,38.99,不活跃,1'
 '313,3.73,41.01,不活跃,0' '314,1.68,39.10,活跃,1' '315,3.73,41.52,活跃,0'
 '316,2.02,38.37,活跃,1' '317,2.00,39.00,活跃,1' '318,4.36,41.13,活跃,0'
 '319,3.79,38.96,活跃,2' '320,2.17,39.22,不活跃,1' '321,3.33,38.95,不活跃,2'
 '322,4.50,38.17,不活跃,2' '323,3.92,40.96,不活跃,0' '324,2.13,38.94,不活跃,1'
 '325,1.64,38.21,不活跃,1' '326,2.06,38.62,活跃,1' '327,4.35,38.27,活跃,2'
 '328,2.12,39.17,不活跃,1' '329,4.15,40.56,不活跃,0' '330,2.24,39.45,不活跃,1'
 '331,4.04,41.23,活跃,0' '332,2.53,39.08,活跃,1' '333,4.07,41.25,不活跃,0'
 '334,4.16,39.00,活跃,2' '335,4.44,41.52,活跃,0' '336,3.70,38.42,活跃,2'
 '337,3.87,39.00,不活跃,2' '338,1.42,38.82,不活跃,1' '339,2.57,39.17,不活跃,1'
 '340,3.90,41.61,不活跃,0' '341,2.41,38.78,不活跃,1' '342,3.90,40.93,不活跃,0'
 '343,3.80,38.89,活跃,2' '344,3.84,39.78,不活跃,0' '345,4.75,40.46,不活跃,0'
 '346,3.13,38.75,活跃,2' '347,2.40,38.74,活跃,1' '348,4.12,39.02,不活跃,2'
 '349,4.46,38.64,活跃,2' '350,4.23,40.91,活跃,0' '351,4.75,40.61,活跃,0'
 '352,4.11,41.24,不活跃,0' '353,4.45,41.20,活跃,0' '354,3.99,38.95,不活跃,2'
 '355,3.87,38.99,活跃,2' '356,4.50,38.96,不活跃,2' '357,3.68,38.56,不活跃,2'
 '358,3.58,38.87,不活跃,2' '359,4.16,39.48,不活跃,2' '360,4.21,40.77,不活跃,0'
 '361,4.05,39.26,活跃,2' '362,3.49,40.77,活跃,0' '363,1.93,38.95,活跃,1'
 '364,4.16,39.77,活跃,2' '365,3.86,39.81,活跃,2' '366,2.30,39.23,活跃,1'
 '367,3.62,40.68,不活跃,0' '368,4.39,38.97,活跃,2' '369,2.18,39.68,活跃,1'
 '370,4.56,39.13,活跃,2' '371,3.95,40.88,不活跃,0' '372,3.79,39.07,活跃,2'
 '373,4.54,38.70,活跃,2' '374,3.85,40.82,不活跃,0' '375,3.89,38.47,活跃,2'
 '376,4.22,39.29,不活跃,2' '377,4.62,38.62,不活跃,2' '378,3.37,39.34,活跃,2'
 '379,4.11,38.72,活跃,2' '380,4.86,39.19,不活跃,2' '381,3.00,38.10,活跃,1'
 '382,3.67,39.72,活跃,2' '383,2.08,38.54,活跃,1' '384,1.98,39.10,活跃,1'
 '385,4.09,39.65,不活跃,2' '386,2.09,39.16,活跃,1' '387,4.19,40.85,活跃,0'
 '388,4.01,39.13,不活跃,2' '389,3.28,38.68,不活跃,2' '390,4.18,39.38,活跃,2'

你可能感兴趣的:(日常问题解决杂记)