The following are code examples for showing how to use . They are extracted from open source Python projects. You can vote up the examples you like or vote down the exmaples you don’t like. You can also save this page to your account.
Example 1
def test_iter_buffering_string():
# Safe casting disallows shrinking strings
a = np.array(['abc', 'a', 'abcd'], dtype=np.bytes_)
assert_equal(a.dtype, np.dtype('S4'))
assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
op_dtypes='S2')
i = nditer(a, ['buffered'], ['readonly'], op_dtypes='S6')
assert_equal(i[0], asbytes('abc'))
assert_equal(i[0].dtype, np.dtype('S6'))
a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode)
assert_equal(a.dtype, np.dtype('U4'))
assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
op_dtypes='U2')
i = nditer(a, ['buffered'], ['readonly'], op_dtypes='U6')
assert_equal(i[0], sixu('abc'))
assert_equal(i[0].dtype, np.dtype('U6'))
Example 2
def read_atlas_annot(atlas_dir, hemi_list=None):
" Returns atlas annotations "
if hemi_list is None:
hemi_list = ['lh', 'rh']
annot = dict()
for hemi in hemi_list:
annot[hemi] = dict()
annot_path = pjoin(atlas_dir, 'label', '{}.aparc.annot'.format(hemi))
annot[hemi]['labels'], annot[hemi]['ctab'], annot[hemi]['names'] = nib.freesurfer.io.read_annot(annot_path,
orig_ids=True)
# ensuring names are plainstring
if isinstance(annot[hemi]['names'][0], np.bytes_):
annot[hemi]['names'] = [bytestr.decode('UTF-8') for bytestr in annot[hemi]['names']]
return annot
Example 3
def test_iter_buffering_string():
# Safe casting disallows shrinking strings
a = np.array(['abc', 'a', 'abcd'], dtype=np.bytes_)
assert_equal(a.dtype, np.dtype('S4'))
assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
op_dtypes='S2')
i = nditer(a, ['buffered'], ['readonly'], op_dtypes='S6')
assert_equal(i[0], asbytes('abc'))
assert_equal(i[0].dtype, np.dtype('S6'))
a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode)
assert_equal(a.dtype, np.dtype('U4'))
assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
op_dtypes='U2')
i = nditer(a, ['buffered'], ['readonly'], op_dtypes='U6')
assert_equal(i[0], sixu('abc'))
assert_equal(i[0].dtype, np.dtype('U6'))
Example 4
def hdf_attr_to_dict(attr):
"""
Convert from HDF attributes to valid dict
"""
try:
output_dict = dict(attr)
except:
output_dict = {}
for count in attr:
try:
output_dict[count] = attr[count]
except:
print('Fail: {}'.format(count))
# String in HDF are treated as numpy bytes_ literals
# We want out instance in memeory to have Python Strings
# This does a simple conversion
for k in output_dict:
if isinstance(output_dict[k], _np.bytes_):
output_dict[k] = output_dict[k].decode('UTF-8')
return output_dict
Example 5
def test_iter_buffering_string():
# Safe casting disallows shrinking strings
a = np.array(['abc', 'a', 'abcd'], dtype=np.bytes_)
assert_equal(a.dtype, np.dtype('S4'))
assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
op_dtypes='S2')
i = nditer(a, ['buffered'], ['readonly'], op_dtypes='S6')
assert_equal(i[0], asbytes('abc'))
assert_equal(i[0].dtype, np.dtype('S6'))
a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode)
assert_equal(a.dtype, np.dtype('U4'))
assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
op_dtypes='U2')
i = nditer(a, ['buffered'], ['readonly'], op_dtypes='U6')
assert_equal(i[0], sixu('abc'))
assert_equal(i[0].dtype, np.dtype('U6'))
Example 6
def test_iter_buffering_string():
# Safe casting disallows shrinking strings
a = np.array(['abc', 'a', 'abcd'], dtype=np.bytes_)
assert_equal(a.dtype, np.dtype('S4'))
assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
op_dtypes='S2')
i = nditer(a, ['buffered'], ['readonly'], op_dtypes='S6')
assert_equal(i[0], asbytes('abc'))
assert_equal(i[0].dtype, np.dtype('S6'))
a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode)
assert_equal(a.dtype, np.dtype('U4'))
assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
op_dtypes='U2')
i = nditer(a, ['buffered'], ['readonly'], op_dtypes='U6')
assert_equal(i[0], sixu('abc'))
assert_equal(i[0].dtype, np.dtype('U6'))
Example 7
def test_iter_buffering_string():
# Safe casting disallows shrinking strings
a = np.array(['abc', 'a', 'abcd'], dtype=np.bytes_)
assert_equal(a.dtype, np.dtype('S4'))
assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
op_dtypes='S2')
i = nditer(a, ['buffered'], ['readonly'], op_dtypes='S6')
assert_equal(i[0], asbytes('abc'))
assert_equal(i[0].dtype, np.dtype('S6'))
a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode)
assert_equal(a.dtype, np.dtype('U4'))
assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
op_dtypes='U2')
i = nditer(a, ['buffered'], ['readonly'], op_dtypes='U6')
assert_equal(i[0], sixu('abc'))
assert_equal(i[0].dtype, np.dtype('U6'))
Example 8
def test_iter_buffering_string():
# Safe casting disallows shrinking strings
a = np.array(['abc', 'a', 'abcd'], dtype=np.bytes_)
assert_equal(a.dtype, np.dtype('S4'))
assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
op_dtypes='S2')
i = nditer(a, ['buffered'], ['readonly'], op_dtypes='S6')
assert_equal(i[0], asbytes('abc'))
assert_equal(i[0].dtype, np.dtype('S6'))
a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode)
assert_equal(a.dtype, np.dtype('U4'))
assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
op_dtypes='U2')
i = nditer(a, ['buffered'], ['readonly'], op_dtypes='U6')
assert_equal(i[0], sixu('abc'))
assert_equal(i[0].dtype, np.dtype('U6'))
Example 9
def _extract_edge_value(tree, edge):
ft_idx = edge.calc_record.feature_idx
split_type = edge.calc_record.split_type
val = edge.value_encoded
pivot = edge.calc_record.pivot
if split_type is CalcRecord.NUM:
if val == SplitRecord.GREATER:
return ">{0:.2f}".format(pivot)
else:
return "<={0:.2f}".format(pivot)
elif tree.X_encoders is not None:
value = tree.X_encoders[ft_idx].single_inv_transform(val)
if isinstance(value, np.bytes_):
return value.decode('UTF-8')
else:
return value
else:
return val
Example 10
def normalize_attr_strings(a: np.ndarray) -> np.ndarray:
"""
Take an np.ndarray of all kinds of string-like elements, and return an array of ascii (np.string_) objects
"""
if np.issubdtype(a.dtype, np.object_):
if np.all([type(x) is str for x in a]) or np.all([type(x) is np.str_ for x in a]) or np.all([type(x) is np.unicode_ for x in a]):
return np.array([x.encode('ascii', 'xmlcharrefreplace') for x in a])
elif np.all([type(x) is np.string_ for x in a]) or np.all([type(x) is np.bytes_ for x in a]):
return a.astype("string_")
else:
print(type(a[0]))
raise ValueError("Arbitrary numpy object arrays not supported (all elements must be string objects).")
elif np.issubdtype(a.dtype, np.string_) or np.issubdtype(a.dtype, np.object_):
return a
elif np.issubdtype(a.dtype, np.str_) or np.issubdtype(a.dtype, np.unicode_):
return np.array([x.encode('ascii', 'xmlcharrefreplace') for x in a])
else:
raise ValueError("String values must be object, ascii or unicode.")
Example 11
def h5py_dataset_iterator(self,g, prefix=''):
for key in g.keys():
item = g[key]
path = '{}/{}'.format(prefix, key)
keys = [i for i in item.keys()]
if isinstance(item[keys[0]], h5py.Dataset): # test for dataset
data = {'path':path}
for k in keys:
if not isinstance(item[k], h5py.Group):
dataset = np.array(item[k].value)
if type(dataset) is np.ndarray:
if dataset.size != 0:
if type(dataset[0]) is np.bytes_:
dataset = [a.decode('ascii') for a in dataset]
data.update({k:dataset})
yield data
else: # test for group (go down)
yield from self.h5py_dataset_iterator(item, path)
Example 12
def get_data(self, path, prefix=''):
item = self.store[path]
path = '{}/{}'.format(prefix, path)
keys = [i for i in item.keys()]
data = {'path': path}
# print(path)
for k in keys:
if not isinstance(item[k], h5py.Group):
dataset = np.array(item[k].value)
if type(dataset) is np.ndarray:
if dataset.size != 0:
if type(dataset[0]) is np.bytes_:
dataset = [a.decode('ascii') for a in dataset]
data.update({k: dataset})
return data
Example 13
def test_iter_buffering_string():
# Safe casting disallows shrinking strings
a = np.array(['abc', 'a', 'abcd'], dtype=np.bytes_)
assert_equal(a.dtype, np.dtype('S4'))
assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
op_dtypes='S2')
i = nditer(a, ['buffered'], ['readonly'], op_dtypes='S6')
assert_equal(i[0], asbytes('abc'))
assert_equal(i[0].dtype, np.dtype('S6'))
a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode)
assert_equal(a.dtype, np.dtype('U4'))
assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
op_dtypes='U2')
i = nditer(a, ['buffered'], ['readonly'], op_dtypes='U6')
assert_equal(i[0], sixu('abc'))
assert_equal(i[0].dtype, np.dtype('U6'))
Example 14
def _getconv(dtype):
""" Find the correct dtype converter. Adapted from matplotlib """
def floatconv(x):
x.lower()
if b'0x' in x:
return float.fromhex(asstr(x))
return float(x)
typ = dtype.type
if issubclass(typ, np.bool_):
return lambda x: bool(int(x))
if issubclass(typ, np.uint64):
return np.uint64
if issubclass(typ, np.int64):
return np.int64
if issubclass(typ, np.integer):
return lambda x: int(float(x))
elif issubclass(typ, np.longdouble):
return np.longdouble
elif issubclass(typ, np.floating):
return floatconv
elif issubclass(typ, np.complex):
return lambda x: complex(asstr(x))
elif issubclass(typ, np.bytes_):
return bytes
else:
return str
Example 15
def decode_qtypes(cls, value):
"""
Decode all the QCollection items to normal python types
"""
if isinstance(value, numpy.bytes_):
return value.decode("utf-8")
elif isinstance(value, list):
return value
else:
return value.item()
Example 16
def _getconv(dtype):
""" Find the correct dtype converter. Adapted from matplotlib """
def floatconv(x):
x.lower()
if b'0x' in x:
return float.fromhex(asstr(x))
return float(x)
typ = dtype.type
if issubclass(typ, np.bool_):
return lambda x: bool(int(x))
if issubclass(typ, np.uint64):
return np.uint64
if issubclass(typ, np.int64):
return np.int64
if issubclass(typ, np.integer):
return lambda x: int(float(x))
elif issubclass(typ, np.longdouble):
return np.longdouble
elif issubclass(typ, np.floating):
return floatconv
elif issubclass(typ, np.complex):
return lambda x: complex(asstr(x))
elif issubclass(typ, np.bytes_):
return bytes
else:
return str
Example 17
def encode_ascii(s):
if isinstance(s, str):
return s.encode('ascii')
elif isinstance(s, numpy.ndarray) and \
issubclass(s.dtype.type, numpy.str_):
ns = numpy.char.encode(s, 'ascii').view(type(s))
if ns.dtype.itemsize != s.dtype.itemsize / 4:
ns = ns.astype((numpy.bytes_, s.dtype.itemsize / 4))
return ns
return s
Example 18
def decode_ascii(s):
if isinstance(s, bytes):
return s.decode('ascii')
elif (isinstance(s, numpy.ndarray) and
issubclass(s.dtype.type, numpy.bytes_)):
# np.char.encode/decode annoyingly don't preserve the type of the
# array, hence the view() call
# It also doesn't necessarily preserve widths of the strings,
# hence the astype()
ns = numpy.char.decode(s, 'ascii').view(type(s))
if ns.dtype.itemsize / 4 != s.dtype.itemsize:
ns = ns.astype((numpy.str_, s.dtype.itemsize))
return ns
return s
Example 19
def _ensure_decoded(s):
""" if we have bytes, decode them to unicode """
if isinstance(s, (np.bytes_, bytes)):
s = s.decode(pd.get_option('display.encoding'))
return s
Example 20
def test_isscalar_numpy_array_scalars(self):
self.assertTrue(lib.isscalar(np.int64(1)))
self.assertTrue(lib.isscalar(np.float64(1.)))
self.assertTrue(lib.isscalar(np.int32(1)))
self.assertTrue(lib.isscalar(np.object_('foobar')))
self.assertTrue(lib.isscalar(np.str_('foobar')))
self.assertTrue(lib.isscalar(np.unicode_(u('foobar'))))
self.assertTrue(lib.isscalar(np.bytes_(b'foobar')))
self.assertTrue(lib.isscalar(np.datetime64('2014-01-01')))
self.assertTrue(lib.isscalar(np.timedelta64(1, 'h')))
Example 21
def _ensure_decoded(s):
""" if we have bytes, decode them to unicode """
if isinstance(s, np.bytes_):
s = s.decode('UTF-8')
return s
Example 22
def _getconv(dtype):
""" Find the correct dtype converter. Adapted from matplotlib """
def floatconv(x):
x.lower()
if b'0x' in x:
return float.fromhex(asstr(x))
return float(x)
typ = dtype.type
if issubclass(typ, np.bool_):
return lambda x: bool(int(x))
if issubclass(typ, np.uint64):
return np.uint64
if issubclass(typ, np.int64):
return np.int64
if issubclass(typ, np.integer):
return lambda x: int(float(x))
elif issubclass(typ, np.floating):
return floatconv
elif issubclass(typ, np.complex):
return lambda x: complex(asstr(x))
elif issubclass(typ, np.bytes_):
return bytes
else:
return str
Example 23
def _getconv(dtype):
""" Find the correct dtype converter. Adapted from matplotlib """
def floatconv(x):
x.lower()
if b'0x' in x:
return float.fromhex(asstr(x))
return float(x)
typ = dtype.type
if issubclass(typ, np.bool_):
return lambda x: bool(int(x))
if issubclass(typ, np.uint64):
return np.uint64
if issubclass(typ, np.int64):
return np.int64
if issubclass(typ, np.integer):
return lambda x: int(float(x))
elif issubclass(typ, np.longdouble):
return np.longdouble
elif issubclass(typ, np.floating):
return floatconv
elif issubclass(typ, np.complex):
return lambda x: complex(asstr(x))
elif issubclass(typ, np.bytes_):
return bytes
else:
return str
Example 24
def _getconv(dtype):
""" Find the correct dtype converter. Adapted from matplotlib """
def floatconv(x):
x.lower()
if b'0x' in x:
return float.fromhex(asstr(x))
return float(x)
typ = dtype.type
if issubclass(typ, np.bool_):
return lambda x: bool(int(x))
if issubclass(typ, np.uint64):
return np.uint64
if issubclass(typ, np.int64):
return np.int64
if issubclass(typ, np.integer):
return lambda x: int(float(x))
elif issubclass(typ, np.longdouble):
return np.longdouble
elif issubclass(typ, np.floating):
return floatconv
elif issubclass(typ, np.complex):
return lambda x: complex(asstr(x))
elif issubclass(typ, np.bytes_):
return bytes
else:
return str
Example 25
def export_text(decision_tree, feature_names=None):
"""Export a decision tree in WEKA like string format.
Parameters
----------
decision_tree : decision tree classifier
feature_names : list of strings, optional (default=None)
Names of each of the features.
Returns
-------
ret : string
"""
max_depth = 500
def build_string(node, indent, depth):
ret = ''
if node is None or depth > max_depth:
return ''
if node.is_feature:
ret += '\n'
template = '| ' * indent
if feature_names is None:
template += str(node.details.feature_idx)
else:
template += feature_names[node.details.feature_idx]
template += ' {}'
for child in node.children:
edge_value = _extract_edge_value(decision_tree, child[1])
ret += template.format(edge_value)
ret += build_string(child[0], indent + 1, depth + 1)
else:
value = decision_tree.y_encoder.single_inv_transform(node.value)
if isinstance(value, np.bytes_):
value = value.decode('UTF-8')
ret += ': {} {} \n'.format(value, _extract_class_count(node))
return ret
return build_string(decision_tree.root, 0, 0)
Example 26
def _getconv(dtype):
""" Find the correct dtype converter. Adapted from matplotlib """
def floatconv(x):
x.lower()
if b'0x' in x:
return float.fromhex(asstr(x))
return float(x)
typ = dtype.type
if issubclass(typ, np.bool_):
return lambda x: bool(int(x))
if issubclass(typ, np.uint64):
return np.uint64
if issubclass(typ, np.int64):
return np.int64
if issubclass(typ, np.integer):
return lambda x: int(float(x))
elif issubclass(typ, np.longdouble):
return np.longdouble
elif issubclass(typ, np.floating):
return floatconv
elif issubclass(typ, np.complex):
return lambda x: complex(asstr(x))
elif issubclass(typ, np.bytes_):
return bytes
else:
return str
Example 27
def test_pickle_py2_scalar_latin1_hack(self):
# Check that scalar unpickling hack in Py3 that supports
# encoding='latin1' work correctly.
# Python2 output for pickle.dumps(...)
datas = [
# (original, python2_pickle, koi8r_validity)
(np.unicode_('\u6bd2'),
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n"
"(S'U1'\np2\nI0\nI1\ntp3\nRp4\n(I3\nS'
"tp6\nbS'\\xd2k\\x00\\x00'\np7\ntp8\nRp9\n."),
'invalid'),
(np.float64(9e123),
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'f8'\n"
"p2\nI0\nI1\ntp3\nRp4\n(I3\nS'
"bS'O\\x81\\xb7Z\\xaa:\\xabY'\np7\ntp8\nRp9\n."),
'invalid'),
(np.bytes_(asbytes('\x9c')), # different 8-bit code point in KOI8-R vs latin1
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'S1'\np2\n"
"I0\nI1\ntp3\nRp4\n(I3\nS'|'\np5\nNNNI1\nI1\nI0\ntp6\nbS'\\x9c'\np7\n"
"tp8\nRp9\n."),
'different'),
]
if sys.version_info[0] >= 3:
for original, data, koi8r_validity in datas:
result = pickle.loads(data, encoding='latin1')
assert_equal(result, original)
# Decoding under non-latin1 encoding (e.g.) KOI8-R can
# produce bad results, but should not segfault.
if koi8r_validity == 'different':
# Unicode code points happen to lie within latin1,
# but are different in koi8-r, resulting to silent
# bogus results
result = pickle.loads(data, encoding='koi8-r')
assert_(result != original)
elif koi8r_validity == 'invalid':
# Unicode code points outside latin1, so results
# to an encoding exception
assert_raises(ValueError, pickle.loads, data, encoding='koi8-r')
else:
raise ValueError(koi8r_validity)
Example 28
def test_pickle_py2_scalar_latin1_hack(self):
# Check that scalar unpickling hack in Py3 that supports
# encoding='latin1' work correctly.
# Python2 output for pickle.dumps(...)
datas = [
# (original, python2_pickle, koi8r_validity)
(np.unicode_('\u6bd2'),
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n"
"(S'U1'\np2\nI0\nI1\ntp3\nRp4\n(I3\nS'
"tp6\nbS'\\xd2k\\x00\\x00'\np7\ntp8\nRp9\n."),
'invalid'),
(np.float64(9e123),
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'f8'\n"
"p2\nI0\nI1\ntp3\nRp4\n(I3\nS'
"bS'O\\x81\\xb7Z\\xaa:\\xabY'\np7\ntp8\nRp9\n."),
'invalid'),
(np.bytes_(asbytes('\x9c')), # different 8-bit code point in KOI8-R vs latin1
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'S1'\np2\n"
"I0\nI1\ntp3\nRp4\n(I3\nS'|'\np5\nNNNI1\nI1\nI0\ntp6\nbS'\\x9c'\np7\n"
"tp8\nRp9\n."),
'different'),
]
if sys.version_info[0] >= 3:
for original, data, koi8r_validity in datas:
result = pickle.loads(data, encoding='latin1')
assert_equal(result, original)
# Decoding under non-latin1 encoding (e.g.) KOI8-R can
# produce bad results, but should not segfault.
if koi8r_validity == 'different':
# Unicode code points happen to lie within latin1,
# but are different in koi8-r, resulting to silent
# bogus results
result = pickle.loads(data, encoding='koi8-r')
assert_(result != original)
elif koi8r_validity == 'invalid':
# Unicode code points outside latin1, so results
# to an encoding exception
assert_raises(ValueError, pickle.loads, data, encoding='koi8-r')
else:
raise ValueError(koi8r_validity)
Example 29
def datasetselected(self):
""" Action : One or more DataSets were selected from the list """
#print('Selection changed')
self.currentdset = self.ui.dataGroupSelect.currentText() + '/' + \
self.ui.dataSetList.currentItem().text()
# print('Current Selection : {}'.format(self.currentdset))
self.allselect = ['/' + str(self.ui.dataGroupSelect.currentText() +\
'/' + i.text()) for i in self.ui.dataSetList.selectedItems()]
if len(self.allselect) == 0:
self.allselect = None
self.ui.currentDatasetText.setText('')
attrs = {}
self.ui.dataSetAttribs.setRowCount(0)
self.ui.dataSetMemo.setText('')
else:
if len(self.allselect) == 1:
self.ui.currentDatasetText.setText(self.currentdset)
else:
self.ui.currentDatasetText.setText(self.currentdset + ' ( + ' +\
str(len(self.allselect)-1) + ' others)' )
self.ui.dataSetAttribs.setSortingEnabled(False)
self.ui.dataSetAttribs.setRowCount(0)
self.ui.dataSetAttribs.setColumnCount(2)
attrs = _h5utils.retrieve_dataset_attribute_dict(self.path + self.filename,self.currentdset)
for count, key in enumerate(attrs.keys()):
self.ui.dataSetAttribs.insertRow(self.ui.dataSetAttribs.rowCount())
self.ui.dataSetAttribs.setItem(count,0,_QTableWidgetItem(str(key)))
temp = attrs[key]
if isinstance(temp,_np.bytes_):
self.ui.dataSetAttribs.setItem(count,1,_QTableWidgetItem(temp.decode()))
else:
self.ui.dataSetAttribs.setItem(count,1,_QTableWidgetItem(str(temp)))
self.ui.dataSetAttribs.setSortingEnabled(True)
self.ui.dataSetAttribs.sortItems(0)
try:
self.ui.dataSetMemo.setText(attrs['Memo'].decode())
except:
pass
Example 30
def test_pickle_py2_scalar_latin1_hack(self):
# Check that scalar unpickling hack in Py3 that supports
# encoding='latin1' work correctly.
# Python2 output for pickle.dumps(...)
datas = [
# (original, python2_pickle, koi8r_validity)
(np.unicode_('\u6bd2'),
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n"
"(S'U1'\np2\nI0\nI1\ntp3\nRp4\n(I3\nS'
"tp6\nbS'\\xd2k\\x00\\x00'\np7\ntp8\nRp9\n."),
'invalid'),
(np.float64(9e123),
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'f8'\n"
"p2\nI0\nI1\ntp3\nRp4\n(I3\nS'
"bS'O\\x81\\xb7Z\\xaa:\\xabY'\np7\ntp8\nRp9\n."),
'invalid'),
(np.bytes_(asbytes('\x9c')), # different 8-bit code point in KOI8-R vs latin1
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'S1'\np2\n"
"I0\nI1\ntp3\nRp4\n(I3\nS'|'\np5\nNNNI1\nI1\nI0\ntp6\nbS'\\x9c'\np7\n"
"tp8\nRp9\n."),
'different'),
]
if sys.version_info[0] >= 3:
for original, data, koi8r_validity in datas:
result = pickle.loads(data, encoding='latin1')
assert_equal(result, original)
# Decoding under non-latin1 encoding (e.g.) KOI8-R can
# produce bad results, but should not segfault.
if koi8r_validity == 'different':
# Unicode code points happen to lie within latin1,
# but are different in koi8-r, resulting to silent
# bogus results
result = pickle.loads(data, encoding='koi8-r')
assert_(result != original)
elif koi8r_validity == 'invalid':
# Unicode code points outside latin1, so results
# to an encoding exception
assert_raises(ValueError, pickle.loads, data, encoding='koi8-r')
else:
raise ValueError(koi8r_validity)
Example 31
def test_pickle_py2_scalar_latin1_hack(self):
# Check that scalar unpickling hack in Py3 that supports
# encoding='latin1' work correctly.
# Python2 output for pickle.dumps(...)
datas = [
# (original, python2_pickle, koi8r_validity)
(np.unicode_('\u6bd2'),
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n"
"(S'U1'\np2\nI0\nI1\ntp3\nRp4\n(I3\nS'
"tp6\nbS'\\xd2k\\x00\\x00'\np7\ntp8\nRp9\n."),
'invalid'),
(np.float64(9e123),
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'f8'\n"
"p2\nI0\nI1\ntp3\nRp4\n(I3\nS'
"bS'O\\x81\\xb7Z\\xaa:\\xabY'\np7\ntp8\nRp9\n."),
'invalid'),
(np.bytes_(asbytes('\x9c')), # different 8-bit code point in KOI8-R vs latin1
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'S1'\np2\n"
"I0\nI1\ntp3\nRp4\n(I3\nS'|'\np5\nNNNI1\nI1\nI0\ntp6\nbS'\\x9c'\np7\n"
"tp8\nRp9\n."),
'different'),
]
if sys.version_info[0] >= 3:
for original, data, koi8r_validity in datas:
result = pickle.loads(data, encoding='latin1')
assert_equal(result, original)
# Decoding under non-latin1 encoding (e.g.) KOI8-R can
# produce bad results, but should not segfault.
if koi8r_validity == 'different':
# Unicode code points happen to lie within latin1,
# but are different in koi8-r, resulting to silent
# bogus results
result = pickle.loads(data, encoding='koi8-r')
assert_(result != original)
elif koi8r_validity == 'invalid':
# Unicode code points outside latin1, so results
# to an encoding exception
assert_raises(ValueError, pickle.loads, data, encoding='koi8-r')
else:
raise ValueError(koi8r_validity)
Example 32
def test_pickle_py2_scalar_latin1_hack(self):
# Check that scalar unpickling hack in Py3 that supports
# encoding='latin1' work correctly.
# Python2 output for pickle.dumps(...)
datas = [
# (original, python2_pickle, koi8r_validity)
(np.unicode_('\u6bd2'),
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n"
"(S'U1'\np2\nI0\nI1\ntp3\nRp4\n(I3\nS'
"tp6\nbS'\\xd2k\\x00\\x00'\np7\ntp8\nRp9\n."),
'invalid'),
(np.float64(9e123),
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'f8'\n"
"p2\nI0\nI1\ntp3\nRp4\n(I3\nS'
"bS'O\\x81\\xb7Z\\xaa:\\xabY'\np7\ntp8\nRp9\n."),
'invalid'),
(np.bytes_(asbytes('\x9c')), # different 8-bit code point in KOI8-R vs latin1
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'S1'\np2\n"
"I0\nI1\ntp3\nRp4\n(I3\nS'|'\np5\nNNNI1\nI1\nI0\ntp6\nbS'\\x9c'\np7\n"
"tp8\nRp9\n."),
'different'),
]
if sys.version_info[0] >= 3:
for original, data, koi8r_validity in datas:
result = pickle.loads(data, encoding='latin1')
assert_equal(result, original)
# Decoding under non-latin1 encoding (e.g.) KOI8-R can
# produce bad results, but should not segfault.
if koi8r_validity == 'different':
# Unicode code points happen to lie within latin1,
# but are different in koi8-r, resulting to silent
# bogus results
result = pickle.loads(data, encoding='koi8-r')
assert_(result != original)
elif koi8r_validity == 'invalid':
# Unicode code points outside latin1, so results
# to an encoding exception
assert_raises(ValueError, pickle.loads, data, encoding='koi8-r')
else:
raise ValueError(koi8r_validity)
Example 33
def test_pickle_py2_scalar_latin1_hack(self):
# Check that scalar unpickling hack in Py3 that supports
# encoding='latin1' work correctly.
# Python2 output for pickle.dumps(...)
datas = [
# (original, python2_pickle, koi8r_validity)
(np.unicode_('\u6bd2'),
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n"
"(S'U1'\np2\nI0\nI1\ntp3\nRp4\n(I3\nS'
"tp6\nbS'\\xd2k\\x00\\x00'\np7\ntp8\nRp9\n."),
'invalid'),
(np.float64(9e123),
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'f8'\n"
"p2\nI0\nI1\ntp3\nRp4\n(I3\nS'
"bS'O\\x81\\xb7Z\\xaa:\\xabY'\np7\ntp8\nRp9\n."),
'invalid'),
(np.bytes_(asbytes('\x9c')), # different 8-bit code point in KOI8-R vs latin1
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'S1'\np2\n"
"I0\nI1\ntp3\nRp4\n(I3\nS'|'\np5\nNNNI1\nI1\nI0\ntp6\nbS'\\x9c'\np7\n"
"tp8\nRp9\n."),
'different'),
]
if sys.version_info[0] >= 3:
for original, data, koi8r_validity in datas:
result = pickle.loads(data, encoding='latin1')
assert_equal(result, original)
# Decoding under non-latin1 encoding (e.g.) KOI8-R can
# produce bad results, but should not segfault.
if koi8r_validity == 'different':
# Unicode code points happen to lie within latin1,
# but are different in koi8-r, resulting to silent
# bogus results
result = pickle.loads(data, encoding='koi8-r')
assert_(result != original)
elif koi8r_validity == 'invalid':
# Unicode code points outside latin1, so results
# to an encoding exception
assert_raises(ValueError, pickle.loads, data, encoding='koi8-r')
else:
raise ValueError(koi8r_validity)
Example 34
def test_pickle_py2_scalar_latin1_hack(self):
# Check that scalar unpickling hack in Py3 that supports
# encoding='latin1' work correctly.
# Python2 output for pickle.dumps(...)
datas = [
# (original, python2_pickle, koi8r_validity)
(np.unicode_('\u6bd2'),
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n"
"(S'U1'\np2\nI0\nI1\ntp3\nRp4\n(I3\nS'
"tp6\nbS'\\xd2k\\x00\\x00'\np7\ntp8\nRp9\n."),
'invalid'),
(np.float64(9e123),
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'f8'\n"
"p2\nI0\nI1\ntp3\nRp4\n(I3\nS'
"bS'O\\x81\\xb7Z\\xaa:\\xabY'\np7\ntp8\nRp9\n."),
'invalid'),
(np.bytes_(asbytes('\x9c')), # different 8-bit code point in KOI8-R vs latin1
asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n(S'S1'\np2\n"
"I0\nI1\ntp3\nRp4\n(I3\nS'|'\np5\nNNNI1\nI1\nI0\ntp6\nbS'\\x9c'\np7\n"
"tp8\nRp9\n."),
'different'),
]
if sys.version_info[0] >= 3:
for original, data, koi8r_validity in datas:
result = pickle.loads(data, encoding='latin1')
assert_equal(result, original)
# Decoding under non-latin1 encoding (e.g.) KOI8-R can
# produce bad results, but should not segfault.
if koi8r_validity == 'different':
# Unicode code points happen to lie within latin1,
# but are different in koi8-r, resulting to silent
# bogus results
result = pickle.loads(data, encoding='koi8-r')
assert_(result != original)
elif koi8r_validity == 'invalid':
# Unicode code points outside latin1, so results
# to an encoding exception
assert_raises(ValueError, pickle.loads, data, encoding='koi8-r')
else:
raise ValueError(koi8r_validity)