Example 1
def roll_zeropad(a, shift, axis=None):
a = np.asanyarray(a)
if shift == 0: return a
if axis is None:
n = a.size
reshape = True
else:
n = a.shape[axis]
reshape = False
if np.abs(shift) > n:
res = np.zeros_like(a)
elif shift < 0:
shift += n
zeros = np.zeros_like(a.take(np.arange(n-shift), axis))
res = np.concatenate((a.take(np.arange(n-shift,n), axis), zeros), axis)
else:
zeros = np.zeros_like(a.take(np.arange(n-shift,n), axis))
res = np.concatenate((zeros, a.take(np.arange(n-shift), axis)), axis)
if reshape:
return res.reshape(a.shape)
else:
return res
Example 2
def rhoA(self):
# rhoA
rhoA = pd.DataFrame(0, index=np.arange(1), columns=self.latent)
for i in range(self.lenlatent):
weights = pd.DataFrame(self.outer_weights[self.latent[i]])
weights = weights[(weights.T != 0).any()]
result = pd.DataFrame.dot(weights.T, weights)
result_ = pd.DataFrame.dot(weights, weights.T)
S = self.data_[self.Variables['measurement'][
self.Variables['latent'] == self.latent[i]]]
S = pd.DataFrame.dot(S.T, S) / S.shape[0]
numerador = (
np.dot(np.dot(weights.T, (S - np.diag(np.diag(S)))), weights))
denominador = (
(np.dot(np.dot(weights.T, (result_ - np.diag(np.diag(result_)))), weights)))
rhoA_ = ((result)**2) * (numerador / denominador)
if(np.isnan(rhoA_.values)):
rhoA[self.latent[i]] = 1
else:
rhoA[self.latent[i]] = rhoA_.values
return rhoA.T
Example 3
def plot_sent_trajectories(sents, decode_plot):
font = {'family' : 'normal',
'size' : 14}
matplotlib.rc('font', **font)
i = 0
l = ["Portuguese","Catalan"]
axes = plt.gca()
#axes.set_xlim([xmin,xmax])
axes.set_ylim([-1,1])
for sent, enc in zip(sents, decode_plot):
if i==2: continue
i += 1
#times = np.arange(len(enc))
times = np.linspace(0,1,len(enc))
plt.plot(times, enc, label=l[i-1])
plt.title("Hidden Node Trajectories")
plt.xlabel('timestep')
plt.ylabel('trajectories')
plt.legend(loc='best')
plt.savefig("final_tests/cr_por_cat_hidden_cell_trajectories", bbox_inches="tight")
plt.close()
Example 4
def _generate_data():
"""
?????
????u(k-1) ? y(k-1)?????y(k)
"""
# u = np.random.uniform(-1,1,200)
# y=[]
# former_y_value = 0
# for i in np.arange(0,200):
# y.append(former_y_value)
# next_y_value = (29.0 / 40) * np.sin(
# (16.0 * u[i] + 8 * former_y_value) / (3.0 + 4.0 * (u[i] ** 2) + 4 * (former_y_value ** 2))) \
# + (2.0 / 10) * u[i] + (2.0 / 10) * former_y_value
# former_y_value = next_y_value
# return u,y
u1 = np.random.uniform(-np.pi,np.pi,200)
u2 = np.random.uniform(-1,1,200)
y = np.zeros(200)
for i in range(200):
value = np.sin(u1[i]) + u2[i]
y[i] = value
return u1, u2, y
Example 5
def plot_counts(counts, gene_type):
"""Plot expression counts. Return a Figure object"""
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
fig = plt.figure(figsize=((50 + len(counts) * 5) / 25.4, 210/25.4))
matplotlib.rcParams.update({'font.size': 14})
ax = fig.gca()
ax.set_title('{} gene usage'.format(gene_type))
ax.set_xlabel('{} gene'.format(gene_type))
ax.set_ylabel('Count')
ax.set_xticks(np.arange(len(counts)) + 0.5)
ax.set_xticklabels(counts.index, rotation='vertical')
ax.grid(axis='x')
ax.set_xlim((-0.25, len(counts)))
ax.bar(np.arange(len(counts)), counts['count'])
fig.set_tight_layout(True)
return fig
Example 6
def _create_figure(predictions_dict):
"""Creates and returns a new figure that visualizes
attention scores for for a single model predictions.
"""
# Find out how long the predicted sequence is
target_words = list(predictions_dict["predicted_tokens"])
prediction_len = _get_prediction_length(predictions_dict)
# Get source words
source_len = predictions_dict["features.source_len"]
source_words = predictions_dict["features.source_tokens"][:source_len]
# Plot
fig = plt.figure(figsize=(8, 8))
plt.imshow(
X=predictions_dict["attention_scores"][:prediction_len, :source_len],
interpolation="nearest",
cmap=plt.cm.Blues)
plt.xticks(np.arange(source_len), source_words, rotation=45)
plt.yticks(np.arange(prediction_len), target_words, rotation=-45)
fig.tight_layout()
return fig
Example 7
def mypsd(Rates,time_range,bin_w = 5., nmax = 4000):
bins = np.arange(0,len(time_range),1)
#print bins
a,b = np.histogram(Rates, bins)
ff = (1./len(bins))*abs(np.fft.fft(Rates- np.mean(Rates)))**2
Fs = 1./(1*0.001)
freq2 = np.fft.fftfreq(len(bins))[0:len(bins/2)+1] # d= dt
freq = np.fft.fftfreq(len(bins))[:len(ff)/2+1]
px = ff[0:len(ff)/2+1]
max_px = np.max(px[1:])
idx = px == max_px
corr_freq = freq[pl.find(idx)]
new_px = px
max_pow = new_px[pl.find(idx)]
return new_px,freq,corr_freq[0],freq2, max_pow
Example 8
def remove_artifacts(self, image):
"""
Remove the connected components that are not within the parameters
Operates in place
:param image: sudoku's thresholded image w/o grid
:return: None
"""
labeled, features = label(image, structure=CROSS)
lbls = np.arange(1, features + 1)
areas = extract_feature(image, labeled, lbls, np.sum,
np.uint32, 0)
sides = extract_feature(image, labeled, lbls, min_side,
np.float32, 0, True)
diags = extract_feature(image, labeled, lbls, diagonal,
np.float32, 0, True)
for index in lbls:
area = areas[index - 1] / 255
side = sides[index - 1]
diag = diags[index - 1]
if side < 5 or side > 20 \
or diag < 15 or diag > 25 \
or area < 40:
image[labeled == index] = 0
return None
Example 9
def remove_artifacts(self, image):
"""
Remove the connected components that are not within the parameters
Operates in place
:param image: sudoku's thresholded image w/o grid
:return: None
"""
labeled, features = label(image, structure=CROSS)
lbls = np.arange(1, features + 1)
areas = extract_feature(image, labeled, lbls, np.sum,
np.uint32, 0)
sides = extract_feature(image, labeled, lbls, min_side,
np.float32, 0, True)
diags = extract_feature(image, labeled, lbls, diagonal,
np.float32, 0, True)
for index in lbls:
area = areas[index - 1] / 255
side = sides[index - 1]
diag = diags[index - 1]
if side < 5 or side > 20 \
or diag < 15 or diag > 25 \
or area < 40:
image[labeled == index] = 0
return None
Example 10
def next_batch(self, batch_size, fake_data=False):
"""Return the next `batch_size` examples from this data set."""
if fake_data:
fake_image = [1.0 for _ in xrange(784)]
fake_label = 0
return [fake_image for _ in xrange(batch_size)], [
fake_label for _ in xrange(batch_size)]
start = self._index_in_epoch
self._index_in_epoch += batch_size
if self._index_in_epoch > self._num_examples:
# Finished epoch
self._epochs_completed += 1
# Shuffle the data
perm = numpy.arange(self._num_examples)
numpy.random.shuffle(perm)
self._images = self._images[perm]
self._labels = self._labels[perm]
# Start next epoch
start = 0
self._index_in_epoch = batch_size
assert batch_size <= self._num_examples
end = self._index_in_epoch
return self._images[start:end], self._labels[start:end]
Example 11
def next_batch(self, batch_size, fake_data=False):
"""Return the next `batch_size` examples from this data set."""
if fake_data:
fake_image = [1.0 for _ in xrange(784)]
fake_label = 0
return [fake_image for _ in xrange(batch_size)], [
fake_label for _ in xrange(batch_size)]
start = self._index_in_epoch
self._index_in_epoch += batch_size
if self._index_in_epoch > self._num_examples:
# Finished epoch
self._epochs_completed += 1
# Shuffle the data
perm = numpy.arange(self._num_examples)
numpy.random.shuffle(perm)
self._images = self._images[perm]
self._labels = self._labels[perm]
# Start next epoch
start = 0
self._index_in_epoch = batch_size
assert batch_size <= self._num_examples
end = self._index_in_epoch
return self._images[start:end], self._labels[start:end]
Example 12
def split_dataset(dataset, split_ratio, mode):
if mode=='SPLIT_CLASSES':
nrof_classes = len(dataset)
class_indices = np.arange(nrof_classes)
np.random.shuffle(class_indices)
split = int(round(nrof_classes*split_ratio))
train_set = [dataset[i] for i in class_indices[0:split]]
test_set = [dataset[i] for i in class_indices[split:-1]]
elif mode=='SPLIT_IMAGES':
train_set = []
test_set = []
min_nrof_images = 2
for cls in dataset:
paths = cls.image_paths
np.random.shuffle(paths)
split = int(round(len(paths)*split_ratio))
if split
continue # Not enough images for test set. Skip class...
train_set.append(ImageClass(cls.name, paths[0:split]))
test_set.append(ImageClass(cls.name, paths[split:-1]))
else:
raise ValueError('Invalid train/test split mode "%s"' % mode)
return train_set, test_set
Example 13
def quantize_from_probs2(probs, resolution):
"""Quantize multiple non-normalized probs to given resolution.
Args:
probs: An [N, M]-shaped numpy array of non-normalized probabilities.
Returns:
An [N, M]-shaped array of quantized probabilities such that
np.all(result.sum(axis=1) == resolution).
"""
assert len(probs.shape) == 2
N, M = probs.shape
probs = probs / probs.sum(axis=1, keepdims=True)
result = np.zeros(probs.shape, np.int8)
range_N = np.arange(N, dtype=np.int32)
for _ in range(resolution):
sample = probs.argmax(axis=1)
result[range_N, sample] += 1
probs[range_N, sample] -= 1.0 / resolution
return result
Example 14
def get_train_data():
# definite the dataset as two input , one output
DS = SupervisedDataSet(2, 1)
u1, u2, y = _generate_data()
# add data element to the dataset
for i in np.arange(199):
DS.addSample([u1[i], u2[i]], [y[i + 1]])
# you can get your input/output this way
# X = DS['input']
# Y = DS['target']
# split the dataset into train dataset and test dataset
dataTrain, dataTest = DS.splitWithProportion(0.8)
return dataTrain, dataTest
Example 15
def read_chunk(self, idx, chunk_size, padding=(0, 0), nodes=None):
self._open()
t_start, t_stop = self._get_t_start_t_stop(idx, chunk_size, padding)
if self.time_axis == 0:
local_chunk = self.data[t_start:t_stop, :]
elif self.time_axis == 1:
local_chunk = self.data[:, t_start:t_stop].T
self._close()
if nodes is not None:
if not numpy.all(nodes == numpy.arange(self.nb_channels)):
local_chunk = numpy.take(local_chunk, nodes, axis=1)
return self._scale_data_to_float32(local_chunk)
Example 16
def _get_slice_(self, t_start, t_stop):
x_beg = numpy.int64(t_start // self.SAMPLES_PER_RECORD)
r_beg = numpy.mod(t_start, self.SAMPLES_PER_RECORD)
x_end = numpy.int64(t_stop // self.SAMPLES_PER_RECORD)
r_end = numpy.mod(t_stop, self.SAMPLES_PER_RECORD)
if x_beg == x_end:
g_offset = x_beg * self.bytes_per_block_div + self.block_offset_div
data_slice = numpy.arange(g_offset + r_beg * self.nb_channels, g_offset + r_end * self.nb_channels, dtype=numpy.int64)
yield data_slice
else:
for count, nb_blocks in enumerate(numpy.arange(x_beg, x_end + 1, dtype=numpy.int64)):
g_offset = nb_blocks * self.bytes_per_block_div + self.block_offset_div
if count == 0:
data_slice = numpy.arange(g_offset + r_beg * self.nb_channels, g_offset + self.block_size_div, dtype=numpy.int64)
elif (count == (x_end - x_beg)):
data_slice = numpy.arange(g_offset, g_offset + r_end * self.nb_channels, dtype=numpy.int64)
else:
data_slice = numpy.arange(g_offset, g_offset + self.block_size_div, dtype=numpy.int64)
yield data_slice
Example 17
def _get_slice_(self, t_start, t_stop):
x_beg = numpy.int64(t_start // self.SAMPLES_PER_RECORD)
r_beg = numpy.mod(t_start, self.SAMPLES_PER_RECORD)
x_end = numpy.int64(t_stop // self.SAMPLES_PER_RECORD)
r_end = numpy.mod(t_stop, self.SAMPLES_PER_RECORD)
data_slice = []
if x_beg == x_end:
g_offset = x_beg * self.SAMPLES_PER_RECORD + self.OFFSET_PER_BLOCK[0]*(x_beg + 1) + self.OFFSET_PER_BLOCK[1]*x_beg
data_slice = numpy.arange(g_offset + r_beg, g_offset + r_end, dtype=numpy.int64)
else:
for count, nb_blocks in enumerate(numpy.arange(x_beg, x_end + 1, dtype=numpy.int64)):
g_offset = nb_blocks * self.SAMPLES_PER_RECORD + self.OFFSET_PER_BLOCK[0]*(nb_blocks + 1) + self.OFFSET_PER_BLOCK[1]*nb_blocks
if count == 0:
data_slice += numpy.arange(g_offset + r_beg, g_offset + self.SAMPLES_PER_RECORD, dtype=numpy.int64).tolist()
elif (count == (x_end - x_beg)):
data_slice += numpy.arange(g_offset, g_offset + r_end, dtype=numpy.int64).tolist()
else:
data_slice += numpy.arange(g_offset, g_offset + self.SAMPLES_PER_RECORD, dtype=numpy.int64).tolist()
return data_slice
Example 18
def read_chunk(self, idx, chunk_size, padding=(0, 0), nodes=None):
t_start, t_stop = self._get_t_start_t_stop(idx, chunk_size, padding)
local_shape = t_stop - t_start
if nodes is None:
nodes = numpy.arange(self.nb_channels)
local_chunk = numpy.zeros((local_shape, len(nodes)), dtype=self.data_dtype)
data_slice = self._get_slice_(t_start, t_stop)
self._open()
for count, i in enumerate(nodes):
local_chunk[:, count] = self.data[i][data_slice]
self._close()
return self._scale_data_to_float32(local_chunk)
Example 19
def _get_slice_(self, t_start, t_stop):
x_beg = numpy.int64(t_start // self.SAMPLES_PER_RECORD)
r_beg = numpy.mod(t_start, self.SAMPLES_PER_RECORD)
x_end = numpy.int64(t_stop // self.SAMPLES_PER_RECORD)
r_end = numpy.mod(t_stop, self.SAMPLES_PER_RECORD)
data_slice = []
if x_beg == x_end:
g_offset = x_beg * self.SAMPLES_PER_RECORD + self.OFFSET_PER_BLOCK[0]*(x_beg + 1) + self.OFFSET_PER_BLOCK[1]*x_beg
data_slice = numpy.arange(g_offset + r_beg, g_offset + r_end, dtype=numpy.int64)
else:
for count, nb_blocks in enumerate(numpy.arange(x_beg, x_end + 1, dtype=numpy.int64)):
g_offset = nb_blocks * self.SAMPLES_PER_RECORD + self.OFFSET_PER_BLOCK[0]*(nb_blocks + 1) + self.OFFSET_PER_BLOCK[1]*nb_blocks
if count == 0:
data_slice += numpy.arange(g_offset + r_beg, g_offset + self.SAMPLES_PER_RECORD, dtype=numpy.int64).tolist()
elif (count == (x_end - x_beg)):
data_slice += numpy.arange(g_offset, g_offset + r_end, dtype=numpy.int64).tolist()
else:
data_slice += numpy.arange(g_offset, g_offset + self.SAMPLES_PER_RECORD, dtype=numpy.int64).tolist()
return data_slice
Example 20
def read_chunk(self, idx, chunk_size, padding=(0, 0), nodes=None):
t_start, t_stop = self._get_t_start_t_stop(idx, chunk_size, padding)
local_shape = t_stop - t_start
if nodes is None:
nodes = numpy.arange(self.nb_channels)
local_chunk = numpy.zeros((local_shape, len(nodes)), dtype=self.data_dtype)
data_slice = self._get_slice_(t_start, t_stop)
self._open()
for count, i in enumerate(nodes):
local_chunk[:, count] = self.data[i][data_slice]
self._close()
return self._scale_data_to_float32(local_chunk)
Example 21
def view_trigger_snippets_bis(trigger_snippets, elec_index, save=None):
fig = pylab.figure()
ax = fig.add_subplot(1, 1, 1)
for n in xrange(0, trigger_snippets.shape[2]):
y = trigger_snippets[:, elec_index, n]
x = numpy.arange(- (y.size - 1) / 2, (y.size - 1) / 2 + 1)
b = 0.5 + 0.5 * numpy.random.rand()
ax.plot(x, y, color=(0.0, 0.0, b), linestyle='solid')
ax.grid(True)
ax.set_xlim([numpy.amin(x), numpy.amax(x)])
ax.set_xlabel("time")
ax.set_ylabel("amplitude")
if save is None:
pylab.show()
else:
pylab.savefig(save)
pylab.close(fig)
return
Example 22
def cost(self, x):
Rdx = dl.Vector()
self.Prior.init_vector(Rdx,0)
dx = x[PARAMETER] - self.Prior.mean
self.Prior.R.mult(dx, Rdx)
reg = .5*Rdx.inner(dx)
u = dl.Vector()
ud = dl.Vector()
self.Q.init_vector(u,0)
self.Q.init_vector(ud,0)
misfit = 0
for t in np.arange(self.t_1, self.t_final+(.5*self.dt), self.dt):
x[STATE].retrieve(u,t)
self.ud.retrieve(ud,t)
diff = u - ud
Qdiff = self.Q * diff
misfit += .5/self.noise_variance*Qdiff.inner(diff)
c = misfit + reg
return [c, reg, misfit]
Example 23
def _flow_index(self, n, batch_size=32, shuffle=False, seed=None):
# ensure self.batch_index is 0
self.reset()
while 1:
if seed is not None:
np.random.seed(seed + self.total_batches_seen)
if self.batch_index == 0:
index_array = np.arange(n)
if shuffle:
index_array = np.random.permutation(n)
current_index = (self.batch_index * batch_size) % n
if n >= current_index + batch_size:
current_batch_size = batch_size
self.batch_index += 1
else:
current_batch_size = n - current_index
self.batch_index = 0
self.total_batches_seen += 1
yield (index_array[current_index: current_index + current_batch_size],
current_index, current_batch_size)
Example 24
def make_split(X_full, Y_full, split):
N = X_full.shape[0]
n = int(N * PROPORTION_TRAIN)
ind = np.arange(N)
np.random.seed(split + SEED)
np.random.shuffle(ind)
train_ind = ind[:n]
test_ind= ind[n:]
X = X_full[train_ind]
Xs = X_full[test_ind]
Y = Y_full[train_ind]
Ys = Y_full[test_ind]
return X, Y, Xs, Ys
Example 25
def plot_difference_histogram(group, gene_name, bins=np.arange(20.1)):
"""
Plot a histogram of percentage differences for a specific gene.
"""
exact_matches = group[group.V_SHM == 0]
CDR3s_exact = len(set(s for s in exact_matches.CDR3_nt if s))
Js_exact = len(set(exact_matches.J_gene))
fig = Figure(figsize=(100/25.4, 60/25.4))
ax = fig.gca()
ax.set_xlabel('Percentage difference')
ax.set_ylabel('Frequency')
fig.suptitle('Gene ' + gene_name, y=1.08, fontsize=16)
ax.set_title('{:,} sequences assigned'.format(len(group)))
ax.text(0.25, 0.95,
'{:,} ({:.1%}) exact matches\n {} unique CDR3\n {} unique J'.format(
len(exact_matches), len(exact_matches) / len(group),
CDR3s_exact, Js_exact),
transform=ax.transAxes, fontsize=10,
bbox=dict(boxstyle='round', facecolor='white', alpha=0.5),
horizontalalignment='left', verticalalignment='top')
_ = ax.hist(list(group.V_SHM), bins=bins)
return fig
Example 26
def create_decoder(self, helper, mode):
attention_fn = AttentionLayerDot(
params={"num_units": self.attention_dim},
mode=tf.contrib.learn.ModeKeys.TRAIN)
attention_values = tf.convert_to_tensor(
np.random.randn(self.batch_size, self.input_seq_len, 32),
dtype=tf.float32)
attention_keys = tf.convert_to_tensor(
np.random.randn(self.batch_size, self.input_seq_len, 32),
dtype=tf.float32)
params = AttentionDecoder.default_params()
params["max_decode_length"] = self.max_decode_length
return AttentionDecoder(
params=params,
mode=mode,
vocab_size=self.vocab_size,
attention_keys=attention_keys,
attention_values=attention_values,
attention_values_length=np.arange(self.batch_size) + 1,
attention_fn=attention_fn)
Example 27
def make_copy(num_examples, min_len, max_len):
"""
Generates a dataset where the target is equal to the source.
Sequence lengths are chosen randomly from [min_len, max_len].
Args:
num_examples: Number of examples to generate
min_len: Minimum sequence length
max_len: Maximum sequence length
Returns:
An iterator of (source, target) string tuples.
"""
for _ in range(num_examples):
turn_length = np.random.choice(np.arange(min_len, max_len + 1))
source_tokens = np.random.choice(
list(VOCABULARY), size=turn_length, replace=True)
target_tokens = source_tokens
yield " ".join(source_tokens), " ".join(target_tokens)
Example 28
def make_reverse(num_examples, min_len, max_len):
"""
Generates a dataset where the target is equal to the source reversed.
Sequence lengths are chosen randomly from [min_len, max_len].
Args:
num_examples: Number of examples to generate
min_len: Minimum sequence length
max_len: Maximum sequence length
Returns:
An iterator of (source, target) string tuples.
"""
for _ in range(num_examples):
turn_length = np.random.choice(np.arange(min_len, max_len + 1))
source_tokens = np.random.choice(
list(VOCABULARY), size=turn_length, replace=True)
target_tokens = source_tokens[::-1]
yield " ".join(source_tokens), " ".join(target_tokens)
Example 29
def update_dividends(self, new_dividends):
"""
Update our dividend frame with new dividends. @new_dividends should be
a DataFrame with columns containing at least the entries in
zipline.protocol.DIVIDEND_FIELDS.
"""
# Mark each new dividend with a unique integer id. This ensures that
# we can differentiate dividends whose date/sid fields are otherwise
# identical.
new_dividends['id'] = np.arange(
self._dividend_count,
self._dividend_count + len(new_dividends),
)
self._dividend_count += len(new_dividends)
self.dividend_frame = sort_values(pd.concat(
[self.dividend_frame, new_dividends]
), ['pay_date', 'ex_date']).set_index('id', drop=False)
Example 30
def create_test_panel_ohlc_source(sim_params, env):
start = sim_params.first_open \
if sim_params else pd.datetime(1990, 1, 3, 0, 0, 0, 0, pytz.utc)
end = sim_params.last_close \
if sim_params else pd.datetime(1990, 1, 8, 0, 0, 0, 0, pytz.utc)
index = env.days_in_range(start, end)
price = np.arange(0, len(index)) + 100
high = price * 1.05
low = price * 0.95
open_ = price + .1 * (price % 2 - .5)
volume = np.ones(len(index)) * 1000
arbitrary = np.ones(len(index))
df = pd.DataFrame({'price': price,
'high': high,
'low': low,
'open': open_,
'volume': volume,
'arbitrary': arbitrary},
index=index)
panel = pd.Panel.from_dict({0: df})
return DataPanelSource(panel), panel
Example 31
def test_expect_dtypes_with_tuple(self):
allowed_dtypes = (dtype('datetime64[ns]'), dtype('float'))
@expect_dtypes(a=allowed_dtypes)
def foo(a, b):
return a, b
for d in allowed_dtypes:
good_a = arange(3).astype(d)
good_b = object()
ret_a, ret_b = foo(good_a, good_b)
self.assertIs(good_a, ret_a)
self.assertIs(good_b, ret_b)
with self.assertRaises(TypeError) as e:
foo(arange(3, dtype='uint32'), object())
expected_message = (
"{qualname}() expected a value with dtype 'datetime64[ns]' "
"or 'float64' for argument 'a', but got 'uint32' instead."
).format(qualname=qualname(foo))
self.assertEqual(e.exception.args[0], expected_message)
Example 32
def test_bad_input(self):
data = arange(100).reshape(self.ndates, self.nsids)
baseline = DataFrame(data, index=self.dates, columns=self.sids)
loader = DataFrameLoader(
USEquityPricing.close,
baseline,
)
with self.assertRaises(ValueError):
# Wrong column.
loader.load_adjusted_array(
[USEquityPricing.open], self.dates, self.sids, self.mask
)
with self.assertRaises(ValueError):
# Too many columns.
loader.load_adjusted_array(
[USEquityPricing.open, USEquityPricing.close],
self.dates,
self.sids,
self.mask,
)
Example 33
def test_baseline(self):
data = arange(100).reshape(self.ndates, self.nsids)
baseline = DataFrame(data, index=self.dates, columns=self.sids)
loader = DataFrameLoader(USEquityPricing.close, baseline)
dates_slice = slice(None, 10, None)
sids_slice = slice(1, 3, None)
[adj_array] = loader.load_adjusted_array(
[USEquityPricing.close],
self.dates[dates_slice],
self.sids[sids_slice],
self.mask[dates_slice, sids_slice],
).values()
for idx, window in enumerate(adj_array.traverse(window_length=3)):
expected = baseline.values[dates_slice, sids_slice][idx:idx + 3]
assert_array_equal(window, expected)
Example 34
def get_normalized_dispersion(mat_mean, mat_var, nbins=20):
mat_disp = (mat_var - mat_mean) / np.square(mat_mean)
quantiles = np.percentile(mat_mean, np.arange(0, 100, 100 / nbins))
quantiles = np.append(quantiles, mat_mean.max())
# merge bins with no difference in value
quantiles = np.unique(quantiles)
if len(quantiles) <= 1:
# pathological case: the means are all identical. just return raw dispersion.
return mat_disp
# calc median dispersion per bin
(disp_meds, _, disp_bins) = scipy.stats.binned_statistic(mat_mean, mat_disp, statistic='median', bins=quantiles)
# calc median absolute deviation of dispersion per bin
disp_meds_arr = disp_meds[disp_bins-1] # 0th bin is empty since our quantiles start from 0
disp_abs_dev = abs(mat_disp - disp_meds_arr)
(disp_mads, _, disp_bins) = scipy.stats.binned_statistic(mat_mean, disp_abs_dev, statistic='median', bins=quantiles)
# calculate normalized dispersion
disp_mads_arr = disp_mads[disp_bins-1]
disp_norm = (mat_disp - disp_meds_arr) / disp_mads_arr
return disp_norm
Example 35
def compute_nearest_neighbors(submatrix, balltree, k, row_start):
""" Compute k nearest neighbors on a submatrix
Args: submatrix (np.ndarray): Data submatrix
balltree: Nearest neighbor index (from sklearn)
k: number of nearest neigbors to compute
row_start: row offset into larger matrix
Returns a COO sparse adjacency matrix of nearest neighbor relations as (i,j,x)"""
nn_dist, nn_idx = balltree.query(submatrix, k=k+1)
# Remove the self-as-neighbors
nn_idx = nn_idx[:,1:]
nn_dist = nn_dist[:,1:]
# Construct a COO sparse matrix of edges and distances
i = np.repeat(row_start + np.arange(nn_idx.shape[0]), k)
j = nn_idx.ravel().astype(int)
return (i, j, nn_dist.ravel())
Example 36
def preprocess_matrix(matrix, num_bcs=None, use_bcs=None, use_genes=None, force_cells=None):
if force_cells is not None:
bc_counts = matrix.get_reads_per_bc()
bc_indices, _, _ = cr_stats.filter_cellular_barcodes_fixed_cutoff(bc_counts, force_cells)
matrix = matrix.select_barcodes(bc_indices)
elif use_bcs is not None:
bc_seqs = cr_utils.load_csv_rownames(use_bcs)
bc_indices = matrix.bcs_to_ints(bc_seqs)
matrix = matrix.select_barcodes(bc_indices)
elif num_bcs is not None and num_bcs < matrix.bcs_dim:
bc_indices = np.sort(np.random.choice(np.arange(matrix.bcs_dim), size=num_bcs, replace=False))
matrix = matrix.select_barcodes(bc_indices)
if use_genes is not None:
gene_ids = cr_utils.load_csv_rownames(use_genes)
gene_indices = matrix.gene_ids_to_ints(gene_ids)
matrix = matrix.select_genes(gene_indices)
matrix, _, _ = matrix.select_nonzero_axes()
return matrix
Example 37
def get_depth_info(read_iter, chrom, cstart, cend):
depths = np.zeros(cend-cstart, np.int32)
for read in read_iter:
pos = read.pos
rstart = max(pos, cstart)
# Increment to the end of the window or the end of the
# alignment, whichever comes first
rend = min(read.aend, cend)
depths[(rstart-cstart):(rend-cstart)] += 1
positions = np.arange(cstart, cend, dtype=np.int32)
depth_df = pd.DataFrame({"chrom": chrom, "pos": positions, "coverage": depths})
return depth_df
Example 38
def getDataRecorderConfiguration(self):
nRecorders= self.getNumberOfRecorderTables()
sourceBufSize= 256
source= ctypes.create_string_buffer('\000', sourceBufSize)
option= CIntArray(np.zeros(nRecorders, dtype=np.int32))
table=CIntArray(np.arange(1, nRecorders + 1))
self._lib.PI_qDRC.argtypes= [c_int, CIntArray, c_char_p,
CIntArray, c_int, c_int]
self._convertErrorToException(
self._lib.PI_qDRC(self._id, table, source,
option, sourceBufSize, nRecorders))
sources= [x.strip() for x in source.value.split('\n')]
cfg= DataRecorderConfiguration()
for i in range(nRecorders):
cfg.setTable(table.toNumpyArray()[i],
sources[i],
option.toNumpyArray()[i])
return cfg
Example 39
def loadLogoSet(path, rows,cols,test_data_rate=0.15):
random.seed(612)
_, imgID = readItems('data.txt')
y, _ = modelDict(path)
nPics = len(y)
faceassset = np.zeros((nPics,rows,cols), dtype = np.uint8) ### gray images
noImg = []
for i in range(nPics):
temp = cv2.imread(path +'logo/'+imgID[i]+'.jpg', 0)
if temp == None:
noImg.append(i)
elif temp.size < 1000:
noImg.append(i)
else:
temp = cv2.resize(temp,(cols, rows), interpolation = cv2.INTER_CUBIC)
faceassset[i,:,:] = temp
y = np.delete(y, noImg,0); faceassset = np.delete(faceassset, noImg, 0)
nPics = len(y)
index = random.sample(np.arange(nPics), int(nPics*test_data_rate))
x_test = faceassset[index,:,:]; x_train = np.delete(faceassset, index, 0)
y_test = y[index]; y_train = np.delete(y, index, 0)
return (x_train, y_train), (x_test, y_test)
Example 40
def batch_iter(data, batch_size, num_epochs, shuffle=True):
"""
Generates a batch iterator for a dataset.
"""
data = np.array(data)
data_size = len(data)
num_batches_per_epoch = int(len(data)/batch_size) + 1
for epoch in range(num_epochs):
# Shuffle the data at each epoch
if shuffle:
shuffle_indices = np.random.permutation(np.arange(data_size))
shuffled_data = data[shuffle_indices]
else:
shuffled_data = data
for batch_num in range(num_batches_per_epoch):
start_index = batch_num * batch_size
end_index = min((batch_num + 1) * batch_size, data_size)
yield shuffled_data[start_index:end_index]
Example 41
def _gen_centroids():
a = np.arange(SSIZE/18, SSIZE, SSIZE/9)
x, y = np.meshgrid(a, a)
return np.dstack((y, x)).reshape((81, 2))
Example 42
def classify(self, image):
"""
Given a 28x28 image, returns an array representing the 2 highest
probable prediction
:param image:
:return: array of 2 highest prob-digit tuples
"""
if cv2.__version__[0] == '2':
res = self.model.find_nearest(np.array([self.feature(image)]), k=11)
else:
res = self.model.findNearest(np.array([self.feature(image)]), k=11)
hist = np.histogram(res[2], bins=9, range=(1, 10), normed=True)[0]
zipped = sorted(zip(hist, np.arange(1, 10)), reverse=True)
return np.array(zipped[:2])
Example 43
def blend2(x1,x2,y, metric, task, x1valid, x2valid, x1test, x2test):
try:
mm = no_transform()
mbest_score = -2
for w1 in np.arange(0.2, 1, 0.1):
w2 = 1- w1
x = mm.fit_transform(x1)*w1 + mm.fit_transform(x2)*w2
exec('score = libscores.'+ metric + '(y, x, "' + task + '")')
try:
if score <= 0:
exec('CVscore_auc = libscores.auc_metric(y, x, "' + task + '")')
score += CVscore_auc/10
except:
pass
if score > mbest_score:
mbest_score = score
mbest_w1 = w1
mbest_x = x
mbest_w2 = 1- mbest_w1
xvalid = mm.fit_transform(x1valid) * mbest_w1 + mm.fit_transform(x2valid)* mbest_w2
xtest = mm.fit_transform(x1test) * mbest_w1 + mm.fit_transform(x2test) * mbest_w2
return mbest_score, xvalid, xtest
except:
return 0.01, x1valid, x1test
Example 44
def blend3(x1,x2, x3, y, metric, task, x1valid, x2valid, x3valid, x1test, x2test, x3test):
try:
mm = no_transform()
mbest_score = -2
for w1 in np.arange(0.2, 1, 0.2):
for w2 in np.arange(0.1, 0.6, 0.2):
w3 = 1- w1 - w2
if w3 > 0:
x = mm.fit_transform(x1)*w1 + mm.fit_transform(x2)*w2 + mm.fit_transform(x3)*w3
exec('score = libscores.'+ metric + '(y, x, "' + task + '")')
try:
if score <= 0:
exec('CVscore_auc = libscores.auc_metric(y, x, "' + task + '")')
score += CVscore_auc/10
except:
pass
if score > mbest_score:
mbest_score = score
mbest_w1 = w1
mbest_w2 = w2
mbest_w3 = 1- mbest_w1- mbest_w2
xvalid = mm.fit_transform(x1valid) * mbest_w1 + mm.fit_transform(x2valid)* mbest_w2 + mm.fit_transform(x3valid)* mbest_w3
xtest = mm.fit_transform(x1test) * mbest_w1 + mm.fit_transform(x2test) * mbest_w2 + mm.fit_transform(x3test) * mbest_w3
return mbest_score, xvalid, xtest
except:
return 0.01, x1valid, x1test
Example 45
def tiedrank(a):
''' Return the ranks (with base 1) of a list resolving ties by averaging.
This works for numpy arrays.'''
m=len(a)
# Sort a in ascending order (sa=sorted vals, i=indices)
i=a.argsort()
sa=a[i]
# Find unique values
uval=np.unique(a)
# Test whether there are ties
R=np.arange(m, dtype=float)+1 # Ranks with base 1
if len(uval)!=m:
# Average the ranks for the ties
oldval=sa[0]
newval=sa[0]
k0=0
for k in range(1,m):
newval=sa[k]
if newval==oldval:
# moving average
R[k0:k+1]=R[k-1]*(k-k0)/(k-k0+1)+R[k]/(k-k0+1)
else:
k0=k;
oldval=newval
# Invert the index
S=np.empty(m)
S[i]=R
return S
Example 46
def plot_trajectories(src_sent, src_encoding, idx):
# encoding is (time_steps, hidden_dim)
#pca = PCA(n_components=1)
#pca_result = pca.fit_transform(src_encoding)
times = np.arange(src_encoding.shape[0])
plt.plot(times, src_encoding)
plt.title(" ".join(src_sent))
plt.xlabel('timestep')
plt.ylabel('trajectories')
plt.savefig("misc_hidden_cell_trajectories_"+str(idx), bbox_inches="tight")
plt.close()
Example 47
def dense_to_one_hot(labels_dense, num_classes=10):
"""Convert class labels from scalars to one-hot vectors."""
num_labels = labels_dense.shape[0]
index_offset = numpy.arange(num_labels) * num_classes
labels_one_hot = numpy.zeros((num_labels, num_classes))
labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
return labels_one_hot
Example 48
def dense_to_one_hot(labels_dense, num_classes=10):
"""Convert class labels from scalars to one-hot vectors."""
num_labels = labels_dense.shape[0]
index_offset = numpy.arange(num_labels) * num_classes
labels_one_hot = numpy.zeros((num_labels, num_classes))
labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
return labels_one_hot
Example 49
def iterate_minibatches(inputs, targets, batchsize, shuffle=False, augment=False):
assert len(inputs) == len(targets)
if shuffle:
indices = np.arange(len(inputs))
np.random.shuffle(indices)
for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
if shuffle:
excerpt = indices[start_idx:start_idx + batchsize]
else:
excerpt = slice(start_idx, start_idx + batchsize)
if augment:
# as in paper :
# pad feature arrays with 4 pixels on each side
# and do random cropping of 32x32
padded = np.pad(inputs[excerpt],((0,0),(0,0),(4,4),(4,4)),mode='constant')
random_cropped = np.zeros(inputs[excerpt].shape, dtype=np.float32)
crops = np.random.random_integers(0,high=8,size=(batchsize,2))
for r in range(batchsize):
random_cropped[r,:,:,:] = padded[r,:,crops[r,0]:(crops[r,0]+32),crops[r,1]:(crops[r,1]+32)]
inp_exc = random_cropped
else:
inp_exc = inputs[excerpt]
yield inp_exc, targets[excerpt]
# ############################## Main program ################################
Example 50
def __init__(self, env):
self.env = env
if isinstance(env.observation_space, Discrete):
self.state_size = 1
else:
self.state_size = numel(env.observation_space.shape)
if isinstance(self.env.action_space, Discrete):
self.is_discrete = True
self.action_size = env.action_space.n
self.actions = np.arange(self.action_size)
else:
self.is_discrete = False
self.action_size = numel(env.action_space.sample())