1000sprites

TensorFlow学习日记25

解析：

'''
This example demonstrates the use of fasttext for text classification
'''

from __future__ import print_function
import numpy as np

from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Embedding
from keras.layers import GlobalAveragePooling1D
from keras.datasets import imdb


def create_ngram_set(input_list, ngram_value=2):
    """
    Extract a set of n-grams from a list of integers.

    >>> create_ngram_set([1, 4, 9, 4, 1, 4], ngram_value=2)
    {(4, 9), (4, 1), (1, 4), (9, 4)}

    >>> create_ngram_set([1, 4, 9, 4, 1, 4], ngram_value=3)
    [(1, 4, 9), (4, 9, 4), (9, 4, 1), (4, 1, 4)]
    """
    return set(zip(*[input_list[i:] for i in range(ngram_value)]))


def add_ngram(sequences, token_indice, ngram_range=2):
    """
    Augment the input list of list (sequences) by appending n-grams values.

    Example: adding bi-gram
    >>> sequences = [[1, 3, 4, 5], [1, 3, 7, 9, 2]]
    >>> token_indice = {(1, 3): 1337, (9, 2): 42, (4, 5): 2017}
    >>> add_ngram(sequences, token_indice, ngram_range=2)
    [[1, 3, 4, 5, 1337, 2017], [1, 3, 7, 9, 2, 1337, 42]]

    Example: adding tri-gram
    >>> sequences = [[1, 3, 4, 5], [1, 3, 7, 9, 2]]
    >>> token_indice = {(1, 3): 1337, (9, 2): 42, (4, 5): 2017, (7, 9, 2): 2018}
    >>> add_ngram(sequences, token_indice, ngram_range=3)
    [[1, 3, 4, 5, 1337], [1, 3, 7, 9, 2, 1337, 2018]]
    """
    new_sequences = []
    for input_list in sequences:
        new_list = input_list[:]
        for i in range(len(new_list) - ngram_range + 1):
            for ngram_value in range(2, ngram_range + 1):
                ngram = tuple(new_list[i:i + ngram_value])
                if ngram in token_indice:
                    new_list.append(token_indice[ngram])
        new_sequences.append(new_list)

    return new_sequences


# Set parameters:
# ngram_range = 2 will add bi-grams features
ngram_range = 1
max_features = 20000
maxlen = 400
batch_size = 32
embedding_dims = 50
epochs = 5

print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')
print('Average train sequence length: {}'.format(np.mean(list(map(len, x_train)), dtype=int)))
print('Average test sequence length: {}'.format(np.mean(list(map(len, x_test)), dtype=int)))

if ngram_range > 1:
    print('Adding {}-gram features'.format(ngram_range))
    # Create set of unique n-gram from the training set.
    ngram_set = set()
    for input_list in x_train:
        for i in range(2, ngram_range + 1):
            set_of_ngram = create_ngram_set(input_list, ngram_value=i)
            ngram_set.update(set_of_ngram)

    # Dictionary mapping n-gram token to a unique integer.
    # Integer values are greater than max_features in order
    # to avoid collision with existing features.
    start_index = max_features + 1
    token_indice = {v: k + start_index for k, v in enumerate(ngram_set)}
    indice_token = {token_indice[k]: k for k in token_indice}

    # max_features is the highest integer that could be found in the dataset.
    max_features = np.max(list(indice_token.keys())) + 1

    # Augmenting x_train and x_test with n-grams features
    x_train = add_ngram(x_train, token_indice, ngram_range)
    x_test = add_ngram(x_test, token_indice, ngram_range)
    print('Average train sequence length: {}'.format(np.mean(list(map(len, x_train)), dtype=int)))
    print('Average test sequence length: {}'.format(np.mean(list(map(len, x_test)), dtype=int)))

print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

print('Build model...')
model = Sequential()

# we start off with an efficient embedding layer which maps
# our vocab indices into embedding_dims dimensions
model.add(Embedding(max_features,
                    embedding_dims,
                    input_length=maxlen))

# we add a GlobalAveragePooling1D, which will average the embeddings
# of all words in the document
model.add(GlobalAveragePooling1D())

# We project onto a single unit output layer, and squash it with a sigmoid:
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          validation_data=(x_test, y_test))

2. IMDB LSTM

解析：

'''
Trains an LSTM model on the IMDB sentiment classification task.
'''
from __future__ import print_function

from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Embedding
from keras.layers import LSTM
from keras.datasets import imdb

max_features = 20000
maxlen = 80  # cut texts after this number of words (among top max_features most common words)
batch_size = 32

print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

print('Build model...')
model = Sequential()
model.add(Embedding(max_features, 128))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))

# try using different optimizers and different optimizer configs
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

print('Train...')
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=15,
          validation_data=(x_test, y_test))
score, acc = model.evaluate(x_test, y_test,
                            batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)

3. LSTM Stateful

解析：

'''Example script showing how to use a stateful LSTM model
and how its stateless counterpart performs.
'''
from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, LSTM

# ----------------------------------------------------------
# EDITABLE PARAMETERS
# Read the documentation in the script head for more details
# ----------------------------------------------------------

# length of input
input_len = 1000

# The window length of the moving average used to generate
# the output from the input in the input/output pair used
# to train the LSTM
# e.g. if tsteps=2 and input=[1, 2, 3, 4, 5],
#      then output=[1.5, 2.5, 3.5, 4.5]
tsteps = 2

# The input sequence length that the LSTM is trained on for each output point
lahead = 1

# training parameters passed to "model.fit(...)"
batch_size = 1
epochs = 10

# ------------
# MAIN PROGRAM
# ------------

print("*" * 33)
if lahead >= tsteps:
    print("STATELESS LSTM WILL ALSO CONVERGE")
else:
    print("STATELESS LSTM WILL NOT CONVERGE")
print("*" * 33)

np.random.seed(1986)

print('Generating Data...')


def gen_uniform_amp(amp=1, xn=10000):
    """Generates uniform random data between
    -amp and +amp
    and of length xn

    Arguments:
        amp: maximum/minimum range of uniform data
        xn: length of series
    """
    data_input = np.random.uniform(-1 * amp, +1 * amp, xn)
    data_input = pd.DataFrame(data_input)
    return data_input


# Since the output is a moving average of the input,
# the first few points of output will be NaN
# and will be dropped from the generated data
# before training the LSTM.
# Also, when lahead > 1,
# the preprocessing step later of "rolling window view"
# will also cause some points to be lost.
# For aesthetic reasons,
# in order to maintain generated data length = input_len after pre-processing,
# add a few points to account for the values that will be lost.
to_drop = max(tsteps - 1, lahead - 1)
data_input = gen_uniform_amp(amp=0.1, xn=input_len + to_drop)

# set the target to be a N-point average of the input
expected_output = data_input.rolling(window=tsteps, center=False).mean()

# when lahead > 1, need to convert the input to "rolling window view"
# https://docs.scipy.org/doc/numpy/reference/generated/numpy.repeat.html
if lahead > 1:
    data_input = np.repeat(data_input.values, repeats=lahead, axis=1)
    data_input = pd.DataFrame(data_input)
    for i, c in enumerate(data_input.columns):
        data_input[c] = data_input[c].shift(i)

# drop the nan
expected_output = expected_output[to_drop:]
data_input = data_input[to_drop:]

print('Input shape:', data_input.shape)
print('Output shape:', expected_output.shape)
print('Input head: ')
print(data_input.head())
print('Output head: ')
print(expected_output.head())
print('Input tail: ')
print(data_input.tail())
print('Output tail: ')
print(expected_output.tail())

print('Plotting input and expected output')
plt.plot(data_input[0][:10], '.')
plt.plot(expected_output[0][:10], '-')
plt.legend(['Input', 'Expected output'])
plt.title('Input')
plt.show()


def create_model(stateful):
    model = Sequential()
    model.add(LSTM(20,
                   input_shape=(lahead, 1),
                   batch_size=batch_size,
                   stateful=stateful))
    model.add(Dense(1))
    model.compile(loss='mse', optimizer='adam')
    return model


print('Creating Stateful Model...')
model_stateful = create_model(stateful=True)


# split train/test data
def split_data(x, y, ratio=0.8):
    to_train = int(input_len * ratio)
    # tweak to match with batch_size
    to_train -= to_train % batch_size

    x_train = x[:to_train]
    y_train = y[:to_train]
    x_test = x[to_train:]
    y_test = y[to_train:]

    # tweak to match with batch_size
    to_drop = x.shape[0] % batch_size
    if to_drop > 0:
        x_test = x_test[:-1 * to_drop]
        y_test = y_test[:-1 * to_drop]

    # some reshaping
    reshape_3 = lambda x: x.values.reshape((x.shape[0], x.shape[1], 1))
    x_train = reshape_3(x_train)
    x_test = reshape_3(x_test)

    reshape_2 = lambda x: x.values.reshape((x.shape[0], 1))
    y_train = reshape_2(y_train)
    y_test = reshape_2(y_test)

    return (x_train, y_train), (x_test, y_test)


(x_train, y_train), (x_test, y_test) = split_data(data_input, expected_output)
print('x_train.shape: ', x_train.shape)
print('y_train.shape: ', y_train.shape)
print('x_test.shape: ', x_test.shape)
print('y_test.shape: ', y_test.shape)

print('Training')
for i in range(epochs):
    print('Epoch', i + 1, '/', epochs)
    # Note that the last state for sample i in a batch will
    # be used as initial state for sample i in the next batch.
    # Thus we are simultaneously training on batch_size series with
    # lower resolution than the original series contained in data_input.
    # Each of these series are offset by one step and can be
    # extracted with data_input[i::batch_size].
    model_stateful.fit(x_train,
                       y_train,
                       batch_size=batch_size,
                       epochs=1,
                       verbose=1,
                       validation_data=(x_test, y_test),
                       shuffle=False)
    model_stateful.reset_states()

print('Predicting')
predicted_stateful = model_stateful.predict(x_test, batch_size=batch_size)

print('Creating Stateless Model...')
model_stateless = create_model(stateful=False)

print('Training')
model_stateless.fit(x_train,
                    y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(x_test, y_test),
                    shuffle=False)

print('Predicting')
predicted_stateless = model_stateless.predict(x_test, batch_size=batch_size)

# ----------------------------

print('Plotting Results')
plt.subplot(3, 1, 1)
plt.plot(y_test)
plt.title('Expected')
plt.subplot(3, 1, 2)
# drop the first "tsteps-1" because it is not possible to predict them
# since the "previous" timesteps to use do not exist
plt.plot((y_test - predicted_stateful).flatten()[tsteps - 1:])
plt.title('Stateful: Expected - Predicted')
plt.subplot(3, 1, 3)
plt.plot((y_test - predicted_stateless).flatten())
plt.title('Stateless: Expected - Predicted')
plt.show()

4. Pretrained Word Embeddings

解析：

'''
This script loads pre-trained word embeddings (GloVe embeddings)
into a frozen Keras Embedding layer, and uses it to
train a text classification model on the 20 Newsgroup dataset
(classification of newsgroup messages into 20 different categories).
'''

from __future__ import print_function

import os
import sys
import numpy as np
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.layers import Dense, Input, GlobalMaxPooling1D
from keras.layers import Conv1D, MaxPooling1D, Embedding
from keras.models import Model

BASE_DIR = ''
GLOVE_DIR = os.path.join(BASE_DIR, 'glove.6B')
TEXT_DATA_DIR = os.path.join(BASE_DIR, '20_newsgroup')
MAX_SEQUENCE_LENGTH = 1000
MAX_NB_WORDS = 20000
EMBEDDING_DIM = 100
VALIDATION_SPLIT = 0.2

# first, build index mapping words in the embeddings set
# to their embedding vector

print('Indexing word vectors.')

embeddings_index = {}
f = open(os.path.join(GLOVE_DIR, 'glove.6B.100d.txt'))
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()

print('Found %s word vectors.' % len(embeddings_index))

# second, prepare text samples and their labels
print('Processing text dataset')

texts = []  # list of text samples
labels_index = {}  # dictionary mapping label name to numeric id
labels = []  # list of label ids
for name in sorted(os.listdir(TEXT_DATA_DIR)):
    path = os.path.join(TEXT_DATA_DIR, name)
    if os.path.isdir(path):
        label_id = len(labels_index)
        labels_index[name] = label_id
        for fname in sorted(os.listdir(path)):
            if fname.isdigit():
                fpath = os.path.join(path, fname)
                if sys.version_info < (3,):
                    f = open(fpath)
                else:
                    f = open(fpath, encoding='latin-1')
                t = f.read()
                i = t.find('\n\n')  # skip header
                if 0 < i:
                    t = t[i:]
                texts.append(t)
                f.close()
                labels.append(label_id)

print('Found %s texts.' % len(texts))

# finally, vectorize the text samples into a 2D integer tensor
tokenizer = Tokenizer(num_words=MAX_NB_WORDS)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

word_index = tokenizer.word_index
print('Found %s unique tokens.' % len(word_index))

data = pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH)

labels = to_categorical(np.asarray(labels))
print('Shape of data tensor:', data.shape)
print('Shape of label tensor:', labels.shape)

# split the data into a training set and a validation set
indices = np.arange(data.shape[0])
np.random.shuffle(indices)
data = data[indices]
labels = labels[indices]
num_validation_samples = int(VALIDATION_SPLIT * data.shape[0])

x_train = data[:-num_validation_samples]
y_train = labels[:-num_validation_samples]
x_val = data[-num_validation_samples:]
y_val = labels[-num_validation_samples:]

print('Preparing embedding matrix.')

# prepare embedding matrix
num_words = min(MAX_NB_WORDS, len(word_index))
embedding_matrix = np.zeros((num_words, EMBEDDING_DIM))
for word, i in word_index.items():
    if i >= MAX_NB_WORDS:
        continue
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        # words not found in embedding index will be all-zeros.
        embedding_matrix[i] = embedding_vector

# load pre-trained word embeddings into an Embedding layer
# note that we set trainable = False so as to keep the embeddings fixed
embedding_layer = Embedding(num_words,
                            EMBEDDING_DIM,
                            weights=[embedding_matrix],
                            input_length=MAX_SEQUENCE_LENGTH,
                            trainable=False)

print('Training model.')

# train a 1D convnet with global maxpooling
sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
embedded_sequences = embedding_layer(sequence_input)
x = Conv1D(128, 5, activation='relu')(embedded_sequences)
x = MaxPooling1D(5)(x)
x = Conv1D(128, 5, activation='relu')(x)
x = MaxPooling1D(5)(x)
x = Conv1D(128, 5, activation='relu')(x)
x = GlobalMaxPooling1D()(x)
x = Dense(128, activation='relu')(x)
preds = Dense(len(labels_index), activation='softmax')(x)

model = Model(sequence_input, preds)
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['acc'])

model.fit(x_train, y_train,
          batch_size=128,
          epochs=10,
          validation_data=(x_val, y_val))

5. Reuters MLP

解析：

'''
Trains and evaluate a simple MLP
on the Reuters newswire topic classification task.
'''
from __future__ import print_function

import numpy as np
import keras
from keras.datasets import reuters
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.preprocessing.text import Tokenizer

max_words = 1000
batch_size = 32
epochs = 5

print('Loading data...')
(x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=max_words,
                                                         test_split=0.2)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

num_classes = np.max(y_train) + 1
print(num_classes, 'classes')

print('Vectorizing sequence data...')
tokenizer = Tokenizer(num_words=max_words)
x_train = tokenizer.sequences_to_matrix(x_train, mode='binary')
x_test = tokenizer.sequences_to_matrix(x_test, mode='binary')
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

print('Convert class vector to binary class matrix '
      '(for use with categorical_crossentropy)')
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
print('y_train shape:', y_train.shape)
print('y_test shape:', y_test.shape)

print('Building model...')
model = Sequential()
model.add(Dense(512, input_shape=(max_words,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

history = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_split=0.1)
score = model.evaluate(x_test, y_test,
                       batch_size=batch_size, verbose=1)
print('Test score:', score[0])
print('Test accuracy:', score[1])

参考文献：

[1] Keras Examples：https://github.com/fchollet/keras/tree/master/examples

docker容器中连接宿主机mysql数据库
最近要在docker中使用mysql数据库，首先考虑在ubuntu的镜像中安装mysql，这样的脚本和数据库都在容器中，直接访问localhost：3306，脚本很简单，如下：importpymysql#建立数据库连接db=pymysql.connect(port=3306,host="localhost",user="root",password="password",database="my_
android.support.v7.widget.RecyclerView$SavedState cannot be cast to android.widget.AbsListView$Sa... Mis丶H
全部错误信息：java.lang.RuntimeException:UnabletostartactivityComponentInfo{com.enhance.greapp/com.kaomanfen.enhance.gre3k.activity.QuestionWordActivity}:java.lang.ClassCastException:android.support.v7.widge
Flink：处理有界流数据的wordcount 小易学编程 flink 大数据
数据源：helloworldhelloflinkhelloscala有界流：packagechapter02importorg.apache.flink.streaming.api.scala._/***ClassName:BoundedStreamWordCount*Package:chapter02*Description:**@Author小易日拱一卒*@Create2025-06-272:
3136. 有效单词咔咔咔的 c++
3136.有效单词题目链接：3136.有效单词代码如下：classSolution{public:boolisValid(stringword){if(word.size()='a'&&c='A'&&c='0'&&c='b'&&c='B'&&c<='Z')){consonantCnt++;//统计辅音字符}continue;//忽略字母和数字}returnfalse;//如果有其他字符，返回fal
《叛军岭》网飞电影【1080p超清中字】逆岭完整未删减版免费在线观看高清迅雷UC网盘百度云资源提取码优惠攻略官
《叛军岭》（RebelRidge）是由杰瑞米·索尔尼尔编剧并执导的一部动作惊悚片，主演包括亚伦·皮埃尔、大卫·丹曼、艾莫里·科恩等。这部电影在2024年9月6日于美国网络上映，片长为131分钟，IMDb编码为tt11301886。提示：文章排版原因，观影资源链接地址放在文章结尾，往下翻就行提示：文章排版原因，观影资源链接地址放在文章结尾，往下翻就行影片讲述了前海军陆战队队员特里（亚伦·皮埃尔饰）前
python笔记day1 w的狗子啊
01.Holleword1.pycharm快捷键ctrl+/----添加或者取消注释ctrl+s----保存ctrl+c----复制ctrl+v----粘贴ctrl+n----新建ctrl+f----搜索ctrl+r----替换ctrl+z----撤销ctrl+shift+z-----反撤销ctrl+a----全选2.注意事项在程序中涉及到的所有和语法相关的符号，都是在英文输入法下对应的符号。实际
《我把你的青春带走》18 一笑东方
第十八章哥哥妹妹真真情话绵绵，挑挑拣拣好似木兰出征。“对不起啊，来到这类地方，我的思绪肯定是发散的，对不住啊！”“那就不要看了，你回去玩穿越得了。”“可是你知道，如果我能穿越，我就是小说。”“不，哥哥，你是神话！”“不，妹妹，哥只是个传说！”“不，哥哥，你是艺术。”“不，妹妹，艺术是我。”“别废话了，回去跪键盘，Word文档不准打出字！”我瞬间无语了，在语言的杀伤力上，我甘拜下风！“好了，才不会让
用SQLyog连接出现2058错误时处理方法 chilavert318 点点滴滴
win10系统更新安装Mysql8.0，连接SQLyog的时候出现下面错误1.打开cmd：mysql-uroot-p输入密码root2.进入mysql依次执行下面语句ALTERUSER'root'@'localhost'IDENTIFIEDBY'root'PASSWORDEXPIRENEVER;#修改加密规则ALTERUSER'root'@'localhost'IDENTIFIEDWITHmysq
Python深度学习实践：LSTM与GRU在序列数据预测中的应用 AI智能应用 Python入门实战计算科学神经计算深度学习神经网络大数据人工智能大型语言模型 AI AGI LLM Java Python 架构设计 Agent RPA
Python深度学习实践：LSTM与GRU在序列数据预测中的应用作者：禅与计算机程序设计艺术/ZenandtheArtofComputerProgramming1.背景介绍1.1问题的由来序列数据预测是机器学习领域的一个重要研究方向，涉及时间序列分析、自然语言处理、语音识别等多个领域。序列数据具有时间依赖性，即序列中每个元素都受到前面元素的影响。传统的机器学习算法难以捕捉这种时间依赖性，而深度学习
【leetcode-字符串】单词搜索 II 程序员小2
【leetcode-字符串】单词搜索II题目：给定一个二维网格board和一个字典中的单词列表words，找出所有同时在二维网格和字典中出现的单词。单词必须按照字母顺序，通过相邻的单元格内的字母构成，其中“相邻”单元格是那些水平相邻或垂直相邻的单元格。同一个单元格内的字母在一个单词中不允许被重复使用。示例:输入:words=["oath","pea","eat","rain"]andboard=[
MySQL远程无法连接(1130) 欧阳晓
事情背景最近琢磨着迁移数据库，想通过自己的电脑连接服务器，遇到两个问题，一是在这台WindowServer2008上根本找不到Mysql，二是自己的电脑连接不上，提示：1130-host...isnotallowedtoconnecttothisMySqlserver解决这个提示就证明这台服务器上是有Mysql的。费劲周折，终于找到了一个命令行运行Enterpassword:*******Welc
2014年最具人气国外WORDPRESS主题 weixin_34355715 php 前端 ux ViewUI
在国外，WrodPress这个博客系统极为受欢迎，使用WordPress来建站可以降低很多成本，另外还能以十分便宜的价格获得一个漂亮的WP网站模板。今天向大家分享来自Themeforest2014年最具人气的高级WrodPress主题，这些主题无论是设计还是技术上，都是目前最新最流行的。比如CSS3、扁平化、响应式设计、全屏视频背景、视差滚动特效等等。本次分享的WP主题实用性非常不错，流行的设计+
打造智能资讯引擎：基于 Python 的新闻数据爬取与个性化推荐系统实战全流程解析程序员威哥最新爬虫实战项目 python 开发语言
前言：数据时代的信息洪流，如何做到“千人千面”？在信息爆炸的时代，每天都有成千上万条新闻资讯涌现。如何从海量内容中挖掘出用户感兴趣的资讯？这不仅仅是爬虫技术的问题，更是数据建模与智能推荐算法的落地挑战。本篇文章将带你从零出发，构建一个具有实际应用价值的“个性化新闻阅读推荐系统”，从数据采集（爬虫）、文本处理（NLP）、兴趣建模（TF-IDF/协同过滤/Embedding）到推荐展示，覆盖整个推荐系
LeetCode 72. 编辑距离（Edit Distance）| 动态规划详解
72.编辑距离题目描述给你两个单词word1和word2，请计算将word1转换为word2所需的最少操作数。你可以对一个单词进行以下三种操作：插入一个字符删除一个字符替换一个字符✅示例输入：word1="horse",word2="ros"输出：3解释：horse->rorse(替换h为r)rorse->rose(删除r)rose->ros(删除e)解题思路：动态规划（DP）✅状态定义dp[i]
Jenkins credentials 增加了github credential 但是在Git SCM 凭证中不显示
不能直接选择secrettext类型，选择usernamewithpassword类型username填github用户名password填在GitHubdevelopersetting中生成的accesstoken
python 连接数据库小鱼拉灯 mysql 数据库 python
一.连接MYSQL1.下载PyMySql模块2.在MYSQL中创建数据库并连接importpymysqlconn=pymysql.connect(host='localhost',user='root',password='123456',database='ikun',charset='utf8',port=3306)3.创建表importpymysqlconn=pymysql.connect(
jxORM--整体说明 jxandrew jxWebUI 数据库 python ORM
系列文章目录：jxORMI–编程指南jxORM是配套jxWebUI使用的数据库操作库。使用说明jxORM的使用非常简单，主要包括几个步骤：1、导入依赖fromjxORMimportjxORMLogger,ORM,DBDataType,ColType,jxDB2、设置数据库连接#用默认设置，设置本地的mysql数据库连接jxDB.set('testDB',password='password')目前
Node.js package.json常用配置字段（Node.js配置、Node项目配置、Node配置）（package-lock.json、yarn.lock） Dontla 前端 nodejs node.js json
文章目录Node.jspackage.json配置字段详解引言基础元数据字段nameversiondescriptionkeywordsauthorlicense依赖管理dependenciesdevDependenciespeerDependenciesoptionalDependenciesoverrides脚本配置scripts发布配置privatepublishConfigfilesmai
【大语言模型基础】GPT（Generative Pre-training ）生成式无监督预训练模型原理
前言ELMo：将上下文当作特征，但是无监督的语料和我们真实的语料还是有区别的，不一定符合我们特定的任务，是一种双向的特征提取。OpenAIGPT:通过transformerdecoder学习出来一个语言模型，不是固定的，通过任务fine-tuning,用transfomer代替ELMo的LSTM。OpenAIGPT其实就是缺少了encoder的transformer：当然也没了encoder与de
【NLP舆情分析】基于python微博舆情分析可视化系统(flask+pandas+echarts) 视频教程 - 基于wordcloud库实现词云图
大家好，我是java1234_小锋老师，最近写了一套【NLP舆情分析】基于python微博舆情分析可视化系统(flask+pandas+echarts)视频教程，持续更新中，计划月底更新完，感谢支持。今天讲解基于wordcloud库实现词云图视频在线地址：2026版【NLP舆情分析】基于python微博舆情分析可视化系统(flask+pandas+echarts+爬虫)视频教程（火爆连载更新中..
American life 享悦moonlight
Word:Bummer->whatashame=太可惜了[ˈbʌmɚ]adisappointingsituation/糟糕->Itisarealbummerbombthetest->failthetesthangouthanginSubject:physicsphysicaleducationError:warmheart->warmheartedwords
日常英语口语积累｜第一轮 Ivy_IBFE
【口语练习资料】1.新闻编辑室（快）2.老友记3.摩登家庭4.CommencementspeechTips：1.readingandconsuminginformation2.nottomemorize3.nottoprematurelyapproachanativespeaker4.buildingyourinventoryofwordsandexpressions5.watchingTVors
python学生成绩管理系统【完整版】，Python开发基础面试题
name=self.username.get()password=self.password.get()ifname==‘hacker707’andpassword==‘admin’:self.page.destroy()MenuPage(self.root)else:showinfo(title=‘错误’,message=‘账号或密码错误！’)db.pyimportjsonclassStuden
Go-Redis × 向量检索实战用 HNSW 在 Redis 中索引与查询文本 Embedding（Hash & JSON 双版本） Hello.Reader 数据库运维缓存技术 golang redis embedding
1.场景与思路痛点：把“文本内容”转成向量后，如何在本地Redis里做近似向量搜索（KNN），而不依赖外部向量数据库？方案：利用HuggingFace模型sentence-transformers/all-MiniLM-L6-v2生成384维Float32向量；借助RediSearch的HNSW索引能力，在Hash或JSON文档里存储&查询向量；用go-redisv9的高阶API（FTCreate
412/1000 新一年的Key word：价值回归茉莉大大
冬歇期开始，自从回到北京，每天最少三波朋友，一起聊天，说近况，畅未来。大家对于经济下行这件事，还是普遍持乐观的态度。虽然昨天有一个哥们点透了一个不想接受的事实：“2018年是近十年来，最好的一年。”对于真正想做事情的人来说，未必不是好机会。大浪退去，留在沙滩上的是英雄还是狗熊，一目了然。不管2018年是赚了很多钱，还是亏得血本无归，只要没有下牌桌，一切皆有可能。其实，这些都不重要。重要的是，知道自
C#学习笔记说笑谈古松 C#c#
这是我以前的学习笔记，使用word写的，缩进应该有问题。3.1变量usingsystem;在这里定义的变量就可以在整个程序中使用;inta;publicclassmain{在这里定义的变量就可以在整个类中使用;intb;publicvoidstaticMain(){在这里定义的变量就可以在整个方法中使用;intc;}}也可以用static实现!3.1常量静态常量:publicconstintMAX
【AAAI2025】计算机视觉|P-sLSTM:P-sLSTM：让LSTM在时间序列预测领域“重获新生”
论文地址：https://arxiv.org/pdf/2408.10006代码地址：https://github.com/Eleanorkong/P-sLSTM关注UPCV缝合怪，分享最计算机视觉新即插即用模块，并提供配套的论文资料与代码。https://space.bilibili.com/473764881摘要传统的循环神经网络结构，如长短期记忆神经网络(LSTM)，在时间序列预测(TSF)任
【Python办公】Python如何批量提取word文档中的表格小庄-Python办公 Python笔记 python word 提取word表格 python读取word文档 word文档 python办公
目录专栏导读环境准备核心库介绍单个Word文档表格提取基础提取方法转换为DataFrame批量处理多个Word文档批量提取并保存到Excel高级功能表格数据清洗按条件筛选表格表格格式检测完整示例：智能批量提取注意事项总结专栏导读欢迎来到Python办公自动化专栏—Python处理办公问题，解放您的双手️‍博客主页：请点击——>一晌小贪欢的博客主页求关注该系列文章专栏：请点击——>Python办公自
最后一个单词的长度 AWEN_33 算法
给你一个字符串s，由若干单词组成，单词前后用一些空格字符隔开。返回字符串中最后一个单词的长度。单词是指仅由字母组成、不包含任何空格字符的最大子字符串。（s仅有英文字母和空格''组成；s中至少存在一个单词）classSolution{public:intlengthOfLastWord(strings){//初始化索引i为字符串最后一个字符的位置（s.size()-1）//a用于记录最后一个单词的长
【转】【译】How to Handle Very Long Sequences with LSTM（LSTM RNN 超长序列处理）开始奋斗的胖子机器学习 RNN LSTM 序列深度学习
原文地址http://machinelearningmastery.com/handle-long-sequences-long-short-term-memory-recurrent-neural-networks/一个长的输入序列却只对应一个或者一小段输出就是我们经常说的序列标注和序列分类。主要包括下面一些例子：包含上千个词的文件情感分类（NLP）包含上千个时间状态的脑电痕迹分类（Medici
js动画html标签（持续更新中） 843977358 html js 动画 media opacity
1.jQuery 效果 - animate() 方法改变 "div" 元素的高度： $(".btn1").click(function(){ $("#box").animate({height:"300px
springMVC学习笔记 caoyong springMVC
1、搭建开发环境 a>、添加jar文件，在ioc所需jar包的基础上添加spring-web.jar,spring-webmvc.jar b>、在web.xml中配置前端控制器 <servlet> &nbs
POI中设置Excel单元格格式 107x poi style 列宽合并单元格自动换行
引用：http://apps.hi.baidu.com/share/detail/17249059 POI中可能会用到一些需要设置EXCEL单元格格式的操作小结：先获取工作薄对象: HSSFWorkbook wb = new HSSFWorkbook(); HSSFSheet sheet = wb.createSheet(); HSSFCellStyle setBorder = wb.
jquery 获取A href 触发js方法的this参数无效的情况一炮送你回车库 jquery
html如下： <td class=\"bord-r-n bord-l-n c-333\"> <a class=\"table-icon edit\" onclick=\"editTrValues(this);\">修改</a> </td>" j
md5 3213213333332132 MD5
import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; public class MDFive { public static void main(String[] args) { String md5Str = "cq
完全卸载干净Oracle11g sophia天雪 orale数据库卸载干净清理注册表
完全卸载干净Oracle11g A、存在OUI卸载工具的情况下：第一步：停用所有Oracle相关的已启动的服务；第二步：找到OUI卸载工具：在“开始”菜单中找到“oracle_OraDb11g_home”文件夹中 &
apache 的access.log 日志文件太大如何解决 darkranger apache
CustomLog logs/access.log common 此写法导致日志数据一致自增变大。直接注释上面的语法 #CustomLog logs/access.log common 增加： CustomLog "|bin/rotatelogs.exe -l logs/access-%Y-%m-d.log
Hadoop单机模式环境搭建关键步骤 aijuans 分布式
Hadoop环境需要sshd服务一直开启，故，在服务器上需要按照ssh服务，以Ubuntu Linux为例，按照ssh服务如下： sudo apt-get install ssh sudo apt-get install rsync 编辑HADOOP_HOME/conf/hadoop-env.sh文件，将JAVA_HOME设置为Java
PL/SQL DEVELOPER 使用的一些技巧 atongyeye java sql
1 记住密码这是个有争议的功能，因为记住密码会给带来数据安全的问题。但假如是开发用的库，密码甚至可以和用户名相同，每次输入密码实在没什么意义，可以考虑让PLSQL Developer记住密码。位置：Tools菜单－－Preferences－－Oracle－－Logon HIstory－－Store with password 2 特殊Copy 在SQL Window
PHP：在对象上动态添加一个新的方法 bardo 方法动态添加闭包
有关在一个对象上动态添加方法，如果你来自Ruby语言或您熟悉这门语言，你已经知道它是什么...... Ruby提供给你一种方式来获得一个instancied对象，并给这个对象添加一个额外的方法。好！不说Ruby了，让我们来谈谈PHP PHP未提供一个“标准的方式”做这样的事情，这也是没有核心的一部分... 但无论如何，它并没有说我们不能做这样
ThreadLocal与线程安全 bijian1013 java java多线程 threadLocal
首先来看一下线程安全问题产生的两个前提条件： 1.数据共享，多个线程访问同样的数据。 2.共享数据是可变的，多个线程对访问的共享数据作出了修改。实例：定义一个共享数据： public static int a = 0;
Tomcat 架包冲突解决征客丶 tomcat Web
环境： Tomcat 7.0.6 win7 x64 错误表象：【我的冲突的架包是：catalina.jar 与 tomcat-catalina-7.0.61.jar 冲突，不知道其他架包冲突时是不是也报这个错误】严重: End event threw exception java.lang.NoSuchMethodException: org.apache.catalina.dep
【Scala三】分析Spark源代码总结的Scala语法一 bit1129 scala
Scala语法 1. classOf运算符 Scala中的classOf[T]是一个class对象，等价于Java的T.class,比如classOf[TextInputFormat]等价于TextInputFormat.class 2. 方法默认值 defaultMinPartitions就是一个默认值，类似C++的方法默认值
java 线程池管理机制 BlueSkator java线程池管理机制
编辑 Add Tools jdk线程池一、引言第一：降低资源消耗。通过重复利用已创建的线程降低线程创建和销毁造成的消耗。第二：提高响应速度。当任务到达时，任务可以不需要等到线程创建就能立即执行。第三：提高线程的可管理性。线程是稀缺资源，如果无限制的创建，不仅会消耗系统资源，还会降低系统的稳定性，使用线程池可以进行统一的分配，调优和监控。
关于hql中使用本地sql函数的问题（问-答） BreakingBad HQL 存储函数
转自于：http://www.iteye.com/problems/23775 问：我在开发过程中，使用hql进行查询（mysql5）使用到了mysql自带的函数find_in_set()这个函数作为匹配字符串的来讲效率非常好，但是我直接把它写在hql语句里面（from ForumMemberInfo fm,ForumArea fa where find_in_set(fm.userId,f
读《研磨设计模式》-代码笔记-迭代器模式-Iterator bylijinnan java 设计模式
声明：本文只为方便我个人查阅和理解，详细的分析以及源代码请移步原作者的博客http://chjavach.iteye.com/ import java.util.Arrays; import java.util.List; /** * Iterator模式提供一种方法顺序访问一个聚合对象中各个元素，而又不暴露该对象内部表示 * * 个人觉得，为了不暴露该
常用SQL chenjunt3 oracle sql C++c C#
--NC建库 CREATE TABLESPACE NNC_DATA01 DATAFILE 'E:\oracle\product\10.2.0\oradata\orcl\nnc_data01.dbf' SIZE 500M AUTOEXTEND ON NEXT 50M EXTENT MANAGEMENT LOCAL UNIFORM SIZE 256K ; CREATE TABLESPA
数学是科学技术的语言 comsci 工作活动领域模型
从小学到大学都在学习数学，从小学开始了解数字的概念和背诵九九表到大学学习复变函数和离散数学，看起来好像掌握了这些数学知识，但是在工作中却很少真正用到这些知识，为什么？最近在研究一种开源软件-CARROT2的源代码的时候，又一次感觉到数学在计算机技术中的不可动摇的基础作用，CARROT2是一种用于自动语言分类（聚类）的工具性软件，用JAVA语言编写，它
Linux系统手动安装rzsz 软件包 daizj linux sz rz
1、下载软件 rzsz-3.34.tar.gz。登录linux，用命令 wget http://freeware.sgi.com/source/rzsz/rzsz-3.48.tar.gz下载。 2、解压 tar zxvf rzsz-3.34.tar.gz 3、安装 cd rzsz-3.34 ; make posix 。注意：这个软件安装与常规的GNU软件不
读源码之:ArrayBlockingQueue dieslrae java
ArrayBlockingQueue是concurrent包提供的一个线程安全的队列,由一个数组来保存队列元素.通过 takeIndex和 putIndex来分别记录出队列和入队列的下标,以保证在出队列时不进行元素移动. //在出队列或者入队列的时候对takeIndex或者putIndex进行累加,如果已经到了数组末尾就又从0开始,保证数
C语言学习九枚举的定义和应用 dcj3sjt126com c
枚举的定义 # include <stdio.h> enum WeekDay { MonDay, TuesDay, WednesDay, ThursDay, FriDay, SaturDay, SunDay }; int main(void) { //int day; //day定义成int类型不合适 enum WeekDay day = Wedne
Vagrant 三种网络配置详解 dcj3sjt126com vagrant
Forwarded port Private network Public network Vagrant 中一共有三种网络配置，下面我们将会详解三种网络配置各自优缺点。端口映射(Forwarded port)，顾名思义是指把宿主计算机的端口映射到虚拟机的某一个端口上，访问宿主计算机端口时，请求实际是被转发到虚拟机上指定端口的。Vagrantfile中设定语法为： c
16.性能优化-完结 frank1234 性能优化
性能调优是一个宏大的工程，需要从宏观架构(比如拆分，冗余，读写分离，集群，缓存等)，软件设计（比如多线程并行化，选择合适的数据结构），数据库设计层面（合理的表设计，汇总表，索引，分区，拆分，冗余等）以及微观（软件的配置，SQL语句的编写，操作系统配置等）根据软件的应用场景做综合的考虑和权衡，并经验实际测试验证才能达到最优。性能水很深，笔者经验尚浅，赶脚也就了解了点皮毛而已，我觉得
Word Search hcx2013 search
Given a 2D board and a word, find if the word exists in the grid. The word can be constructed from letters of sequentially adjacent cell, where "adjacent" cells are those horizontally or ve
Spring4新特性——Web开发的增强 jinnianshilongnian spring spring mvc spring4
Spring4新特性——泛型限定式依赖注入 Spring4新特性——核心容器的其他改进 Spring4新特性——Web开发的增强 Spring4新特性——集成Bean Validation 1.1(JSR-349)到SpringMVC Spring4新特性——Groovy Bean定义DSL Spring4新特性——更好的Java泛型操作API Spring4新
CentOS安装配置tengine并设置开机启动 liuxingguome centos
yum install gcc-c++ yum install pcre pcre-devel yum install zlib zlib-devel yum install openssl openssl-devel Ubuntu上可以这样安装 sudo aptitude install libdmalloc-dev libcurl4-opens
第14章工具函数（上） onestopweb 函数
index.html <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/
Xelsius 2008 and SAP BW at a glance blueoxygen BO Xelsius
Xelsius提供了丰富多样的数据连接方式，其中为SAP BW专属提供的是BICS。那么Xelsius的各种连接的优缺点比较以及Xelsius是如何直接连接到BEx Query的呢？以下Wiki文章应该提供了全面的概览。 http://wiki.sdn.sap.com/wiki/display/BOBJ/Xcelsius+2008+and+SAP+NetWeaver+BW+Co
oracle表空间相关 tongsh6 oracle
在oracle数据库中，一个用户对应一个表空间，当表空间不足时，可以采用增加表空间的数据文件容量，也可以增加数据文件，方法有如下几种： 1.给表空间增加数据文件 ALTER TABLESPACE "表空间的名字" ADD DATAFILE '表空间的数据文件路径' SIZE 50M; &nb
.Net framework4.0安装失败 yangjuanjava .net windows
上午的.net framework 4.0，各种失败，查了好多答案，各种不靠谱，最后终于找到答案了和Windows Update有关系，给目录名重命名一下再次安装，即安装成功了！下载地址：http://www.microsoft.com/en-us/download/details.aspx?id=17113 方法： 1.运行cmd，输入net stop WuAuServ 2.点击开

TensorFlow学习日记25

你可能感兴趣的:(fasttext,lstm,imdb,word,embedding,mlp)