aspect_level情感分析(持续更新)

aspect_level情感分析(持续更新)_第1张图片

第一编代码
import os
#print(os.getcwd())
np.random.seed(7)#对随机数生成器选定随机数种子,以确保每次执行代码时结果都是相同的
# define the raw dataset
def load_cor(fname,content=[],target=[],rating=[]):
    with open(r'C:\Users\lujinyu\PycharmProjects\Attion\data\%s.cor' % fname) as f:
            sentences = f.readlines()
            print(f.name)
            print(len(sentences) / 3)
            for i in range(int(len(sentences) / 3)):
                content.append(sentences[i * 3].strip())
                target.append(sentences[i * 3 + 1].strip())
                rating.append(sentences[i * 3 + 2].strip())
            df=pd.DataFrame([content,target,rating], index=['content','target','rating'])
            df= df.T#行列转置。
    return df


if __name__ == '__main__':
    print(load_cor('train'))#查看类型
第二遍代码
import numpy as np
import pandas as pd
import re
import os
print(os.getcwd())
np.random.seed(7)#对随机数生成器选定随机数种子,以确保每次执行代码时结果都是相同的
def load_cor(content=[],target=[],rating=[]):
    with open(r'train.cor' ) as f:
            sentences = f.readlines()
            print(f.name)
            print(len(sentences) / 3)
            for i in range(int(len(sentences) / 3)):
                content.append(sentences[i * 3].strip())
                target.append(sentences[i * 3 + 1].strip())
                rating.append(sentences[i * 3 + 2].strip())
            df=[content,target,rating]

    return df
def transLabel(labels):
    for i in range(len(labels)):
        if labels[i] == '1':
            labels[i] = 2
        elif labels[i] == '0':
            labels[i] = '1'
        elif labels[i] == '-1':
            labels[i] = 0
        else: print ("label无效:",labels[i])
    return labels
def changeListCode(b):
    a = []
    for i in b:
        a.append(i.decode('utf8'))
    return a
if __name__ == '__main__':
    df = load_cor()
    opinion = transLabel(df[2])
    content=df[0]
    lint=df[0]+df[1]
   # print(lint)
    words = []
    for line in lint:
        lineArr = line.strip().split()
        for word in lineArr:
            data = re.findall(r'[a-zA-Z]*', word)
            for w in data:
                if w != '':
                    words.append(w.lower())
print('Preprocessing...')
dict = pd.DataFrame(pd.Series(words).value_counts())  # 统计词的出现次数
print(dict)
del words

你可能感兴趣的:(aspect_level情感分析(持续更新))