推荐系统笔记——推荐引擎之实现简单的实时推荐算法(三)

同类目推荐

制造日志logfile.txt和cate.log,应用于同类目推荐:

# coding=utf-8
import random

user_list = ["one", "two", "three", "four", "five"]
num = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
log_type_array = ["1", "2", "3", "4", "5", "6", "7"]
albet_num = ["a", "b", "c", "d", "e", "f", "g", "h","A", "B", "C", "G", "T", "Y", "R", "U", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
topic_arary = ["空气净化器", "净水器", "加湿器", "空气净化滤芯"]
file_object = open("./logfile.txt", "w")

for n in range(0, 2000):
    cookie = "".join(random.sample(albet_num, 6))
    uid = "".join(random.sample(user_list, 1))
    user_agent = "Macintosh Chrome Safari"
    ip = "192.168.89.177"
    video_id = "".join(random.sample(num, 7))
    topic = "".join(random.sample(topic_arary, 1))
    order_id = '0'
    log_type = "".join(random.sample(log_type_array, 1))
    final = cookie + "&" + uid + "&" + user_agent + "&" + ip + "&" + video_id + "&" + topic + "&" + order_id + "&" + log_type + "\n"
    file_object.write(final)
file_object.close()


cate_its = {}  # key = uid, value = video_ids

file = open("./logfile.txt", "r")
for line in file.readlines():
    line = line.strip()
    ls = line.split("&")    # len(ls) = 8
    if ls[5] not in cate_its.keys():
        cate_its[ls[5]] = []
    cate_its[ls[5]].append(ls[4])
file_object.close()

file_object = open("./cate.log", "w")
for k, v in cate_its.items():
    line = k + "\t" + "&&".join(v) + "\n"
    file_object.write(line)
file_object.close()

同样需要用到Server和Client程序,Client程序不变,Server程序如下:

# -*- coding: utf-8 -*-
import socket

HOST,PORT = "", 8888

listen_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
listen_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
listen_socket.bind((HOST, PORT))
listen_socket.listen(1)
print("Serving HTTP on port %s ..."%PORT)


cate_its = {}  # key = uid, value = video_ids
file = open("./cate.log", "r", encoding="utf-8")
for line in file.readlines():
    line = line.strip()
    ls = line.split("\t")    # len(ls) = 8
    if ls[0] not in cate_its.keys():
        cate_its[ls[0]] = []
    lss = ls[1].split("&&")
    for v in lss:
        cate_its[ls[0]].append(v)

    
def log_process(request, tag):
    # tag = 1, 文字, 返回与文字匹配的商品
    # tag = 2, 数字, 返回与数字同类目的商品
    if tag == 1:
        if request in cate_its.keys():
            return "&&".join(cate_its[request])
        else:
            return "wrong request"
    elif tag == 2:
        for k,v in cate_its.items():
            return "&&".join(v)
        return "wrong request"
    return "wrong"


while True:
    client_connection, client_address = listen_socket.accept()
    request = client_connection.recv(1024)
    
    # http_response = log_process(request.strip(), 2)
    paras,tag = request.split("&&")
    http_response = log_process(paras, int(tag.split("EOF")[0]))
    
    client_connection.sendall(http_response)
    client_connection.close()

一致性哈希

from hash_ring import *
import redis

# 实例化两个服务器(哈希环的机器列表)
memcache_servers = ['127.0.0.1:6379', '127.0.0.1:6380']  # ip : 端口
# 初始化
ring = HashRing(memcache_servers)


# 写入
server = ring.get_node('my_key2232')
print(server)
r = redis.Redis(host=server.split(':')[0], port=int(server.split(':')[1]), db=0)
r.set("my_key2232", "12312312312")

server = ring.get_node('my_key')
print(server)
r = redis.Redis(host=server.split(':')[0], port=int(server.split(':')[1]), db=0)
r.set("my_key", "12312312312")

# 读出
server = ring.get_node('my_key')
r = redis.Redis(host=server.split(':')[0], port=int(server.split(':')[1]), db=0)
print("get content")
print(r.get("my_key"))

如果在Python3.x中用hash_ring模块会遇到问题,具体解决参考:https://blog.csdn.net/qq_14997473/article/details/87855481

多线程内存更新

设置两个线程,一个读操作一个写操作,并设置读锁和写锁保护:

cate_its = {}  # key = uid, value = video_ids
file = open("./logfile.txt", "r")

import threading
from time import ctime, sleep, time

lock = 0  # 0表示可以读,1表示可以写

def read(lock=lock):
    #print("i am reading") #测试用
    while True:
        if lock == 0:
            for k, v in cate_its.items():
                line = k + "\t" + "\t" + "&&".join(v[:10]) + "\n"
                print(line)
                print("read succeed")
                print(time()) #显示时间戳
                break
            #lock = 1
        else:
            print("read failed")
        sleep(1) #单位:秒
            
def write(lock=lock):
    #print("i am writing")
    while True:
        if lock == 0:
            lock = 1
            for line in file.readlines():
                line = line.strip()
                ls = line.split("&")    # len(ls) = 8
                if ls[5] not in cate_its.keys():
                    cate_its[ls[5]] = []
                cate_its[ls[5]].append(ls[4])
            print("write succeed")
            print(time())
            lock = 0
        else:
            print("write failed")
        sleep(1)
            
    
threads = []
t1 = threading.Thread(target=read, args=())
threads.append(t1)
t2 = threading.Thread(target=write, args=())
threads.append(t2)

for t in threads:
    #t.setDaemon(True) #后台执行
    t.start()

用类实现多线程内存更新:

import threading
from time import ctime, sleep, time
    

class Lock():
    lock = 0 # 0表示可以读,1表示可以写
    cate_its = {}
    file = open("./logfile.txt", "r")
    def __init__(self):
        for line in self.file.readlines():
                line = line.strip()
                ls = line.split("&")
                if ls[5] not in self.cate_its.keys():
                    self.cate_its[ls[5]] = []
                self.cate_its[ls[5]].append(ls[4])
                
    def write(self):
        while True:
            if self.lock == 0:
                self.lock = 1
                for line in self.file.readlines():
                    line = line.strip()
                    ls = line.split("&")
                    if ls[5] not in self.cate_its.keys():
                        self.cate_its[ls[5]] = []
                    self.cate_its[ls[5]].append(ls[4])
                print("write succeed")
                print(time())
                self.lock = 0
            else:
                print("write failed")
            sleep(1) #单位:秒
            
    def read(self):
        while True:
            if self.lock == 0:
                for k, v in self.cate_its.items():
                    line = k + "\t" + "\t" + "&&".join(v[:10]) + "\n"
                    print(line)
                    print("read succeed")
                    print(time()) #显示时间戳
                    break
            else:
                print("read failed")
            sleep(1)
    
    
if __name__=="__main__":
    l = Lock()
    threads = []
    t1 = threading.Thread(target=l.read, args=())
    threads.append(t1)
    t2 = threading.Thread(target=l.write, args=())
    threads.append(t2)
    
    for t in threads:
        #t.setDaemon(True) #后台执行
        t.start()

人工干预推荐

比如设定一些特定格式,修改Server端的log_process()函数:

def log_process(request, tag):
    # tag = 1, 文字, 返回与文字匹配的商品
    # tag = 2, 数字, 返回与数字同类目的商品
    if tag == 1:
        if request in cate_its.keys():
            return "&&".join(cate_its[request])
        else:
            return "wrong request"
    elif tag == 2:
        for k,v in cate_its.items():
            return v[0] + "&&" + request + "#spical#" + "&&".join(v[1:])
        return "wrong request"
    return "wrong"

 

你可能感兴趣的:(推荐系统)