项目地址:https://github.com/AaronEC/CBR-House-Price-Recommender
# TODO:
# Impliment distance scoring?距离评分
# Calculation for bool value weighting? 布尔值所占比重的计算
# RETAIN new case (customers house) and add it to existing database 保留新案例并将新案例添加到数据库
# This import is just for showing monetary values formatted in local currency
# reeditor:早知晓
# time:2023.04.10
import locale
locale.setlocale(locale.LC_ALL, '') # 用于国际货币换算的,不用理会这个
class house:
""" 保存每个房子相关的所有信息 """
def __init__(self, row):
self.name = row[0]
self.price = float(row[1]) # 售价
self.date = int(row[2]) # 在售时长
self.distance = float(row[3]) # 距离
self.area = int(row[4]) # 使用面积
self.rooms = int(row[5]) # 房间数
self.bedrooms = int(row[6]) # 卧室数
self.detached = row[7] # 是否独立式
self.garage = row[8] # 是否可刀
self.energy = str(row[9]).strip('\n') #能源评级
# 根据通货膨胀调整价格 ( 每三个月增加3% )
for i in range(0, self.date, 3):
self.price += self.price * 0.03
# 设置初始值
self.value = 0
def value(house):
""" 基于权重值计算和更新已出售房子与顾客房子之间的关联度。
:type house: house[class]
:rtype: void(返回类型)
"""
adjustment = 0 #
if house.distance > 0.25:
print(f"\nHouse {house.name} too far away, disregarded")
house.value, house.price = 0, 0
else:
print(f"\nHouse {house.name} within distance, calculating...")
value = weights["distance"] # 获取distance的权重值直接赋给value
if house.area and customerHouse.area:
value += weights["area"] * (house.area / customerHouse.area)
if house.rooms and customerHouse.rooms:
value += weights["rooms"] * (house.rooms / customerHouse.rooms)
if house.bedrooms and customerHouse.bedrooms:
value += weights["bedrooms"] * (house.bedrooms / customerHouse.bedrooms)
if house.energy and customerHouse.energy:
value += weights["energy"] * (energyRating[house.energy] / energyRating[customerHouse.energy])
if house.detached == 'Y': # 对于有限的布尔值采用枚举法处理
value += weights["detached"]
if house.garage == 'Y':
value += weights["garage"]
if customerHouse.detached == 'N':
adjustment += weights["detached"]
if customerHouse.garage == 'N':
adjustment += weights["garage"]
house.value = round(value / (potential - adjustment), 2) # 四舍五入处理关联度
print(f"Relative value: {house.value}")
def saveHouse(file, savedHouse):
""" 将带有推荐值的顾客房子存储到数据库中,留着以后使用
:type file: 数据库文件名
:type savedHouse: house[class]
:rtype: void
"""
# Format house object ready for saving
savedHouse.name = len(houseDatabase) + 1 # 房子序号
savedHouse.price = round(savedHouse.price) # 四舍五入取整
savedHouse.energy = savedHouse.energy + "\n"
# Convert object to list
savedHouse = list(savedHouse.__dict__.values())
savedHouse.pop() # 删除 value 属性
# Convert list to string
outputString = ','.join(str(x) for x in savedHouse)
# Save string to .csv file
with open('Database.csv', 'a') as databaseOut:
# Check if exact house is already in database (to prevent double saving)
for line in databaseIn:
line = ','.join(str(x) for x in line)
if outputString.split(',', 1)[1] == line.split(',', 1)[1]: # 对比除房子序号外的其他属性值,若已经存在相同案例,则 break
print("Exact house already in database, not saving...")
break
# Save to database, if it is a unique entry
else:
print("House not already in database, saving...")
databaseOut.write(outputString)
# 定义计算关联度使用的权重 (基于专家知识)
weights = {
"distance": 4,
"area": 2,
"rooms": 2,
"bedrooms": 2,
"detached": 3,
"garage": 1,
"energy": 1
}
potential = sum(weights.values()) # 计算案例相似的参数
# 定义能源评价对应数值
energyRating = {
"A": 6,
"B": 5,
"C": 4,
"D": 3,
"E": 2,
"F": 1
}
# Send database files to 2d arrays ready for processing
houseIn = [line.split(',') for line in open('House.csv')] # 将待售房子信息转换为二维数组,[[表头信息],[数据信息]]
databaseIn = [line.split(',') for line in open('Database.csv')] # 将已售房子信息转换为二维数组,[[表头信息],[数据信息],…]
# Define object of class 'house' for customer house and reset price
customerHouse = house(houseIn[1])
customerHouse.price = 0
# Define comparison houses (array of objects of class 'house')
houseDatabase = []
for row in databaseIn[1:]: # 跳过表头
houseDatabase.append(house(row))
# Weighted comparisons between customer house and database houses
valueTotals = [] # 存储所有已售房子与顾客房子的关联度
for house in houseDatabase:
value(house)
valueTotals.append(house.value)
# Find closest database house value match to customer house value
bestMatchIndex = valueTotals.index(min(valueTotals, key=lambda x:abs(x-1)))
# Calculate estimated customer house price based on value adjusted price of best match house
customerHouse.price = houseDatabase[bestMatchIndex].price / min(valueTotals, key=lambda x:abs(x-1))
# Output results summary to terminal
print(f"""
------------------------------------------------------------------------------------
Closest match: House {houseDatabase[bestMatchIndex].name}
Relative weighted value: {houseDatabase[bestMatchIndex].value}
------------------------------------------------------------------------------------
Estimated customer house value: {locale.currency(customerHouse.price, grouping=True)}p
------------------------------------------------------------------------------------
""")
# Save customer house to database to improve future recommendation accuracy
userSave = ""
while userSave.lower() != "y" or userSave.lower() != "n":
userSave = input("Save valuation to database? (Y/N): \n")
if userSave.lower() == "y":
saveHouse('Database.csv', customerHouse)
break
elif userSave.lower() == "n":
print("Not saving")
break
else:
print("Invalid input, enter Y for yes, or N for no")
麻雀虽小,五脏俱全。因为这个项目是大学布置的作业所以比较简单,但基本的框架都实现了。计算案例相似度部分有对于文本类型数据的处理,但由于文本型数据取值个数较少,所以原作者采用的类似枚举的方法按值分别计算相似度,所以这个地方的实现还要再看一下别的代码。
加油加油一直在路上,冲冲冲!!!