数据来源可见我的上篇博文Python数据分析实战:上海二手房价分析。将CSV文件中的数据导入到Navicat中,导入过程中需选择“10008(MAC-Simplified Chinese GB 2312)”这个编码格式,否则会出现乱码。
导入成功,如下所示。
需要将area、price这两列数据类型改为decimal
SELECT addr_dist AS 区, SUBSTRING(AVG(price) FROM 1 FOR 5) AS 平均房价,
MAX(price) AS 最贵房价, MIN(price) AS 最低房价, COUNT(f1) AS 房源数量
FROM house_info
GROUP BY addr_dist
ORDER BY AVG(price) DESC
SELECT addr_dist, face AS 朝向, AVG(price) AS 平均房价
FROM house_info
WHERE face IN ('南','南北')
GROUP BY addr_dist, face
ORDER BY addr_dist;
部分查询结果如上,结果显示起来很长,因此使用以下查询语言,使得查询结果美观点。
SELECT A1.addr_dist AS 区, 南朝向的平均房价, 南北朝向的平均房价
FROM
(SELECT addr_dist, SUBSTRING(AVG(price),1,5) AS 南朝向的平均房价
FROM house_info
WHERE face = '南'
GROUP BY addr_dist, face) AS A1
INNER JOIN
(SELECT addr_dist, SUBSTRING(AVG(price),1,5) AS 南北朝向的平均房价
FROM house_info
WHERE face = '南北'
GROUP BY addr_dist, face) AS A2
ON A1.addr_dist = A2.addr_dist
SELECT A1.addr_dist AS 区, 1室1厅1卫, 2室1厅1卫, 2室2厅1卫, 3室2厅1卫, 3室2厅2卫
FROM
(SELECT addr_dist, SUBSTRING(AVG(price),1,5) AS 1室1厅1卫
FROM house_info
WHERE type = '1室1厅1卫'
GROUP BY addr_dist, type) AS A1
INNER JOIN
(SELECT addr_dist, SUBSTRING(AVG(price),1,5) AS 2室1厅1卫
FROM house_info
WHERE type = '2室1厅1卫'
GROUP BY addr_dist, type) AS A2
ON A1.addr_dist = A2.addr_dist
INNER JOIN
(SELECT addr_dist, SUBSTRING(AVG(price),1,5) AS 2室2厅1卫
FROM house_info
WHERE type = '2室2厅1卫'
GROUP BY addr_dist, type) AS A3
ON A1.addr_dist = A3.addr_dist
INNER JOIN
(SELECT addr_dist, SUBSTRING(AVG(price),1,5) AS 3室2厅1卫
FROM house_info
WHERE type = '3室2厅1卫'
GROUP BY addr_dist, type) AS A4
ON A1.addr_dist = A4.addr_dist
INNER JOIN
(SELECT addr_dist, SUBSTRING(AVG(price),1,5) AS 3室2厅2卫
FROM house_info
WHERE type = '3室2厅2卫'
GROUP BY addr_dist, type) AS A5
ON A1.addr_dist = A5.addr_dist