先上图
我的环境python2.7
运行后生产csv文件rent.cvs,内容为58同城品牌合租爬取的合租房源数据
使用火狐浏览器访问(谷歌浏览器加载csv时显示跨域问题)
爬取网站:http://bj.58.com/pinpaigongyu/pn/5/?minprice=2000_4000
抓取代码:
#!usr/bin/python
# -*- coding: utf-8 -*-
from bs4 import BeautifulSoup
import requests
import csv
import codecs
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
url = "http://bj.58.com/pinpaigongyu/pn/{page}/?minprice=2000_4000"
'''已完成的页数序号,初时为0'''
page = 0
csv_file = open("rent.csv","w")
csv_file.write(codecs.BOM_UTF8)
csv_writer = csv.writer(csv_file, delimiter=',')
while True:
page += 1
print("fetch: ", url.format(page=page))
response = requests.get(url.format(page=page))
html = BeautifulSoup(response.text, "html.parser")
house_list = html.select(".list > li")
# 循环在读不到新的房源时结束
if not house_list:
break
for house in house_list:
house_title = house.select("h2")[0].string
house_url = "http://bj.58.com/%s"%(house.select("a")[0]["href"])
house_info_list = house_title.split()
# 如果第二列是公寓名则取第一列作为地址
if "公寓" in house_info_list[1] or "青年社区" in house_info_list[1]:
house_location = house_info_list[0]
else:
house_location = house_info_list[1]
house_money = house.select(".money")[0].select("b")[0].string
print 'house_title=', house_title, 'house_location=', house_location
csv_writer.writerow([house_title, house_location, house_money, house_url])
csv_file.close()
运行python脚本,抓取过程...
将抓取数据存入csv文件:
页面js代码:
var map = new AMap.Map("container", {
resizeEnable: true,
zoomEnable: true,
center: [116.397428, 39.90923],
zoom: 11
});
var scale = new AMap.Scale();
map.addControl(scale);
var arrivalRange = new AMap.ArrivalRange();
var x, y, t, vehicle = "SUBWAY,BUS";
var workAddress, workMarker;
var rentMarkerArray = [];
var polygonArray = [];
var amapTransfer;
var infoWindow = new AMap.InfoWindow({
offset: new AMap.Pixel(0, -30)
});
var auto = new AMap.Autocomplete({
input: "work-location"
});
AMap.event.addListener(auto, "select", workLocationSelected);
function takeBus(radio) {
vehicle = radio.value;
loadWorkLocation()
}
function takeSubway(radio) {
vehicle = radio.value;
loadWorkLocation()
}
function importRentInfo(fileInfo) {
var file = fileInfo.files[0].name;
loadRentLocationByFile(file);
}
function workLocationSelected(e) {
workAddress = e.poi.name;
loadWorkLocation();
}
function loadWorkMarker(x, y, locationName) {
workMarker = new AMap.Marker({
map: map,
title: locationName,
icon: 'http://webapi.amap.com/theme/v1.3/markers/n/mark_r.png',
position: [x, y]
});
}
function loadWorkRange(x, y, t, color, v) {
arrivalRange.search([x, y], t, function(status, result) {
if (result.bounds) {
for (var i = 0; i < result.bounds.length; i++) {
var polygon = new AMap.Polygon({
map: map,
fillColor: color,
fillOpacity: "0.4",
strokeColor: color,
strokeOpacity: "0.8",
strokeWeight: 1
});
polygon.setPath(result.bounds[i]);
polygonArray.push(polygon);
}
}
}, {
policy: v
});
}
function addMarkerByAddress(address) {
var geocoder = new AMap.Geocoder({
city: "北京",
radius: 1000
});
geocoder.getLocation(address, function(status, result) {
if (status === "complete" && result.info === 'OK') {
var geocode = result.geocodes[0];
rentMarker = new AMap.Marker({
map: map,
title: address,
icon: 'http://webapi.amap.com/theme/v1.3/markers/n/mark_b.png',
position: [geocode.location.getLng(), geocode.location.getLat()]
});
rentMarkerArray.push(rentMarker);
rentMarker.content = "房源:" + address + ""
rentMarker.on('click', function(e) {
infoWindow.setContent(e.target.content);
infoWindow.open(map, e.target.getPosition());
if (amapTransfer) amapTransfer.clear();
amapTransfer = new AMap.Transfer({
map: map,
policy: AMap.TransferPolicy.LEAST_TIME,
city: "北京市",
panel: 'transfer-panel'
});
amapTransfer.search([{
keyword: workAddress
}, {
keyword: address
}], function(status, result) {})
});
}
})
}
function delWorkLocation() {
if (polygonArray) map.remove(polygonArray);
if (workMarker) map.remove(workMarker);
polygonArray = [];
}
function delRentLocation() {
if (rentMarkerArray) map.remove(rentMarkerArray);
rentMarkerArray = [];
}
function loadWorkLocation() {
delWorkLocation();
var geocoder = new AMap.Geocoder({
city: "北京",
radius: 1000
});
geocoder.getLocation(workAddress, function(status, result) {
if (status === "complete" && result.info === 'OK') {
var geocode = result.geocodes[0];
x = geocode.location.getLng();
y = geocode.location.getLat();
loadWorkMarker(x, y);
loadWorkRange(x, y, 60, "#3f67a5", vehicle);
map.setZoomAndCenter(12, [x, y]);
}
})
}
function loadRentLocationByFile(fileName) {
delRentLocation();
var rent_locations = new Set();
//$.get(fileName, function(data) {
// console.log(data);
// data = data.split("\n");
// data.forEach(function(item, index) {
// rent_locations.add(item.split(",")[1]);
// });
// rent_locations.forEach(function(element, index) {
// addMarkerByAddress(element);
// });
//});
$.ajax({
url: fileName,
contentType: "text/plain; charset=utf-8",
type: "post",
dataType: 'text',
success: function (data) {
console.log(data);
data = data.split("\n");
data.forEach(function(item, index) {
rent_locations.add(item.split(",")[1]);
});
rent_locations.forEach(function(element, index) {
addMarkerByAddress(element);
});
}
});
}
全部源码下载:
https://download.csdn.net/download/ardo_pass/10648354