发表时间:2007年5月7日 16时36分45秒 评论/阅读(1/0)
没写过DB和XML解析,手忙脚乱了搞了5~6个小时才搞出来.
发现解析xml有两个需要注意的地方.
1.文件必须是采用utf-8编码存储,解析失败(这里害我浪费了好多时间)
2.取出来以后取采用iconv进行gbk->utf-8的转码,否则显示乱码.(这里至今不懂为什么,只是看了网上那样做)
发现程序还是没有超过70行,而且我还不知道写一些高级的语法,否则可能还短一点.
require 'rexml/document'
require 'iconv'
require 'mysql'
include REXML
$host = "localhost"
$user = "root"
$passwd = ""
$db = "friends"
$my = Mysql.connect($host, $user, $passwd)
$my.select_db $db
cov = Iconv.new( 'gbk', 'utf-8')
Dir.glob("*.html").sort.each do |fname|
puts fname
File.open(fname) do |f|
xml = Document.new(f)
tds = Array.new
xml.elements.each("//td"){|x| tds.push cov.iconv(x.text)}
p = Array.new
train_no,start_station,end_station,train_type,distance,duration,*p = ''
dongli = false
i = 1
train_no = tds[i]
start_station,end_station = tds[i+=2].split " — "
train_type = tds[i+=2]
dongli = true if train_type == "空调动力车组"
distance = tds[i+=2][0..-5]
duration = tds[i+=2].sub("小时",":").sub("分钟","")
#获得票价列表
n = -1
if dongli
i+=1
while tds[i+=1] != "站次"
p[n+=1] = tds[i]
end
else
i+=1
0.upto(6){|x| p[x] = tds[i+x+1]+tds[i+x+8]}
i+=15
end
$my.query("insert into train (train_no,start_station,end_station,train_type,distance,duration,p0,p1,p2,p3,p4,p5,p6,p7) values ('#{train_no}','#{start_station}','#{end_station}','#{train_type}',#{distance},'#{duration}','#{p[0]}','#{p[1]}','#{p[2]}','#{p[3]}','#{p[4]}','#{p[5]}','#{p[6]}','#{p[7]}')")
i+= 5
station_num,station_name,arrive_time,leave_time,day_num,distance = ""
while i < tds.size - 1
station_num = tds[i+=1]
station_name = tds[i+=1]
arrive_time = tds[i+=1]
arrive_time = "0:00" if arrive_time == "--"
leave_time = tds[i+=1]
leave_time = "0:00" if leave_time == "--"
day_num = tds[i+=1]
if day_num == "当日"
day_num = "0"
elsif day_num == "第2日"
day_num = "1"
else
day_num = "2"
end
distance = tds[i+=1]
$my.query("insert into train_station (train_no,station_num,station_name,arrive_time,leave_time,day_num,distance) values ('#{train_no}',#{station_num},'#{station_name}','#{arrive_time}','#{leave_time}',#{day_num},#{distance})")
end
puts "process train_no:#{train_no},train_stations:#{station_num.to_i+1}"
end
File.rename(fname,"complete\\#{fname}")
end
$my.close