生成sitemap文件

有一个sitemap文件,非常有利于google等搜索引擎收录你的网站内容,我也来搞一搞

1,首先到https://www.google.com/webmasters,注册一下我的网站www.weekface.info,并验证所有权.具体步骤按照google的提示做就行了.

2,制作sitemap.xml文件
首先我要编写两个类,该类用来生成sitemap文件,这样我们可以编写一个单独的任务,来循环调用该类生成post和tag的sitemap
先生成post和tag的sitemap文件,分别位于public/sitemap_gz_files/sitemap_tag.xml.gz,和 public/sitemap_gz_files/sitemap_post.xml.gz
再在网站根目录public下面生成一个总的sitemap.xml,用来标明上面两个文件(gzip压缩过的)的存放地址:

#public/sitemap.xml
<?xml version="1.0" encoding="UTF-8"?>
<sitemapindex xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
        xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9
        http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd"
        xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<sitemap>
  <loc>http://www.weekface.info/sitemap_gz_files/sitemap_tag.xml.gz</loc>
  <lastmod>2010-02-10</lastmod>
 </sitemap>
<sitemap>
  <loc>http://www.weekface.info/sitemap_gz_files/sitemap_post.xml.gz</loc>
  <lastmod>2010-02-10</lastmod>
 </sitemap>
</sitemapindex>

#lib/sitemap_job.rb
#SitemapPrepare是一个Struct类,方便参数传递之用
class SitemapPrepare < Struct.new(:model_name,:url_format,:changefreq, :select_name, :condition)
end

#Sitemap类是用来生成sitemap的
class SitemapJob
  attr_accessor :sp, :ins, :template

  def initialize sp
        @sp = sp
        @ins = []
        @template = ERB.new <<TMP
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
        xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9
        http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
        xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">

<% @ins.each_with_index do |c,index| %>
<url>
  <loc><%= @sp.url_format.call(c) %></loc>
<lastmod><%= Time.now.strftime("%Y-%m-%d") %></lastmod>
<priority><%= rand > 0.5 ? 1.0 : 0.9 %></priority>
<changefreq><%= @sp.changefreq%></changefreq>
</url>
<% end %>
</urlset>
TMP
  end

  def sitemap
        m = @sp.model_name.camelize.constantize
        @ins = m.find :all, :select => @sp.select_name, :conditions => @sp.condition
        perform
  end

  def perform
        filename = "sitemap_#{@sp.model_name.downcase}"
        FileUtils.mkdir_p "#{RAILS_ROOT}/public/sitemap_gz_files"
        File.open("#{RAILS_ROOT}/public/sitemap_gz_files/#{filename}.xml", 'w'){ |f| f.puts @template.result(binding) }
        r = "#{RAILS_ROOT}/public/sitemap_gz_files"
        archive = "#{r}/#{filename}.xml.gz"
        FileUtils.rm archive, :force=>true

        system(`cd #{RAILS_ROOT}/public/sitemap_gz_files; gzip -c #{filename}.xml > #{filename}.xml.gz `)
        FileUtils.rm "#{r}/#{filename}.xml", :force=>true
  end

  def self.result
        sitemap_string = %Q{<?xml version="1.0" encoding="UTF-8"?>
<sitemapindex xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
        xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9
        http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd"
        xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">}

        today = Time.now.to_s(:db).split(' ').first
        Dir["#{RAILS_ROOT}/public/sitemap_gz_files/*"].each do |path|
          sitemap_string  << %Q{<sitemap>
  <loc>http://www.weekface.info/sitemap_gz_files/#{File.basename(path)}</loc>
  <lastmod>#{today}</lastmod>
 </sitemap>}
        end
        sitemap_string << "</sitemapindex>"
        File.open("#{RAILS_ROOT}/public/sitemap.xml", "w"){ |f| f.puts sitemap_string }
  end

end

上面这个类是用来生成sitemap的类,我们通过新加一个rake 任务来调用他:

#lib/tasks/enki.rake 增加如下内容
desc "sitemap generate"
  task :sitemap => :environment do
        #posts 在SitemapJob类中,通过call该Proc来生成不同的url格式
        post_proc = Proc.new{|sp|"http://www.weekface.info#{sp.published_at.strftime("/%Y/%m/%d/") + sp.slug}"}
        post_condition = "published_at < '#{Time.zone.now}'"
    post = SitemapPrepare.new("post",post_proc,"daily","id,slug,published_at", post_condition)
        SitemapJob.new(post).sitemap

        #posts
        tag_proc = Proc.new{|sp|"http://www.weekface.info/#{sp.name}"}
        tag_condition = "true"
    tag = SitemapPrepare.new("tag",tag_proc,"daily","id,name", tag_condition)
        SitemapJob.new(tag).sitemap

        #generate public/sitemap.xml
        SitemapJob.result
  end

很简单的,仔细看看很容易理解的.

最后将这个任务加入crontab

#crontab -e 每天生成一次
0 0 * * * /usr/local/system/ruby/bin/rake enki:sitemap RAILS_ENV=production

 

你可能感兴趣的:(职场,Ruby,Rails,休闲)