#!/usr/local/bin/ruby

require 'rexml/document'
require 'net/http'

# make sure we bypass any proxy servers
#
head = {'Pragma', 'no-cache',
     'Cache-Control', 'no-cache'}

# go get the RDF file with the headlines in it
#
begin
  h = Net::HTTP.new "slashdot.org"
  resp, xml = h.get("/slashdot.xml", headers=head)
rescue
  print "HTTP request failed: #{$!}\n"
  exit 1
end

# make a temporary file to hold the headlines
#
f = File.new('headlines/slash.inc.tmp', 'w')

# get rid of miscellaneous ampersands, since they are illegal HTML
# characters -- and they break the XML parser
#
xml.gsub!(/\&/, '&')

# put back in the legitimate "&" entity
xml.gsub!(/amp\;amp\;/, 'amp;')

doc = REXML::Document.new xml

# break up the stories and only extract the <url> and <title> portions
# of each story
#
doc.elements.each("backslash/story") {|story|
  story.elements.each("url") { |url| f.write "<a href=\"#{url.text}\">" }
  story.elements.each("title") { |title| f.write "#{title.text}" }
  f.write "</a><br>\n"
}

f.close

# now the file's complete, replace the existing headline file with
# the new one
#
File::rename('headlines/slash.inc.tmp', 'headlines/slash.inc')