#!/usr/local/bin/ruby
require 'rexml/document'
require 'net/http'
# make sure we bypass any proxy servers
#
head = {'Pragma', 'no-cache',
'Cache-Control', 'no-cache'}
# go get the RDF file with the headlines in it
#
begin
h = Net::HTTP.new "slashdot.org"
resp, xml = h.get("/slashdot.xml", headers=head)
rescue
print "HTTP request failed: #{$!}\n"
exit 1
end
# make a temporary file to hold the headlines
#
f = File.new('headlines/slash.inc.tmp', 'w')
# get rid of miscellaneous ampersands, since they are illegal HTML
# characters -- and they break the XML parser
#
xml.gsub!(/\&/, '&')
# put back in the legitimate "&" entity
xml.gsub!(/amp\;amp\;/, 'amp;')
doc = REXML::Document.new xml
# break up the stories and only extract the <url> and <title> portions
# of each story
#
doc.elements.each("backslash/story") {|story|
story.elements.each("url") { |url| f.write "<a href=\"#{url.text}\">" }
story.elements.each("title") { |title| f.write "#{title.text}" }
f.write "</a><br>\n"
}
f.close
# now the file's complete, replace the existing headline file with
# the new one
#
File::rename('headlines/slash.inc.tmp', 'headlines/slash.inc')