[JDEV] New Transport Architecture - Idea
Jared Rhine
jared at wordzoo.com
Mon Feb 5 14:14:18 CST 2001
[Citation date: Mon, 5 Feb 2001 11:46:17 -0800]
>>>>> Mark == Mark Zamoyta <jdev at jabber.org>
Mark> I was unaware of what RSS was, but I'll look into it! The
Mark> key thing is to leverage the content and services of
Mark> existing websites, and make it instantly available to Jabber
Mark> users.
I'm currently using the script below to act as a pseudo-user which
collects RSS information as posts it as a headline to my Jabber
server, which shows up on the headlines tab of my winjab client. To
receive headlines, I subscribe to the 'headline' user's presence.
It's based on the framework by DJ Adams at:
http://www.pipetree.com/jabber/
I too agree that this is a superior approach to new transports. I'm
not familiar with the RSS transport, though. I think I'll prefer to
implement a pseudo-user client-based approach like this so I can have
simple control over posting logic, such as filtering (and mapping from
moreover.com 'click here' URLs to the actual URL).
Note, although DJ Adams' original framework was presence-sensitive
(only sent headlines if you were available) this version isn't (I like
to wake up in the morning with a headlines tab full of new stories).
Don't let this thing run too unattended, as it will happily keep
posting headlines even if you're not logged in.
-- begin --
#!/usr/bin/perl
use Net::Jabber;
use LWP::Simple;
use LWP::UserAgent;
use XML::RSS;
use File::Path;
use URI::Escape;
use strict;
use constant CACHEDIR => '/tmp/jabber-headlines-cache';
use constant DEBUGFILE => '/tmp/jabber-headlines.log';
use constant SERVER => 'xxxxx; # Obscured
use constant PORT => 5222;
use constant USER => 'headlines';
use constant PASSWORD => 'xxxxx; # Obscured
use constant RESOURCE => 'bot';
use constant DELAY => 600;
use constant VERBOSE => 3;
my %present;
my %cache;
my %fullcache;
my %sources = (
'http://www.jabber.org/rss/articles.xml' => 5,
'http://slashdot.org/slashdot.rdf' => 2,
'http://search.cpan.org/recent.rdf' => 4,
'http://udell.roninhouse.com/udell.rdf' => 7,
'http://freshmeat.net/backend/fm.rdf' => 9,
'http://www.mozilla.org/news.rdf' => 9,
'http://www.newsforge.com/newsforge.rdf' => 6,
'http://www.wired.com/news_drop/netcenter/netcenter.rdf' => 3,
'http://www.moreover.com/cgi-local/page?index_home+rss' => 2,
# 'http://www.moreover.com/cgi-local/page?index_computerservices+rss' => 3,
'http://www.moreover.com/cgi-local/page?index_enterprisecomputing+rss' => 3,
# 'http://www.moreover.com/cgi-local/page?index_e-commerce+rss' => 3,
'http://www.moreover.com/cgi-local/page?index_personaltechnology+rss' => 2,
'http://www.moreover.com/cgi-local/page?index_wireless+rss' => 2,
'http://www.xenite.org/channels/lordoftherings.rss' => 8,
'http://xml.com/xml/scriptingnews.rdf' => 4,
# Betanews?
'http://www.nwfusion.com:8080/rss/wireless/query.html?qt=%2Bwireless&nh=10&rf=1' => 5,
# 'http://www.xmltree.com/whatsnew/rss.cfm' => 17,
'http://www.moreover.com/cgi-local/page?index_xml+rss' => 7,
'http://ilrt.org/discovery/rdf/resources/rss.rdf' => 19,
'http://www.xmlhack.com/rsscat.php' => 3,
'http://rootprompt.org/rss/' => 4,
'http://www.apacheweek.com/issues/apacheweek-headlines.xml' => 8,
'http://www.oreillynet.com/pub/q/32' => 7,
'http://www.soap-wrc.com/webservices/rss.asp' => 10,
'http://www.kuro5hin.org/backend.rdf' => 9,
'http://cnn.com/cnn.rss' => 5,
'http://p.moreover.com/cgi-local/page?index_java+rss' => 8,
'http://p.moreover.com/cgi-local/page?index_science+rss' => 9,
'http://memepool.com/memepool.rss' => 11,
'http://www.geekpress.com/index.xml' => 13,
);
# Redirect stdout
my $debug = DEBUGFILE;
open STDERR, ">$debug" or die "Can't redirect stdout: $!";
my @sources = keys %sources;
my $count = 1;
mkpath(CACHEDIR,1,0700);
my $connection = Net::Jabber::Client->new();
log3("Making connection to Jabber server");
$connection->Connect( hostname => SERVER,
port => PORT )
or die "Cannot connect ($!)\n";
log3("Attempting Ident/Auth");
my @result = $connection->AuthSend( username => USER,
password => PASSWORD,
resource => RESOURCE );
if ($result[0] ne "ok") {
die "Ident/Auth with server failed: $result[0] - $result[1]\n";
}
log3("Setting headline handler");
$SIG{ALRM} = \&do_headlines;
log3("Setting presence handler");
$connection->SetCallBacks( presence => \&handle_presence );
log3("Requesting roster");
$connection->RosterGet();
log3("Sending presence");
$connection->PresenceSend();
log3("Retrieving RSS for first time and setting alarm");
do_headlines();
log3("Entering main loop");
while(defined($connection->Process())) { }
log3("Cancelling alarm");
alarm(0);
print "ERROR: The connection was killed...\n";
exit(0);
sub do_headlines {
$count += 1;
# foreach my $source (@sources) {
foreach my $source (grep { ($count/$sources{$_}) == int($count/$sources{$_}) } (sort keys %sources)) {
# Retrieve the RSS
log3("Getting $source");
my $data = get($source);
# Skip if cannot retrieve
unless (defined($data)) {
log1("Cannot retrieve $source - skipping");
next;
}
# Process any messages
$connection->Process(1);
my $rss = XML::RSS->new();
# Parse the RSS and get the items
$rss->parse($data);
my @items = @{$rss->{items}};
# Discover any new items
log3("Looking for new items");
foreach my $item (@items) {
# Stop looking if we reach an item we've
# already seen
last if exists $cache{$source} and $cache{$source} eq $item->{link};
my $orglink = $item->{link};
my $link = $orglink;
# Check cache
my $file = CACHEDIR . "/" . uri_escape($link,"^A-Za-z0-9\\.:\\?");
if ($fullcache{$link} or -r $file) {
log2("Skipping because previously seen: $link");
next;
}
log2("New item from $source - $item->{title}");
# Create headline message
my $msg = Net::Jabber::Message->new();
$msg->SetMessage(
type => 'headline',
subject => $item->{title},
body => $item->{description},
);
# Add to cache
log3("Adding $link ($file) to cache");
$fullcache{$link} = 1;
open TOUCH, ">$file" or warn "Couldn't create/touch cache file $file: $!";
close TOUCH;
my $oob = $msg->NewX('jabber:x:oob');
$oob->SetDesc($item->{title});
# If moreover, find the real URL.
if ($orglink =~ m{^http://[a-z]\.moreover\.com/}) {
my $ua = LWP::UserAgent->new;
my $request = HTTP::Request->new('GET',$orglink);
my $response = $ua->simple_request($request);
if ($response->header('location')) {
log3("Successful retrieval redirected of $orglink");
$link = $response->header('location');
} else {
my $headers = $response->headers_as_string;
log3("Unsuccessful retrieval of $orglink");
log3("Headers: $headers");
$link = $orglink;
}
log2("Replacing moreover URL $orglink with $link");
}
$oob->SetURL($link);
my @sendees;
# Send the headline to all that are present
foreach my $recipient (keys %present) {
sleep 2;
$msg->SetTo($recipient);
$connection->Send($msg);
push @sendees, $recipient;
}
log2("Sent to ".(join(", ", @sendees) || "nobody"));
# This will prevent all the items being
# counted as new the first time through
# the loop (but allows the first item in
# the RSS to be sent).
#last unless exists($cache{$source});
}
# Remember the latest new item
$cache{$source} = $items[0]->{link};
}
log3("Setting alarm");
alarm(DELAY);
}
sub handle_presence {
my $presence = new Net::Jabber::Presence(@_);
my $jid = $presence->GetFrom();
my $show = $presence->GetShow();
my $type = $presence->GetType();
$jid =~ s!\/.*$!!; # remove any resource suffix from JID
log3("Presence from $jid:\n".$presence->GetXML());
# Subscription request:
# Accept, and request subscription to them.
if ($type eq "subscribe") {
log3("$jid requests subscription");
$connection->Send($presence->Reply(type => 'subscribed'));
$connection->Send($presence->Reply(type => 'subscribe'));
}
# Request to unsubscribe:
# Acknowledge, and request unsubscription from them.
# Don't forget to remove them from the present list, too.
if ($type eq "unsubscribe") {
log3("$jid requests unsubscription");
$connection->Send($presence->Reply(type => 'unsubscribed'));
$connection->Send($presence->Reply(type => 'unsubscribe'));
delete $present{$jid};
}
# User has disconnected
if ($type eq "unavailable") {
log3("$jid unavailable");
delete $present{$jid};
}
# Default presence information (type is blank)
$present{$jid} = 1 if $type eq "";
}
sub log1 {
# WARN
my $msg = shift;
return unless VERBOSE >= 1;
print STDERR "WARN: $msg\n";
}
sub log2 {
# INFO
my $msg = shift;
return unless VERBOSE >= 2;
print STDERR "INFO: $msg\n";
}
sub log3 {
# DBUG
my $msg = shift;
return unless VERBOSE >= 3;
print STDERR "DBUG: $msg\n";
}
-- end --
-- jared at wordzoo.com
"Come, let us retract the foreskin of misconception and apply the wire
brush of enlightenment." -- Geoff Miller
More information about the JDev
mailing list