# Copyright (c) 2005 Jose Nazario

# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation files
# (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:

# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

__copyright__ = 'Copyright (c) 2005 Jose Nazario'

import rsssax
import os, sys, xml.sax, urllib

class Rss:
    def __init__(self, baseUrl):
        """sets up the RSS results for a search"""
        self.url = baseUrl

    def search(self, terms):
        """searches for the terms correctly"""
        query = '%s%s' % (self.url, terms)
        response = urllib.urlopen(query).read()

        p = xml.sax.make_parser()
        handler = rsssax.rssHandler()
        p.setContentHandler(handler)
        p.feed(response)
        return handler.out

    def unHtml(self, input):
        """strips the HTML formatting from things"""
        inHtml = 0
        output = ''
        for letter in input:
            if letter == '<' and not inHtml: inHtml = 1
            elif letter == '>' and inHtml: inHtml = 0
            elif inHtml != 1: output = output + letter
        return output

    def printRss(self, results):
        """prints the results as a nicely formatted list"""
        i = 1
        for r in results:
            try:
                print '\n%d. ' % i,
                if r.has_key('title'): print '%s' % r['title'],
                if r.has_key('source'): print ' (%s)' % r['source']
                elif r.has_key('dc:creator'): print ' (%s)' % r['dc:creator']
                elif r.has_key('dc:source'): print ' (%s)' % r['dc:source']
                else: print ''
                if r.has_key('description'): print '    %s' % self.unHtml(r[u'description'])
                if r.has_key('pubDate'): print '    %s' % r[u'pubDate']
                elif r.has_key('dc:date'): print '     %s' % r['dc:date']
                print '    %s' % r[u'link']
                i = i + 1
            except UnicodeEncodeError: pass

if __name__ == '__main__':
    r = Rss('http://rss.topix.net/search/?xml=1&q=')
    out = r.search('john%20kerry')
    r.printRss(out)
    del(r)

    r = Rss('http://del.icio.us/rss/tag/')
    out = r.search('democrats')
    r.printRss(out[:10])
    del(r)
