#!/home2/depalati/python/bin/python """genfeed.py version 0.1.0 (27 July 2009) Generates an rss or atom feed from specified directories. The feed filename will be .xml. This requires the rfc3339 module which can be downloaded from http://henry.precheur.org/2008/9/3/RFC_3339_formatting_in_Python.html. At present this is a lie. Only atom feeds can be generated. genfeed.py is free software and distributed under the GNU GPL v. 2. """ import os, os.path import re import shelve from datetime import datetime from rfc3339 import rfc3339 ## Settings ############# # Path to check for updates as well as the URL of the path path = "/home2/depalati/mike/docs" path_url_base = "http://mike.depalatis.net/docs" # Maximum number of entries per feed. max_feed_items = 20 # Set to True to use RSS 2.0 instead of Atom 1.0. use_rss = False # Feed title, author, etc. settings title = "Unblog" author = "Michael V. DePalatis" email = "mvd@fake.email.com" url = "http://mike.depalatis.net/docs.html" feed_url = "http://mike.depalatis.net/docs.xml" ## Functions ############## def sanitize(text): """Changes < and > characters to < and > respectively in order to allow for valid XML.""" gt = re.compile(">") lt = re.compile("<") return gt.sub(">", lt.sub("<", text)) def genFeed(path, use_rss=False): """Generates the feed for the directory path and returns it as an xml string. If use_rss is True, the returned string will be in RSS 2.0 instead of Atom 1.0.""" xml = '' re_update = re.compile(r"", re.IGNORECASE) # Get a list of files in the path and sort based on their # modification time. contents = os.listdir(path) entries = [] for entry in contents: if entry == "header.txt" or entry == "footer.txt": continue if entry[-3:].lower() == "txt": entries.append(entry) entries = sorted(entries, key=lambda f: os.path.getmtime(path + "/" + f)) entries.reverse() # Open entry database for entry times entry_db = shelve.open(path + "/entries.db") # Generate XML data. if use_rss: # Not yet implemented. pass else: # Feed-wide stuff xml += """ %s %s %s %s """ % (title, url, feed_url, author, rfc3339(datetime.today()), url) limit = max_feed_items # Per entry stuff if len(entries) < max_feed_items: limit = len(entries) for j in range(limit): entry = entries[j] entry_file = open(path + "/" + entry, 'r') entry_title = entry_file.readline()[2:] entry_link = path_url_base + "/" + entry[:-3] + "html" entry_text = entry_file.read(800) + "..." do_update = not re.search(re_update, entry_text) if entry_db.has_key(entry) and do_update: updated = entry_db[entry] else: updated = rfc3339(os.path.getmtime(path + "/" + entry)) if do_update: entry_db[entry] = updated entry_file.close() xml += """ %s %s %s %s """ % (entry_title, entry_link, entry_link, updated, sanitize(entry_text)) xml += "\n" entry_db.close() return xml ## Main ######### if __name__ == "__main__": print "Updating feed..." xml = genFeed(path) xml_file_name = os.path.basename(os.path.normpath(path)) + ".xml" xml_file = open(xml_file_name, 'w') xml_file.write(xml) xml_file.close()