feedgenerator: more rss stuff
This commit is contained in:
parent
70e7666eaf
commit
1b80528ec6
1 changed files with 191 additions and 23 deletions
214
feedgenerator.py
214
feedgenerator.py
|
@ -11,6 +11,8 @@
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
import dateutil.parser
|
||||||
|
import dateutil.tz
|
||||||
|
|
||||||
|
|
||||||
class FeedGenerator:
|
class FeedGenerator:
|
||||||
|
@ -20,7 +22,7 @@ class FeedGenerator:
|
||||||
# required
|
# required
|
||||||
__atom_id = None
|
__atom_id = None
|
||||||
__atom_title = None
|
__atom_title = None
|
||||||
__atom_updated = datetime.utcnow().isoformat('T')+'Z'
|
__atom_updated = datetime.now(dateutil.tz.tzutc())
|
||||||
|
|
||||||
# recommended
|
# recommended
|
||||||
__atom_author = None # {name*, uri, email}
|
__atom_author = None # {name*, uri, email}
|
||||||
|
@ -35,34 +37,33 @@ class FeedGenerator:
|
||||||
__atom_rights = None
|
__atom_rights = None
|
||||||
__atom_subtitle = None
|
__atom_subtitle = None
|
||||||
|
|
||||||
|
# other
|
||||||
|
__atom_feed_xml_lang = None
|
||||||
|
|
||||||
## RSS
|
## RSS
|
||||||
|
# http://www.rssboard.org/rss-specification
|
||||||
__rss_title = None
|
__rss_title = None
|
||||||
__rss_link = None
|
__rss_link = None
|
||||||
__rss_description = None
|
__rss_description = None
|
||||||
|
|
||||||
'''
|
__rss_category = None
|
||||||
category
|
__rss_cloud = None
|
||||||
cloud
|
__rss_copyright = None
|
||||||
copyright
|
__rss_docs = 'http://www.rssboard.org/rss-specification'
|
||||||
docs
|
__rss_generator = None
|
||||||
generator
|
__rss_image = None
|
||||||
image
|
__rss_language = None
|
||||||
language
|
__rss_lastBuildDate = datetime.now(dateutil.tz.tzutc())
|
||||||
lastBuildDate
|
__rss_managingEditor = None
|
||||||
managingEditor
|
__rss_pubDate = None
|
||||||
pubDate
|
__rss_rating = None
|
||||||
rating
|
__rss_skipHours = None
|
||||||
skipHours
|
__rss_skipDays = None
|
||||||
skipDays
|
__rss_textInput = None
|
||||||
textInput
|
__rss_ttl = None
|
||||||
ttl
|
__rss_webMaster = None
|
||||||
webMaster
|
|
||||||
|
|
||||||
|
|
||||||
# feed
|
|
||||||
# rss -> channel
|
|
||||||
'''
|
|
||||||
|
|
||||||
|
|
||||||
def __ensure_format(self, val, allowed, required, allowed_values={}):
|
def __ensure_format(self, val, allowed, required, allowed_values={}):
|
||||||
if not val:
|
if not val:
|
||||||
|
@ -85,6 +86,10 @@ class FeedGenerator:
|
||||||
|
|
||||||
def atom_str(self):
|
def atom_str(self):
|
||||||
feed = etree.Element('feed', xmlns='http://www.w3.org/2005/Atom')
|
feed = etree.Element('feed', xmlns='http://www.w3.org/2005/Atom')
|
||||||
|
if self.__atom_feed_xml_lang:
|
||||||
|
feed.attrib['{http://www.w3.org/XML/1998/namespace}lang'] = \
|
||||||
|
self.__atom_feed_xml_lang
|
||||||
|
|
||||||
doc = etree.ElementTree(feed)
|
doc = etree.ElementTree(feed)
|
||||||
if not ( self.__atom_id and self.__atom_title and self.__atom_updated ):
|
if not ( self.__atom_id and self.__atom_title and self.__atom_updated ):
|
||||||
raise ValueError('Required fields not set')
|
raise ValueError('Required fields not set')
|
||||||
|
@ -93,7 +98,7 @@ class FeedGenerator:
|
||||||
title = etree.SubElement(feed, 'title')
|
title = etree.SubElement(feed, 'title')
|
||||||
title.text = self.__atom_title
|
title.text = self.__atom_title
|
||||||
updated = etree.SubElement(feed, 'updated')
|
updated = etree.SubElement(feed, 'updated')
|
||||||
updated.text = self.__atom_updated
|
updated.text = self.__atom_updated.isoformat()
|
||||||
|
|
||||||
# Add author elements
|
# Add author elements
|
||||||
for a in self.__atom_author or []:
|
for a in self.__atom_author or []:
|
||||||
|
@ -189,6 +194,34 @@ class FeedGenerator:
|
||||||
return self.__atom_id
|
return self.__atom_id
|
||||||
|
|
||||||
|
|
||||||
|
def updated(self, updated=None):
|
||||||
|
'''Set or get the updated value which indicates the last time the feed
|
||||||
|
was modified in a significant way.
|
||||||
|
|
||||||
|
The value can either be a string which will automatically be parsed or a
|
||||||
|
datetime.datetime object. In any case it is necessary that the value
|
||||||
|
include timezone information.
|
||||||
|
|
||||||
|
:param updated: The modification date.
|
||||||
|
:returns: Modification date as datetime.datetime
|
||||||
|
'''
|
||||||
|
if not updated is None:
|
||||||
|
if isinstance(updated, basestr):
|
||||||
|
updated = dateutil.parser.parse(updated)
|
||||||
|
if not isinstance(updated, datetime.datetime):
|
||||||
|
ValueError('Invalid datetime format')
|
||||||
|
if updated.tzinfo is None:
|
||||||
|
ValueError('Datetime object has no timezone info')
|
||||||
|
self.__atom_updated = updated
|
||||||
|
self.__rss_lastBuildDate = updated
|
||||||
|
|
||||||
|
return self.__atom_updated
|
||||||
|
|
||||||
|
|
||||||
|
def lastBuildDate(self, lastBuildDate=None):
|
||||||
|
return updated( lastBuildDate )
|
||||||
|
|
||||||
|
|
||||||
def author(self, author=None, replace=False, **kwargs):
|
def author(self, author=None, replace=False, **kwargs):
|
||||||
'''Get or set autor data. An author element is a dict containing a name,
|
'''Get or set autor data. An author element is a dict containing a name,
|
||||||
an email adress and a uri. Name is mandatory for ATOM, email is mandatory
|
an email adress and a uri. Name is mandatory for ATOM, email is mandatory
|
||||||
|
@ -257,9 +290,30 @@ class FeedGenerator:
|
||||||
category,
|
category,
|
||||||
set(['term', 'schema', 'label']),
|
set(['term', 'schema', 'label']),
|
||||||
set(['term']) )
|
set(['term']) )
|
||||||
|
# Map the ATOM categories to RSS categories. Use the atom:label as
|
||||||
|
# name or if not present the atom:term. The atom:schema is the
|
||||||
|
# rss:domain.
|
||||||
|
self.__rss_category = []
|
||||||
|
for cat in self.__atom_category:
|
||||||
|
rss_cat = {}
|
||||||
|
rss_cat['value'] = cat['label'] if cat.get('label') else cat['term']
|
||||||
|
if cat.get('schema'):
|
||||||
|
rss_cat['domain'] = cat['schema']
|
||||||
return self.__atom_category
|
return self.__atom_category
|
||||||
|
|
||||||
|
|
||||||
|
def cloud(self, domain=None, port=None, path=None, registerProcedure=None,
|
||||||
|
protocol=None):
|
||||||
|
'''Set or get the cloud data of the feed. It is an RSS only attribute. It
|
||||||
|
specifies a web service that supports the rssCloud interface which can be
|
||||||
|
implemented in HTTP-POST, XML-RPC or SOAP 1.1.
|
||||||
|
'''
|
||||||
|
if not domain is None:
|
||||||
|
self.__rss_cloud = {'donain':domain, 'port':port, 'path':path,
|
||||||
|
'registerProcedure':registerProcedure, 'protocol':protocol}
|
||||||
|
return self.__rss_cloud
|
||||||
|
|
||||||
|
|
||||||
def contributor(self, contributor=None, replace=False, **kwargs):
|
def contributor(self, contributor=None, replace=False, **kwargs):
|
||||||
if contributor is None and kwargs:
|
if contributor is None and kwargs:
|
||||||
contributor = kwargs
|
contributor = kwargs
|
||||||
|
@ -278,6 +332,7 @@ class FeedGenerator:
|
||||||
self.__atom_generator['version'] = version
|
self.__atom_generator['version'] = version
|
||||||
if not uri in None:
|
if not uri in None:
|
||||||
self.__atom_generator['uri'] = uri
|
self.__atom_generator['uri'] = uri
|
||||||
|
self.__rss_generator = generator
|
||||||
return self.__atom_generator
|
return self.__atom_generator
|
||||||
|
|
||||||
|
|
||||||
|
@ -290,15 +345,48 @@ class FeedGenerator:
|
||||||
def logo(self, logo=None):
|
def logo(self, logo=None):
|
||||||
if not logo is None:
|
if not logo is None:
|
||||||
self.__atom_logo = logo
|
self.__atom_logo = logo
|
||||||
|
self.__rss_image = { 'url' : logo }
|
||||||
return self.__atom_logo
|
return self.__atom_logo
|
||||||
|
|
||||||
|
|
||||||
|
def image(self, url=None, title=None, link=None, width=None, height=None,
|
||||||
|
description=None):
|
||||||
|
'''Set the image of the feed. This element is roughly equivalent to
|
||||||
|
atom:logo.
|
||||||
|
|
||||||
|
:param url: The URL of a GIF, JPEG or PNG image.
|
||||||
|
:param title: Describes the image. The default value is the feeds title.
|
||||||
|
:param link: URL of the site the image will link to. The default is to
|
||||||
|
use the feeds first altertate link.
|
||||||
|
:param width: Width of the image in pixel. The maximum is 144.
|
||||||
|
:param height: The height of the image. The maximum is 400.
|
||||||
|
:param description: Title of the link.
|
||||||
|
'''
|
||||||
|
if not url is None:
|
||||||
|
self.__rss_image = { 'url' : url }
|
||||||
|
if not title is None:
|
||||||
|
self.__rss_image['title'] = title
|
||||||
|
if not link is None:
|
||||||
|
self.__rss_image['link'] = link
|
||||||
|
if width:
|
||||||
|
self.__rss_image['width'] = width
|
||||||
|
if height:
|
||||||
|
self.__rss_image['height'] = height
|
||||||
|
self.__atom_logo = url
|
||||||
|
return self.__rss_image
|
||||||
|
|
||||||
|
|
||||||
def rights(self, rights=None):
|
def rights(self, rights=None):
|
||||||
if not rights is None:
|
if not rights is None:
|
||||||
self.__atom_rights = rights
|
self.__atom_rights = rights
|
||||||
|
self.__rss_copyright = rights
|
||||||
return self.__atom_rights
|
return self.__atom_rights
|
||||||
|
|
||||||
|
|
||||||
|
def copyright(self, copyright=None):
|
||||||
|
return rights( copyright )
|
||||||
|
|
||||||
|
|
||||||
def subtitle(self, subtitle=None):
|
def subtitle(self, subtitle=None):
|
||||||
if not subtitle is None:
|
if not subtitle is None:
|
||||||
self.__atom_subtitle = subtitle
|
self.__atom_subtitle = subtitle
|
||||||
|
@ -316,6 +404,85 @@ class FeedGenerator:
|
||||||
return subtitle( description )
|
return subtitle( description )
|
||||||
|
|
||||||
|
|
||||||
|
def subtitle(self, docs=None):
|
||||||
|
if not docs is None:
|
||||||
|
self.__rss_docs = docs
|
||||||
|
return self.__rss_docs
|
||||||
|
|
||||||
|
|
||||||
|
def language(self, language=None):
|
||||||
|
if not language is None:
|
||||||
|
self.__rss_language = language
|
||||||
|
self.__atom_feed_xml_lang = language
|
||||||
|
return self.__rss_language
|
||||||
|
|
||||||
|
|
||||||
|
def managingEditor(self, managingEditor=None):
|
||||||
|
'''Set or get the value for managingEditor which is the email address for
|
||||||
|
person responsible for editorial content. This is a RSS only value.
|
||||||
|
|
||||||
|
:param managingEditor: Email adress of the managing editor.
|
||||||
|
'''
|
||||||
|
if not managingEditor is None:
|
||||||
|
self.__rss_managingEditor = managingEditor
|
||||||
|
return self.__rss_managingEditor
|
||||||
|
|
||||||
|
|
||||||
|
def pubDate(self, pubDate=None):
|
||||||
|
if not pubDate is None:
|
||||||
|
if isinstance(pubDate, basestr):
|
||||||
|
pubDate = dateutil.parser.parse(pubDate)
|
||||||
|
if not isinstance(pubDate, datetime.datetime):
|
||||||
|
ValueError('Invalid datetime format')
|
||||||
|
if pubDate.tzinfo is None:
|
||||||
|
ValueError('Datetime object has no timezone info')
|
||||||
|
self.__rss_pubDate = pubDate
|
||||||
|
|
||||||
|
return self.__rss_pubDate
|
||||||
|
|
||||||
|
|
||||||
|
def rating(self, rating=None):
|
||||||
|
'''Set and get the PICS rating for the channel. It is an RSS only
|
||||||
|
value.
|
||||||
|
'''
|
||||||
|
if not rating is None:
|
||||||
|
self.__rss_rating = rating
|
||||||
|
return self.__rss_rating
|
||||||
|
|
||||||
|
|
||||||
|
def skipHours(self, hours=None, replace=False):
|
||||||
|
'''Set or get the value of skipHours, a hint for aggregators telling them
|
||||||
|
which hours they can skip. This is an RSS only value.
|
||||||
|
'''
|
||||||
|
if not hours is None:
|
||||||
|
if not (isinstance(hours, list) or isinstance(hours, set)):
|
||||||
|
hours = [hours]
|
||||||
|
for h in hours:
|
||||||
|
if not h in xrange(24):
|
||||||
|
ValueError('Invalid hour %s' % h)
|
||||||
|
if replace or not self.__rss_skipHours:
|
||||||
|
self.__rss_skipHours = set()
|
||||||
|
self.__rss_skipHours |= set(hours)
|
||||||
|
return self.__rss_skipHours
|
||||||
|
|
||||||
|
|
||||||
|
def skipDays(self, days=None, replace=False):
|
||||||
|
'''Set or get the value of skipDays, a hint for aggregators telling them
|
||||||
|
which days they can skip This is an RSS only value.
|
||||||
|
'''
|
||||||
|
if not days is None:
|
||||||
|
if not (isinstance(days, list) or isinstance(days, set)):
|
||||||
|
days = [days]
|
||||||
|
for d in days:
|
||||||
|
if not d in ['Monday', 'Tuesday', 'Wednesday', 'Thursday',
|
||||||
|
'Friday', 'Saturday', 'Sunday']:
|
||||||
|
ValueError('Invalid day %s' % h)
|
||||||
|
if replace or not self.__rss_skipDays:
|
||||||
|
self.__rss_skipDays = set()
|
||||||
|
self.__rss_skipDays |= set(days)
|
||||||
|
return self.__rss_skipDays
|
||||||
|
|
||||||
|
|
||||||
class FeedEntry:
|
class FeedEntry:
|
||||||
|
|
||||||
'''
|
'''
|
||||||
|
@ -370,4 +537,5 @@ if __name__ == '__main__':
|
||||||
fg.rights('cc-by')
|
fg.rights('cc-by')
|
||||||
fg.subtitle('This is a cool feed!')
|
fg.subtitle('This is a cool feed!')
|
||||||
fg.link( href='http://larskiesow.de/test.atom', rel='self' )
|
fg.link( href='http://larskiesow.de/test.atom', rel='self' )
|
||||||
|
fg.language('de')
|
||||||
print fg.atom_str()
|
print fg.atom_str()
|
||||||
|
|
Loading…
Reference in a new issue