feedgenerator: ATOM part of FeedEntry finished

2013-04-22 17:51:51 +02:00 · 2013-04-22 17:51:51 +02:00 · 471ea2b5a3
commit 471ea2b5a3
parent 751867d393
1 changed files with 448 additions and 24 deletions
--- a/feedgenerator.py
+++ b/feedgenerator.py
@ -17,6 +17,8 @@ import dateutil.tz
 class FeedGenerator:
 	__feed_entries = []
 	## ATOM
 	# http://www.atomenabled.org/developers/syndication/
 	# required
@ -50,7 +52,7 @@ class FeedGenerator:
 	__rss_cloud          = None
 	__rss_copyright      = None
 	__rss_docs           = 'http://www.rssboard.org/rss-specification'
-	__rss_generator      = None
+	__rss_generator      = 'Lernfunk3 FeedGenerator'
 	__rss_image          = None
 	__rss_language       = None
 	__rss_lastBuildDate  = datetime.now(dateutil.tz.tzutc())
@ -84,7 +86,7 @@ class FeedGenerator:
 		return val
-	def atom_str(self):
+	def __create_atom(self):
 		feed    = etree.Element('feed',  xmlns='http://www.w3.org/2005/Atom')
 		if self.__atom_feed_xml_lang:
 			feed.attrib['{http://www.w3.org/XML/1998/namespace}lang'] = \
@ -174,11 +176,123 @@ class FeedGenerator:
 			subtitle = etree.SubElement(feed, 'subtitle')
 			subtitle.text = self.__atom_subtitle
-		return etree.tostring(feed, pretty_print=True)
+		for entry in self.__feed_entries:
-		'''
+			entry.atom_entry(feed)
-		outFile = open('homemade.xml', 'w')
+
-		doc.write(outFile)
+		return feed, doc
-		'''
+
 	def atom_str(self, pretty=False):
 		feed, doc = self.__create_atom()
 		return etree.tostring(feed, pretty_print=pretty)
 	def atom_file(self, filename):
 		feed, doc = self.__create_atom()
 		with open(filename, 'w') as f:
 			doc.write(f)
 	def __create_rss(self):
 		feed    = etree.Element('rss', version='2.0')
 		doc     = etree.ElementTree(feed)
 		channel = etree.SubElement(feed, 'channel')
 		if not ( self.__rss_title and self.__rss_link and self.__rss_description ):
 			raise ValueError('Required fields not set')
 		title = etree.SubElement(channel, 'title')
 		title.text = self.__rss_title
 		link = etree.SubElement(channel, 'link')
 		link.text = self.__rss_link
 		link = etree.SubElement(channel, 'description')
 		link.text = self.__rss_description
 		if self.__rss_category:
 			for cat in self.__rss_category:
 				category = etree.SubElement(channel, 'category')
 				category.text = cat['value']
 				if cat.get('domain'):
 					category.attrib['domain'] = cat['domain']
 		if self.__rss_cloud:
 			cloud = etree.SubElement(channel, 'cloud')
 			cloud.attrib['domain'] = self.__rss_cloud.get('domain')
 			cloud.attrib['port'] = self.__rss_cloud.get('port')
 			cloud.attrib['path'] = self.__rss_cloud.get('path')
 			cloud.attrib['registerProcedure'] = self.__rss_cloud.get(
 					'registerProcedure')
 			cloud.attrib['protocol'] = self.__rss_cloud.get('protocol')
 		if self.__rss_copyright:
 			copyright = etree.SubElement(channel, 'copyright')
 			copyright.text = self.__rss_copyright
 		if self.__rss_docs:
 			docs = etree.SubElement(channel, 'docs')
 			docs.text = self.__rss_docs
 		if self.__rss_generator:
 			generator = etree.SubElement(channel, 'generator')
 			generator.text = self.__rss_generator
 		if self.__rss_image:
 			image = etree.SubElement(channel, 'image')
 			image.attrib['url'] = self.__rss_image.get('url')
 			image.attrib['title'] = self.__rss_image['title'] \
 					if self.__rss_image.get('title') else self.__rss_title
 			image.attrib['link'] = self.__rss_image['link'] \
 					if self.__rss_image.get('link') else self.__rss_link
 			if self.__rss_image.get('width'):
 				image.attrib['width'] = self.__rss_image.get('width')
 			if self.__rss_image.get('height'):
 				image.attrib['height'] = self.__rss_image.get('height')
 			if self.__rss_image.get('description'):
 				image.attrib['description'] = self.__rss_image.get('description')
 		if self.__rss_language:
 			language = etree.SubElement(channel, 'language')
 			language.text = self.__rss_language
 		if self.__rss_lastBuildDate:
 			lastBuildDate = etree.SubElement(channel, 'lastBuildDate')
 			lastBuildDate.text = self.__rss_lastBuildDate.strftime(
 					'%a, %e %b %Y %H:%M:%S %z')
 		if self.__rss_managingEditor:
 			managingEditor = etree.SubElement(channel, 'managingEditor')
 			managingEditor.text = self.__rss_managingEditor
 		if self.__rss_pubDate:
 			pubDate = etree.SubElement(channel, 'pubDate')
 			pubDate.text = self.__rss_pubDate.strftime(
 					'%a, %e %b %Y %H:%M:%S %z')
 		if self.__rss_rating:
 			rating = etree.SubElement(channel, 'rating')
 			rating.text = self.__rss_rating
 		if self.__rss_skipHours:
 			skipHours = etree.SubElement(channel, 'skipHours')
 			for h in self.__rss_skipHours:
 				hour = etree.SubElement(skipHours, 'hour')
 				hour.text = str(h)
 		if self.__rss_skipDays:
 			skipDays = etree.SubElement(channel, 'skipDays')
 			for d in self.__rss_skipDays:
 				day = etree.SubElement(skipDays, 'day')
 				day.text = d
 		if self.__rss_textInput:
 			textInput = etree.SubElement(channel, 'textInput')
 			textInput.attrib['title'] = self.__rss_textInput.get('title')
 			textInput.attrib['description'] = self.__rss_textInput.get('description')
 			textInput.attrib['name'] = self.__rss_textInput.get('name')
 			textInput.attrib['link'] = self.__rss_textInput.get('link')
 		if self.__rss_ttl:
 			ttl = etree.SubElement(channel, 'ttl')
 			ttl.text = self.__rss_ttl
 		if self.__rss_webMaster:
 			webMaster = etree.SubElement(channel, 'webMaster')
 			webMaster.text = self.__rss_webMaster
 		return feed, doc
 	def rss_str(self, pretty=False):
 		feed, doc = self.__create_rss()
 		return etree.tostring(feed, pretty_print=pretty)
 	def rss_file(self, filename):
 		feed, doc = self.__create_rss()
 		with open(filename, 'w') as f:
 			doc.write(f)
 	def title(self, title=None):
@ -257,6 +371,8 @@ class FeedGenerator:
 		'''Get or set link data. An link element is a dict with the fields href,
 		rel, type, hreflang, title, and length. Href is mandatory for ATOM.
 		RSS only supports one link with URL only.
 		:param link:    Dict or list of dicts with data.
 		:param replace: Add or replace old data.
@ -274,8 +390,9 @@ class FeedGenerator:
 					set(['href', 'rel', 'type', 'hreflang', 'title', 'length']),
 					set(['href']), 
 					{'rel':['alternate', 'enclosure', 'related', 'self', 'via']} )
-			# RSS only needs the URL:
+			# RSS only needs one URL. We use the first link for RSS:
-			self.__rss_link = [ l['href'] for l in self.__atom_link ]
+			if len(self.__atom_link) > 0:
 				self.__rss_link = self.__atom_link[0]['href']
 		# return the set with more information (atom)
 		return self.__atom_link
@ -299,6 +416,7 @@ class FeedGenerator:
 				rss_cat['value'] = cat['label'] if cat.get('label') else cat['term']
 				if cat.get('schema'):
 					rss_cat['domain'] = cat['schema']
 				self.__rss_category.append( rss_cat )
 		return self.__atom_category
@ -401,10 +519,10 @@ class FeedGenerator:
 		:param description: Description/Subtitle of the channel.
 		'''
-		return subtitle( description )
+		return self.subtitle( description )
-	def subtitle(self, docs=None):
+	def docs(self, docs=None):
 		if not docs is None:
 			self.__rss_docs = docs
 		return self.__rss_docs
@ -523,29 +641,38 @@ class FeedGenerator:
 		return self.__rss_webMaster
 	def add_entry(self, feedEntry=None):
 		if feedEntry is None:
 			feedEntry = FeedEntry()
 		self.__feed_entries.append( feedEntry )
 		return feedEntry
 	def add_item(self, item=None):
 		return self.add_entry(item)
 class FeedEntry:
 	'''
 	# ATOM
 	# required
-	id
+	__atom_id = None
-	title
+	__atom_title = None
-	updated
+	__atom_updated = datetime.now(dateutil.tz.tzutc())
 	# recommended
-	author
+	__atom_author = None
-	content
+	__atom_content = None
-	link
+	__atom_link = None
-	summary
+	__atom_summary = None
 	# optional
-	category
+	__atom_category = None
-	contributor
+	__atom_contributor = None
-	source
+	__atom_source = None
-	rights
+	__atom_rights = None
 	'''
 	# RSS
 	author
 	category
@ -565,6 +692,288 @@ class FeedEntry:
 	title
 	'''
 	def __ensure_format(self, val, allowed, required, allowed_values={}):
 		if not val:
 			return None
 		# Make shure that we have a list of dicts. Even if there is only one.
 		if not isinstance(val, list):
 			val = [val]
 		for elem in val:
 			if not isinstance(elem, dict):
 				raise ValueError('Invalid data (value is no dictionary)')
 			if not set(elem.keys()) <= allowed:
 				raise ValueError('Data contains invalid keys')
 			if not set(elem.keys()) >= required:
 				raise ValueError('Data contains not all required keys')
 			for k,v in allowed_values.iteritems():
 				if elem.get(k) and not elem[k] in v:
 					raise ValueError('Invalid value for %s' % k )
 		return val
 	def atom_entry(self, feed):
 		entry = etree.SubElement(feed, 'entry')
 		if not ( self.__atom_id and self.__atom_title and self.__atom_updated ):
 			raise ValueError('Required fields not set')
 		id      = etree.SubElement(entry, 'id')
 		id.text = self.__atom_id
 		title   = etree.SubElement(entry, 'title')
 		title.text = self.__atom_title
 		updated   = etree.SubElement(entry, 'updated')
 		updated.text = self.__atom_updated.isoformat()
 		# An entry must contain an alternate link if there is no content element.
 		if not self.__atom_content:
 			if not True in [ l.get('type') == 'alternate' \
 					for l in self.__atom_link or [] ]:
 				raise ValueError('Entry must contain an alternate link or '
 						+ 'a content element.')
 		# Add author elements
 		for a in self.__atom_author or []:
 			# Atom requires a name. Skip elements without.
 			if not a.get('name'):
 				continue
 			author = etree.SubElement(entry, 'author')
 			name = etree.SubElement(author, 'name')
 			name.text = a.get('name')
 			if a.get('email'):
 				email = etree.SubElement(author, 'email')
 				email.text = a.get('email')
 			if a.get('uri'):
 				email = etree.SubElement(author, 'url')
 				email.text = a.get('uri')
 		if self.__atom_content:
 			content = etree.SubElement(entry, 'content')
 			if self.__atom_content.get('src'):
 				content.attrib['src'] = self.__atom_content['src']
 			elif self.__atom_content.get('content'):
 				content.text = self.__atom_content.get('content')
 		for l in self.__atom_link or []:
 			link = etree.SubElement(entry, 'link', href=l['href'])
 			if l.get('rel'):
 				link.attrib['rel'] = l['rel']
 			if l.get('type'):
 				link.attrib['type'] = l['type']
 			if l.get('hreflang'):
 				link.attrib['hreflang'] = l['hreflang']
 			if l.get('title'):
 				link.attrib['title'] = l['title']
 			if l.get('length'):
 				link.attrib['length'] = l['length']
 		if self.__atom_summary:
 			summary = etree.SubElement(entry, 'summary')
 			summary.text = self.__atom_summary
 		for c in self.__atom_category or []:
 			cat = etree.SubElement(feed, 'category', term=c['term'])
 			if c.get('schema'):
 				cat.attrib['schema'] = c['schema']
 			if c.get('label'):
 				cat.attrib['label'] = c['label']
 		# Add author elements
 		for c in self.__atom_contributor or []:
 			# Atom requires a name. Skip elements without.
 			if not c.get('name'):
 				continue
 			contrib = etree.SubElement(feed, 'contributor')
 			name = etree.SubElement(contrib, 'name')
 			name.text = c.get('name')
 			if c.get('email'):
 				email = etree.SubElement(contrib, 'email')
 				email.text = c.get('email')
 			if c.get('uri'):
 				email = etree.SubElement(contrib, 'url')
 				email.text = c.get('uri')
 		if self.__atom_rights:
 			rights = etree.SubElement(feed, 'rights')
 			rights.text = self.__atom_rights
 	def title(self, title=None):
 		if not title is None:
 			self.__atom_title = title
 			self.__rss_title = title
 		return self.__atom_title
 	def id(self, id=None):
 		if not id is None:
 			self.__atom_id = id
 		return self.__atom_id
 	def updated(self, updated=None):
 		'''Set or get the updated value which indicates the last time the entry
 		was modified in a significant way.
 		The value can either be a string which will automatically be parsed or a
 		datetime.datetime object. In any case it is necessary that the value
 		include timezone information.
 		:param updated: The modification date.
 		:returns: Modification date as datetime.datetime
 		'''
 		if not updated is None:
 			if isinstance(updated, basestr):
 				updated = dateutil.parser.parse(updated)
 			if not isinstance(updated, datetime.datetime):
 				ValueError('Invalid datetime format')
 			if updated.tzinfo is None:
 				ValueError('Datetime object has no timezone info')
 			self.__atom_updated = updated
 			self.__rss_lastBuildDate = updated
 		return self.__atom_updated
 	def author(self, author=None, replace=False, **kwargs):
 		'''Get or set autor data. An author element is a dict containing a name,
 		an email adress and a uri. Name is mandatory for ATOM, email is mandatory
 		for RSS.
 		:param author:  Dict or list of dicts with author data.
 		:param replace: Add or replace old data.
 		Example::
 			>>> author( { 'name':'John Doe', 'email':'jdoe@example.com' } )
 			[{'name':'John Doe','email':'jdoe@example.com'}]
 			>>> author([{'name':'Mr. X'},{'name':'Max'}])
 			[{'name':'John Doe','email':'jdoe@example.com'},
 					{'name':'John Doe'}, {'name':'Max'}]
 			>>> author( name='John Doe', email='jdoe@example.com', replace=True )
 			[{'name':'John Doe','email':'jdoe@example.com'}]
 		'''
 		if author is None and kwargs:
 			author = kwargs
 		if not author is None:
 			if replace or self.__atom_author is None:
 				self.__atom_author = []
 			self.__atom_author += self.__ensure_format( author, 
 					set(['name', 'email', 'uri']), set(['name']))
 		return self.__atom_author
 	def content(self, content=None, src=None):
 		if not src is None:
 			self.__atom_content = {'src':src}
 		elif not content is None:
 			self.__atom_content = {'content':content}
 		return self.__atom_content
 	def link(self, link=None, replace=False, **kwargs):
 		'''Get or set link data. An link element is a dict with the fields href,
 		rel, type, hreflang, title, and length. Href is mandatory for ATOM.
 		RSS only supports one link with URL only.
 		:param link:    Dict or list of dicts with data.
 		:param replace: Add or replace old data.
 		Example::
 			link(...)
 		'''
 		if link is None and kwargs:
 			link = kwargs
 		if not link is None:
 			if replace or self.__atom_link is None:
 				self.__atom_link = []
 			self.__atom_link += self.__ensure_format( link, 
 					set(['href', 'rel', 'type', 'hreflang', 'title', 'length']),
 					set(['href']), 
 					{'rel':['alternate', 'enclosure', 'related', 'self', 'via']} )
 			# RSS only needs one URL. We use the first link for RSS:
 			if len(self.__atom_link) > 0:
 				self.__rss_link = self.__atom_link[0]['href']
 		# return the set with more information (atom)
 		return self.__atom_link
 	def summary(self, summary=None):
 		if not summary is None:
 			self.__atom_summary = summary
 		return self.__atom_summary
 	def category(self, category=None, replace=False, **kwargs):
 		if category is None and kwargs:
 			category = kwargs
 		if not category is None:
 			if replace or self.__atom_category is None:
 				self.__atom_category = []
 			self.__atom_category += self.__ensure_format( 
 					category, 
 					set(['term', 'schema', 'label']),
 					set(['term']) )
 			# Map the ATOM categories to RSS categories. Use the atom:label as
 			# name or if not present the atom:term. The atom:schema is the
 			# rss:domain.
 			self.__rss_category = []
 			for cat in self.__atom_category:
 				rss_cat = {}
 				rss_cat['value'] = cat['label'] if cat.get('label') else cat['term']
 				if cat.get('schema'):
 					rss_cat['domain'] = cat['schema']
 				self.__rss_category.append( rss_cat )
 		return self.__atom_category
 	def contributor(self, contributor=None, replace=False, **kwargs):
 		if contributor is None and kwargs:
 			contributor = kwargs
 		if not contributor is None:
 			if replace or self.__atom_contributor is None:
 				self.__atom_contributor = []
 			self.__atom_contributor += self.__ensure_format( contributor, 
 					set(['name', 'email', 'uri']), set(['name']))
 		return self.__atom_contributor
 	def published(self, published=None):
 		'''Set or get the published value which ontains the time of the initial
 		creation or first availability of the entry.
 		The value can either be a string which will automatically be parsed or a
 		datetime.datetime object. In any case it is necessary that the value
 		include timezone information.
 		:param published: The creation date.
 		:returns: Creation date as datetime.datetime
 		'''
 		if not published is None:
 			if isinstance(published, basestr):
 				published = dateutil.parser.parse(published)
 			if not isinstance(published, datetime.datetime):
 				ValueError('Invalid datetime format')
 			if published.tzinfo is None:
 				ValueError('Datetime object has no timezone info')
 			self.__atom_published = published
 			self.__rss_lastBuildDate = published
 		return self.__atom_published
 	def rights(self, rights=None):
 		if not rights is None:
 			self.__atom_rights = rights
 		return self.__atom_rights
 if __name__ == '__main__':
 	fg = FeedGenerator()
 	fg.id('http://lernfunk.de/_MEDIAID_123')
@ -580,4 +989,19 @@ if __name__ == '__main__':
 	fg.subtitle('This is a cool feed!')
 	fg.link( href='http://larskiesow.de/test.atom', rel='self' )
 	fg.language('de')
-	print fg.atom_str()
+	fe = fg.add_entry()
 	fe.id('http://lernfunk.de/_MEDIAID_123#1')
 	fe.title('First Element')
 	fe.content('''Lorem ipsum dolor sit amet, consectetur adipiscing elit. Tamen
 			aberramus a proposito, et, ne longius, prorsus, inquam, Piso, si ista
 			mala sunt, placet. Aut etiam, ut vestitum, sic sententiam habeas aliam
 			domesticam, aliam forensem, ut in fronte ostentatio sit, intus veritas
 			occultetur? Cum id fugiunt, re eadem defendunt, quae Peripatetici,
 			verba.''')
 	fe.summary('Lorem ipsum dolor sit amet, consectetur adipiscing elit...')
 	fe.link( href='http://example.com', rel='alternate' )
 	fe.author( name='Lars Kiesow', email='lkiesow@uos.de' )
 	print fg.atom_str(pretty=True)
 	#print fg.rss_str(pretty=True)