Prevent XML Denial of Service Attacks

This patch prevents entity expansion for provided XML content to guard
against XML denial of service attacks like XML bomb or Billion laughs
attack.
This commit is contained in:
Lars Kiesow 2020-01-25 15:58:49 +01:00
parent 9440ccaffe
commit 0eb12f9133
No known key found for this signature in database
GPG key ID: 5DAFE8D9C823CE73
10 changed files with 180 additions and 180 deletions

View file

@ -3,7 +3,7 @@
feedgen.entry feedgen.entry
~~~~~~~~~~~~~ ~~~~~~~~~~~~~
:copyright: 2013, Lars Kiesow <lkiesow@uos.de> :copyright: 2013-2020, Lars Kiesow <lkiesow@uos.de>
:license: FreeBSD and LGPL, see license.* for more details. :license: FreeBSD and LGPL, see license.* for more details.
''' '''
@ -13,10 +13,11 @@ from datetime import datetime
import dateutil.parser import dateutil.parser
import dateutil.tz import dateutil.tz
import warnings import warnings
from lxml import etree
from lxml.etree import CDATA # nosec - adding CDATA entry is safe
from feedgen.compat import string_types from feedgen.compat import string_types
from feedgen.util import ensure_format, formatRFC2822 from feedgen.util import ensure_format, formatRFC2822, xml_fromstring, xml_elem
def _add_text_elm(entry, data, name): def _add_text_elm(entry, data, name):
@ -24,7 +25,7 @@ def _add_text_elm(entry, data, name):
if not data: if not data:
return return
elm = etree.SubElement(entry, name) elm = xml_elem(name, entry)
type_ = data.get('type') type_ = data.get('type')
if data.get('src'): if data.get('src'):
if name != 'content': if name != 'content':
@ -34,16 +35,14 @@ def _add_text_elm(entry, data, name):
elif data.get(name): elif data.get(name):
# Surround xhtml with a div tag, parse it and embed it # Surround xhtml with a div tag, parse it and embed it
if type_ == 'xhtml': if type_ == 'xhtml':
elm.append(etree.fromstring( xhtml = '<div xmlns="http://www.w3.org/1999/xhtml">' \
'<div xmlns="http://www.w3.org/1999/xhtml">' + + data.get(name) + '</div>'
data.get(name) + '</div>')) elm.append(xml_fromstring(xhtml))
elif type_ == 'CDATA': elif type_ == 'CDATA':
elm.text = etree.CDATA( elm.text = CDATA(data.get(name))
data.get(name))
# Parse XML and embed it # Parse XML and embed it
elif type_ and (type_.endswith('/xml') or type_.endswith('+xml')): elif type_ and (type_.endswith('/xml') or type_.endswith('+xml')):
elm.append(etree.fromstring( elm.append(xml_fromstring(data[name]))
data[name]))
# Embed the text in escaped form # Embed the text in escaped form
elif not type_ or type_.startswith('text') or type_ == 'html': elif not type_ or type_.startswith('text') or type_ == 'html':
elm.text = data.get(name) elm.text = data.get(name)
@ -102,14 +101,14 @@ class FeedEntry(object):
def atom_entry(self, extensions=True): def atom_entry(self, extensions=True):
'''Create an ATOM entry and return it.''' '''Create an ATOM entry and return it.'''
entry = etree.Element('entry') entry = xml_elem('entry')
if not (self.__atom_id and self.__atom_title and self.__atom_updated): if not (self.__atom_id and self.__atom_title and self.__atom_updated):
raise ValueError('Required fields not set') raise ValueError('Required fields not set')
id = etree.SubElement(entry, 'id') id = xml_elem('id', entry)
id.text = self.__atom_id id.text = self.__atom_id
title = etree.SubElement(entry, 'title') title = xml_elem('title', entry)
title.text = self.__atom_title title.text = self.__atom_title
updated = etree.SubElement(entry, 'updated') updated = xml_elem('updated', entry)
updated.text = self.__atom_updated.isoformat() updated.text = self.__atom_updated.isoformat()
# An entry must contain an alternate link if there is no content # An entry must contain an alternate link if there is no content
@ -125,20 +124,20 @@ class FeedEntry(object):
# Atom requires a name. Skip elements without. # Atom requires a name. Skip elements without.
if not a.get('name'): if not a.get('name'):
continue continue
author = etree.SubElement(entry, 'author') author = xml_elem('author', entry)
name = etree.SubElement(author, 'name') name = xml_elem('name', author)
name.text = a.get('name') name.text = a.get('name')
if a.get('email'): if a.get('email'):
email = etree.SubElement(author, 'email') email = xml_elem('email', author)
email.text = a.get('email') email.text = a.get('email')
if a.get('uri'): if a.get('uri'):
uri = etree.SubElement(author, 'uri') uri = xml_elem('uri', author)
uri.text = a.get('uri') uri.text = a.get('uri')
_add_text_elm(entry, self.__atom_content, 'content') _add_text_elm(entry, self.__atom_content, 'content')
for l in self.__atom_link or []: for l in self.__atom_link or []:
link = etree.SubElement(entry, 'link', href=l['href']) link = xml_elem('link', entry, href=l['href'])
if l.get('rel'): if l.get('rel'):
link.attrib['rel'] = l['rel'] link.attrib['rel'] = l['rel']
if l.get('type'): if l.get('type'):
@ -153,7 +152,7 @@ class FeedEntry(object):
_add_text_elm(entry, self.__atom_summary, 'summary') _add_text_elm(entry, self.__atom_summary, 'summary')
for c in self.__atom_category or []: for c in self.__atom_category or []:
cat = etree.SubElement(entry, 'category', term=c['term']) cat = xml_elem('category', entry, term=c['term'])
if c.get('scheme'): if c.get('scheme'):
cat.attrib['scheme'] = c['scheme'] cat.attrib['scheme'] = c['scheme']
if c.get('label'): if c.get('label'):
@ -164,32 +163,31 @@ class FeedEntry(object):
# Atom requires a name. Skip elements without. # Atom requires a name. Skip elements without.
if not c.get('name'): if not c.get('name'):
continue continue
contrib = etree.SubElement(entry, 'contributor') contrib = xml_elem('contributor', entry)
name = etree.SubElement(contrib, 'name') name = xml_elem('name', contrib)
name.text = c.get('name') name.text = c.get('name')
if c.get('email'): if c.get('email'):
email = etree.SubElement(contrib, 'email') email = xml_elem('email', contrib)
email.text = c.get('email') email.text = c.get('email')
if c.get('uri'): if c.get('uri'):
uri = etree.SubElement(contrib, 'uri') uri = xml_elem('uri', contrib)
uri.text = c.get('uri') uri.text = c.get('uri')
if self.__atom_published: if self.__atom_published:
published = etree.SubElement(entry, 'published') published = xml_elem('published', entry)
published.text = self.__atom_published.isoformat() published.text = self.__atom_published.isoformat()
if self.__atom_rights: if self.__atom_rights:
rights = etree.SubElement(entry, 'rights') rights = xml_elem('rights', entry)
rights.text = self.__atom_rights rights.text = self.__atom_rights
if self.__atom_source: if self.__atom_source:
source = etree.SubElement(entry, 'source') source = xml_elem('source', entry)
if self.__atom_source.get('title'): if self.__atom_source.get('title'):
source_title = etree.SubElement(source, 'title') source_title = xml_elem('title', source)
source_title.text = self.__atom_source['title'] source_title.text = self.__atom_source['title']
if self.__atom_source.get('link'): if self.__atom_source.get('link'):
etree.SubElement(source, 'link', xml_elem('link', source, href=self.__atom_source['link'])
href=self.__atom_source['link'])
if extensions: if extensions:
for ext in self.__extensions.values() or []: for ext in self.__extensions.values() or []:
@ -200,60 +198,59 @@ class FeedEntry(object):
def rss_entry(self, extensions=True): def rss_entry(self, extensions=True):
'''Create a RSS item and return it.''' '''Create a RSS item and return it.'''
entry = etree.Element('item') entry = xml_elem('item')
if not (self.__rss_title or if not (self.__rss_title or
self.__rss_description or self.__rss_description or
self.__rss_content): self.__rss_content):
raise ValueError('Required fields not set') raise ValueError('Required fields not set')
if self.__rss_title: if self.__rss_title:
title = etree.SubElement(entry, 'title') title = xml_elem('title', entry)
title.text = self.__rss_title title.text = self.__rss_title
if self.__rss_link: if self.__rss_link:
link = etree.SubElement(entry, 'link') link = xml_elem('link', entry)
link.text = self.__rss_link link.text = self.__rss_link
if self.__rss_description and self.__rss_content: if self.__rss_description and self.__rss_content:
description = etree.SubElement(entry, 'description') description = xml_elem('description', entry)
description.text = self.__rss_description description.text = self.__rss_description
XMLNS_CONTENT = 'http://purl.org/rss/1.0/modules/content/' XMLNS_CONTENT = 'http://purl.org/rss/1.0/modules/content/'
content = etree.SubElement(entry, '{%s}encoded' % XMLNS_CONTENT) content = xml_elem('{%s}encoded' % XMLNS_CONTENT, entry)
content.text = etree.CDATA(self.__rss_content['content']) \ content.text = CDATA(self.__rss_content['content']) \
if self.__rss_content.get('type', '') == 'CDATA' \ if self.__rss_content.get('type', '') == 'CDATA' \
else self.__rss_content['content'] else self.__rss_content['content']
elif self.__rss_description: elif self.__rss_description:
description = etree.SubElement(entry, 'description') description = xml_elem('description', entry)
description.text = self.__rss_description description.text = self.__rss_description
elif self.__rss_content: elif self.__rss_content:
description = etree.SubElement(entry, 'description') description = xml_elem('description', entry)
description.text = etree.CDATA(self.__rss_content['content']) \ description.text = CDATA(self.__rss_content['content']) \
if self.__rss_content.get('type', '') == 'CDATA' \ if self.__rss_content.get('type', '') == 'CDATA' \
else self.__rss_content['content'] else self.__rss_content['content']
for a in self.__rss_author or []: for a in self.__rss_author or []:
author = etree.SubElement(entry, 'author') author = xml_elem('author', entry)
author.text = a author.text = a
if self.__rss_guid.get('guid'): if self.__rss_guid.get('guid'):
guid = etree.SubElement(entry, 'guid') guid = xml_elem('guid', entry)
guid.text = self.__rss_guid['guid'] guid.text = self.__rss_guid['guid']
permaLink = str(self.__rss_guid.get('permalink', False)).lower() permaLink = str(self.__rss_guid.get('permalink', False)).lower()
guid.attrib['isPermaLink'] = permaLink guid.attrib['isPermaLink'] = permaLink
for cat in self.__rss_category or []: for cat in self.__rss_category or []:
category = etree.SubElement(entry, 'category') category = xml_elem('category', entry)
category.text = cat['value'] category.text = cat['value']
if cat.get('domain'): if cat.get('domain'):
category.attrib['domain'] = cat['domain'] category.attrib['domain'] = cat['domain']
if self.__rss_comments: if self.__rss_comments:
comments = etree.SubElement(entry, 'comments') comments = xml_elem('comments', entry)
comments.text = self.__rss_comments comments.text = self.__rss_comments
if self.__rss_enclosure: if self.__rss_enclosure:
enclosure = etree.SubElement(entry, 'enclosure') enclosure = xml_elem('enclosure', entry)
enclosure.attrib['url'] = self.__rss_enclosure['url'] enclosure.attrib['url'] = self.__rss_enclosure['url']
enclosure.attrib['length'] = self.__rss_enclosure['length'] enclosure.attrib['length'] = self.__rss_enclosure['length']
enclosure.attrib['type'] = self.__rss_enclosure['type'] enclosure.attrib['type'] = self.__rss_enclosure['type']
if self.__rss_pubDate: if self.__rss_pubDate:
pubDate = etree.SubElement(entry, 'pubDate') pubDate = xml_elem('pubDate', entry)
pubDate.text = formatRFC2822(self.__rss_pubDate) pubDate.text = formatRFC2822(self.__rss_pubDate)
if self.__rss_source: if self.__rss_source:
source = etree.SubElement(entry, 'source', source = xml_elem('source', entry, url=self.__rss_source['url'])
url=self.__rss_source['url'])
source.text = self.__rss_source['title'] source.text = self.__rss_source['title']
if extensions: if extensions:

View file

@ -13,9 +13,8 @@
:license: FreeBSD and LGPL, see license.* for more details. :license: FreeBSD and LGPL, see license.* for more details.
''' '''
from lxml import etree
from feedgen.ext.base import BaseExtension from feedgen.ext.base import BaseExtension
from feedgen.util import xml_elem
class DcBaseExtension(BaseExtension): class DcBaseExtension(BaseExtension):
@ -45,10 +44,10 @@ class DcBaseExtension(BaseExtension):
def extend_ns(self): def extend_ns(self):
return {'dc': 'http://purl.org/dc/elements/1.1/'} return {'dc': 'http://purl.org/dc/elements/1.1/'}
def _extend_xml(self, xml_elem): def _extend_xml(self, xml_element):
'''Extend xml_elem with set DC fields. '''Extend xml_element with set DC fields.
:param xml_elem: etree element :param xml_element: etree element
''' '''
DCELEMENTS_NS = 'http://purl.org/dc/elements/1.1/' DCELEMENTS_NS = 'http://purl.org/dc/elements/1.1/'
@ -58,8 +57,8 @@ class DcBaseExtension(BaseExtension):
'identifier']: 'identifier']:
if hasattr(self, '_dcelem_%s' % elem): if hasattr(self, '_dcelem_%s' % elem):
for val in getattr(self, '_dcelem_%s' % elem) or []: for val in getattr(self, '_dcelem_%s' % elem) or []:
node = etree.SubElement(xml_elem, node = xml_elem('{%s}%s' % (DCELEMENTS_NS, elem),
'{%s}%s' % (DCELEMENTS_NS, elem)) xml_element)
node.text = val node.text = val
def extend_atom(self, atom_feed): def extend_atom(self, atom_feed):

View file

@ -12,8 +12,8 @@
import numbers import numbers
import warnings import warnings
from lxml import etree
from feedgen.ext.base import BaseEntryExtension from feedgen.ext.base import BaseEntryExtension
from feedgen.util import xml_elem
class GeoRSSPolygonInteriorWarning(Warning): class GeoRSSPolygonInteriorWarning(Warning):
@ -86,49 +86,43 @@ class GeoEntryExtension(BaseEntryExtension):
GEO_NS = 'http://www.georss.org/georss' GEO_NS = 'http://www.georss.org/georss'
if self.__point: if self.__point:
point = etree.SubElement(entry, '{%s}point' % GEO_NS) point = xml_elem('{%s}point' % GEO_NS, entry)
point.text = self.__point point.text = self.__point
if self.__line: if self.__line:
line = etree.SubElement(entry, '{%s}line' % GEO_NS) line = xml_elem('{%s}line' % GEO_NS, entry)
line.text = self.__line line.text = self.__line
if self.__polygon: if self.__polygon:
polygon = etree.SubElement(entry, '{%s}polygon' % GEO_NS) polygon = xml_elem('{%s}polygon' % GEO_NS, entry)
polygon.text = self.__polygon polygon.text = self.__polygon
if self.__box: if self.__box:
box = etree.SubElement(entry, '{%s}box' % GEO_NS) box = xml_elem('{%s}box' % GEO_NS, entry)
box.text = self.__box box.text = self.__box
if self.__featuretypetag: if self.__featuretypetag:
featuretypetag = etree.SubElement( featuretypetag = xml_elem('{%s}featuretypetag' % GEO_NS, entry)
entry,
'{%s}featuretypetag' % GEO_NS
)
featuretypetag.text = self.__featuretypetag featuretypetag.text = self.__featuretypetag
if self.__relationshiptag: if self.__relationshiptag:
relationshiptag = etree.SubElement( relationshiptag = xml_elem('{%s}relationshiptag' % GEO_NS, entry)
entry,
'{%s}relationshiptag' % GEO_NS
)
relationshiptag.text = self.__relationshiptag relationshiptag.text = self.__relationshiptag
if self.__featurename: if self.__featurename:
featurename = etree.SubElement(entry, '{%s}featurename' % GEO_NS) featurename = xml_elem('{%s}featurename' % GEO_NS, entry)
featurename.text = self.__featurename featurename.text = self.__featurename
if self.__elev: if self.__elev:
elevation = etree.SubElement(entry, '{%s}elev' % GEO_NS) elevation = xml_elem('{%s}elev' % GEO_NS, entry)
elevation.text = str(self.__elev) elevation.text = str(self.__elev)
if self.__floor: if self.__floor:
floor = etree.SubElement(entry, '{%s}floor' % GEO_NS) floor = xml_elem('{%s}floor' % GEO_NS, entry)
floor.text = str(self.__floor) floor.text = str(self.__floor)
if self.__radius: if self.__radius:
radius = etree.SubElement(entry, '{%s}radius' % GEO_NS) radius = xml_elem('{%s}radius' % GEO_NS, entry)
radius.text = str(self.__radius) radius.text = str(self.__radius)
return entry return entry

View file

@ -10,10 +10,8 @@
:license: FreeBSD and LGPL, see license.* for more details. :license: FreeBSD and LGPL, see license.* for more details.
''' '''
from lxml import etree
from feedgen.ext.base import BaseEntryExtension, BaseExtension from feedgen.ext.base import BaseEntryExtension, BaseExtension
from feedgen.util import ensure_format from feedgen.util import ensure_format, xml_elem
MEDIA_NS = 'http://search.yahoo.com/mrss/' MEDIA_NS = 'http://search.yahoo.com/mrss/'
@ -45,10 +43,10 @@ class MediaEntryExtension(BaseEntryExtension):
# Define current media:group # Define current media:group
group = groups.get(media_content.get('group')) group = groups.get(media_content.get('group'))
if group is None: if group is None:
group = etree.SubElement(entry, '{%s}group' % MEDIA_NS) group = xml_elem('{%s}group' % MEDIA_NS, entry)
groups[media_content.get('group')] = group groups[media_content.get('group')] = group
# Add content # Add content
content = etree.SubElement(group, '{%s}content' % MEDIA_NS) content = xml_elem('{%s}content' % MEDIA_NS, group)
for attr in ('url', 'fileSize', 'type', 'medium', 'isDefault', for attr in ('url', 'fileSize', 'type', 'medium', 'isDefault',
'expression', 'bitrate', 'framerate', 'samplingrate', 'expression', 'bitrate', 'framerate', 'samplingrate',
'channels', 'duration', 'height', 'width', 'lang'): 'channels', 'duration', 'height', 'width', 'lang'):
@ -59,10 +57,10 @@ class MediaEntryExtension(BaseEntryExtension):
# Define current media:group # Define current media:group
group = groups.get(media_thumbnail.get('group')) group = groups.get(media_thumbnail.get('group'))
if group is None: if group is None:
group = etree.SubElement(entry, '{%s}group' % MEDIA_NS) group = xml_elem('{%s}group' % MEDIA_NS, entry)
groups[media_thumbnail.get('group')] = group groups[media_thumbnail.get('group')] = group
# Add thumbnails # Add thumbnails
thumbnail = etree.SubElement(group, '{%s}thumbnail' % MEDIA_NS) thumbnail = xml_elem('{%s}thumbnail' % MEDIA_NS, group)
for attr in ('url', 'height', 'width', 'time'): for attr in ('url', 'height', 'width', 'time'):
if media_thumbnail.get(attr): if media_thumbnail.get(attr):
thumbnail.set(attr, media_thumbnail[attr]) thumbnail.set(attr, media_thumbnail[attr])

View file

@ -10,11 +10,9 @@
:license: FreeBSD and LGPL, see license.* for more details. :license: FreeBSD and LGPL, see license.* for more details.
''' '''
from lxml import etree
from feedgen.compat import string_types from feedgen.compat import string_types
from feedgen.ext.base import BaseExtension from feedgen.ext.base import BaseExtension
from feedgen.util import ensure_format from feedgen.util import ensure_format, xml_elem
class PodcastExtension(BaseExtension): class PodcastExtension(BaseExtension):
@ -47,11 +45,11 @@ class PodcastExtension(BaseExtension):
channel = rss_feed[0] channel = rss_feed[0]
if self.__itunes_author: if self.__itunes_author:
author = etree.SubElement(channel, '{%s}author' % ITUNES_NS) author = xml_elem('{%s}author' % ITUNES_NS, channel)
author.text = self.__itunes_author author.text = self.__itunes_author
if self.__itunes_block is not None: if self.__itunes_block is not None:
block = etree.SubElement(channel, '{%s}block' % ITUNES_NS) block = xml_elem('{%s}block' % ITUNES_NS, channel)
block.text = 'yes' if self.__itunes_block else 'no' block.text = 'yes' if self.__itunes_block else 'no'
for c in self.__itunes_category or []: for c in self.__itunes_category or []:
@ -60,45 +58,42 @@ class PodcastExtension(BaseExtension):
category = channel.find( category = channel.find(
'{%s}category[@text="%s"]' % (ITUNES_NS, c.get('cat'))) '{%s}category[@text="%s"]' % (ITUNES_NS, c.get('cat')))
if category is None: if category is None:
category = etree.SubElement(channel, category = xml_elem('{%s}category' % ITUNES_NS, channel)
'{%s}category' % ITUNES_NS)
category.attrib['text'] = c.get('cat') category.attrib['text'] = c.get('cat')
if c.get('sub'): if c.get('sub'):
subcategory = etree.SubElement(category, subcategory = xml_elem('{%s}category' % ITUNES_NS, category)
'{%s}category' % ITUNES_NS)
subcategory.attrib['text'] = c.get('sub') subcategory.attrib['text'] = c.get('sub')
if self.__itunes_image: if self.__itunes_image:
image = etree.SubElement(channel, '{%s}image' % ITUNES_NS) image = xml_elem('{%s}image' % ITUNES_NS, channel)
image.attrib['href'] = self.__itunes_image image.attrib['href'] = self.__itunes_image
if self.__itunes_explicit in ('yes', 'no', 'clean'): if self.__itunes_explicit in ('yes', 'no', 'clean'):
explicit = etree.SubElement(channel, '{%s}explicit' % ITUNES_NS) explicit = xml_elem('{%s}explicit' % ITUNES_NS, channel)
explicit.text = self.__itunes_explicit explicit.text = self.__itunes_explicit
if self.__itunes_complete in ('yes', 'no'): if self.__itunes_complete in ('yes', 'no'):
complete = etree.SubElement(channel, '{%s}complete' % ITUNES_NS) complete = xml_elem('{%s}complete' % ITUNES_NS, channel)
complete.text = self.__itunes_complete complete.text = self.__itunes_complete
if self.__itunes_new_feed_url: if self.__itunes_new_feed_url:
new_feed_url = etree.SubElement(channel, new_feed_url = xml_elem('{%s}new-feed-url' % ITUNES_NS, channel)
'{%s}new-feed-url' % ITUNES_NS)
new_feed_url.text = self.__itunes_new_feed_url new_feed_url.text = self.__itunes_new_feed_url
if self.__itunes_owner: if self.__itunes_owner:
owner = etree.SubElement(channel, '{%s}owner' % ITUNES_NS) owner = xml_elem('{%s}owner' % ITUNES_NS, channel)
owner_name = etree.SubElement(owner, '{%s}name' % ITUNES_NS) owner_name = xml_elem('{%s}name' % ITUNES_NS, owner)
owner_name.text = self.__itunes_owner.get('name') owner_name.text = self.__itunes_owner.get('name')
owner_email = etree.SubElement(owner, '{%s}email' % ITUNES_NS) owner_email = xml_elem('{%s}email' % ITUNES_NS, owner)
owner_email.text = self.__itunes_owner.get('email') owner_email.text = self.__itunes_owner.get('email')
if self.__itunes_subtitle: if self.__itunes_subtitle:
subtitle = etree.SubElement(channel, '{%s}subtitle' % ITUNES_NS) subtitle = xml_elem('{%s}subtitle' % ITUNES_NS, channel)
subtitle.text = self.__itunes_subtitle subtitle.text = self.__itunes_subtitle
if self.__itunes_summary: if self.__itunes_summary:
summary = etree.SubElement(channel, '{%s}summary' % ITUNES_NS) summary = xml_elem('{%s}summary' % ITUNES_NS, channel)
summary.text = self.__itunes_summary summary.text = self.__itunes_summary
return rss_feed return rss_feed

View file

@ -10,9 +10,8 @@
:license: FreeBSD and LGPL, see license.* for more details. :license: FreeBSD and LGPL, see license.* for more details.
''' '''
from lxml import etree
from feedgen.ext.base import BaseEntryExtension from feedgen.ext.base import BaseEntryExtension
from feedgen.util import xml_elem
class PodcastEntryExtension(BaseEntryExtension): class PodcastEntryExtension(BaseEntryExtension):
@ -40,43 +39,43 @@ class PodcastEntryExtension(BaseEntryExtension):
ITUNES_NS = 'http://www.itunes.com/dtds/podcast-1.0.dtd' ITUNES_NS = 'http://www.itunes.com/dtds/podcast-1.0.dtd'
if self.__itunes_author: if self.__itunes_author:
author = etree.SubElement(entry, '{%s}author' % ITUNES_NS) author = xml_elem('{%s}author' % ITUNES_NS, entry)
author.text = self.__itunes_author author.text = self.__itunes_author
if self.__itunes_block is not None: if self.__itunes_block is not None:
block = etree.SubElement(entry, '{%s}block' % ITUNES_NS) block = xml_elem('{%s}block' % ITUNES_NS, entry)
block.text = 'yes' if self.__itunes_block else 'no' block.text = 'yes' if self.__itunes_block else 'no'
if self.__itunes_image: if self.__itunes_image:
image = etree.SubElement(entry, '{%s}image' % ITUNES_NS) image = xml_elem('{%s}image' % ITUNES_NS, entry)
image.attrib['href'] = self.__itunes_image image.attrib['href'] = self.__itunes_image
if self.__itunes_duration: if self.__itunes_duration:
duration = etree.SubElement(entry, '{%s}duration' % ITUNES_NS) duration = xml_elem('{%s}duration' % ITUNES_NS, entry)
duration.text = self.__itunes_duration duration.text = self.__itunes_duration
if self.__itunes_explicit in ('yes', 'no', 'clean'): if self.__itunes_explicit in ('yes', 'no', 'clean'):
explicit = etree.SubElement(entry, '{%s}explicit' % ITUNES_NS) explicit = xml_elem('{%s}explicit' % ITUNES_NS, entry)
explicit.text = self.__itunes_explicit explicit.text = self.__itunes_explicit
if self.__itunes_is_closed_captioned is not None: if self.__itunes_is_closed_captioned is not None:
is_closed_captioned = etree.SubElement( is_closed_captioned = xml_elem(
entry, '{%s}isClosedCaptioned' % ITUNES_NS) '{%s}isClosedCaptioned' % ITUNES_NS, entry)
if self.__itunes_is_closed_captioned: if self.__itunes_is_closed_captioned:
is_closed_captioned.text = 'yes' is_closed_captioned.text = 'yes'
else: else:
is_closed_captioned.text = 'no' is_closed_captioned.text = 'no'
if self.__itunes_order is not None and self.__itunes_order >= 0: if self.__itunes_order is not None and self.__itunes_order >= 0:
order = etree.SubElement(entry, '{%s}order' % ITUNES_NS) order = xml_elem('{%s}order' % ITUNES_NS, entry)
order.text = str(self.__itunes_order) order.text = str(self.__itunes_order)
if self.__itunes_subtitle: if self.__itunes_subtitle:
subtitle = etree.SubElement(entry, '{%s}subtitle' % ITUNES_NS) subtitle = xml_elem('{%s}subtitle' % ITUNES_NS, entry)
subtitle.text = self.__itunes_subtitle subtitle.text = self.__itunes_subtitle
if self.__itunes_summary: if self.__itunes_summary:
summary = etree.SubElement(entry, '{%s}summary' % ITUNES_NS) summary = xml_elem('{%s}summary' % ITUNES_NS, entry)
summary.text = self.__itunes_summary summary.text = self.__itunes_summary
return entry return entry

View file

@ -10,9 +10,8 @@ See below for details
http://web.resource.org/rss/1.0/modules/syndication/ http://web.resource.org/rss/1.0/modules/syndication/
''' '''
from lxml import etree
from feedgen.ext.base import BaseExtension from feedgen.ext.base import BaseExtension
from feedgen.util import xml_elem
SYNDICATION_NS = 'http://purl.org/rss/1.0/modules/syndication/' SYNDICATION_NS = 'http://purl.org/rss/1.0/modules/syndication/'
PERIOD_TYPE = ('hourly', 'daily', 'weekly', 'monthly', 'yearly') PERIOD_TYPE = ('hourly', 'daily', 'weekly', 'monthly', 'yearly')
@ -20,7 +19,7 @@ PERIOD_TYPE = ('hourly', 'daily', 'weekly', 'monthly', 'yearly')
def _set_value(channel, name, value): def _set_value(channel, name, value):
if value: if value:
newelem = etree.SubElement(channel, '{%s}' % SYNDICATION_NS + name) newelem = xml_elem('{%s}' % SYNDICATION_NS + name, channel)
newelem.text = value newelem.text = value

View file

@ -10,9 +10,8 @@
:license: FreeBSD and LGPL, see license.* for more details. :license: FreeBSD and LGPL, see license.* for more details.
''' '''
from lxml import etree
from feedgen.ext.base import BaseEntryExtension, BaseExtension from feedgen.ext.base import BaseEntryExtension, BaseExtension
from feedgen.util import xml_elem
TORRENT_NS = 'http://xmlns.ezrss.it/0.1/dtd/' TORRENT_NS = 'http://xmlns.ezrss.it/0.1/dtd/'
@ -41,30 +40,29 @@ class TorrentEntryExtension(BaseEntryExtension):
:param feed: The RSS item XML element to use. :param feed: The RSS item XML element to use.
''' '''
if self.__torrent_filename: if self.__torrent_filename:
filename = etree.SubElement(entry, '{%s}filename' % TORRENT_NS) filename = xml_elem('{%s}filename' % TORRENT_NS, entry)
filename.text = self.__torrent_filename filename.text = self.__torrent_filename
if self.__torrent_contentlength: if self.__torrent_contentlength:
contentlength = etree.SubElement(entry, contentlength = xml_elem('{%s}contentlength' % TORRENT_NS, entry)
'{%s}contentlength' % TORRENT_NS)
contentlength.text = self.__torrent_contentlength contentlength.text = self.__torrent_contentlength
if self.__torrent_infohash: if self.__torrent_infohash:
infohash = etree.SubElement(entry, '{%s}infohash' % TORRENT_NS) infohash = xml_elem('{%s}infohash' % TORRENT_NS, entry)
infohash.text = self.__torrent_infohash infohash.text = self.__torrent_infohash
magnet = etree.SubElement(entry, '{%s}magneturi' % TORRENT_NS) magnet = xml_elem('{%s}magneturi' % TORRENT_NS, entry)
magnet.text = 'magnet:?xt=urn:btih:' + self.__torrent_infohash magnet.text = 'magnet:?xt=urn:btih:' + self.__torrent_infohash
if self.__torrent_seeds: if self.__torrent_seeds:
seeds = etree.SubElement(entry, '{%s}seed' % TORRENT_NS) seeds = xml_elem('{%s}seed' % TORRENT_NS, entry)
seeds.text = self.__torrent_seeds seeds.text = self.__torrent_seeds
if self.__torrent_peers: if self.__torrent_peers:
peers = etree.SubElement(entry, '{%s}peers' % TORRENT_NS) peers = xml_elem('{%s}peers' % TORRENT_NS, entry)
peers.text = self.__torrent_peers peers.text = self.__torrent_peers
if self.__torrent_verified: if self.__torrent_verified:
verified = etree.SubElement(entry, '{%s}verified' % TORRENT_NS) verified = xml_elem('{%s}verified' % TORRENT_NS, entry)
verified.text = self.__torrent_verified verified.text = self.__torrent_verified
def filename(self, torrent_filename=None): def filename(self, torrent_filename=None):

View file

@ -3,7 +3,7 @@
feedgen.feed feedgen.feed
~~~~~~~~~~~~ ~~~~~~~~~~~~
:copyright: 2013-2016, Lars Kiesow <lkiesow@uos.de> :copyright: 2013-2020, Lars Kiesow <lkiesow@uos.de>
:license: FreeBSD and LGPL, see license.* for more details. :license: FreeBSD and LGPL, see license.* for more details.
@ -14,12 +14,12 @@ from datetime import datetime
import dateutil.parser import dateutil.parser
import dateutil.tz import dateutil.tz
from lxml import etree from lxml import etree # nosec - not using this for parsing
import feedgen.version import feedgen.version
from feedgen.compat import string_types from feedgen.compat import string_types
from feedgen.entry import FeedEntry from feedgen.entry import FeedEntry
from feedgen.util import ensure_format, formatRFC2822 from feedgen.util import ensure_format, formatRFC2822, xml_elem
_feedgen_version = feedgen.version.version_str _feedgen_version = feedgen.version.version_str
@ -47,7 +47,7 @@ class FeedGenerator(object):
self.__atom_contributor = None self.__atom_contributor = None
self.__atom_generator = { self.__atom_generator = {
'value': 'python-feedgen', 'value': 'python-feedgen',
'uri': 'http://lkiesow.github.io/python-feedgen', 'uri': 'https://lkiesow.github.io/python-feedgen',
'version': feedgen.version.version_str} # {value*,uri,version} 'version': feedgen.version.version_str} # {value*,uri,version}
self.__atom_icon = None self.__atom_icon = None
self.__atom_logo = None self.__atom_logo = None
@ -95,9 +95,9 @@ class FeedGenerator(object):
if ext.get('atom'): if ext.get('atom'):
nsmap.update(ext['inst'].extend_ns()) nsmap.update(ext['inst'].extend_ns())
feed = etree.Element('feed', feed = xml_elem('feed',
xmlns='http://www.w3.org/2005/Atom', xmlns='http://www.w3.org/2005/Atom',
nsmap=nsmap) nsmap=nsmap)
if self.__atom_feed_xml_lang: if self.__atom_feed_xml_lang:
feed.attrib['{http://www.w3.org/XML/1998/namespace}lang'] = \ feed.attrib['{http://www.w3.org/XML/1998/namespace}lang'] = \
self.__atom_feed_xml_lang self.__atom_feed_xml_lang
@ -108,11 +108,11 @@ class FeedGenerator(object):
([] if self.__atom_updated else ['updated']) ([] if self.__atom_updated else ['updated'])
missing = ', '.join(missing) missing = ', '.join(missing)
raise ValueError('Required fields not set (%s)' % missing) raise ValueError('Required fields not set (%s)' % missing)
id = etree.SubElement(feed, 'id') id = xml_elem('id', feed)
id.text = self.__atom_id id.text = self.__atom_id
title = etree.SubElement(feed, 'title') title = xml_elem('title', feed)
title.text = self.__atom_title title.text = self.__atom_title
updated = etree.SubElement(feed, 'updated') updated = xml_elem('updated', feed)
updated.text = self.__atom_updated.isoformat() updated.text = self.__atom_updated.isoformat()
# Add author elements # Add author elements
@ -120,18 +120,18 @@ class FeedGenerator(object):
# Atom requires a name. Skip elements without. # Atom requires a name. Skip elements without.
if not a.get('name'): if not a.get('name'):
continue continue
author = etree.SubElement(feed, 'author') author = xml_elem('author', feed)
name = etree.SubElement(author, 'name') name = xml_elem('name', author)
name.text = a.get('name') name.text = a.get('name')
if a.get('email'): if a.get('email'):
email = etree.SubElement(author, 'email') email = xml_elem('email', author)
email.text = a.get('email') email.text = a.get('email')
if a.get('uri'): if a.get('uri'):
uri = etree.SubElement(author, 'uri') uri = xml_elem('uri', author)
uri.text = a.get('uri') uri.text = a.get('uri')
for l in self.__atom_link or []: for l in self.__atom_link or []:
link = etree.SubElement(feed, 'link', href=l['href']) link = xml_elem('link', feed, href=l['href'])
if l.get('rel'): if l.get('rel'):
link.attrib['rel'] = l['rel'] link.attrib['rel'] = l['rel']
if l.get('type'): if l.get('type'):
@ -144,7 +144,7 @@ class FeedGenerator(object):
link.attrib['length'] = l['length'] link.attrib['length'] = l['length']
for c in self.__atom_category or []: for c in self.__atom_category or []:
cat = etree.SubElement(feed, 'category', term=c['term']) cat = xml_elem('category', feed, term=c['term'])
if c.get('scheme'): if c.get('scheme'):
cat.attrib['scheme'] = c['scheme'] cat.attrib['scheme'] = c['scheme']
if c.get('label'): if c.get('label'):
@ -155,18 +155,18 @@ class FeedGenerator(object):
# Atom requires a name. Skip elements without. # Atom requires a name. Skip elements without.
if not c.get('name'): if not c.get('name'):
continue continue
contrib = etree.SubElement(feed, 'contributor') contrib = xml_elem('contributor', feed)
name = etree.SubElement(contrib, 'name') name = xml_elem('name', contrib)
name.text = c.get('name') name.text = c.get('name')
if c.get('email'): if c.get('email'):
email = etree.SubElement(contrib, 'email') email = xml_elem('email', contrib)
email.text = c.get('email') email.text = c.get('email')
if c.get('uri'): if c.get('uri'):
uri = etree.SubElement(contrib, 'uri') uri = xml_elem('uri', contrib)
uri.text = c.get('uri') uri.text = c.get('uri')
if self.__atom_generator and self.__atom_generator.get('value'): if self.__atom_generator and self.__atom_generator.get('value'):
generator = etree.SubElement(feed, 'generator') generator = xml_elem('generator', feed)
generator.text = self.__atom_generator['value'] generator.text = self.__atom_generator['value']
if self.__atom_generator.get('uri'): if self.__atom_generator.get('uri'):
generator.attrib['uri'] = self.__atom_generator['uri'] generator.attrib['uri'] = self.__atom_generator['uri']
@ -174,19 +174,19 @@ class FeedGenerator(object):
generator.attrib['version'] = self.__atom_generator['version'] generator.attrib['version'] = self.__atom_generator['version']
if self.__atom_icon: if self.__atom_icon:
icon = etree.SubElement(feed, 'icon') icon = xml_elem('icon', feed)
icon.text = self.__atom_icon icon.text = self.__atom_icon
if self.__atom_logo: if self.__atom_logo:
logo = etree.SubElement(feed, 'logo') logo = xml_elem('logo', feed)
logo.text = self.__atom_logo logo.text = self.__atom_logo
if self.__atom_rights: if self.__atom_rights:
rights = etree.SubElement(feed, 'rights') rights = xml_elem('rights', feed)
rights.text = self.__atom_rights rights.text = self.__atom_rights
if self.__atom_subtitle: if self.__atom_subtitle:
subtitle = etree.SubElement(feed, 'subtitle') subtitle = xml_elem('subtitle', feed)
subtitle.text = self.__atom_subtitle subtitle.text = self.__atom_subtitle
if extensions: if extensions:
@ -255,8 +255,8 @@ class FeedGenerator(object):
nsmap.update({'atom': 'http://www.w3.org/2005/Atom', nsmap.update({'atom': 'http://www.w3.org/2005/Atom',
'content': 'http://purl.org/rss/1.0/modules/content/'}) 'content': 'http://purl.org/rss/1.0/modules/content/'})
feed = etree.Element('rss', version='2.0', nsmap=nsmap) feed = xml_elem('rss', version='2.0', nsmap=nsmap)
channel = etree.SubElement(feed, 'channel') channel = xml_elem('channel', feed)
if not (self.__rss_title and if not (self.__rss_title and
self.__rss_link and self.__rss_link and
self.__rss_description): self.__rss_description):
@ -265,18 +265,17 @@ class FeedGenerator(object):
([] if self.__rss_description else ['description']) ([] if self.__rss_description else ['description'])
missing = ', '.join(missing) missing = ', '.join(missing)
raise ValueError('Required fields not set (%s)' % missing) raise ValueError('Required fields not set (%s)' % missing)
title = etree.SubElement(channel, 'title') title = xml_elem('title', channel)
title.text = self.__rss_title title.text = self.__rss_title
link = etree.SubElement(channel, 'link') link = xml_elem('link', channel)
link.text = self.__rss_link link.text = self.__rss_link
desc = etree.SubElement(channel, 'description') desc = xml_elem('description', channel)
desc.text = self.__rss_description desc.text = self.__rss_description
for ln in self.__atom_link or []: for ln in self.__atom_link or []:
# It is recommended to include a atom self link in rss documents… # It is recommended to include a atom self link in rss documents…
if ln.get('rel') == 'self': if ln.get('rel') == 'self':
selflink = etree.SubElement( selflink = xml_elem('{http://www.w3.org/2005/Atom}link',
channel, '{http://www.w3.org/2005/Atom}link', channel, href=ln['href'], rel='self')
href=ln['href'], rel='self')
if ln.get('type'): if ln.get('type'):
selflink.attrib['type'] = ln['type'] selflink.attrib['type'] = ln['type']
if ln.get('hreflang'): if ln.get('hreflang'):
@ -288,12 +287,12 @@ class FeedGenerator(object):
break break
if self.__rss_category: if self.__rss_category:
for cat in self.__rss_category: for cat in self.__rss_category:
category = etree.SubElement(channel, 'category') category = xml_elem('category', channel)
category.text = cat['value'] category.text = cat['value']
if cat.get('domain'): if cat.get('domain'):
category.attrib['domain'] = cat['domain'] category.attrib['domain'] = cat['domain']
if self.__rss_cloud: if self.__rss_cloud:
cloud = etree.SubElement(channel, 'cloud') cloud = xml_elem('cloud', channel)
cloud.attrib['domain'] = self.__rss_cloud.get('domain') cloud.attrib['domain'] = self.__rss_cloud.get('domain')
cloud.attrib['port'] = self.__rss_cloud.get('port') cloud.attrib['port'] = self.__rss_cloud.get('port')
cloud.attrib['path'] = self.__rss_cloud.get('path') cloud.attrib['path'] = self.__rss_cloud.get('path')
@ -301,69 +300,69 @@ class FeedGenerator(object):
'registerProcedure') 'registerProcedure')
cloud.attrib['protocol'] = self.__rss_cloud.get('protocol') cloud.attrib['protocol'] = self.__rss_cloud.get('protocol')
if self.__rss_copyright: if self.__rss_copyright:
copyright = etree.SubElement(channel, 'copyright') copyright = xml_elem('copyright', channel)
copyright.text = self.__rss_copyright copyright.text = self.__rss_copyright
if self.__rss_docs: if self.__rss_docs:
docs = etree.SubElement(channel, 'docs') docs = xml_elem('docs', channel)
docs.text = self.__rss_docs docs.text = self.__rss_docs
if self.__rss_generator: if self.__rss_generator:
generator = etree.SubElement(channel, 'generator') generator = xml_elem('generator', channel)
generator.text = self.__rss_generator generator.text = self.__rss_generator
if self.__rss_image: if self.__rss_image:
image = etree.SubElement(channel, 'image') image = xml_elem('image', channel)
url = etree.SubElement(image, 'url') url = xml_elem('url', image)
url.text = self.__rss_image.get('url') url.text = self.__rss_image.get('url')
title = etree.SubElement(image, 'title') title = xml_elem('title', image)
title.text = self.__rss_image.get('title', self.__rss_title) title.text = self.__rss_image.get('title', self.__rss_title)
link = etree.SubElement(image, 'link') link = xml_elem('link', image)
link.text = self.__rss_image.get('link', self.__rss_link) link.text = self.__rss_image.get('link', self.__rss_link)
if self.__rss_image.get('width'): if self.__rss_image.get('width'):
width = etree.SubElement(image, 'width') width = xml_elem('width', image)
width.text = self.__rss_image.get('width') width.text = self.__rss_image.get('width')
if self.__rss_image.get('height'): if self.__rss_image.get('height'):
height = etree.SubElement(image, 'height') height = xml_elem('height', image)
height.text = self.__rss_image.get('height') height.text = self.__rss_image.get('height')
if self.__rss_image.get('description'): if self.__rss_image.get('description'):
description = etree.SubElement(image, 'description') description = xml_elem('description', image)
description.text = self.__rss_image.get('description') description.text = self.__rss_image.get('description')
if self.__rss_language: if self.__rss_language:
language = etree.SubElement(channel, 'language') language = xml_elem('language', channel)
language.text = self.__rss_language language.text = self.__rss_language
if self.__rss_lastBuildDate: if self.__rss_lastBuildDate:
lastBuildDate = etree.SubElement(channel, 'lastBuildDate') lastBuildDate = xml_elem('lastBuildDate', channel)
lastBuildDate.text = formatRFC2822(self.__rss_lastBuildDate) lastBuildDate.text = formatRFC2822(self.__rss_lastBuildDate)
if self.__rss_managingEditor: if self.__rss_managingEditor:
managingEditor = etree.SubElement(channel, 'managingEditor') managingEditor = xml_elem('managingEditor', channel)
managingEditor.text = self.__rss_managingEditor managingEditor.text = self.__rss_managingEditor
if self.__rss_pubDate: if self.__rss_pubDate:
pubDate = etree.SubElement(channel, 'pubDate') pubDate = xml_elem('pubDate', channel)
pubDate.text = formatRFC2822(self.__rss_pubDate) pubDate.text = formatRFC2822(self.__rss_pubDate)
if self.__rss_rating: if self.__rss_rating:
rating = etree.SubElement(channel, 'rating') rating = xml_elem('rating', channel)
rating.text = self.__rss_rating rating.text = self.__rss_rating
if self.__rss_skipHours: if self.__rss_skipHours:
skipHours = etree.SubElement(channel, 'skipHours') skipHours = xml_elem('skipHours', channel)
for h in self.__rss_skipHours: for h in self.__rss_skipHours:
hour = etree.SubElement(skipHours, 'hour') hour = xml_elem('hour', skipHours)
hour.text = str(h) hour.text = str(h)
if self.__rss_skipDays: if self.__rss_skipDays:
skipDays = etree.SubElement(channel, 'skipDays') skipDays = xml_elem('skipDays', channel)
for d in self.__rss_skipDays: for d in self.__rss_skipDays:
day = etree.SubElement(skipDays, 'day') day = xml_elem('day', skipDays)
day.text = d day.text = d
if self.__rss_textInput: if self.__rss_textInput:
textInput = etree.SubElement(channel, 'textInput') textInput = xml_elem('textInput', channel)
textInput.attrib['title'] = self.__rss_textInput.get('title') textInput.attrib['title'] = self.__rss_textInput.get('title')
textInput.attrib['description'] = \ textInput.attrib['description'] = \
self.__rss_textInput.get('description') self.__rss_textInput.get('description')
textInput.attrib['name'] = self.__rss_textInput.get('name') textInput.attrib['name'] = self.__rss_textInput.get('name')
textInput.attrib['link'] = self.__rss_textInput.get('link') textInput.attrib['link'] = self.__rss_textInput.get('link')
if self.__rss_ttl: if self.__rss_ttl:
ttl = etree.SubElement(channel, 'ttl') ttl = xml_elem('ttl', channel)
ttl.text = str(self.__rss_ttl) ttl.text = str(self.__rss_ttl)
if self.__rss_webMaster: if self.__rss_webMaster:
webMaster = etree.SubElement(channel, 'webMaster') webMaster = xml_elem('webMaster', channel)
webMaster.text = self.__rss_webMaster webMaster.text = self.__rss_webMaster
if extensions: if extensions:

View file

@ -10,6 +10,28 @@
''' '''
import locale import locale
import sys import sys
import lxml # nosec - we configure a safe parser below
# Configure a safe parser which does not allow XML entity expansion
parser = lxml.etree.XMLParser(
attribute_defaults=False,
dtd_validation=False,
load_dtd=False,
no_network=True,
recover=False,
remove_pis=True,
resolve_entities=False,
huge_tree=False)
def xml_fromstring(xmlstring):
return lxml.etree.fromstring(xmlstring, parser) # nosec - safe parser
def xml_elem(name, parent=None, **kwargs):
if parent is not None:
return lxml.etree.SubElement(parent, name, **kwargs)
return lxml.etree.Element(name, **kwargs)
def ensure_format(val, allowed, required, allowed_values=None, defaults=None): def ensure_format(val, allowed, required, allowed_values=None, defaults=None):