Prevent XML Denial of Service Attacks

This patch prevents entity expansion for provided XML content to guard
against XML denial of service attacks like XML bomb or Billion laughs
attack.
This commit is contained in:
Lars Kiesow 2020-01-25 15:58:49 +01:00
parent 9440ccaffe
commit 0eb12f9133
No known key found for this signature in database
GPG key ID: 5DAFE8D9C823CE73
10 changed files with 180 additions and 180 deletions

View file

@ -3,7 +3,7 @@
feedgen.entry
~~~~~~~~~~~~~
:copyright: 2013, Lars Kiesow <lkiesow@uos.de>
:copyright: 2013-2020, Lars Kiesow <lkiesow@uos.de>
:license: FreeBSD and LGPL, see license.* for more details.
'''
@ -13,10 +13,11 @@ from datetime import datetime
import dateutil.parser
import dateutil.tz
import warnings
from lxml import etree
from lxml.etree import CDATA # nosec - adding CDATA entry is safe
from feedgen.compat import string_types
from feedgen.util import ensure_format, formatRFC2822
from feedgen.util import ensure_format, formatRFC2822, xml_fromstring, xml_elem
def _add_text_elm(entry, data, name):
@ -24,7 +25,7 @@ def _add_text_elm(entry, data, name):
if not data:
return
elm = etree.SubElement(entry, name)
elm = xml_elem(name, entry)
type_ = data.get('type')
if data.get('src'):
if name != 'content':
@ -34,16 +35,14 @@ def _add_text_elm(entry, data, name):
elif data.get(name):
# Surround xhtml with a div tag, parse it and embed it
if type_ == 'xhtml':
elm.append(etree.fromstring(
'<div xmlns="http://www.w3.org/1999/xhtml">' +
data.get(name) + '</div>'))
xhtml = '<div xmlns="http://www.w3.org/1999/xhtml">' \
+ data.get(name) + '</div>'
elm.append(xml_fromstring(xhtml))
elif type_ == 'CDATA':
elm.text = etree.CDATA(
data.get(name))
elm.text = CDATA(data.get(name))
# Parse XML and embed it
elif type_ and (type_.endswith('/xml') or type_.endswith('+xml')):
elm.append(etree.fromstring(
data[name]))
elm.append(xml_fromstring(data[name]))
# Embed the text in escaped form
elif not type_ or type_.startswith('text') or type_ == 'html':
elm.text = data.get(name)
@ -102,14 +101,14 @@ class FeedEntry(object):
def atom_entry(self, extensions=True):
'''Create an ATOM entry and return it.'''
entry = etree.Element('entry')
entry = xml_elem('entry')
if not (self.__atom_id and self.__atom_title and self.__atom_updated):
raise ValueError('Required fields not set')
id = etree.SubElement(entry, 'id')
id = xml_elem('id', entry)
id.text = self.__atom_id
title = etree.SubElement(entry, 'title')
title = xml_elem('title', entry)
title.text = self.__atom_title
updated = etree.SubElement(entry, 'updated')
updated = xml_elem('updated', entry)
updated.text = self.__atom_updated.isoformat()
# An entry must contain an alternate link if there is no content
@ -125,20 +124,20 @@ class FeedEntry(object):
# Atom requires a name. Skip elements without.
if not a.get('name'):
continue
author = etree.SubElement(entry, 'author')
name = etree.SubElement(author, 'name')
author = xml_elem('author', entry)
name = xml_elem('name', author)
name.text = a.get('name')
if a.get('email'):
email = etree.SubElement(author, 'email')
email = xml_elem('email', author)
email.text = a.get('email')
if a.get('uri'):
uri = etree.SubElement(author, 'uri')
uri = xml_elem('uri', author)
uri.text = a.get('uri')
_add_text_elm(entry, self.__atom_content, 'content')
for l in self.__atom_link or []:
link = etree.SubElement(entry, 'link', href=l['href'])
link = xml_elem('link', entry, href=l['href'])
if l.get('rel'):
link.attrib['rel'] = l['rel']
if l.get('type'):
@ -153,7 +152,7 @@ class FeedEntry(object):
_add_text_elm(entry, self.__atom_summary, 'summary')
for c in self.__atom_category or []:
cat = etree.SubElement(entry, 'category', term=c['term'])
cat = xml_elem('category', entry, term=c['term'])
if c.get('scheme'):
cat.attrib['scheme'] = c['scheme']
if c.get('label'):
@ -164,32 +163,31 @@ class FeedEntry(object):
# Atom requires a name. Skip elements without.
if not c.get('name'):
continue
contrib = etree.SubElement(entry, 'contributor')
name = etree.SubElement(contrib, 'name')
contrib = xml_elem('contributor', entry)
name = xml_elem('name', contrib)
name.text = c.get('name')
if c.get('email'):
email = etree.SubElement(contrib, 'email')
email = xml_elem('email', contrib)
email.text = c.get('email')
if c.get('uri'):
uri = etree.SubElement(contrib, 'uri')
uri = xml_elem('uri', contrib)
uri.text = c.get('uri')
if self.__atom_published:
published = etree.SubElement(entry, 'published')
published = xml_elem('published', entry)
published.text = self.__atom_published.isoformat()
if self.__atom_rights:
rights = etree.SubElement(entry, 'rights')
rights = xml_elem('rights', entry)
rights.text = self.__atom_rights
if self.__atom_source:
source = etree.SubElement(entry, 'source')
source = xml_elem('source', entry)
if self.__atom_source.get('title'):
source_title = etree.SubElement(source, 'title')
source_title = xml_elem('title', source)
source_title.text = self.__atom_source['title']
if self.__atom_source.get('link'):
etree.SubElement(source, 'link',
href=self.__atom_source['link'])
xml_elem('link', source, href=self.__atom_source['link'])
if extensions:
for ext in self.__extensions.values() or []:
@ -200,60 +198,59 @@ class FeedEntry(object):
def rss_entry(self, extensions=True):
'''Create a RSS item and return it.'''
entry = etree.Element('item')
entry = xml_elem('item')
if not (self.__rss_title or
self.__rss_description or
self.__rss_content):
raise ValueError('Required fields not set')
if self.__rss_title:
title = etree.SubElement(entry, 'title')
title = xml_elem('title', entry)
title.text = self.__rss_title
if self.__rss_link:
link = etree.SubElement(entry, 'link')
link = xml_elem('link', entry)
link.text = self.__rss_link
if self.__rss_description and self.__rss_content:
description = etree.SubElement(entry, 'description')
description = xml_elem('description', entry)
description.text = self.__rss_description
XMLNS_CONTENT = 'http://purl.org/rss/1.0/modules/content/'
content = etree.SubElement(entry, '{%s}encoded' % XMLNS_CONTENT)
content.text = etree.CDATA(self.__rss_content['content']) \
content = xml_elem('{%s}encoded' % XMLNS_CONTENT, entry)
content.text = CDATA(self.__rss_content['content']) \
if self.__rss_content.get('type', '') == 'CDATA' \
else self.__rss_content['content']
elif self.__rss_description:
description = etree.SubElement(entry, 'description')
description = xml_elem('description', entry)
description.text = self.__rss_description
elif self.__rss_content:
description = etree.SubElement(entry, 'description')
description.text = etree.CDATA(self.__rss_content['content']) \
description = xml_elem('description', entry)
description.text = CDATA(self.__rss_content['content']) \
if self.__rss_content.get('type', '') == 'CDATA' \
else self.__rss_content['content']
for a in self.__rss_author or []:
author = etree.SubElement(entry, 'author')
author = xml_elem('author', entry)
author.text = a
if self.__rss_guid.get('guid'):
guid = etree.SubElement(entry, 'guid')
guid = xml_elem('guid', entry)
guid.text = self.__rss_guid['guid']
permaLink = str(self.__rss_guid.get('permalink', False)).lower()
guid.attrib['isPermaLink'] = permaLink
for cat in self.__rss_category or []:
category = etree.SubElement(entry, 'category')
category = xml_elem('category', entry)
category.text = cat['value']
if cat.get('domain'):
category.attrib['domain'] = cat['domain']
if self.__rss_comments:
comments = etree.SubElement(entry, 'comments')
comments = xml_elem('comments', entry)
comments.text = self.__rss_comments
if self.__rss_enclosure:
enclosure = etree.SubElement(entry, 'enclosure')
enclosure = xml_elem('enclosure', entry)
enclosure.attrib['url'] = self.__rss_enclosure['url']
enclosure.attrib['length'] = self.__rss_enclosure['length']
enclosure.attrib['type'] = self.__rss_enclosure['type']
if self.__rss_pubDate:
pubDate = etree.SubElement(entry, 'pubDate')
pubDate = xml_elem('pubDate', entry)
pubDate.text = formatRFC2822(self.__rss_pubDate)
if self.__rss_source:
source = etree.SubElement(entry, 'source',
url=self.__rss_source['url'])
source = xml_elem('source', entry, url=self.__rss_source['url'])
source.text = self.__rss_source['title']
if extensions:

View file

@ -13,9 +13,8 @@
:license: FreeBSD and LGPL, see license.* for more details.
'''
from lxml import etree
from feedgen.ext.base import BaseExtension
from feedgen.util import xml_elem
class DcBaseExtension(BaseExtension):
@ -45,10 +44,10 @@ class DcBaseExtension(BaseExtension):
def extend_ns(self):
return {'dc': 'http://purl.org/dc/elements/1.1/'}
def _extend_xml(self, xml_elem):
'''Extend xml_elem with set DC fields.
def _extend_xml(self, xml_element):
'''Extend xml_element with set DC fields.
:param xml_elem: etree element
:param xml_element: etree element
'''
DCELEMENTS_NS = 'http://purl.org/dc/elements/1.1/'
@ -58,8 +57,8 @@ class DcBaseExtension(BaseExtension):
'identifier']:
if hasattr(self, '_dcelem_%s' % elem):
for val in getattr(self, '_dcelem_%s' % elem) or []:
node = etree.SubElement(xml_elem,
'{%s}%s' % (DCELEMENTS_NS, elem))
node = xml_elem('{%s}%s' % (DCELEMENTS_NS, elem),
xml_element)
node.text = val
def extend_atom(self, atom_feed):

View file

@ -12,8 +12,8 @@
import numbers
import warnings
from lxml import etree
from feedgen.ext.base import BaseEntryExtension
from feedgen.util import xml_elem
class GeoRSSPolygonInteriorWarning(Warning):
@ -86,49 +86,43 @@ class GeoEntryExtension(BaseEntryExtension):
GEO_NS = 'http://www.georss.org/georss'
if self.__point:
point = etree.SubElement(entry, '{%s}point' % GEO_NS)
point = xml_elem('{%s}point' % GEO_NS, entry)
point.text = self.__point
if self.__line:
line = etree.SubElement(entry, '{%s}line' % GEO_NS)
line = xml_elem('{%s}line' % GEO_NS, entry)
line.text = self.__line
if self.__polygon:
polygon = etree.SubElement(entry, '{%s}polygon' % GEO_NS)
polygon = xml_elem('{%s}polygon' % GEO_NS, entry)
polygon.text = self.__polygon
if self.__box:
box = etree.SubElement(entry, '{%s}box' % GEO_NS)
box = xml_elem('{%s}box' % GEO_NS, entry)
box.text = self.__box
if self.__featuretypetag:
featuretypetag = etree.SubElement(
entry,
'{%s}featuretypetag' % GEO_NS
)
featuretypetag = xml_elem('{%s}featuretypetag' % GEO_NS, entry)
featuretypetag.text = self.__featuretypetag
if self.__relationshiptag:
relationshiptag = etree.SubElement(
entry,
'{%s}relationshiptag' % GEO_NS
)
relationshiptag = xml_elem('{%s}relationshiptag' % GEO_NS, entry)
relationshiptag.text = self.__relationshiptag
if self.__featurename:
featurename = etree.SubElement(entry, '{%s}featurename' % GEO_NS)
featurename = xml_elem('{%s}featurename' % GEO_NS, entry)
featurename.text = self.__featurename
if self.__elev:
elevation = etree.SubElement(entry, '{%s}elev' % GEO_NS)
elevation = xml_elem('{%s}elev' % GEO_NS, entry)
elevation.text = str(self.__elev)
if self.__floor:
floor = etree.SubElement(entry, '{%s}floor' % GEO_NS)
floor = xml_elem('{%s}floor' % GEO_NS, entry)
floor.text = str(self.__floor)
if self.__radius:
radius = etree.SubElement(entry, '{%s}radius' % GEO_NS)
radius = xml_elem('{%s}radius' % GEO_NS, entry)
radius.text = str(self.__radius)
return entry

View file

@ -10,10 +10,8 @@
:license: FreeBSD and LGPL, see license.* for more details.
'''
from lxml import etree
from feedgen.ext.base import BaseEntryExtension, BaseExtension
from feedgen.util import ensure_format
from feedgen.util import ensure_format, xml_elem
MEDIA_NS = 'http://search.yahoo.com/mrss/'
@ -45,10 +43,10 @@ class MediaEntryExtension(BaseEntryExtension):
# Define current media:group
group = groups.get(media_content.get('group'))
if group is None:
group = etree.SubElement(entry, '{%s}group' % MEDIA_NS)
group = xml_elem('{%s}group' % MEDIA_NS, entry)
groups[media_content.get('group')] = group
# Add content
content = etree.SubElement(group, '{%s}content' % MEDIA_NS)
content = xml_elem('{%s}content' % MEDIA_NS, group)
for attr in ('url', 'fileSize', 'type', 'medium', 'isDefault',
'expression', 'bitrate', 'framerate', 'samplingrate',
'channels', 'duration', 'height', 'width', 'lang'):
@ -59,10 +57,10 @@ class MediaEntryExtension(BaseEntryExtension):
# Define current media:group
group = groups.get(media_thumbnail.get('group'))
if group is None:
group = etree.SubElement(entry, '{%s}group' % MEDIA_NS)
group = xml_elem('{%s}group' % MEDIA_NS, entry)
groups[media_thumbnail.get('group')] = group
# Add thumbnails
thumbnail = etree.SubElement(group, '{%s}thumbnail' % MEDIA_NS)
thumbnail = xml_elem('{%s}thumbnail' % MEDIA_NS, group)
for attr in ('url', 'height', 'width', 'time'):
if media_thumbnail.get(attr):
thumbnail.set(attr, media_thumbnail[attr])

View file

@ -10,11 +10,9 @@
:license: FreeBSD and LGPL, see license.* for more details.
'''
from lxml import etree
from feedgen.compat import string_types
from feedgen.ext.base import BaseExtension
from feedgen.util import ensure_format
from feedgen.util import ensure_format, xml_elem
class PodcastExtension(BaseExtension):
@ -47,11 +45,11 @@ class PodcastExtension(BaseExtension):
channel = rss_feed[0]
if self.__itunes_author:
author = etree.SubElement(channel, '{%s}author' % ITUNES_NS)
author = xml_elem('{%s}author' % ITUNES_NS, channel)
author.text = self.__itunes_author
if self.__itunes_block is not None:
block = etree.SubElement(channel, '{%s}block' % ITUNES_NS)
block = xml_elem('{%s}block' % ITUNES_NS, channel)
block.text = 'yes' if self.__itunes_block else 'no'
for c in self.__itunes_category or []:
@ -60,45 +58,42 @@ class PodcastExtension(BaseExtension):
category = channel.find(
'{%s}category[@text="%s"]' % (ITUNES_NS, c.get('cat')))
if category is None:
category = etree.SubElement(channel,
'{%s}category' % ITUNES_NS)
category = xml_elem('{%s}category' % ITUNES_NS, channel)
category.attrib['text'] = c.get('cat')
if c.get('sub'):
subcategory = etree.SubElement(category,
'{%s}category' % ITUNES_NS)
subcategory = xml_elem('{%s}category' % ITUNES_NS, category)
subcategory.attrib['text'] = c.get('sub')
if self.__itunes_image:
image = etree.SubElement(channel, '{%s}image' % ITUNES_NS)
image = xml_elem('{%s}image' % ITUNES_NS, channel)
image.attrib['href'] = self.__itunes_image
if self.__itunes_explicit in ('yes', 'no', 'clean'):
explicit = etree.SubElement(channel, '{%s}explicit' % ITUNES_NS)
explicit = xml_elem('{%s}explicit' % ITUNES_NS, channel)
explicit.text = self.__itunes_explicit
if self.__itunes_complete in ('yes', 'no'):
complete = etree.SubElement(channel, '{%s}complete' % ITUNES_NS)
complete = xml_elem('{%s}complete' % ITUNES_NS, channel)
complete.text = self.__itunes_complete
if self.__itunes_new_feed_url:
new_feed_url = etree.SubElement(channel,
'{%s}new-feed-url' % ITUNES_NS)
new_feed_url = xml_elem('{%s}new-feed-url' % ITUNES_NS, channel)
new_feed_url.text = self.__itunes_new_feed_url
if self.__itunes_owner:
owner = etree.SubElement(channel, '{%s}owner' % ITUNES_NS)
owner_name = etree.SubElement(owner, '{%s}name' % ITUNES_NS)
owner = xml_elem('{%s}owner' % ITUNES_NS, channel)
owner_name = xml_elem('{%s}name' % ITUNES_NS, owner)
owner_name.text = self.__itunes_owner.get('name')
owner_email = etree.SubElement(owner, '{%s}email' % ITUNES_NS)
owner_email = xml_elem('{%s}email' % ITUNES_NS, owner)
owner_email.text = self.__itunes_owner.get('email')
if self.__itunes_subtitle:
subtitle = etree.SubElement(channel, '{%s}subtitle' % ITUNES_NS)
subtitle = xml_elem('{%s}subtitle' % ITUNES_NS, channel)
subtitle.text = self.__itunes_subtitle
if self.__itunes_summary:
summary = etree.SubElement(channel, '{%s}summary' % ITUNES_NS)
summary = xml_elem('{%s}summary' % ITUNES_NS, channel)
summary.text = self.__itunes_summary
return rss_feed

View file

@ -10,9 +10,8 @@
:license: FreeBSD and LGPL, see license.* for more details.
'''
from lxml import etree
from feedgen.ext.base import BaseEntryExtension
from feedgen.util import xml_elem
class PodcastEntryExtension(BaseEntryExtension):
@ -40,43 +39,43 @@ class PodcastEntryExtension(BaseEntryExtension):
ITUNES_NS = 'http://www.itunes.com/dtds/podcast-1.0.dtd'
if self.__itunes_author:
author = etree.SubElement(entry, '{%s}author' % ITUNES_NS)
author = xml_elem('{%s}author' % ITUNES_NS, entry)
author.text = self.__itunes_author
if self.__itunes_block is not None:
block = etree.SubElement(entry, '{%s}block' % ITUNES_NS)
block = xml_elem('{%s}block' % ITUNES_NS, entry)
block.text = 'yes' if self.__itunes_block else 'no'
if self.__itunes_image:
image = etree.SubElement(entry, '{%s}image' % ITUNES_NS)
image = xml_elem('{%s}image' % ITUNES_NS, entry)
image.attrib['href'] = self.__itunes_image
if self.__itunes_duration:
duration = etree.SubElement(entry, '{%s}duration' % ITUNES_NS)
duration = xml_elem('{%s}duration' % ITUNES_NS, entry)
duration.text = self.__itunes_duration
if self.__itunes_explicit in ('yes', 'no', 'clean'):
explicit = etree.SubElement(entry, '{%s}explicit' % ITUNES_NS)
explicit = xml_elem('{%s}explicit' % ITUNES_NS, entry)
explicit.text = self.__itunes_explicit
if self.__itunes_is_closed_captioned is not None:
is_closed_captioned = etree.SubElement(
entry, '{%s}isClosedCaptioned' % ITUNES_NS)
is_closed_captioned = xml_elem(
'{%s}isClosedCaptioned' % ITUNES_NS, entry)
if self.__itunes_is_closed_captioned:
is_closed_captioned.text = 'yes'
else:
is_closed_captioned.text = 'no'
if self.__itunes_order is not None and self.__itunes_order >= 0:
order = etree.SubElement(entry, '{%s}order' % ITUNES_NS)
order = xml_elem('{%s}order' % ITUNES_NS, entry)
order.text = str(self.__itunes_order)
if self.__itunes_subtitle:
subtitle = etree.SubElement(entry, '{%s}subtitle' % ITUNES_NS)
subtitle = xml_elem('{%s}subtitle' % ITUNES_NS, entry)
subtitle.text = self.__itunes_subtitle
if self.__itunes_summary:
summary = etree.SubElement(entry, '{%s}summary' % ITUNES_NS)
summary = xml_elem('{%s}summary' % ITUNES_NS, entry)
summary.text = self.__itunes_summary
return entry

View file

@ -10,9 +10,8 @@ See below for details
http://web.resource.org/rss/1.0/modules/syndication/
'''
from lxml import etree
from feedgen.ext.base import BaseExtension
from feedgen.util import xml_elem
SYNDICATION_NS = 'http://purl.org/rss/1.0/modules/syndication/'
PERIOD_TYPE = ('hourly', 'daily', 'weekly', 'monthly', 'yearly')
@ -20,7 +19,7 @@ PERIOD_TYPE = ('hourly', 'daily', 'weekly', 'monthly', 'yearly')
def _set_value(channel, name, value):
if value:
newelem = etree.SubElement(channel, '{%s}' % SYNDICATION_NS + name)
newelem = xml_elem('{%s}' % SYNDICATION_NS + name, channel)
newelem.text = value

View file

@ -10,9 +10,8 @@
:license: FreeBSD and LGPL, see license.* for more details.
'''
from lxml import etree
from feedgen.ext.base import BaseEntryExtension, BaseExtension
from feedgen.util import xml_elem
TORRENT_NS = 'http://xmlns.ezrss.it/0.1/dtd/'
@ -41,30 +40,29 @@ class TorrentEntryExtension(BaseEntryExtension):
:param feed: The RSS item XML element to use.
'''
if self.__torrent_filename:
filename = etree.SubElement(entry, '{%s}filename' % TORRENT_NS)
filename = xml_elem('{%s}filename' % TORRENT_NS, entry)
filename.text = self.__torrent_filename
if self.__torrent_contentlength:
contentlength = etree.SubElement(entry,
'{%s}contentlength' % TORRENT_NS)
contentlength = xml_elem('{%s}contentlength' % TORRENT_NS, entry)
contentlength.text = self.__torrent_contentlength
if self.__torrent_infohash:
infohash = etree.SubElement(entry, '{%s}infohash' % TORRENT_NS)
infohash = xml_elem('{%s}infohash' % TORRENT_NS, entry)
infohash.text = self.__torrent_infohash
magnet = etree.SubElement(entry, '{%s}magneturi' % TORRENT_NS)
magnet = xml_elem('{%s}magneturi' % TORRENT_NS, entry)
magnet.text = 'magnet:?xt=urn:btih:' + self.__torrent_infohash
if self.__torrent_seeds:
seeds = etree.SubElement(entry, '{%s}seed' % TORRENT_NS)
seeds = xml_elem('{%s}seed' % TORRENT_NS, entry)
seeds.text = self.__torrent_seeds
if self.__torrent_peers:
peers = etree.SubElement(entry, '{%s}peers' % TORRENT_NS)
peers = xml_elem('{%s}peers' % TORRENT_NS, entry)
peers.text = self.__torrent_peers
if self.__torrent_verified:
verified = etree.SubElement(entry, '{%s}verified' % TORRENT_NS)
verified = xml_elem('{%s}verified' % TORRENT_NS, entry)
verified.text = self.__torrent_verified
def filename(self, torrent_filename=None):

View file

@ -3,7 +3,7 @@
feedgen.feed
~~~~~~~~~~~~
:copyright: 2013-2016, Lars Kiesow <lkiesow@uos.de>
:copyright: 2013-2020, Lars Kiesow <lkiesow@uos.de>
:license: FreeBSD and LGPL, see license.* for more details.
@ -14,12 +14,12 @@ from datetime import datetime
import dateutil.parser
import dateutil.tz
from lxml import etree
from lxml import etree # nosec - not using this for parsing
import feedgen.version
from feedgen.compat import string_types
from feedgen.entry import FeedEntry
from feedgen.util import ensure_format, formatRFC2822
from feedgen.util import ensure_format, formatRFC2822, xml_elem
_feedgen_version = feedgen.version.version_str
@ -47,7 +47,7 @@ class FeedGenerator(object):
self.__atom_contributor = None
self.__atom_generator = {
'value': 'python-feedgen',
'uri': 'http://lkiesow.github.io/python-feedgen',
'uri': 'https://lkiesow.github.io/python-feedgen',
'version': feedgen.version.version_str} # {value*,uri,version}
self.__atom_icon = None
self.__atom_logo = None
@ -95,9 +95,9 @@ class FeedGenerator(object):
if ext.get('atom'):
nsmap.update(ext['inst'].extend_ns())
feed = etree.Element('feed',
xmlns='http://www.w3.org/2005/Atom',
nsmap=nsmap)
feed = xml_elem('feed',
xmlns='http://www.w3.org/2005/Atom',
nsmap=nsmap)
if self.__atom_feed_xml_lang:
feed.attrib['{http://www.w3.org/XML/1998/namespace}lang'] = \
self.__atom_feed_xml_lang
@ -108,11 +108,11 @@ class FeedGenerator(object):
([] if self.__atom_updated else ['updated'])
missing = ', '.join(missing)
raise ValueError('Required fields not set (%s)' % missing)
id = etree.SubElement(feed, 'id')
id = xml_elem('id', feed)
id.text = self.__atom_id
title = etree.SubElement(feed, 'title')
title = xml_elem('title', feed)
title.text = self.__atom_title
updated = etree.SubElement(feed, 'updated')
updated = xml_elem('updated', feed)
updated.text = self.__atom_updated.isoformat()
# Add author elements
@ -120,18 +120,18 @@ class FeedGenerator(object):
# Atom requires a name. Skip elements without.
if not a.get('name'):
continue
author = etree.SubElement(feed, 'author')
name = etree.SubElement(author, 'name')
author = xml_elem('author', feed)
name = xml_elem('name', author)
name.text = a.get('name')
if a.get('email'):
email = etree.SubElement(author, 'email')
email = xml_elem('email', author)
email.text = a.get('email')
if a.get('uri'):
uri = etree.SubElement(author, 'uri')
uri = xml_elem('uri', author)
uri.text = a.get('uri')
for l in self.__atom_link or []:
link = etree.SubElement(feed, 'link', href=l['href'])
link = xml_elem('link', feed, href=l['href'])
if l.get('rel'):
link.attrib['rel'] = l['rel']
if l.get('type'):
@ -144,7 +144,7 @@ class FeedGenerator(object):
link.attrib['length'] = l['length']
for c in self.__atom_category or []:
cat = etree.SubElement(feed, 'category', term=c['term'])
cat = xml_elem('category', feed, term=c['term'])
if c.get('scheme'):
cat.attrib['scheme'] = c['scheme']
if c.get('label'):
@ -155,18 +155,18 @@ class FeedGenerator(object):
# Atom requires a name. Skip elements without.
if not c.get('name'):
continue
contrib = etree.SubElement(feed, 'contributor')
name = etree.SubElement(contrib, 'name')
contrib = xml_elem('contributor', feed)
name = xml_elem('name', contrib)
name.text = c.get('name')
if c.get('email'):
email = etree.SubElement(contrib, 'email')
email = xml_elem('email', contrib)
email.text = c.get('email')
if c.get('uri'):
uri = etree.SubElement(contrib, 'uri')
uri = xml_elem('uri', contrib)
uri.text = c.get('uri')
if self.__atom_generator and self.__atom_generator.get('value'):
generator = etree.SubElement(feed, 'generator')
generator = xml_elem('generator', feed)
generator.text = self.__atom_generator['value']
if self.__atom_generator.get('uri'):
generator.attrib['uri'] = self.__atom_generator['uri']
@ -174,19 +174,19 @@ class FeedGenerator(object):
generator.attrib['version'] = self.__atom_generator['version']
if self.__atom_icon:
icon = etree.SubElement(feed, 'icon')
icon = xml_elem('icon', feed)
icon.text = self.__atom_icon
if self.__atom_logo:
logo = etree.SubElement(feed, 'logo')
logo = xml_elem('logo', feed)
logo.text = self.__atom_logo
if self.__atom_rights:
rights = etree.SubElement(feed, 'rights')
rights = xml_elem('rights', feed)
rights.text = self.__atom_rights
if self.__atom_subtitle:
subtitle = etree.SubElement(feed, 'subtitle')
subtitle = xml_elem('subtitle', feed)
subtitle.text = self.__atom_subtitle
if extensions:
@ -255,8 +255,8 @@ class FeedGenerator(object):
nsmap.update({'atom': 'http://www.w3.org/2005/Atom',
'content': 'http://purl.org/rss/1.0/modules/content/'})
feed = etree.Element('rss', version='2.0', nsmap=nsmap)
channel = etree.SubElement(feed, 'channel')
feed = xml_elem('rss', version='2.0', nsmap=nsmap)
channel = xml_elem('channel', feed)
if not (self.__rss_title and
self.__rss_link and
self.__rss_description):
@ -265,18 +265,17 @@ class FeedGenerator(object):
([] if self.__rss_description else ['description'])
missing = ', '.join(missing)
raise ValueError('Required fields not set (%s)' % missing)
title = etree.SubElement(channel, 'title')
title = xml_elem('title', channel)
title.text = self.__rss_title
link = etree.SubElement(channel, 'link')
link = xml_elem('link', channel)
link.text = self.__rss_link
desc = etree.SubElement(channel, 'description')
desc = xml_elem('description', channel)
desc.text = self.__rss_description
for ln in self.__atom_link or []:
# It is recommended to include a atom self link in rss documents…
if ln.get('rel') == 'self':
selflink = etree.SubElement(
channel, '{http://www.w3.org/2005/Atom}link',
href=ln['href'], rel='self')
selflink = xml_elem('{http://www.w3.org/2005/Atom}link',
channel, href=ln['href'], rel='self')
if ln.get('type'):
selflink.attrib['type'] = ln['type']
if ln.get('hreflang'):
@ -288,12 +287,12 @@ class FeedGenerator(object):
break
if self.__rss_category:
for cat in self.__rss_category:
category = etree.SubElement(channel, 'category')
category = xml_elem('category', channel)
category.text = cat['value']
if cat.get('domain'):
category.attrib['domain'] = cat['domain']
if self.__rss_cloud:
cloud = etree.SubElement(channel, 'cloud')
cloud = xml_elem('cloud', channel)
cloud.attrib['domain'] = self.__rss_cloud.get('domain')
cloud.attrib['port'] = self.__rss_cloud.get('port')
cloud.attrib['path'] = self.__rss_cloud.get('path')
@ -301,69 +300,69 @@ class FeedGenerator(object):
'registerProcedure')
cloud.attrib['protocol'] = self.__rss_cloud.get('protocol')
if self.__rss_copyright:
copyright = etree.SubElement(channel, 'copyright')
copyright = xml_elem('copyright', channel)
copyright.text = self.__rss_copyright
if self.__rss_docs:
docs = etree.SubElement(channel, 'docs')
docs = xml_elem('docs', channel)
docs.text = self.__rss_docs
if self.__rss_generator:
generator = etree.SubElement(channel, 'generator')
generator = xml_elem('generator', channel)
generator.text = self.__rss_generator
if self.__rss_image:
image = etree.SubElement(channel, 'image')
url = etree.SubElement(image, 'url')
image = xml_elem('image', channel)
url = xml_elem('url', image)
url.text = self.__rss_image.get('url')
title = etree.SubElement(image, 'title')
title = xml_elem('title', image)
title.text = self.__rss_image.get('title', self.__rss_title)
link = etree.SubElement(image, 'link')
link = xml_elem('link', image)
link.text = self.__rss_image.get('link', self.__rss_link)
if self.__rss_image.get('width'):
width = etree.SubElement(image, 'width')
width = xml_elem('width', image)
width.text = self.__rss_image.get('width')
if self.__rss_image.get('height'):
height = etree.SubElement(image, 'height')
height = xml_elem('height', image)
height.text = self.__rss_image.get('height')
if self.__rss_image.get('description'):
description = etree.SubElement(image, 'description')
description = xml_elem('description', image)
description.text = self.__rss_image.get('description')
if self.__rss_language:
language = etree.SubElement(channel, 'language')
language = xml_elem('language', channel)
language.text = self.__rss_language
if self.__rss_lastBuildDate:
lastBuildDate = etree.SubElement(channel, 'lastBuildDate')
lastBuildDate = xml_elem('lastBuildDate', channel)
lastBuildDate.text = formatRFC2822(self.__rss_lastBuildDate)
if self.__rss_managingEditor:
managingEditor = etree.SubElement(channel, 'managingEditor')
managingEditor = xml_elem('managingEditor', channel)
managingEditor.text = self.__rss_managingEditor
if self.__rss_pubDate:
pubDate = etree.SubElement(channel, 'pubDate')
pubDate = xml_elem('pubDate', channel)
pubDate.text = formatRFC2822(self.__rss_pubDate)
if self.__rss_rating:
rating = etree.SubElement(channel, 'rating')
rating = xml_elem('rating', channel)
rating.text = self.__rss_rating
if self.__rss_skipHours:
skipHours = etree.SubElement(channel, 'skipHours')
skipHours = xml_elem('skipHours', channel)
for h in self.__rss_skipHours:
hour = etree.SubElement(skipHours, 'hour')
hour = xml_elem('hour', skipHours)
hour.text = str(h)
if self.__rss_skipDays:
skipDays = etree.SubElement(channel, 'skipDays')
skipDays = xml_elem('skipDays', channel)
for d in self.__rss_skipDays:
day = etree.SubElement(skipDays, 'day')
day = xml_elem('day', skipDays)
day.text = d
if self.__rss_textInput:
textInput = etree.SubElement(channel, 'textInput')
textInput = xml_elem('textInput', channel)
textInput.attrib['title'] = self.__rss_textInput.get('title')
textInput.attrib['description'] = \
self.__rss_textInput.get('description')
textInput.attrib['name'] = self.__rss_textInput.get('name')
textInput.attrib['link'] = self.__rss_textInput.get('link')
if self.__rss_ttl:
ttl = etree.SubElement(channel, 'ttl')
ttl = xml_elem('ttl', channel)
ttl.text = str(self.__rss_ttl)
if self.__rss_webMaster:
webMaster = etree.SubElement(channel, 'webMaster')
webMaster = xml_elem('webMaster', channel)
webMaster.text = self.__rss_webMaster
if extensions:

View file

@ -10,6 +10,28 @@
'''
import locale
import sys
import lxml # nosec - we configure a safe parser below
# Configure a safe parser which does not allow XML entity expansion
parser = lxml.etree.XMLParser(
attribute_defaults=False,
dtd_validation=False,
load_dtd=False,
no_network=True,
recover=False,
remove_pis=True,
resolve_entities=False,
huge_tree=False)
def xml_fromstring(xmlstring):
return lxml.etree.fromstring(xmlstring, parser) # nosec - safe parser
def xml_elem(name, parent=None, **kwargs):
if parent is not None:
return lxml.etree.SubElement(parent, name, **kwargs)
return lxml.etree.Element(name, **kwargs)
def ensure_format(val, allowed, required, allowed_values=None, defaults=None):