From 0eb12f9133ec2f977b88b0f8c07b3249b5b7051d Mon Sep 17 00:00:00 2001 From: Lars Kiesow Date: Sat, 25 Jan 2020 15:58:49 +0100 Subject: [PATCH] Prevent XML Denial of Service Attacks This patch prevents entity expansion for provided XML content to guard against XML denial of service attacks like XML bomb or Billion laughs attack. --- feedgen/entry.py | 93 ++++++++++++++-------------- feedgen/ext/dc.py | 13 ++-- feedgen/ext/geo_entry.py | 28 ++++----- feedgen/ext/media.py | 12 ++-- feedgen/ext/podcast.py | 33 +++++----- feedgen/ext/podcast_entry.py | 23 ++++--- feedgen/ext/syndication.py | 5 +- feedgen/ext/torrent.py | 18 +++--- feedgen/feed.py | 113 +++++++++++++++++------------------ feedgen/util.py | 22 +++++++ 10 files changed, 180 insertions(+), 180 deletions(-) diff --git a/feedgen/entry.py b/feedgen/entry.py index 8617bb2..66400ba 100644 --- a/feedgen/entry.py +++ b/feedgen/entry.py @@ -3,7 +3,7 @@ feedgen.entry ~~~~~~~~~~~~~ - :copyright: 2013, Lars Kiesow + :copyright: 2013-2020, Lars Kiesow :license: FreeBSD and LGPL, see license.* for more details. ''' @@ -13,10 +13,11 @@ from datetime import datetime import dateutil.parser import dateutil.tz import warnings -from lxml import etree + +from lxml.etree import CDATA # nosec - adding CDATA entry is safe from feedgen.compat import string_types -from feedgen.util import ensure_format, formatRFC2822 +from feedgen.util import ensure_format, formatRFC2822, xml_fromstring, xml_elem def _add_text_elm(entry, data, name): @@ -24,7 +25,7 @@ def _add_text_elm(entry, data, name): if not data: return - elm = etree.SubElement(entry, name) + elm = xml_elem(name, entry) type_ = data.get('type') if data.get('src'): if name != 'content': @@ -34,16 +35,14 @@ def _add_text_elm(entry, data, name): elif data.get(name): # Surround xhtml with a div tag, parse it and embed it if type_ == 'xhtml': - elm.append(etree.fromstring( - '
' + - data.get(name) + '
')) + xhtml = '
' \ + + data.get(name) + '
' + elm.append(xml_fromstring(xhtml)) elif type_ == 'CDATA': - elm.text = etree.CDATA( - data.get(name)) + elm.text = CDATA(data.get(name)) # Parse XML and embed it elif type_ and (type_.endswith('/xml') or type_.endswith('+xml')): - elm.append(etree.fromstring( - data[name])) + elm.append(xml_fromstring(data[name])) # Embed the text in escaped form elif not type_ or type_.startswith('text') or type_ == 'html': elm.text = data.get(name) @@ -102,14 +101,14 @@ class FeedEntry(object): def atom_entry(self, extensions=True): '''Create an ATOM entry and return it.''' - entry = etree.Element('entry') + entry = xml_elem('entry') if not (self.__atom_id and self.__atom_title and self.__atom_updated): raise ValueError('Required fields not set') - id = etree.SubElement(entry, 'id') + id = xml_elem('id', entry) id.text = self.__atom_id - title = etree.SubElement(entry, 'title') + title = xml_elem('title', entry) title.text = self.__atom_title - updated = etree.SubElement(entry, 'updated') + updated = xml_elem('updated', entry) updated.text = self.__atom_updated.isoformat() # An entry must contain an alternate link if there is no content @@ -125,20 +124,20 @@ class FeedEntry(object): # Atom requires a name. Skip elements without. if not a.get('name'): continue - author = etree.SubElement(entry, 'author') - name = etree.SubElement(author, 'name') + author = xml_elem('author', entry) + name = xml_elem('name', author) name.text = a.get('name') if a.get('email'): - email = etree.SubElement(author, 'email') + email = xml_elem('email', author) email.text = a.get('email') if a.get('uri'): - uri = etree.SubElement(author, 'uri') + uri = xml_elem('uri', author) uri.text = a.get('uri') _add_text_elm(entry, self.__atom_content, 'content') for l in self.__atom_link or []: - link = etree.SubElement(entry, 'link', href=l['href']) + link = xml_elem('link', entry, href=l['href']) if l.get('rel'): link.attrib['rel'] = l['rel'] if l.get('type'): @@ -153,7 +152,7 @@ class FeedEntry(object): _add_text_elm(entry, self.__atom_summary, 'summary') for c in self.__atom_category or []: - cat = etree.SubElement(entry, 'category', term=c['term']) + cat = xml_elem('category', entry, term=c['term']) if c.get('scheme'): cat.attrib['scheme'] = c['scheme'] if c.get('label'): @@ -164,32 +163,31 @@ class FeedEntry(object): # Atom requires a name. Skip elements without. if not c.get('name'): continue - contrib = etree.SubElement(entry, 'contributor') - name = etree.SubElement(contrib, 'name') + contrib = xml_elem('contributor', entry) + name = xml_elem('name', contrib) name.text = c.get('name') if c.get('email'): - email = etree.SubElement(contrib, 'email') + email = xml_elem('email', contrib) email.text = c.get('email') if c.get('uri'): - uri = etree.SubElement(contrib, 'uri') + uri = xml_elem('uri', contrib) uri.text = c.get('uri') if self.__atom_published: - published = etree.SubElement(entry, 'published') + published = xml_elem('published', entry) published.text = self.__atom_published.isoformat() if self.__atom_rights: - rights = etree.SubElement(entry, 'rights') + rights = xml_elem('rights', entry) rights.text = self.__atom_rights if self.__atom_source: - source = etree.SubElement(entry, 'source') + source = xml_elem('source', entry) if self.__atom_source.get('title'): - source_title = etree.SubElement(source, 'title') + source_title = xml_elem('title', source) source_title.text = self.__atom_source['title'] if self.__atom_source.get('link'): - etree.SubElement(source, 'link', - href=self.__atom_source['link']) + xml_elem('link', source, href=self.__atom_source['link']) if extensions: for ext in self.__extensions.values() or []: @@ -200,60 +198,59 @@ class FeedEntry(object): def rss_entry(self, extensions=True): '''Create a RSS item and return it.''' - entry = etree.Element('item') + entry = xml_elem('item') if not (self.__rss_title or self.__rss_description or self.__rss_content): raise ValueError('Required fields not set') if self.__rss_title: - title = etree.SubElement(entry, 'title') + title = xml_elem('title', entry) title.text = self.__rss_title if self.__rss_link: - link = etree.SubElement(entry, 'link') + link = xml_elem('link', entry) link.text = self.__rss_link if self.__rss_description and self.__rss_content: - description = etree.SubElement(entry, 'description') + description = xml_elem('description', entry) description.text = self.__rss_description XMLNS_CONTENT = 'http://purl.org/rss/1.0/modules/content/' - content = etree.SubElement(entry, '{%s}encoded' % XMLNS_CONTENT) - content.text = etree.CDATA(self.__rss_content['content']) \ + content = xml_elem('{%s}encoded' % XMLNS_CONTENT, entry) + content.text = CDATA(self.__rss_content['content']) \ if self.__rss_content.get('type', '') == 'CDATA' \ else self.__rss_content['content'] elif self.__rss_description: - description = etree.SubElement(entry, 'description') + description = xml_elem('description', entry) description.text = self.__rss_description elif self.__rss_content: - description = etree.SubElement(entry, 'description') - description.text = etree.CDATA(self.__rss_content['content']) \ + description = xml_elem('description', entry) + description.text = CDATA(self.__rss_content['content']) \ if self.__rss_content.get('type', '') == 'CDATA' \ else self.__rss_content['content'] for a in self.__rss_author or []: - author = etree.SubElement(entry, 'author') + author = xml_elem('author', entry) author.text = a if self.__rss_guid.get('guid'): - guid = etree.SubElement(entry, 'guid') + guid = xml_elem('guid', entry) guid.text = self.__rss_guid['guid'] permaLink = str(self.__rss_guid.get('permalink', False)).lower() guid.attrib['isPermaLink'] = permaLink for cat in self.__rss_category or []: - category = etree.SubElement(entry, 'category') + category = xml_elem('category', entry) category.text = cat['value'] if cat.get('domain'): category.attrib['domain'] = cat['domain'] if self.__rss_comments: - comments = etree.SubElement(entry, 'comments') + comments = xml_elem('comments', entry) comments.text = self.__rss_comments if self.__rss_enclosure: - enclosure = etree.SubElement(entry, 'enclosure') + enclosure = xml_elem('enclosure', entry) enclosure.attrib['url'] = self.__rss_enclosure['url'] enclosure.attrib['length'] = self.__rss_enclosure['length'] enclosure.attrib['type'] = self.__rss_enclosure['type'] if self.__rss_pubDate: - pubDate = etree.SubElement(entry, 'pubDate') + pubDate = xml_elem('pubDate', entry) pubDate.text = formatRFC2822(self.__rss_pubDate) if self.__rss_source: - source = etree.SubElement(entry, 'source', - url=self.__rss_source['url']) + source = xml_elem('source', entry, url=self.__rss_source['url']) source.text = self.__rss_source['title'] if extensions: diff --git a/feedgen/ext/dc.py b/feedgen/ext/dc.py index bc4cb7f..f731c0b 100644 --- a/feedgen/ext/dc.py +++ b/feedgen/ext/dc.py @@ -13,9 +13,8 @@ :license: FreeBSD and LGPL, see license.* for more details. ''' -from lxml import etree - from feedgen.ext.base import BaseExtension +from feedgen.util import xml_elem class DcBaseExtension(BaseExtension): @@ -45,10 +44,10 @@ class DcBaseExtension(BaseExtension): def extend_ns(self): return {'dc': 'http://purl.org/dc/elements/1.1/'} - def _extend_xml(self, xml_elem): - '''Extend xml_elem with set DC fields. + def _extend_xml(self, xml_element): + '''Extend xml_element with set DC fields. - :param xml_elem: etree element + :param xml_element: etree element ''' DCELEMENTS_NS = 'http://purl.org/dc/elements/1.1/' @@ -58,8 +57,8 @@ class DcBaseExtension(BaseExtension): 'identifier']: if hasattr(self, '_dcelem_%s' % elem): for val in getattr(self, '_dcelem_%s' % elem) or []: - node = etree.SubElement(xml_elem, - '{%s}%s' % (DCELEMENTS_NS, elem)) + node = xml_elem('{%s}%s' % (DCELEMENTS_NS, elem), + xml_element) node.text = val def extend_atom(self, atom_feed): diff --git a/feedgen/ext/geo_entry.py b/feedgen/ext/geo_entry.py index 2ad6611..bb06cc2 100644 --- a/feedgen/ext/geo_entry.py +++ b/feedgen/ext/geo_entry.py @@ -12,8 +12,8 @@ import numbers import warnings -from lxml import etree from feedgen.ext.base import BaseEntryExtension +from feedgen.util import xml_elem class GeoRSSPolygonInteriorWarning(Warning): @@ -86,49 +86,43 @@ class GeoEntryExtension(BaseEntryExtension): GEO_NS = 'http://www.georss.org/georss' if self.__point: - point = etree.SubElement(entry, '{%s}point' % GEO_NS) + point = xml_elem('{%s}point' % GEO_NS, entry) point.text = self.__point if self.__line: - line = etree.SubElement(entry, '{%s}line' % GEO_NS) + line = xml_elem('{%s}line' % GEO_NS, entry) line.text = self.__line if self.__polygon: - polygon = etree.SubElement(entry, '{%s}polygon' % GEO_NS) + polygon = xml_elem('{%s}polygon' % GEO_NS, entry) polygon.text = self.__polygon if self.__box: - box = etree.SubElement(entry, '{%s}box' % GEO_NS) + box = xml_elem('{%s}box' % GEO_NS, entry) box.text = self.__box if self.__featuretypetag: - featuretypetag = etree.SubElement( - entry, - '{%s}featuretypetag' % GEO_NS - ) + featuretypetag = xml_elem('{%s}featuretypetag' % GEO_NS, entry) featuretypetag.text = self.__featuretypetag if self.__relationshiptag: - relationshiptag = etree.SubElement( - entry, - '{%s}relationshiptag' % GEO_NS - ) + relationshiptag = xml_elem('{%s}relationshiptag' % GEO_NS, entry) relationshiptag.text = self.__relationshiptag if self.__featurename: - featurename = etree.SubElement(entry, '{%s}featurename' % GEO_NS) + featurename = xml_elem('{%s}featurename' % GEO_NS, entry) featurename.text = self.__featurename if self.__elev: - elevation = etree.SubElement(entry, '{%s}elev' % GEO_NS) + elevation = xml_elem('{%s}elev' % GEO_NS, entry) elevation.text = str(self.__elev) if self.__floor: - floor = etree.SubElement(entry, '{%s}floor' % GEO_NS) + floor = xml_elem('{%s}floor' % GEO_NS, entry) floor.text = str(self.__floor) if self.__radius: - radius = etree.SubElement(entry, '{%s}radius' % GEO_NS) + radius = xml_elem('{%s}radius' % GEO_NS, entry) radius.text = str(self.__radius) return entry diff --git a/feedgen/ext/media.py b/feedgen/ext/media.py index 25d561a..74a5317 100644 --- a/feedgen/ext/media.py +++ b/feedgen/ext/media.py @@ -10,10 +10,8 @@ :license: FreeBSD and LGPL, see license.* for more details. ''' -from lxml import etree - from feedgen.ext.base import BaseEntryExtension, BaseExtension -from feedgen.util import ensure_format +from feedgen.util import ensure_format, xml_elem MEDIA_NS = 'http://search.yahoo.com/mrss/' @@ -45,10 +43,10 @@ class MediaEntryExtension(BaseEntryExtension): # Define current media:group group = groups.get(media_content.get('group')) if group is None: - group = etree.SubElement(entry, '{%s}group' % MEDIA_NS) + group = xml_elem('{%s}group' % MEDIA_NS, entry) groups[media_content.get('group')] = group # Add content - content = etree.SubElement(group, '{%s}content' % MEDIA_NS) + content = xml_elem('{%s}content' % MEDIA_NS, group) for attr in ('url', 'fileSize', 'type', 'medium', 'isDefault', 'expression', 'bitrate', 'framerate', 'samplingrate', 'channels', 'duration', 'height', 'width', 'lang'): @@ -59,10 +57,10 @@ class MediaEntryExtension(BaseEntryExtension): # Define current media:group group = groups.get(media_thumbnail.get('group')) if group is None: - group = etree.SubElement(entry, '{%s}group' % MEDIA_NS) + group = xml_elem('{%s}group' % MEDIA_NS, entry) groups[media_thumbnail.get('group')] = group # Add thumbnails - thumbnail = etree.SubElement(group, '{%s}thumbnail' % MEDIA_NS) + thumbnail = xml_elem('{%s}thumbnail' % MEDIA_NS, group) for attr in ('url', 'height', 'width', 'time'): if media_thumbnail.get(attr): thumbnail.set(attr, media_thumbnail[attr]) diff --git a/feedgen/ext/podcast.py b/feedgen/ext/podcast.py index a8af118..4c7eb0b 100644 --- a/feedgen/ext/podcast.py +++ b/feedgen/ext/podcast.py @@ -10,11 +10,9 @@ :license: FreeBSD and LGPL, see license.* for more details. ''' -from lxml import etree - from feedgen.compat import string_types from feedgen.ext.base import BaseExtension -from feedgen.util import ensure_format +from feedgen.util import ensure_format, xml_elem class PodcastExtension(BaseExtension): @@ -47,11 +45,11 @@ class PodcastExtension(BaseExtension): channel = rss_feed[0] if self.__itunes_author: - author = etree.SubElement(channel, '{%s}author' % ITUNES_NS) + author = xml_elem('{%s}author' % ITUNES_NS, channel) author.text = self.__itunes_author if self.__itunes_block is not None: - block = etree.SubElement(channel, '{%s}block' % ITUNES_NS) + block = xml_elem('{%s}block' % ITUNES_NS, channel) block.text = 'yes' if self.__itunes_block else 'no' for c in self.__itunes_category or []: @@ -60,45 +58,42 @@ class PodcastExtension(BaseExtension): category = channel.find( '{%s}category[@text="%s"]' % (ITUNES_NS, c.get('cat'))) if category is None: - category = etree.SubElement(channel, - '{%s}category' % ITUNES_NS) + category = xml_elem('{%s}category' % ITUNES_NS, channel) category.attrib['text'] = c.get('cat') if c.get('sub'): - subcategory = etree.SubElement(category, - '{%s}category' % ITUNES_NS) + subcategory = xml_elem('{%s}category' % ITUNES_NS, category) subcategory.attrib['text'] = c.get('sub') if self.__itunes_image: - image = etree.SubElement(channel, '{%s}image' % ITUNES_NS) + image = xml_elem('{%s}image' % ITUNES_NS, channel) image.attrib['href'] = self.__itunes_image if self.__itunes_explicit in ('yes', 'no', 'clean'): - explicit = etree.SubElement(channel, '{%s}explicit' % ITUNES_NS) + explicit = xml_elem('{%s}explicit' % ITUNES_NS, channel) explicit.text = self.__itunes_explicit if self.__itunes_complete in ('yes', 'no'): - complete = etree.SubElement(channel, '{%s}complete' % ITUNES_NS) + complete = xml_elem('{%s}complete' % ITUNES_NS, channel) complete.text = self.__itunes_complete if self.__itunes_new_feed_url: - new_feed_url = etree.SubElement(channel, - '{%s}new-feed-url' % ITUNES_NS) + new_feed_url = xml_elem('{%s}new-feed-url' % ITUNES_NS, channel) new_feed_url.text = self.__itunes_new_feed_url if self.__itunes_owner: - owner = etree.SubElement(channel, '{%s}owner' % ITUNES_NS) - owner_name = etree.SubElement(owner, '{%s}name' % ITUNES_NS) + owner = xml_elem('{%s}owner' % ITUNES_NS, channel) + owner_name = xml_elem('{%s}name' % ITUNES_NS, owner) owner_name.text = self.__itunes_owner.get('name') - owner_email = etree.SubElement(owner, '{%s}email' % ITUNES_NS) + owner_email = xml_elem('{%s}email' % ITUNES_NS, owner) owner_email.text = self.__itunes_owner.get('email') if self.__itunes_subtitle: - subtitle = etree.SubElement(channel, '{%s}subtitle' % ITUNES_NS) + subtitle = xml_elem('{%s}subtitle' % ITUNES_NS, channel) subtitle.text = self.__itunes_subtitle if self.__itunes_summary: - summary = etree.SubElement(channel, '{%s}summary' % ITUNES_NS) + summary = xml_elem('{%s}summary' % ITUNES_NS, channel) summary.text = self.__itunes_summary return rss_feed diff --git a/feedgen/ext/podcast_entry.py b/feedgen/ext/podcast_entry.py index 4fa6128..2a3771f 100644 --- a/feedgen/ext/podcast_entry.py +++ b/feedgen/ext/podcast_entry.py @@ -10,9 +10,8 @@ :license: FreeBSD and LGPL, see license.* for more details. ''' -from lxml import etree - from feedgen.ext.base import BaseEntryExtension +from feedgen.util import xml_elem class PodcastEntryExtension(BaseEntryExtension): @@ -40,43 +39,43 @@ class PodcastEntryExtension(BaseEntryExtension): ITUNES_NS = 'http://www.itunes.com/dtds/podcast-1.0.dtd' if self.__itunes_author: - author = etree.SubElement(entry, '{%s}author' % ITUNES_NS) + author = xml_elem('{%s}author' % ITUNES_NS, entry) author.text = self.__itunes_author if self.__itunes_block is not None: - block = etree.SubElement(entry, '{%s}block' % ITUNES_NS) + block = xml_elem('{%s}block' % ITUNES_NS, entry) block.text = 'yes' if self.__itunes_block else 'no' if self.__itunes_image: - image = etree.SubElement(entry, '{%s}image' % ITUNES_NS) + image = xml_elem('{%s}image' % ITUNES_NS, entry) image.attrib['href'] = self.__itunes_image if self.__itunes_duration: - duration = etree.SubElement(entry, '{%s}duration' % ITUNES_NS) + duration = xml_elem('{%s}duration' % ITUNES_NS, entry) duration.text = self.__itunes_duration if self.__itunes_explicit in ('yes', 'no', 'clean'): - explicit = etree.SubElement(entry, '{%s}explicit' % ITUNES_NS) + explicit = xml_elem('{%s}explicit' % ITUNES_NS, entry) explicit.text = self.__itunes_explicit if self.__itunes_is_closed_captioned is not None: - is_closed_captioned = etree.SubElement( - entry, '{%s}isClosedCaptioned' % ITUNES_NS) + is_closed_captioned = xml_elem( + '{%s}isClosedCaptioned' % ITUNES_NS, entry) if self.__itunes_is_closed_captioned: is_closed_captioned.text = 'yes' else: is_closed_captioned.text = 'no' if self.__itunes_order is not None and self.__itunes_order >= 0: - order = etree.SubElement(entry, '{%s}order' % ITUNES_NS) + order = xml_elem('{%s}order' % ITUNES_NS, entry) order.text = str(self.__itunes_order) if self.__itunes_subtitle: - subtitle = etree.SubElement(entry, '{%s}subtitle' % ITUNES_NS) + subtitle = xml_elem('{%s}subtitle' % ITUNES_NS, entry) subtitle.text = self.__itunes_subtitle if self.__itunes_summary: - summary = etree.SubElement(entry, '{%s}summary' % ITUNES_NS) + summary = xml_elem('{%s}summary' % ITUNES_NS, entry) summary.text = self.__itunes_summary return entry diff --git a/feedgen/ext/syndication.py b/feedgen/ext/syndication.py index 0141369..016b144 100644 --- a/feedgen/ext/syndication.py +++ b/feedgen/ext/syndication.py @@ -10,9 +10,8 @@ See below for details http://web.resource.org/rss/1.0/modules/syndication/ ''' -from lxml import etree - from feedgen.ext.base import BaseExtension +from feedgen.util import xml_elem SYNDICATION_NS = 'http://purl.org/rss/1.0/modules/syndication/' PERIOD_TYPE = ('hourly', 'daily', 'weekly', 'monthly', 'yearly') @@ -20,7 +19,7 @@ PERIOD_TYPE = ('hourly', 'daily', 'weekly', 'monthly', 'yearly') def _set_value(channel, name, value): if value: - newelem = etree.SubElement(channel, '{%s}' % SYNDICATION_NS + name) + newelem = xml_elem('{%s}' % SYNDICATION_NS + name, channel) newelem.text = value diff --git a/feedgen/ext/torrent.py b/feedgen/ext/torrent.py index e26d0bb..5548a81 100644 --- a/feedgen/ext/torrent.py +++ b/feedgen/ext/torrent.py @@ -10,9 +10,8 @@ :license: FreeBSD and LGPL, see license.* for more details. ''' -from lxml import etree - from feedgen.ext.base import BaseEntryExtension, BaseExtension +from feedgen.util import xml_elem TORRENT_NS = 'http://xmlns.ezrss.it/0.1/dtd/' @@ -41,30 +40,29 @@ class TorrentEntryExtension(BaseEntryExtension): :param feed: The RSS item XML element to use. ''' if self.__torrent_filename: - filename = etree.SubElement(entry, '{%s}filename' % TORRENT_NS) + filename = xml_elem('{%s}filename' % TORRENT_NS, entry) filename.text = self.__torrent_filename if self.__torrent_contentlength: - contentlength = etree.SubElement(entry, - '{%s}contentlength' % TORRENT_NS) + contentlength = xml_elem('{%s}contentlength' % TORRENT_NS, entry) contentlength.text = self.__torrent_contentlength if self.__torrent_infohash: - infohash = etree.SubElement(entry, '{%s}infohash' % TORRENT_NS) + infohash = xml_elem('{%s}infohash' % TORRENT_NS, entry) infohash.text = self.__torrent_infohash - magnet = etree.SubElement(entry, '{%s}magneturi' % TORRENT_NS) + magnet = xml_elem('{%s}magneturi' % TORRENT_NS, entry) magnet.text = 'magnet:?xt=urn:btih:' + self.__torrent_infohash if self.__torrent_seeds: - seeds = etree.SubElement(entry, '{%s}seed' % TORRENT_NS) + seeds = xml_elem('{%s}seed' % TORRENT_NS, entry) seeds.text = self.__torrent_seeds if self.__torrent_peers: - peers = etree.SubElement(entry, '{%s}peers' % TORRENT_NS) + peers = xml_elem('{%s}peers' % TORRENT_NS, entry) peers.text = self.__torrent_peers if self.__torrent_verified: - verified = etree.SubElement(entry, '{%s}verified' % TORRENT_NS) + verified = xml_elem('{%s}verified' % TORRENT_NS, entry) verified.text = self.__torrent_verified def filename(self, torrent_filename=None): diff --git a/feedgen/feed.py b/feedgen/feed.py index b2a206f..9ebd219 100644 --- a/feedgen/feed.py +++ b/feedgen/feed.py @@ -3,7 +3,7 @@ feedgen.feed ~~~~~~~~~~~~ - :copyright: 2013-2016, Lars Kiesow + :copyright: 2013-2020, Lars Kiesow :license: FreeBSD and LGPL, see license.* for more details. @@ -14,12 +14,12 @@ from datetime import datetime import dateutil.parser import dateutil.tz -from lxml import etree +from lxml import etree # nosec - not using this for parsing import feedgen.version from feedgen.compat import string_types from feedgen.entry import FeedEntry -from feedgen.util import ensure_format, formatRFC2822 +from feedgen.util import ensure_format, formatRFC2822, xml_elem _feedgen_version = feedgen.version.version_str @@ -47,7 +47,7 @@ class FeedGenerator(object): self.__atom_contributor = None self.__atom_generator = { 'value': 'python-feedgen', - 'uri': 'http://lkiesow.github.io/python-feedgen', + 'uri': 'https://lkiesow.github.io/python-feedgen', 'version': feedgen.version.version_str} # {value*,uri,version} self.__atom_icon = None self.__atom_logo = None @@ -95,9 +95,9 @@ class FeedGenerator(object): if ext.get('atom'): nsmap.update(ext['inst'].extend_ns()) - feed = etree.Element('feed', - xmlns='http://www.w3.org/2005/Atom', - nsmap=nsmap) + feed = xml_elem('feed', + xmlns='http://www.w3.org/2005/Atom', + nsmap=nsmap) if self.__atom_feed_xml_lang: feed.attrib['{http://www.w3.org/XML/1998/namespace}lang'] = \ self.__atom_feed_xml_lang @@ -108,11 +108,11 @@ class FeedGenerator(object): ([] if self.__atom_updated else ['updated']) missing = ', '.join(missing) raise ValueError('Required fields not set (%s)' % missing) - id = etree.SubElement(feed, 'id') + id = xml_elem('id', feed) id.text = self.__atom_id - title = etree.SubElement(feed, 'title') + title = xml_elem('title', feed) title.text = self.__atom_title - updated = etree.SubElement(feed, 'updated') + updated = xml_elem('updated', feed) updated.text = self.__atom_updated.isoformat() # Add author elements @@ -120,18 +120,18 @@ class FeedGenerator(object): # Atom requires a name. Skip elements without. if not a.get('name'): continue - author = etree.SubElement(feed, 'author') - name = etree.SubElement(author, 'name') + author = xml_elem('author', feed) + name = xml_elem('name', author) name.text = a.get('name') if a.get('email'): - email = etree.SubElement(author, 'email') + email = xml_elem('email', author) email.text = a.get('email') if a.get('uri'): - uri = etree.SubElement(author, 'uri') + uri = xml_elem('uri', author) uri.text = a.get('uri') for l in self.__atom_link or []: - link = etree.SubElement(feed, 'link', href=l['href']) + link = xml_elem('link', feed, href=l['href']) if l.get('rel'): link.attrib['rel'] = l['rel'] if l.get('type'): @@ -144,7 +144,7 @@ class FeedGenerator(object): link.attrib['length'] = l['length'] for c in self.__atom_category or []: - cat = etree.SubElement(feed, 'category', term=c['term']) + cat = xml_elem('category', feed, term=c['term']) if c.get('scheme'): cat.attrib['scheme'] = c['scheme'] if c.get('label'): @@ -155,18 +155,18 @@ class FeedGenerator(object): # Atom requires a name. Skip elements without. if not c.get('name'): continue - contrib = etree.SubElement(feed, 'contributor') - name = etree.SubElement(contrib, 'name') + contrib = xml_elem('contributor', feed) + name = xml_elem('name', contrib) name.text = c.get('name') if c.get('email'): - email = etree.SubElement(contrib, 'email') + email = xml_elem('email', contrib) email.text = c.get('email') if c.get('uri'): - uri = etree.SubElement(contrib, 'uri') + uri = xml_elem('uri', contrib) uri.text = c.get('uri') if self.__atom_generator and self.__atom_generator.get('value'): - generator = etree.SubElement(feed, 'generator') + generator = xml_elem('generator', feed) generator.text = self.__atom_generator['value'] if self.__atom_generator.get('uri'): generator.attrib['uri'] = self.__atom_generator['uri'] @@ -174,19 +174,19 @@ class FeedGenerator(object): generator.attrib['version'] = self.__atom_generator['version'] if self.__atom_icon: - icon = etree.SubElement(feed, 'icon') + icon = xml_elem('icon', feed) icon.text = self.__atom_icon if self.__atom_logo: - logo = etree.SubElement(feed, 'logo') + logo = xml_elem('logo', feed) logo.text = self.__atom_logo if self.__atom_rights: - rights = etree.SubElement(feed, 'rights') + rights = xml_elem('rights', feed) rights.text = self.__atom_rights if self.__atom_subtitle: - subtitle = etree.SubElement(feed, 'subtitle') + subtitle = xml_elem('subtitle', feed) subtitle.text = self.__atom_subtitle if extensions: @@ -255,8 +255,8 @@ class FeedGenerator(object): nsmap.update({'atom': 'http://www.w3.org/2005/Atom', 'content': 'http://purl.org/rss/1.0/modules/content/'}) - feed = etree.Element('rss', version='2.0', nsmap=nsmap) - channel = etree.SubElement(feed, 'channel') + feed = xml_elem('rss', version='2.0', nsmap=nsmap) + channel = xml_elem('channel', feed) if not (self.__rss_title and self.__rss_link and self.__rss_description): @@ -265,18 +265,17 @@ class FeedGenerator(object): ([] if self.__rss_description else ['description']) missing = ', '.join(missing) raise ValueError('Required fields not set (%s)' % missing) - title = etree.SubElement(channel, 'title') + title = xml_elem('title', channel) title.text = self.__rss_title - link = etree.SubElement(channel, 'link') + link = xml_elem('link', channel) link.text = self.__rss_link - desc = etree.SubElement(channel, 'description') + desc = xml_elem('description', channel) desc.text = self.__rss_description for ln in self.__atom_link or []: # It is recommended to include a atom self link in rss documents… if ln.get('rel') == 'self': - selflink = etree.SubElement( - channel, '{http://www.w3.org/2005/Atom}link', - href=ln['href'], rel='self') + selflink = xml_elem('{http://www.w3.org/2005/Atom}link', + channel, href=ln['href'], rel='self') if ln.get('type'): selflink.attrib['type'] = ln['type'] if ln.get('hreflang'): @@ -288,12 +287,12 @@ class FeedGenerator(object): break if self.__rss_category: for cat in self.__rss_category: - category = etree.SubElement(channel, 'category') + category = xml_elem('category', channel) category.text = cat['value'] if cat.get('domain'): category.attrib['domain'] = cat['domain'] if self.__rss_cloud: - cloud = etree.SubElement(channel, 'cloud') + cloud = xml_elem('cloud', channel) cloud.attrib['domain'] = self.__rss_cloud.get('domain') cloud.attrib['port'] = self.__rss_cloud.get('port') cloud.attrib['path'] = self.__rss_cloud.get('path') @@ -301,69 +300,69 @@ class FeedGenerator(object): 'registerProcedure') cloud.attrib['protocol'] = self.__rss_cloud.get('protocol') if self.__rss_copyright: - copyright = etree.SubElement(channel, 'copyright') + copyright = xml_elem('copyright', channel) copyright.text = self.__rss_copyright if self.__rss_docs: - docs = etree.SubElement(channel, 'docs') + docs = xml_elem('docs', channel) docs.text = self.__rss_docs if self.__rss_generator: - generator = etree.SubElement(channel, 'generator') + generator = xml_elem('generator', channel) generator.text = self.__rss_generator if self.__rss_image: - image = etree.SubElement(channel, 'image') - url = etree.SubElement(image, 'url') + image = xml_elem('image', channel) + url = xml_elem('url', image) url.text = self.__rss_image.get('url') - title = etree.SubElement(image, 'title') + title = xml_elem('title', image) title.text = self.__rss_image.get('title', self.__rss_title) - link = etree.SubElement(image, 'link') + link = xml_elem('link', image) link.text = self.__rss_image.get('link', self.__rss_link) if self.__rss_image.get('width'): - width = etree.SubElement(image, 'width') + width = xml_elem('width', image) width.text = self.__rss_image.get('width') if self.__rss_image.get('height'): - height = etree.SubElement(image, 'height') + height = xml_elem('height', image) height.text = self.__rss_image.get('height') if self.__rss_image.get('description'): - description = etree.SubElement(image, 'description') + description = xml_elem('description', image) description.text = self.__rss_image.get('description') if self.__rss_language: - language = etree.SubElement(channel, 'language') + language = xml_elem('language', channel) language.text = self.__rss_language if self.__rss_lastBuildDate: - lastBuildDate = etree.SubElement(channel, 'lastBuildDate') + lastBuildDate = xml_elem('lastBuildDate', channel) lastBuildDate.text = formatRFC2822(self.__rss_lastBuildDate) if self.__rss_managingEditor: - managingEditor = etree.SubElement(channel, 'managingEditor') + managingEditor = xml_elem('managingEditor', channel) managingEditor.text = self.__rss_managingEditor if self.__rss_pubDate: - pubDate = etree.SubElement(channel, 'pubDate') + pubDate = xml_elem('pubDate', channel) pubDate.text = formatRFC2822(self.__rss_pubDate) if self.__rss_rating: - rating = etree.SubElement(channel, 'rating') + rating = xml_elem('rating', channel) rating.text = self.__rss_rating if self.__rss_skipHours: - skipHours = etree.SubElement(channel, 'skipHours') + skipHours = xml_elem('skipHours', channel) for h in self.__rss_skipHours: - hour = etree.SubElement(skipHours, 'hour') + hour = xml_elem('hour', skipHours) hour.text = str(h) if self.__rss_skipDays: - skipDays = etree.SubElement(channel, 'skipDays') + skipDays = xml_elem('skipDays', channel) for d in self.__rss_skipDays: - day = etree.SubElement(skipDays, 'day') + day = xml_elem('day', skipDays) day.text = d if self.__rss_textInput: - textInput = etree.SubElement(channel, 'textInput') + textInput = xml_elem('textInput', channel) textInput.attrib['title'] = self.__rss_textInput.get('title') textInput.attrib['description'] = \ self.__rss_textInput.get('description') textInput.attrib['name'] = self.__rss_textInput.get('name') textInput.attrib['link'] = self.__rss_textInput.get('link') if self.__rss_ttl: - ttl = etree.SubElement(channel, 'ttl') + ttl = xml_elem('ttl', channel) ttl.text = str(self.__rss_ttl) if self.__rss_webMaster: - webMaster = etree.SubElement(channel, 'webMaster') + webMaster = xml_elem('webMaster', channel) webMaster.text = self.__rss_webMaster if extensions: diff --git a/feedgen/util.py b/feedgen/util.py index ca4ad58..8b4e6e5 100644 --- a/feedgen/util.py +++ b/feedgen/util.py @@ -10,6 +10,28 @@ ''' import locale import sys +import lxml # nosec - we configure a safe parser below + +# Configure a safe parser which does not allow XML entity expansion +parser = lxml.etree.XMLParser( + attribute_defaults=False, + dtd_validation=False, + load_dtd=False, + no_network=True, + recover=False, + remove_pis=True, + resolve_entities=False, + huge_tree=False) + + +def xml_fromstring(xmlstring): + return lxml.etree.fromstring(xmlstring, parser) # nosec - safe parser + + +def xml_elem(name, parent=None, **kwargs): + if parent is not None: + return lxml.etree.SubElement(parent, name, **kwargs) + return lxml.etree.Element(name, **kwargs) def ensure_format(val, allowed, required, allowed_values=None, defaults=None):