Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
N
news
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Sartika Aritonang
news
Commits
b9540267
Commit
b9540267
authored
5 years ago
by
Sartika Aritonang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Upload New File
parent
50a6a872
master
…
patch-6
No related merge requests found
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
896 additions
and
0 deletions
+896
-0
sanitizer.py
...b/site-packages/pip/_vendor/html5lib/filters/sanitizer.py
+896
-0
No files found.
stbi/Lib/site-packages/pip/_vendor/html5lib/filters/sanitizer.py
0 → 100644
View file @
b9540267
from
__future__
import
absolute_import
,
division
,
unicode_literals
import
re
from
xml.sax.saxutils
import
escape
,
unescape
from
pip._vendor.six.moves
import
urllib_parse
as
urlparse
from
.
import
base
from
..constants
import
namespaces
,
prefixes
__all__
=
[
"Filter"
]
allowed_elements
=
frozenset
((
(
namespaces
[
'html'
],
'a'
),
(
namespaces
[
'html'
],
'abbr'
),
(
namespaces
[
'html'
],
'acronym'
),
(
namespaces
[
'html'
],
'address'
),
(
namespaces
[
'html'
],
'area'
),
(
namespaces
[
'html'
],
'article'
),
(
namespaces
[
'html'
],
'aside'
),
(
namespaces
[
'html'
],
'audio'
),
(
namespaces
[
'html'
],
'b'
),
(
namespaces
[
'html'
],
'big'
),
(
namespaces
[
'html'
],
'blockquote'
),
(
namespaces
[
'html'
],
'br'
),
(
namespaces
[
'html'
],
'button'
),
(
namespaces
[
'html'
],
'canvas'
),
(
namespaces
[
'html'
],
'caption'
),
(
namespaces
[
'html'
],
'center'
),
(
namespaces
[
'html'
],
'cite'
),
(
namespaces
[
'html'
],
'code'
),
(
namespaces
[
'html'
],
'col'
),
(
namespaces
[
'html'
],
'colgroup'
),
(
namespaces
[
'html'
],
'command'
),
(
namespaces
[
'html'
],
'datagrid'
),
(
namespaces
[
'html'
],
'datalist'
),
(
namespaces
[
'html'
],
'dd'
),
(
namespaces
[
'html'
],
'del'
),
(
namespaces
[
'html'
],
'details'
),
(
namespaces
[
'html'
],
'dfn'
),
(
namespaces
[
'html'
],
'dialog'
),
(
namespaces
[
'html'
],
'dir'
),
(
namespaces
[
'html'
],
'div'
),
(
namespaces
[
'html'
],
'dl'
),
(
namespaces
[
'html'
],
'dt'
),
(
namespaces
[
'html'
],
'em'
),
(
namespaces
[
'html'
],
'event-source'
),
(
namespaces
[
'html'
],
'fieldset'
),
(
namespaces
[
'html'
],
'figcaption'
),
(
namespaces
[
'html'
],
'figure'
),
(
namespaces
[
'html'
],
'footer'
),
(
namespaces
[
'html'
],
'font'
),
(
namespaces
[
'html'
],
'form'
),
(
namespaces
[
'html'
],
'header'
),
(
namespaces
[
'html'
],
'h1'
),
(
namespaces
[
'html'
],
'h2'
),
(
namespaces
[
'html'
],
'h3'
),
(
namespaces
[
'html'
],
'h4'
),
(
namespaces
[
'html'
],
'h5'
),
(
namespaces
[
'html'
],
'h6'
),
(
namespaces
[
'html'
],
'hr'
),
(
namespaces
[
'html'
],
'i'
),
(
namespaces
[
'html'
],
'img'
),
(
namespaces
[
'html'
],
'input'
),
(
namespaces
[
'html'
],
'ins'
),
(
namespaces
[
'html'
],
'keygen'
),
(
namespaces
[
'html'
],
'kbd'
),
(
namespaces
[
'html'
],
'label'
),
(
namespaces
[
'html'
],
'legend'
),
(
namespaces
[
'html'
],
'li'
),
(
namespaces
[
'html'
],
'm'
),
(
namespaces
[
'html'
],
'map'
),
(
namespaces
[
'html'
],
'menu'
),
(
namespaces
[
'html'
],
'meter'
),
(
namespaces
[
'html'
],
'multicol'
),
(
namespaces
[
'html'
],
'nav'
),
(
namespaces
[
'html'
],
'nextid'
),
(
namespaces
[
'html'
],
'ol'
),
(
namespaces
[
'html'
],
'output'
),
(
namespaces
[
'html'
],
'optgroup'
),
(
namespaces
[
'html'
],
'option'
),
(
namespaces
[
'html'
],
'p'
),
(
namespaces
[
'html'
],
'pre'
),
(
namespaces
[
'html'
],
'progress'
),
(
namespaces
[
'html'
],
'q'
),
(
namespaces
[
'html'
],
's'
),
(
namespaces
[
'html'
],
'samp'
),
(
namespaces
[
'html'
],
'section'
),
(
namespaces
[
'html'
],
'select'
),
(
namespaces
[
'html'
],
'small'
),
(
namespaces
[
'html'
],
'sound'
),
(
namespaces
[
'html'
],
'source'
),
(
namespaces
[
'html'
],
'spacer'
),
(
namespaces
[
'html'
],
'span'
),
(
namespaces
[
'html'
],
'strike'
),
(
namespaces
[
'html'
],
'strong'
),
(
namespaces
[
'html'
],
'sub'
),
(
namespaces
[
'html'
],
'sup'
),
(
namespaces
[
'html'
],
'table'
),
(
namespaces
[
'html'
],
'tbody'
),
(
namespaces
[
'html'
],
'td'
),
(
namespaces
[
'html'
],
'textarea'
),
(
namespaces
[
'html'
],
'time'
),
(
namespaces
[
'html'
],
'tfoot'
),
(
namespaces
[
'html'
],
'th'
),
(
namespaces
[
'html'
],
'thead'
),
(
namespaces
[
'html'
],
'tr'
),
(
namespaces
[
'html'
],
'tt'
),
(
namespaces
[
'html'
],
'u'
),
(
namespaces
[
'html'
],
'ul'
),
(
namespaces
[
'html'
],
'var'
),
(
namespaces
[
'html'
],
'video'
),
(
namespaces
[
'mathml'
],
'maction'
),
(
namespaces
[
'mathml'
],
'math'
),
(
namespaces
[
'mathml'
],
'merror'
),
(
namespaces
[
'mathml'
],
'mfrac'
),
(
namespaces
[
'mathml'
],
'mi'
),
(
namespaces
[
'mathml'
],
'mmultiscripts'
),
(
namespaces
[
'mathml'
],
'mn'
),
(
namespaces
[
'mathml'
],
'mo'
),
(
namespaces
[
'mathml'
],
'mover'
),
(
namespaces
[
'mathml'
],
'mpadded'
),
(
namespaces
[
'mathml'
],
'mphantom'
),
(
namespaces
[
'mathml'
],
'mprescripts'
),
(
namespaces
[
'mathml'
],
'mroot'
),
(
namespaces
[
'mathml'
],
'mrow'
),
(
namespaces
[
'mathml'
],
'mspace'
),
(
namespaces
[
'mathml'
],
'msqrt'
),
(
namespaces
[
'mathml'
],
'mstyle'
),
(
namespaces
[
'mathml'
],
'msub'
),
(
namespaces
[
'mathml'
],
'msubsup'
),
(
namespaces
[
'mathml'
],
'msup'
),
(
namespaces
[
'mathml'
],
'mtable'
),
(
namespaces
[
'mathml'
],
'mtd'
),
(
namespaces
[
'mathml'
],
'mtext'
),
(
namespaces
[
'mathml'
],
'mtr'
),
(
namespaces
[
'mathml'
],
'munder'
),
(
namespaces
[
'mathml'
],
'munderover'
),
(
namespaces
[
'mathml'
],
'none'
),
(
namespaces
[
'svg'
],
'a'
),
(
namespaces
[
'svg'
],
'animate'
),
(
namespaces
[
'svg'
],
'animateColor'
),
(
namespaces
[
'svg'
],
'animateMotion'
),
(
namespaces
[
'svg'
],
'animateTransform'
),
(
namespaces
[
'svg'
],
'clipPath'
),
(
namespaces
[
'svg'
],
'circle'
),
(
namespaces
[
'svg'
],
'defs'
),
(
namespaces
[
'svg'
],
'desc'
),
(
namespaces
[
'svg'
],
'ellipse'
),
(
namespaces
[
'svg'
],
'font-face'
),
(
namespaces
[
'svg'
],
'font-face-name'
),
(
namespaces
[
'svg'
],
'font-face-src'
),
(
namespaces
[
'svg'
],
'g'
),
(
namespaces
[
'svg'
],
'glyph'
),
(
namespaces
[
'svg'
],
'hkern'
),
(
namespaces
[
'svg'
],
'linearGradient'
),
(
namespaces
[
'svg'
],
'line'
),
(
namespaces
[
'svg'
],
'marker'
),
(
namespaces
[
'svg'
],
'metadata'
),
(
namespaces
[
'svg'
],
'missing-glyph'
),
(
namespaces
[
'svg'
],
'mpath'
),
(
namespaces
[
'svg'
],
'path'
),
(
namespaces
[
'svg'
],
'polygon'
),
(
namespaces
[
'svg'
],
'polyline'
),
(
namespaces
[
'svg'
],
'radialGradient'
),
(
namespaces
[
'svg'
],
'rect'
),
(
namespaces
[
'svg'
],
'set'
),
(
namespaces
[
'svg'
],
'stop'
),
(
namespaces
[
'svg'
],
'svg'
),
(
namespaces
[
'svg'
],
'switch'
),
(
namespaces
[
'svg'
],
'text'
),
(
namespaces
[
'svg'
],
'title'
),
(
namespaces
[
'svg'
],
'tspan'
),
(
namespaces
[
'svg'
],
'use'
),
))
allowed_attributes
=
frozenset
((
# HTML attributes
(
None
,
'abbr'
),
(
None
,
'accept'
),
(
None
,
'accept-charset'
),
(
None
,
'accesskey'
),
(
None
,
'action'
),
(
None
,
'align'
),
(
None
,
'alt'
),
(
None
,
'autocomplete'
),
(
None
,
'autofocus'
),
(
None
,
'axis'
),
(
None
,
'background'
),
(
None
,
'balance'
),
(
None
,
'bgcolor'
),
(
None
,
'bgproperties'
),
(
None
,
'border'
),
(
None
,
'bordercolor'
),
(
None
,
'bordercolordark'
),
(
None
,
'bordercolorlight'
),
(
None
,
'bottompadding'
),
(
None
,
'cellpadding'
),
(
None
,
'cellspacing'
),
(
None
,
'ch'
),
(
None
,
'challenge'
),
(
None
,
'char'
),
(
None
,
'charoff'
),
(
None
,
'choff'
),
(
None
,
'charset'
),
(
None
,
'checked'
),
(
None
,
'cite'
),
(
None
,
'class'
),
(
None
,
'clear'
),
(
None
,
'color'
),
(
None
,
'cols'
),
(
None
,
'colspan'
),
(
None
,
'compact'
),
(
None
,
'contenteditable'
),
(
None
,
'controls'
),
(
None
,
'coords'
),
(
None
,
'data'
),
(
None
,
'datafld'
),
(
None
,
'datapagesize'
),
(
None
,
'datasrc'
),
(
None
,
'datetime'
),
(
None
,
'default'
),
(
None
,
'delay'
),
(
None
,
'dir'
),
(
None
,
'disabled'
),
(
None
,
'draggable'
),
(
None
,
'dynsrc'
),
(
None
,
'enctype'
),
(
None
,
'end'
),
(
None
,
'face'
),
(
None
,
'for'
),
(
None
,
'form'
),
(
None
,
'frame'
),
(
None
,
'galleryimg'
),
(
None
,
'gutter'
),
(
None
,
'headers'
),
(
None
,
'height'
),
(
None
,
'hidefocus'
),
(
None
,
'hidden'
),
(
None
,
'high'
),
(
None
,
'href'
),
(
None
,
'hreflang'
),
(
None
,
'hspace'
),
(
None
,
'icon'
),
(
None
,
'id'
),
(
None
,
'inputmode'
),
(
None
,
'ismap'
),
(
None
,
'keytype'
),
(
None
,
'label'
),
(
None
,
'leftspacing'
),
(
None
,
'lang'
),
(
None
,
'list'
),
(
None
,
'longdesc'
),
(
None
,
'loop'
),
(
None
,
'loopcount'
),
(
None
,
'loopend'
),
(
None
,
'loopstart'
),
(
None
,
'low'
),
(
None
,
'lowsrc'
),
(
None
,
'max'
),
(
None
,
'maxlength'
),
(
None
,
'media'
),
(
None
,
'method'
),
(
None
,
'min'
),
(
None
,
'multiple'
),
(
None
,
'name'
),
(
None
,
'nohref'
),
(
None
,
'noshade'
),
(
None
,
'nowrap'
),
(
None
,
'open'
),
(
None
,
'optimum'
),
(
None
,
'pattern'
),
(
None
,
'ping'
),
(
None
,
'point-size'
),
(
None
,
'poster'
),
(
None
,
'pqg'
),
(
None
,
'preload'
),
(
None
,
'prompt'
),
(
None
,
'radiogroup'
),
(
None
,
'readonly'
),
(
None
,
'rel'
),
(
None
,
'repeat-max'
),
(
None
,
'repeat-min'
),
(
None
,
'replace'
),
(
None
,
'required'
),
(
None
,
'rev'
),
(
None
,
'rightspacing'
),
(
None
,
'rows'
),
(
None
,
'rowspan'
),
(
None
,
'rules'
),
(
None
,
'scope'
),
(
None
,
'selected'
),
(
None
,
'shape'
),
(
None
,
'size'
),
(
None
,
'span'
),
(
None
,
'src'
),
(
None
,
'start'
),
(
None
,
'step'
),
(
None
,
'style'
),
(
None
,
'summary'
),
(
None
,
'suppress'
),
(
None
,
'tabindex'
),
(
None
,
'target'
),
(
None
,
'template'
),
(
None
,
'title'
),
(
None
,
'toppadding'
),
(
None
,
'type'
),
(
None
,
'unselectable'
),
(
None
,
'usemap'
),
(
None
,
'urn'
),
(
None
,
'valign'
),
(
None
,
'value'
),
(
None
,
'variable'
),
(
None
,
'volume'
),
(
None
,
'vspace'
),
(
None
,
'vrml'
),
(
None
,
'width'
),
(
None
,
'wrap'
),
(
namespaces
[
'xml'
],
'lang'
),
# MathML attributes
(
None
,
'actiontype'
),
(
None
,
'align'
),
(
None
,
'columnalign'
),
(
None
,
'columnalign'
),
(
None
,
'columnalign'
),
(
None
,
'columnlines'
),
(
None
,
'columnspacing'
),
(
None
,
'columnspan'
),
(
None
,
'depth'
),
(
None
,
'display'
),
(
None
,
'displaystyle'
),
(
None
,
'equalcolumns'
),
(
None
,
'equalrows'
),
(
None
,
'fence'
),
(
None
,
'fontstyle'
),
(
None
,
'fontweight'
),
(
None
,
'frame'
),
(
None
,
'height'
),
(
None
,
'linethickness'
),
(
None
,
'lspace'
),
(
None
,
'mathbackground'
),
(
None
,
'mathcolor'
),
(
None
,
'mathvariant'
),
(
None
,
'mathvariant'
),
(
None
,
'maxsize'
),
(
None
,
'minsize'
),
(
None
,
'other'
),
(
None
,
'rowalign'
),
(
None
,
'rowalign'
),
(
None
,
'rowalign'
),
(
None
,
'rowlines'
),
(
None
,
'rowspacing'
),
(
None
,
'rowspan'
),
(
None
,
'rspace'
),
(
None
,
'scriptlevel'
),
(
None
,
'selection'
),
(
None
,
'separator'
),
(
None
,
'stretchy'
),
(
None
,
'width'
),
(
None
,
'width'
),
(
namespaces
[
'xlink'
],
'href'
),
(
namespaces
[
'xlink'
],
'show'
),
(
namespaces
[
'xlink'
],
'type'
),
# SVG attributes
(
None
,
'accent-height'
),
(
None
,
'accumulate'
),
(
None
,
'additive'
),
(
None
,
'alphabetic'
),
(
None
,
'arabic-form'
),
(
None
,
'ascent'
),
(
None
,
'attributeName'
),
(
None
,
'attributeType'
),
(
None
,
'baseProfile'
),
(
None
,
'bbox'
),
(
None
,
'begin'
),
(
None
,
'by'
),
(
None
,
'calcMode'
),
(
None
,
'cap-height'
),
(
None
,
'class'
),
(
None
,
'clip-path'
),
(
None
,
'color'
),
(
None
,
'color-rendering'
),
(
None
,
'content'
),
(
None
,
'cx'
),
(
None
,
'cy'
),
(
None
,
'd'
),
(
None
,
'dx'
),
(
None
,
'dy'
),
(
None
,
'descent'
),
(
None
,
'display'
),
(
None
,
'dur'
),
(
None
,
'end'
),
(
None
,
'fill'
),
(
None
,
'fill-opacity'
),
(
None
,
'fill-rule'
),
(
None
,
'font-family'
),
(
None
,
'font-size'
),
(
None
,
'font-stretch'
),
(
None
,
'font-style'
),
(
None
,
'font-variant'
),
(
None
,
'font-weight'
),
(
None
,
'from'
),
(
None
,
'fx'
),
(
None
,
'fy'
),
(
None
,
'g1'
),
(
None
,
'g2'
),
(
None
,
'glyph-name'
),
(
None
,
'gradientUnits'
),
(
None
,
'hanging'
),
(
None
,
'height'
),
(
None
,
'horiz-adv-x'
),
(
None
,
'horiz-origin-x'
),
(
None
,
'id'
),
(
None
,
'ideographic'
),
(
None
,
'k'
),
(
None
,
'keyPoints'
),
(
None
,
'keySplines'
),
(
None
,
'keyTimes'
),
(
None
,
'lang'
),
(
None
,
'marker-end'
),
(
None
,
'marker-mid'
),
(
None
,
'marker-start'
),
(
None
,
'markerHeight'
),
(
None
,
'markerUnits'
),
(
None
,
'markerWidth'
),
(
None
,
'mathematical'
),
(
None
,
'max'
),
(
None
,
'min'
),
(
None
,
'name'
),
(
None
,
'offset'
),
(
None
,
'opacity'
),
(
None
,
'orient'
),
(
None
,
'origin'
),
(
None
,
'overline-position'
),
(
None
,
'overline-thickness'
),
(
None
,
'panose-1'
),
(
None
,
'path'
),
(
None
,
'pathLength'
),
(
None
,
'points'
),
(
None
,
'preserveAspectRatio'
),
(
None
,
'r'
),
(
None
,
'refX'
),
(
None
,
'refY'
),
(
None
,
'repeatCount'
),
(
None
,
'repeatDur'
),
(
None
,
'requiredExtensions'
),
(
None
,
'requiredFeatures'
),
(
None
,
'restart'
),
(
None
,
'rotate'
),
(
None
,
'rx'
),
(
None
,
'ry'
),
(
None
,
'slope'
),
(
None
,
'stemh'
),
(
None
,
'stemv'
),
(
None
,
'stop-color'
),
(
None
,
'stop-opacity'
),
(
None
,
'strikethrough-position'
),
(
None
,
'strikethrough-thickness'
),
(
None
,
'stroke'
),
(
None
,
'stroke-dasharray'
),
(
None
,
'stroke-dashoffset'
),
(
None
,
'stroke-linecap'
),
(
None
,
'stroke-linejoin'
),
(
None
,
'stroke-miterlimit'
),
(
None
,
'stroke-opacity'
),
(
None
,
'stroke-width'
),
(
None
,
'systemLanguage'
),
(
None
,
'target'
),
(
None
,
'text-anchor'
),
(
None
,
'to'
),
(
None
,
'transform'
),
(
None
,
'type'
),
(
None
,
'u1'
),
(
None
,
'u2'
),
(
None
,
'underline-position'
),
(
None
,
'underline-thickness'
),
(
None
,
'unicode'
),
(
None
,
'unicode-range'
),
(
None
,
'units-per-em'
),
(
None
,
'values'
),
(
None
,
'version'
),
(
None
,
'viewBox'
),
(
None
,
'visibility'
),
(
None
,
'width'
),
(
None
,
'widths'
),
(
None
,
'x'
),
(
None
,
'x-height'
),
(
None
,
'x1'
),
(
None
,
'x2'
),
(
namespaces
[
'xlink'
],
'actuate'
),
(
namespaces
[
'xlink'
],
'arcrole'
),
(
namespaces
[
'xlink'
],
'href'
),
(
namespaces
[
'xlink'
],
'role'
),
(
namespaces
[
'xlink'
],
'show'
),
(
namespaces
[
'xlink'
],
'title'
),
(
namespaces
[
'xlink'
],
'type'
),
(
namespaces
[
'xml'
],
'base'
),
(
namespaces
[
'xml'
],
'lang'
),
(
namespaces
[
'xml'
],
'space'
),
(
None
,
'y'
),
(
None
,
'y1'
),
(
None
,
'y2'
),
(
None
,
'zoomAndPan'
),
))
attr_val_is_uri
=
frozenset
((
(
None
,
'href'
),
(
None
,
'src'
),
(
None
,
'cite'
),
(
None
,
'action'
),
(
None
,
'longdesc'
),
(
None
,
'poster'
),
(
None
,
'background'
),
(
None
,
'datasrc'
),
(
None
,
'dynsrc'
),
(
None
,
'lowsrc'
),
(
None
,
'ping'
),
(
namespaces
[
'xlink'
],
'href'
),
(
namespaces
[
'xml'
],
'base'
),
))
svg_attr_val_allows_ref
=
frozenset
((
(
None
,
'clip-path'
),
(
None
,
'color-profile'
),
(
None
,
'cursor'
),
(
None
,
'fill'
),
(
None
,
'filter'
),
(
None
,
'marker'
),
(
None
,
'marker-start'
),
(
None
,
'marker-mid'
),
(
None
,
'marker-end'
),
(
None
,
'mask'
),
(
None
,
'stroke'
),
))
svg_allow_local_href
=
frozenset
((
(
None
,
'altGlyph'
),
(
None
,
'animate'
),
(
None
,
'animateColor'
),
(
None
,
'animateMotion'
),
(
None
,
'animateTransform'
),
(
None
,
'cursor'
),
(
None
,
'feImage'
),
(
None
,
'filter'
),
(
None
,
'linearGradient'
),
(
None
,
'pattern'
),
(
None
,
'radialGradient'
),
(
None
,
'textpath'
),
(
None
,
'tref'
),
(
None
,
'set'
),
(
None
,
'use'
)
))
allowed_css_properties
=
frozenset
((
'azimuth'
,
'background-color'
,
'border-bottom-color'
,
'border-collapse'
,
'border-color'
,
'border-left-color'
,
'border-right-color'
,
'border-top-color'
,
'clear'
,
'color'
,
'cursor'
,
'direction'
,
'display'
,
'elevation'
,
'float'
,
'font'
,
'font-family'
,
'font-size'
,
'font-style'
,
'font-variant'
,
'font-weight'
,
'height'
,
'letter-spacing'
,
'line-height'
,
'overflow'
,
'pause'
,
'pause-after'
,
'pause-before'
,
'pitch'
,
'pitch-range'
,
'richness'
,
'speak'
,
'speak-header'
,
'speak-numeral'
,
'speak-punctuation'
,
'speech-rate'
,
'stress'
,
'text-align'
,
'text-decoration'
,
'text-indent'
,
'unicode-bidi'
,
'vertical-align'
,
'voice-family'
,
'volume'
,
'white-space'
,
'width'
,
))
allowed_css_keywords
=
frozenset
((
'auto'
,
'aqua'
,
'black'
,
'block'
,
'blue'
,
'bold'
,
'both'
,
'bottom'
,
'brown'
,
'center'
,
'collapse'
,
'dashed'
,
'dotted'
,
'fuchsia'
,
'gray'
,
'green'
,
'!important'
,
'italic'
,
'left'
,
'lime'
,
'maroon'
,
'medium'
,
'none'
,
'navy'
,
'normal'
,
'nowrap'
,
'olive'
,
'pointer'
,
'purple'
,
'red'
,
'right'
,
'solid'
,
'silver'
,
'teal'
,
'top'
,
'transparent'
,
'underline'
,
'white'
,
'yellow'
,
))
allowed_svg_properties
=
frozenset
((
'fill'
,
'fill-opacity'
,
'fill-rule'
,
'stroke'
,
'stroke-width'
,
'stroke-linecap'
,
'stroke-linejoin'
,
'stroke-opacity'
,
))
allowed_protocols
=
frozenset
((
'ed2k'
,
'ftp'
,
'http'
,
'https'
,
'irc'
,
'mailto'
,
'news'
,
'gopher'
,
'nntp'
,
'telnet'
,
'webcal'
,
'xmpp'
,
'callto'
,
'feed'
,
'urn'
,
'aim'
,
'rsync'
,
'tag'
,
'ssh'
,
'sftp'
,
'rtsp'
,
'afs'
,
'data'
,
))
allowed_content_types
=
frozenset
((
'image/png'
,
'image/jpeg'
,
'image/gif'
,
'image/webp'
,
'image/bmp'
,
'text/plain'
,
))
data_content_type
=
re
.
compile
(
r'''
^
# Match a content type <application>/<type>
(?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+)
# Match any character set and encoding
(?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?)
|(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?)
# Assume the rest is data
,.*
$
'''
,
re
.
VERBOSE
)
class
Filter
(
base
.
Filter
):
"""Sanitizes token stream of XHTML+MathML+SVG and of inline style attributes"""
def
__init__
(
self
,
source
,
allowed_elements
=
allowed_elements
,
allowed_attributes
=
allowed_attributes
,
allowed_css_properties
=
allowed_css_properties
,
allowed_css_keywords
=
allowed_css_keywords
,
allowed_svg_properties
=
allowed_svg_properties
,
allowed_protocols
=
allowed_protocols
,
allowed_content_types
=
allowed_content_types
,
attr_val_is_uri
=
attr_val_is_uri
,
svg_attr_val_allows_ref
=
svg_attr_val_allows_ref
,
svg_allow_local_href
=
svg_allow_local_href
):
"""Creates a Filter
:arg allowed_elements: set of elements to allow--everything else will
be escaped
:arg allowed_attributes: set of attributes to allow in
elements--everything else will be stripped
:arg allowed_css_properties: set of CSS properties to allow--everything
else will be stripped
:arg allowed_css_keywords: set of CSS keywords to allow--everything
else will be stripped
:arg allowed_svg_properties: set of SVG properties to allow--everything
else will be removed
:arg allowed_protocols: set of allowed protocols for URIs
:arg allowed_content_types: set of allowed content types for ``data`` URIs.
:arg attr_val_is_uri: set of attributes that have URI values--values
that have a scheme not listed in ``allowed_protocols`` are removed
:arg svg_attr_val_allows_ref: set of SVG attributes that can have
references
:arg svg_allow_local_href: set of SVG elements that can have local
hrefs--these are removed
"""
super
(
Filter
,
self
)
.
__init__
(
source
)
self
.
allowed_elements
=
allowed_elements
self
.
allowed_attributes
=
allowed_attributes
self
.
allowed_css_properties
=
allowed_css_properties
self
.
allowed_css_keywords
=
allowed_css_keywords
self
.
allowed_svg_properties
=
allowed_svg_properties
self
.
allowed_protocols
=
allowed_protocols
self
.
allowed_content_types
=
allowed_content_types
self
.
attr_val_is_uri
=
attr_val_is_uri
self
.
svg_attr_val_allows_ref
=
svg_attr_val_allows_ref
self
.
svg_allow_local_href
=
svg_allow_local_href
def
__iter__
(
self
):
for
token
in
base
.
Filter
.
__iter__
(
self
):
token
=
self
.
sanitize_token
(
token
)
if
token
:
yield
token
# Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
# stripping out all attributes not in ALLOWED_ATTRIBUTES. Style attributes
# are parsed, and a restricted set, specified by ALLOWED_CSS_PROPERTIES and
# ALLOWED_CSS_KEYWORDS, are allowed through. attributes in ATTR_VAL_IS_URI
# are scanned, and only URI schemes specified in ALLOWED_PROTOCOLS are
# allowed.
#
# sanitize_html('<script> do_nasty_stuff() </script>')
# => <script> do_nasty_stuff() </script>
# sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
# => <a>Click here for $100</a>
def
sanitize_token
(
self
,
token
):
# accommodate filters which use token_type differently
token_type
=
token
[
"type"
]
if
token_type
in
(
"StartTag"
,
"EndTag"
,
"EmptyTag"
):
name
=
token
[
"name"
]
namespace
=
token
[
"namespace"
]
if
((
namespace
,
name
)
in
self
.
allowed_elements
or
(
namespace
is
None
and
(
namespaces
[
"html"
],
name
)
in
self
.
allowed_elements
)):
return
self
.
allowed_token
(
token
)
else
:
return
self
.
disallowed_token
(
token
)
elif
token_type
==
"Comment"
:
pass
else
:
return
token
def
allowed_token
(
self
,
token
):
if
"data"
in
token
:
attrs
=
token
[
"data"
]
attr_names
=
set
(
attrs
.
keys
())
# Remove forbidden attributes
for
to_remove
in
(
attr_names
-
self
.
allowed_attributes
):
del
token
[
"data"
][
to_remove
]
attr_names
.
remove
(
to_remove
)
# Remove attributes with disallowed URL values
for
attr
in
(
attr_names
&
self
.
attr_val_is_uri
):
assert
attr
in
attrs
# I don't have a clue where this regexp comes from or why it matches those
# characters, nor why we call unescape. I just know it's always been here.
# Should you be worried by this comment in a sanitizer? Yes. On the other hand, all
# this will do is remove *more* than it otherwise would.
val_unescaped
=
re
.
sub
(
"[`
\x00
-
\x20\x7f
-
\xa0\\
s]+"
,
''
,
unescape
(
attrs
[
attr
]))
.
lower
()
# remove replacement characters from unescaped characters
val_unescaped
=
val_unescaped
.
replace
(
"
\ufffd
"
,
""
)
try
:
uri
=
urlparse
.
urlparse
(
val_unescaped
)
except
ValueError
:
uri
=
None
del
attrs
[
attr
]
if
uri
and
uri
.
scheme
:
if
uri
.
scheme
not
in
self
.
allowed_protocols
:
del
attrs
[
attr
]
if
uri
.
scheme
==
'data'
:
m
=
data_content_type
.
match
(
uri
.
path
)
if
not
m
:
del
attrs
[
attr
]
elif
m
.
group
(
'content_type'
)
not
in
self
.
allowed_content_types
:
del
attrs
[
attr
]
for
attr
in
self
.
svg_attr_val_allows_ref
:
if
attr
in
attrs
:
attrs
[
attr
]
=
re
.
sub
(
r'url\s*\(\s*[^#\s][^)]+?\)'
,
' '
,
unescape
(
attrs
[
attr
]))
if
(
token
[
"name"
]
in
self
.
svg_allow_local_href
and
(
namespaces
[
'xlink'
],
'href'
)
in
attrs
and
re
.
search
(
r'^\s*[^#\s].*'
,
attrs
[(
namespaces
[
'xlink'
],
'href'
)])):
del
attrs
[(
namespaces
[
'xlink'
],
'href'
)]
if
(
None
,
'style'
)
in
attrs
:
attrs
[(
None
,
'style'
)]
=
self
.
sanitize_css
(
attrs
[(
None
,
'style'
)])
token
[
"data"
]
=
attrs
return
token
def
disallowed_token
(
self
,
token
):
token_type
=
token
[
"type"
]
if
token_type
==
"EndTag"
:
token
[
"data"
]
=
"</
%
s>"
%
token
[
"name"
]
elif
token
[
"data"
]:
assert
token_type
in
(
"StartTag"
,
"EmptyTag"
)
attrs
=
[]
for
(
ns
,
name
),
v
in
token
[
"data"
]
.
items
():
attrs
.
append
(
'
%
s="
%
s"'
%
(
name
if
ns
is
None
else
"
%
s:
%
s"
%
(
prefixes
[
ns
],
name
),
escape
(
v
)))
token
[
"data"
]
=
"<
%
s
%
s>"
%
(
token
[
"name"
],
''
.
join
(
attrs
))
else
:
token
[
"data"
]
=
"<
%
s>"
%
token
[
"name"
]
if
token
.
get
(
"selfClosing"
):
token
[
"data"
]
=
token
[
"data"
][:
-
1
]
+
"/>"
token
[
"type"
]
=
"Characters"
del
token
[
"name"
]
return
token
def
sanitize_css
(
self
,
style
):
# disallow urls
style
=
re
.
compile
(
r'url\s*\(\s*[^\s)]+?\s*\)\s*'
)
.
sub
(
' '
,
style
)
# gauntlet
if
not
re
.
match
(
r"""^([:,;#
%
.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$"""
,
style
):
return
''
if
not
re
.
match
(
r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$"
,
style
):
return
''
clean
=
[]
for
prop
,
value
in
re
.
findall
(
r"([-\w]+)\s*:\s*([^:;]*)"
,
style
):
if
not
value
:
continue
if
prop
.
lower
()
in
self
.
allowed_css_properties
:
clean
.
append
(
prop
+
': '
+
value
+
';'
)
elif
prop
.
split
(
'-'
)[
0
]
.
lower
()
in
[
'background'
,
'border'
,
'margin'
,
'padding'
]:
for
keyword
in
value
.
split
():
if
keyword
not
in
self
.
allowed_css_keywords
and
\
not
re
.
match
(
r"^(#[0-9a-fA-F]+|rgb\(\d+
%
?,\d*
%
?,?\d*
%
?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|
%
|,|\))?)$"
,
keyword
):
# noqa
break
else
:
clean
.
append
(
prop
+
': '
+
value
+
';'
)
elif
prop
.
lower
()
in
self
.
allowed_svg_properties
:
clean
.
append
(
prop
+
': '
+
value
+
';'
)
return
' '
.
join
(
clean
)
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment