Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
N
news
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Sartika Aritonang
news
Commits
b9540267
Commit
b9540267
authored
May 29, 2020
by
Sartika Aritonang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Upload New File
parent
50a6a872
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
896 additions
and
0 deletions
+896
-0
sanitizer.py
...b/site-packages/pip/_vendor/html5lib/filters/sanitizer.py
+896
-0
No files found.
stbi/Lib/site-packages/pip/_vendor/html5lib/filters/sanitizer.py
0 → 100644
View file @
b9540267
from
__future__
import
absolute_import
,
division
,
unicode_literals
import
re
from
xml.sax.saxutils
import
escape
,
unescape
from
pip._vendor.six.moves
import
urllib_parse
as
urlparse
from
.
import
base
from
..constants
import
namespaces
,
prefixes
__all__
=
[
"Filter"
]
allowed_elements
=
frozenset
((
(
namespaces
[
'html'
],
'a'
),
(
namespaces
[
'html'
],
'abbr'
),
(
namespaces
[
'html'
],
'acronym'
),
(
namespaces
[
'html'
],
'address'
),
(
namespaces
[
'html'
],
'area'
),
(
namespaces
[
'html'
],
'article'
),
(
namespaces
[
'html'
],
'aside'
),
(
namespaces
[
'html'
],
'audio'
),
(
namespaces
[
'html'
],
'b'
),
(
namespaces
[
'html'
],
'big'
),
(
namespaces
[
'html'
],
'blockquote'
),
(
namespaces
[
'html'
],
'br'
),
(
namespaces
[
'html'
],
'button'
),
(
namespaces
[
'html'
],
'canvas'
),
(
namespaces
[
'html'
],
'caption'
),
(
namespaces
[
'html'
],
'center'
),
(
namespaces
[
'html'
],
'cite'
),
(
namespaces
[
'html'
],
'code'
),
(
namespaces
[
'html'
],
'col'
),
(
namespaces
[
'html'
],
'colgroup'
),
(
namespaces
[
'html'
],
'command'
),
(
namespaces
[
'html'
],
'datagrid'
),
(
namespaces
[
'html'
],
'datalist'
),
(
namespaces
[
'html'
],
'dd'
),
(
namespaces
[
'html'
],
'del'
),
(
namespaces
[
'html'
],
'details'
),
(
namespaces
[
'html'
],
'dfn'
),
(
namespaces
[
'html'
],
'dialog'
),
(
namespaces
[
'html'
],
'dir'
),
(
namespaces
[
'html'
],
'div'
),
(
namespaces
[
'html'
],
'dl'
),
(
namespaces
[
'html'
],
'dt'
),
(
namespaces
[
'html'
],
'em'
),
(
namespaces
[
'html'
],
'event-source'
),
(
namespaces
[
'html'
],
'fieldset'
),
(
namespaces
[
'html'
],
'figcaption'
),
(
namespaces
[
'html'
],
'figure'
),
(
namespaces
[
'html'
],
'footer'
),
(
namespaces
[
'html'
],
'font'
),
(
namespaces
[
'html'
],
'form'
),
(
namespaces
[
'html'
],
'header'
),
(
namespaces
[
'html'
],
'h1'
),
(
namespaces
[
'html'
],
'h2'
),
(
namespaces
[
'html'
],
'h3'
),
(
namespaces
[
'html'
],
'h4'
),
(
namespaces
[
'html'
],
'h5'
),
(
namespaces
[
'html'
],
'h6'
),
(
namespaces
[
'html'
],
'hr'
),
(
namespaces
[
'html'
],
'i'
),
(
namespaces
[
'html'
],
'img'
),
(
namespaces
[
'html'
],
'input'
),
(
namespaces
[
'html'
],
'ins'
),
(
namespaces
[
'html'
],
'keygen'
),
(
namespaces
[
'html'
],
'kbd'
),
(
namespaces
[
'html'
],
'label'
),
(
namespaces
[
'html'
],
'legend'
),
(
namespaces
[
'html'
],
'li'
),
(
namespaces
[
'html'
],
'm'
),
(
namespaces
[
'html'
],
'map'
),
(
namespaces
[
'html'
],
'menu'
),
(
namespaces
[
'html'
],
'meter'
),
(
namespaces
[
'html'
],
'multicol'
),
(
namespaces
[
'html'
],
'nav'
),
(
namespaces
[
'html'
],
'nextid'
),
(
namespaces
[
'html'
],
'ol'
),
(
namespaces
[
'html'
],
'output'
),
(
namespaces
[
'html'
],
'optgroup'
),
(
namespaces
[
'html'
],
'option'
),
(
namespaces
[
'html'
],
'p'
),
(
namespaces
[
'html'
],
'pre'
),
(
namespaces
[
'html'
],
'progress'
),
(
namespaces
[
'html'
],
'q'
),
(
namespaces
[
'html'
],
's'
),
(
namespaces
[
'html'
],
'samp'
),
(
namespaces
[
'html'
],
'section'
),
(
namespaces
[
'html'
],
'select'
),
(
namespaces
[
'html'
],
'small'
),
(
namespaces
[
'html'
],
'sound'
),
(
namespaces
[
'html'
],
'source'
),
(
namespaces
[
'html'
],
'spacer'
),
(
namespaces
[
'html'
],
'span'
),
(
namespaces
[
'html'
],
'strike'
),
(
namespaces
[
'html'
],
'strong'
),
(
namespaces
[
'html'
],
'sub'
),
(
namespaces
[
'html'
],
'sup'
),
(
namespaces
[
'html'
],
'table'
),
(
namespaces
[
'html'
],
'tbody'
),
(
namespaces
[
'html'
],
'td'
),
(
namespaces
[
'html'
],
'textarea'
),
(
namespaces
[
'html'
],
'time'
),
(
namespaces
[
'html'
],
'tfoot'
),
(
namespaces
[
'html'
],
'th'
),
(
namespaces
[
'html'
],
'thead'
),
(
namespaces
[
'html'
],
'tr'
),
(
namespaces
[
'html'
],
'tt'
),
(
namespaces
[
'html'
],
'u'
),
(
namespaces
[
'html'
],
'ul'
),
(
namespaces
[
'html'
],
'var'
),
(
namespaces
[
'html'
],
'video'
),
(
namespaces
[
'mathml'
],
'maction'
),
(
namespaces
[
'mathml'
],
'math'
),
(
namespaces
[
'mathml'
],
'merror'
),
(
namespaces
[
'mathml'
],
'mfrac'
),
(
namespaces
[
'mathml'
],
'mi'
),
(
namespaces
[
'mathml'
],
'mmultiscripts'
),
(
namespaces
[
'mathml'
],
'mn'
),
(
namespaces
[
'mathml'
],
'mo'
),
(
namespaces
[
'mathml'
],
'mover'
),
(
namespaces
[
'mathml'
],
'mpadded'
),
(
namespaces
[
'mathml'
],
'mphantom'
),
(
namespaces
[
'mathml'
],
'mprescripts'
),
(
namespaces
[
'mathml'
],
'mroot'
),
(
namespaces
[
'mathml'
],
'mrow'
),
(
namespaces
[
'mathml'
],
'mspace'
),
(
namespaces
[
'mathml'
],
'msqrt'
),
(
namespaces
[
'mathml'
],
'mstyle'
),
(
namespaces
[
'mathml'
],
'msub'
),
(
namespaces
[
'mathml'
],
'msubsup'
),
(
namespaces
[
'mathml'
],
'msup'
),
(
namespaces
[
'mathml'
],
'mtable'
),
(
namespaces
[
'mathml'
],
'mtd'
),
(
namespaces
[
'mathml'
],
'mtext'
),
(
namespaces
[
'mathml'
],
'mtr'
),
(
namespaces
[
'mathml'
],
'munder'
),
(
namespaces
[
'mathml'
],
'munderover'
),
(
namespaces
[
'mathml'
],
'none'
),
(
namespaces
[
'svg'
],
'a'
),
(
namespaces
[
'svg'
],
'animate'
),
(
namespaces
[
'svg'
],
'animateColor'
),
(
namespaces
[
'svg'
],
'animateMotion'
),
(
namespaces
[
'svg'
],
'animateTransform'
),
(
namespaces
[
'svg'
],
'clipPath'
),
(
namespaces
[
'svg'
],
'circle'
),
(
namespaces
[
'svg'
],
'defs'
),
(
namespaces
[
'svg'
],
'desc'
),
(
namespaces
[
'svg'
],
'ellipse'
),
(
namespaces
[
'svg'
],
'font-face'
),
(
namespaces
[
'svg'
],
'font-face-name'
),
(
namespaces
[
'svg'
],
'font-face-src'
),
(
namespaces
[
'svg'
],
'g'
),
(
namespaces
[
'svg'
],
'glyph'
),
(
namespaces
[
'svg'
],
'hkern'
),
(
namespaces
[
'svg'
],
'linearGradient'
),
(
namespaces
[
'svg'
],
'line'
),
(
namespaces
[
'svg'
],
'marker'
),
(
namespaces
[
'svg'
],
'metadata'
),
(
namespaces
[
'svg'
],
'missing-glyph'
),
(
namespaces
[
'svg'
],
'mpath'
),
(
namespaces
[
'svg'
],
'path'
),
(
namespaces
[
'svg'
],
'polygon'
),
(
namespaces
[
'svg'
],
'polyline'
),
(
namespaces
[
'svg'
],
'radialGradient'
),
(
namespaces
[
'svg'
],
'rect'
),
(
namespaces
[
'svg'
],
'set'
),
(
namespaces
[
'svg'
],
'stop'
),
(
namespaces
[
'svg'
],
'svg'
),
(
namespaces
[
'svg'
],
'switch'
),
(
namespaces
[
'svg'
],
'text'
),
(
namespaces
[
'svg'
],
'title'
),
(
namespaces
[
'svg'
],
'tspan'
),
(
namespaces
[
'svg'
],
'use'
),
))
allowed_attributes
=
frozenset
((
# HTML attributes
(
None
,
'abbr'
),
(
None
,
'accept'
),
(
None
,
'accept-charset'
),
(
None
,
'accesskey'
),
(
None
,
'action'
),
(
None
,
'align'
),
(
None
,
'alt'
),
(
None
,
'autocomplete'
),
(
None
,
'autofocus'
),
(
None
,
'axis'
),
(
None
,
'background'
),
(
None
,
'balance'
),
(
None
,
'bgcolor'
),
(
None
,
'bgproperties'
),
(
None
,
'border'
),
(
None
,
'bordercolor'
),
(
None
,
'bordercolordark'
),
(
None
,
'bordercolorlight'
),
(
None
,
'bottompadding'
),
(
None
,
'cellpadding'
),
(
None
,
'cellspacing'
),
(
None
,
'ch'
),
(
None
,
'challenge'
),
(
None
,
'char'
),
(
None
,
'charoff'
),
(
None
,
'choff'
),
(
None
,
'charset'
),
(
None
,
'checked'
),
(
None
,
'cite'
),
(
None
,
'class'
),
(
None
,
'clear'
),
(
None
,
'color'
),
(
None
,
'cols'
),
(
None
,
'colspan'
),
(
None
,
'compact'
),
(
None
,
'contenteditable'
),
(
None
,
'controls'
),
(
None
,
'coords'
),
(
None
,
'data'
),
(
None
,
'datafld'
),
(
None
,
'datapagesize'
),
(
None
,
'datasrc'
),
(
None
,
'datetime'
),
(
None
,
'default'
),
(
None
,
'delay'
),
(
None
,
'dir'
),
(
None
,
'disabled'
),
(
None
,
'draggable'
),
(
None
,
'dynsrc'
),
(
None
,
'enctype'
),
(
None
,
'end'
),
(
None
,
'face'
),
(
None
,
'for'
),
(
None
,
'form'
),
(
None
,
'frame'
),
(
None
,
'galleryimg'
),
(
None
,
'gutter'
),
(
None
,
'headers'
),
(
None
,
'height'
),
(
None
,
'hidefocus'
),
(
None
,
'hidden'
),
(
None
,
'high'
),
(
None
,
'href'
),
(
None
,
'hreflang'
),
(
None
,
'hspace'
),
(
None
,
'icon'
),
(
None
,
'id'
),
(
None
,
'inputmode'
),
(
None
,
'ismap'
),
(
None
,
'keytype'
),
(
None
,
'label'
),
(
None
,
'leftspacing'
),
(
None
,
'lang'
),
(
None
,
'list'
),
(
None
,
'longdesc'
),
(
None
,
'loop'
),
(
None
,
'loopcount'
),
(
None
,
'loopend'
),
(
None
,
'loopstart'
),
(
None
,
'low'
),
(
None
,
'lowsrc'
),
(
None
,
'max'
),
(
None
,
'maxlength'
),
(
None
,
'media'
),
(
None
,
'method'
),
(
None
,
'min'
),
(
None
,
'multiple'
),
(
None
,
'name'
),
(
None
,
'nohref'
),
(
None
,
'noshade'
),
(
None
,
'nowrap'
),
(
None
,
'open'
),
(
None
,
'optimum'
),
(
None
,
'pattern'
),
(
None
,
'ping'
),
(
None
,
'point-size'
),
(
None
,
'poster'
),
(
None
,
'pqg'
),
(
None
,
'preload'
),
(
None
,
'prompt'
),
(
None
,
'radiogroup'
),
(
None
,
'readonly'
),
(
None
,
'rel'
),
(
None
,
'repeat-max'
),
(
None
,
'repeat-min'
),
(
None
,
'replace'
),
(
None
,
'required'
),
(
None
,
'rev'
),
(
None
,
'rightspacing'
),
(
None
,
'rows'
),
(
None
,
'rowspan'
),
(
None
,
'rules'
),
(
None
,
'scope'
),
(
None
,
'selected'
),
(
None
,
'shape'
),
(
None
,
'size'
),
(
None
,
'span'
),
(
None
,
'src'
),
(
None
,
'start'
),
(
None
,
'step'
),
(
None
,
'style'
),
(
None
,
'summary'
),
(
None
,
'suppress'
),
(
None
,
'tabindex'
),
(
None
,
'target'
),
(
None
,
'template'
),
(
None
,
'title'
),
(
None
,
'toppadding'
),
(
None
,
'type'
),
(
None
,
'unselectable'
),
(
None
,
'usemap'
),
(
None
,
'urn'
),
(
None
,
'valign'
),
(
None
,
'value'
),
(
None
,
'variable'
),
(
None
,
'volume'
),
(
None
,
'vspace'
),
(
None
,
'vrml'
),
(
None
,
'width'
),
(
None
,
'wrap'
),
(
namespaces
[
'xml'
],
'lang'
),
# MathML attributes
(
None
,
'actiontype'
),
(
None
,
'align'
),
(
None
,
'columnalign'
),
(
None
,
'columnalign'
),
(
None
,
'columnalign'
),
(
None
,
'columnlines'
),
(
None
,
'columnspacing'
),
(
None
,
'columnspan'
),
(
None
,
'depth'
),
(
None
,
'display'
),
(
None
,
'displaystyle'
),
(
None
,
'equalcolumns'
),
(
None
,
'equalrows'
),
(
None
,
'fence'
),
(
None
,
'fontstyle'
),
(
None
,
'fontweight'
),
(
None
,
'frame'
),
(
None
,
'height'
),
(
None
,
'linethickness'
),
(
None
,
'lspace'
),
(
None
,
'mathbackground'
),
(
None
,
'mathcolor'
),
(
None
,
'mathvariant'
),
(
None
,
'mathvariant'
),
(
None
,
'maxsize'
),
(
None
,
'minsize'
),
(
None
,
'other'
),
(
None
,
'rowalign'
),
(
None
,
'rowalign'
),
(
None
,
'rowalign'
),
(
None
,
'rowlines'
),
(
None
,
'rowspacing'
),
(
None
,
'rowspan'
),
(
None
,
'rspace'
),
(
None
,
'scriptlevel'
),
(
None
,
'selection'
),
(
None
,
'separator'
),
(
None
,
'stretchy'
),
(
None
,
'width'
),
(
None
,
'width'
),
(
namespaces
[
'xlink'
],
'href'
),
(
namespaces
[
'xlink'
],
'show'
),
(
namespaces
[
'xlink'
],
'type'
),
# SVG attributes
(
None
,
'accent-height'
),
(
None
,
'accumulate'
),
(
None
,
'additive'
),
(
None
,
'alphabetic'
),
(
None
,
'arabic-form'
),
(
None
,
'ascent'
),
(
None
,
'attributeName'
),
(
None
,
'attributeType'
),
(
None
,
'baseProfile'
),
(
None
,
'bbox'
),
(
None
,
'begin'
),
(
None
,
'by'
),
(
None
,
'calcMode'
),
(
None
,
'cap-height'
),
(
None
,
'class'
),
(
None
,
'clip-path'
),
(
None
,
'color'
),
(
None
,
'color-rendering'
),
(
None
,
'content'
),
(
None
,
'cx'
),
(
None
,
'cy'
),
(
None
,
'd'
),
(
None
,
'dx'
),
(
None
,
'dy'
),
(
None
,
'descent'
),
(
None
,
'display'
),
(
None
,
'dur'
),
(
None
,
'end'
),
(
None
,
'fill'
),
(
None
,
'fill-opacity'
),
(
None
,
'fill-rule'
),
(
None
,
'font-family'
),
(
None
,
'font-size'
),
(
None
,
'font-stretch'
),
(
None
,
'font-style'
),
(
None
,
'font-variant'
),
(
None
,
'font-weight'
),
(
None
,
'from'
),
(
None
,
'fx'
),
(
None
,
'fy'
),
(
None
,
'g1'
),
(
None
,
'g2'
),
(
None
,
'glyph-name'
),
(
None
,
'gradientUnits'
),
(
None
,
'hanging'
),
(
None
,
'height'
),
(
None
,
'horiz-adv-x'
),
(
None
,
'horiz-origin-x'
),
(
None
,
'id'
),
(
None
,
'ideographic'
),
(
None
,
'k'
),
(
None
,
'keyPoints'
),
(
None
,
'keySplines'
),
(
None
,
'keyTimes'
),
(
None
,
'lang'
),
(
None
,
'marker-end'
),
(
None
,
'marker-mid'
),
(
None
,
'marker-start'
),
(
None
,
'markerHeight'
),
(
None
,
'markerUnits'
),
(
None
,
'markerWidth'
),
(
None
,
'mathematical'
),
(
None
,
'max'
),
(
None
,
'min'
),
(
None
,
'name'
),
(
None
,
'offset'
),
(
None
,
'opacity'
),
(
None
,
'orient'
),
(
None
,
'origin'
),
(
None
,
'overline-position'
),
(
None
,
'overline-thickness'
),
(
None
,
'panose-1'
),
(
None
,
'path'
),
(
None
,
'pathLength'
),
(
None
,
'points'
),
(
None
,
'preserveAspectRatio'
),
(
None
,
'r'
),
(
None
,
'refX'
),
(
None
,
'refY'
),
(
None
,
'repeatCount'
),
(
None
,
'repeatDur'
),
(
None
,
'requiredExtensions'
),
(
None
,
'requiredFeatures'
),
(
None
,
'restart'
),
(
None
,
'rotate'
),
(
None
,
'rx'
),
(
None
,
'ry'
),
(
None
,
'slope'
),
(
None
,
'stemh'
),
(
None
,
'stemv'
),
(
None
,
'stop-color'
),
(
None
,
'stop-opacity'
),
(
None
,
'strikethrough-position'
),
(
None
,
'strikethrough-thickness'
),
(
None
,
'stroke'
),
(
None
,
'stroke-dasharray'
),
(
None
,
'stroke-dashoffset'
),
(
None
,
'stroke-linecap'
),
(
None
,
'stroke-linejoin'
),
(
None
,
'stroke-miterlimit'
),
(
None
,
'stroke-opacity'
),
(
None
,
'stroke-width'
),
(
None
,
'systemLanguage'
),
(
None
,
'target'
),
(
None
,
'text-anchor'
),
(
None
,
'to'
),
(
None
,
'transform'
),
(
None
,
'type'
),
(
None
,
'u1'
),
(
None
,
'u2'
),
(
None
,
'underline-position'
),
(
None
,
'underline-thickness'
),
(
None
,
'unicode'
),
(
None
,
'unicode-range'
),
(
None
,
'units-per-em'
),
(
None
,
'values'
),
(
None
,
'version'
),
(
None
,
'viewBox'
),
(
None
,
'visibility'
),
(
None
,
'width'
),
(
None
,
'widths'
),
(
None
,
'x'
),
(
None
,
'x-height'
),
(
None
,
'x1'
),
(
None
,
'x2'
),
(
namespaces
[
'xlink'
],
'actuate'
),
(
namespaces
[
'xlink'
],
'arcrole'
),
(
namespaces
[
'xlink'
],
'href'
),
(
namespaces
[
'xlink'
],
'role'
),
(
namespaces
[
'xlink'
],
'show'
),
(
namespaces
[
'xlink'
],
'title'
),
(
namespaces
[
'xlink'
],
'type'
),
(
namespaces
[
'xml'
],
'base'
),
(
namespaces
[
'xml'
],
'lang'
),
(
namespaces
[
'xml'
],
'space'
),
(
None
,
'y'
),
(
None
,
'y1'
),
(
None
,
'y2'
),
(
None
,
'zoomAndPan'
),
))
attr_val_is_uri
=
frozenset
((
(
None
,
'href'
),
(
None
,
'src'
),
(
None
,
'cite'
),
(
None
,
'action'
),
(
None
,
'longdesc'
),
(
None
,
'poster'
),
(
None
,
'background'
),
(
None
,
'datasrc'
),
(
None
,
'dynsrc'
),
(
None
,
'lowsrc'
),
(
None
,
'ping'
),
(
namespaces
[
'xlink'
],
'href'
),
(
namespaces
[
'xml'
],
'base'
),
))
svg_attr_val_allows_ref
=
frozenset
((
(
None
,
'clip-path'
),
(
None
,
'color-profile'
),
(
None
,
'cursor'
),
(
None
,
'fill'
),
(
None
,
'filter'
),
(
None
,
'marker'
),
(
None
,
'marker-start'
),
(
None
,
'marker-mid'
),
(
None
,
'marker-end'
),
(
None
,
'mask'
),
(
None
,
'stroke'
),
))
svg_allow_local_href
=
frozenset
((
(
None
,
'altGlyph'
),
(
None
,
'animate'
),
(
None
,
'animateColor'
),
(
None
,
'animateMotion'
),
(
None
,
'animateTransform'
),
(
None
,
'cursor'
),
(
None
,
'feImage'
),
(
None
,
'filter'
),
(
None
,
'linearGradient'
),
(
None
,
'pattern'
),
(
None
,
'radialGradient'
),
(
None
,
'textpath'
),
(
None
,
'tref'
),
(
None
,
'set'
),
(
None
,
'use'
)
))
allowed_css_properties
=
frozenset
((
'azimuth'
,
'background-color'
,
'border-bottom-color'
,
'border-collapse'
,
'border-color'
,
'border-left-color'
,
'border-right-color'
,
'border-top-color'
,
'clear'
,
'color'
,
'cursor'
,
'direction'
,
'display'
,
'elevation'
,
'float'
,
'font'
,
'font-family'
,
'font-size'
,
'font-style'
,
'font-variant'
,
'font-weight'
,
'height'
,
'letter-spacing'
,
'line-height'
,
'overflow'
,
'pause'
,
'pause-after'
,
'pause-before'
,
'pitch'
,
'pitch-range'
,
'richness'
,
'speak'
,
'speak-header'
,
'speak-numeral'
,
'speak-punctuation'
,
'speech-rate'
,
'stress'
,
'text-align'
,
'text-decoration'
,
'text-indent'
,
'unicode-bidi'
,
'vertical-align'
,
'voice-family'
,
'volume'
,
'white-space'
,
'width'
,
))
allowed_css_keywords
=
frozenset
((
'auto'
,
'aqua'
,
'black'
,
'block'
,
'blue'
,
'bold'
,
'both'
,
'bottom'
,
'brown'
,
'center'
,
'collapse'
,
'dashed'
,
'dotted'
,
'fuchsia'
,
'gray'
,
'green'
,
'!important'
,
'italic'
,
'left'
,
'lime'
,
'maroon'
,
'medium'
,
'none'
,
'navy'
,
'normal'
,
'nowrap'
,
'olive'
,
'pointer'
,
'purple'
,
'red'
,
'right'
,
'solid'
,
'silver'
,
'teal'
,
'top'
,
'transparent'
,
'underline'
,
'white'
,
'yellow'
,
))
allowed_svg_properties
=
frozenset
((
'fill'
,
'fill-opacity'
,
'fill-rule'
,
'stroke'
,
'stroke-width'
,
'stroke-linecap'
,
'stroke-linejoin'
,
'stroke-opacity'
,
))
allowed_protocols
=
frozenset
((
'ed2k'
,
'ftp'
,
'http'
,
'https'
,
'irc'
,
'mailto'
,
'news'
,
'gopher'
,
'nntp'
,
'telnet'
,
'webcal'
,
'xmpp'
,
'callto'
,
'feed'
,
'urn'
,
'aim'
,
'rsync'
,
'tag'
,
'ssh'
,
'sftp'
,
'rtsp'
,
'afs'
,
'data'
,
))
allowed_content_types
=
frozenset
((
'image/png'
,
'image/jpeg'
,
'image/gif'
,
'image/webp'
,
'image/bmp'
,
'text/plain'
,
))
data_content_type
=
re
.
compile
(
r'''
^
# Match a content type <application>/<type>
(?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+)
# Match any character set and encoding
(?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?)
|(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?)
# Assume the rest is data
,.*
$
'''
,
re
.
VERBOSE
)
class
Filter
(
base
.
Filter
):
"""Sanitizes token stream of XHTML+MathML+SVG and of inline style attributes"""
def
__init__
(
self
,
source
,
allowed_elements
=
allowed_elements
,
allowed_attributes
=
allowed_attributes
,
allowed_css_properties
=
allowed_css_properties
,
allowed_css_keywords
=
allowed_css_keywords
,
allowed_svg_properties
=
allowed_svg_properties
,
allowed_protocols
=
allowed_protocols
,
allowed_content_types
=
allowed_content_types
,
attr_val_is_uri
=
attr_val_is_uri
,
svg_attr_val_allows_ref
=
svg_attr_val_allows_ref
,
svg_allow_local_href
=
svg_allow_local_href
):
"""Creates a Filter
:arg allowed_elements: set of elements to allow--everything else will
be escaped
:arg allowed_attributes: set of attributes to allow in
elements--everything else will be stripped
:arg allowed_css_properties: set of CSS properties to allow--everything
else will be stripped
:arg allowed_css_keywords: set of CSS keywords to allow--everything
else will be stripped
:arg allowed_svg_properties: set of SVG properties to allow--everything
else will be removed
:arg allowed_protocols: set of allowed protocols for URIs
:arg allowed_content_types: set of allowed content types for ``data`` URIs.
:arg attr_val_is_uri: set of attributes that have URI values--values
that have a scheme not listed in ``allowed_protocols`` are removed
:arg svg_attr_val_allows_ref: set of SVG attributes that can have
references
:arg svg_allow_local_href: set of SVG elements that can have local
hrefs--these are removed
"""
super
(
Filter
,
self
)
.
__init__
(
source
)
self
.
allowed_elements
=
allowed_elements
self
.
allowed_attributes
=
allowed_attributes
self
.
allowed_css_properties
=
allowed_css_properties
self
.
allowed_css_keywords
=
allowed_css_keywords
self
.
allowed_svg_properties
=
allowed_svg_properties
self
.
allowed_protocols
=
allowed_protocols
self
.
allowed_content_types
=
allowed_content_types
self
.
attr_val_is_uri
=
attr_val_is_uri
self
.
svg_attr_val_allows_ref
=
svg_attr_val_allows_ref
self
.
svg_allow_local_href
=
svg_allow_local_href
def
__iter__
(
self
):
for
token
in
base
.
Filter
.
__iter__
(
self
):
token
=
self
.
sanitize_token
(
token
)
if
token
:
yield
token
# Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
# stripping out all attributes not in ALLOWED_ATTRIBUTES. Style attributes
# are parsed, and a restricted set, specified by ALLOWED_CSS_PROPERTIES and
# ALLOWED_CSS_KEYWORDS, are allowed through. attributes in ATTR_VAL_IS_URI
# are scanned, and only URI schemes specified in ALLOWED_PROTOCOLS are
# allowed.
#
# sanitize_html('<script> do_nasty_stuff() </script>')
# => <script> do_nasty_stuff() </script>
# sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
# => <a>Click here for $100</a>
def
sanitize_token
(
self
,
token
):
# accommodate filters which use token_type differently
token_type
=
token
[
"type"
]
if
token_type
in
(
"StartTag"
,
"EndTag"
,
"EmptyTag"
):
name
=
token
[
"name"
]
namespace
=
token
[
"namespace"
]
if
((
namespace
,
name
)
in
self
.
allowed_elements
or
(
namespace
is
None
and
(
namespaces
[
"html"
],
name
)
in
self
.
allowed_elements
)):
return
self
.
allowed_token
(
token
)
else
:
return
self
.
disallowed_token
(
token
)
elif
token_type
==
"Comment"
:
pass
else
:
return
token
def
allowed_token
(
self
,
token
):
if
"data"
in
token
:
attrs
=
token
[
"data"
]
attr_names
=
set
(
attrs
.
keys
())
# Remove forbidden attributes
for
to_remove
in
(
attr_names
-
self
.
allowed_attributes
):
del
token
[
"data"
][
to_remove
]
attr_names
.
remove
(
to_remove
)
# Remove attributes with disallowed URL values
for
attr
in
(
attr_names
&
self
.
attr_val_is_uri
):
assert
attr
in
attrs
# I don't have a clue where this regexp comes from or why it matches those
# characters, nor why we call unescape. I just know it's always been here.
# Should you be worried by this comment in a sanitizer? Yes. On the other hand, all
# this will do is remove *more* than it otherwise would.
val_unescaped
=
re
.
sub
(
"[`
\x00
-
\x20\x7f
-
\xa0\\
s]+"
,
''
,
unescape
(
attrs
[
attr
]))
.
lower
()
# remove replacement characters from unescaped characters
val_unescaped
=
val_unescaped
.
replace
(
"
\ufffd
"
,
""
)
try
:
uri
=
urlparse
.
urlparse
(
val_unescaped
)
except
ValueError
:
uri
=
None
del
attrs
[
attr
]
if
uri
and
uri
.
scheme
:
if
uri
.
scheme
not
in
self
.
allowed_protocols
:
del
attrs
[
attr
]
if
uri
.
scheme
==
'data'
:
m
=
data_content_type
.
match
(
uri
.
path
)
if
not
m
:
del
attrs
[
attr
]
elif
m
.
group
(
'content_type'
)
not
in
self
.
allowed_content_types
:
del
attrs
[
attr
]
for
attr
in
self
.
svg_attr_val_allows_ref
:
if
attr
in
attrs
:
attrs
[
attr
]
=
re
.
sub
(
r'url\s*\(\s*[^#\s][^)]+?\)'
,
' '
,
unescape
(
attrs
[
attr
]))
if
(
token
[
"name"
]
in
self
.
svg_allow_local_href
and
(
namespaces
[
'xlink'
],
'href'
)
in
attrs
and
re
.
search
(
r'^\s*[^#\s].*'
,
attrs
[(
namespaces
[
'xlink'
],
'href'
)])):
del
attrs
[(
namespaces
[
'xlink'
],
'href'
)]
if
(
None
,
'style'
)
in
attrs
:
attrs
[(
None
,
'style'
)]
=
self
.
sanitize_css
(
attrs
[(
None
,
'style'
)])
token
[
"data"
]
=
attrs
return
token
def
disallowed_token
(
self
,
token
):
token_type
=
token
[
"type"
]
if
token_type
==
"EndTag"
:
token
[
"data"
]
=
"</
%
s>"
%
token
[
"name"
]
elif
token
[
"data"
]:
assert
token_type
in
(
"StartTag"
,
"EmptyTag"
)
attrs
=
[]
for
(
ns
,
name
),
v
in
token
[
"data"
]
.
items
():
attrs
.
append
(
'
%
s="
%
s"'
%
(
name
if
ns
is
None
else
"
%
s:
%
s"
%
(
prefixes
[
ns
],
name
),
escape
(
v
)))
token
[
"data"
]
=
"<
%
s
%
s>"
%
(
token
[
"name"
],
''
.
join
(
attrs
))
else
:
token
[
"data"
]
=
"<
%
s>"
%
token
[
"name"
]
if
token
.
get
(
"selfClosing"
):
token
[
"data"
]
=
token
[
"data"
][:
-
1
]
+
"/>"
token
[
"type"
]
=
"Characters"
del
token
[
"name"
]
return
token
def
sanitize_css
(
self
,
style
):
# disallow urls
style
=
re
.
compile
(
r'url\s*\(\s*[^\s)]+?\s*\)\s*'
)
.
sub
(
' '
,
style
)
# gauntlet
if
not
re
.
match
(
r"""^([:,;#
%
.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$"""
,
style
):
return
''
if
not
re
.
match
(
r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$"
,
style
):
return
''
clean
=
[]
for
prop
,
value
in
re
.
findall
(
r"([-\w]+)\s*:\s*([^:;]*)"
,
style
):
if
not
value
:
continue
if
prop
.
lower
()
in
self
.
allowed_css_properties
:
clean
.
append
(
prop
+
': '
+
value
+
';'
)
elif
prop
.
split
(
'-'
)[
0
]
.
lower
()
in
[
'background'
,
'border'
,
'margin'
,
'padding'
]:
for
keyword
in
value
.
split
():
if
keyword
not
in
self
.
allowed_css_keywords
and
\
not
re
.
match
(
r"^(#[0-9a-fA-F]+|rgb\(\d+
%
?,\d*
%
?,?\d*
%
?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|
%
|,|\))?)$"
,
keyword
):
# noqa
break
else
:
clean
.
append
(
prop
+
': '
+
value
+
';'
)
elif
prop
.
lower
()
in
self
.
allowed_svg_properties
:
clean
.
append
(
prop
+
': '
+
value
+
';'
)
return
' '
.
join
(
clean
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment