Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
N
news
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Sartika Aritonang
news
Commits
6f1fee36
Commit
6f1fee36
authored
4 years ago
by
Sartika Aritonang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Upload New File
parent
990af9f9
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
417 additions
and
0 deletions
+417
-0
base.py
...b/site-packages/pip/_vendor/html5lib/treebuilders/base.py
+417
-0
No files found.
stbi/Lib/site-packages/pip/_vendor/html5lib/treebuilders/base.py
0 → 100644
View file @
6f1fee36
from
__future__
import
absolute_import
,
division
,
unicode_literals
from
pip._vendor.six
import
text_type
from
..constants
import
scopingElements
,
tableInsertModeElements
,
namespaces
# The scope markers are inserted when entering object elements,
# marquees, table cells, and table captions, and are used to prevent formatting
# from "leaking" into tables, object elements, and marquees.
Marker
=
None
listElementsMap
=
{
None
:
(
frozenset
(
scopingElements
),
False
),
"button"
:
(
frozenset
(
scopingElements
|
set
([(
namespaces
[
"html"
],
"button"
)])),
False
),
"list"
:
(
frozenset
(
scopingElements
|
set
([(
namespaces
[
"html"
],
"ol"
),
(
namespaces
[
"html"
],
"ul"
)])),
False
),
"table"
:
(
frozenset
([(
namespaces
[
"html"
],
"html"
),
(
namespaces
[
"html"
],
"table"
)]),
False
),
"select"
:
(
frozenset
([(
namespaces
[
"html"
],
"optgroup"
),
(
namespaces
[
"html"
],
"option"
)]),
True
)
}
class
Node
(
object
):
"""Represents an item in the tree"""
def
__init__
(
self
,
name
):
"""Creates a Node
:arg name: The tag name associated with the node
"""
# The tag name assocaited with the node
self
.
name
=
name
# The parent of the current node (or None for the document node)
self
.
parent
=
None
# The value of the current node (applies to text nodes and comments)
self
.
value
=
None
# A dict holding name -> value pairs for attributes of the node
self
.
attributes
=
{}
# A list of child nodes of the current node. This must include all
# elements but not necessarily other node types.
self
.
childNodes
=
[]
# A list of miscellaneous flags that can be set on the node.
self
.
_flags
=
[]
def
__str__
(
self
):
attributesStr
=
" "
.
join
([
"
%
s=
\"
%
s
\"
"
%
(
name
,
value
)
for
name
,
value
in
self
.
attributes
.
items
()])
if
attributesStr
:
return
"<
%
s
%
s>"
%
(
self
.
name
,
attributesStr
)
else
:
return
"<
%
s>"
%
(
self
.
name
)
def
__repr__
(
self
):
return
"<
%
s>"
%
(
self
.
name
)
def
appendChild
(
self
,
node
):
"""Insert node as a child of the current node
:arg node: the node to insert
"""
raise
NotImplementedError
def
insertText
(
self
,
data
,
insertBefore
=
None
):
"""Insert data as text in the current node, positioned before the
start of node insertBefore or to the end of the node's text.
:arg data: the data to insert
:arg insertBefore: True if you want to insert the text before the node
and False if you want to insert it after the node
"""
raise
NotImplementedError
def
insertBefore
(
self
,
node
,
refNode
):
"""Insert node as a child of the current node, before refNode in the
list of child nodes. Raises ValueError if refNode is not a child of
the current node
:arg node: the node to insert
:arg refNode: the child node to insert the node before
"""
raise
NotImplementedError
def
removeChild
(
self
,
node
):
"""Remove node from the children of the current node
:arg node: the child node to remove
"""
raise
NotImplementedError
def
reparentChildren
(
self
,
newParent
):
"""Move all the children of the current node to newParent.
This is needed so that trees that don't store text as nodes move the
text in the correct way
:arg newParent: the node to move all this node's children to
"""
# XXX - should this method be made more general?
for
child
in
self
.
childNodes
:
newParent
.
appendChild
(
child
)
self
.
childNodes
=
[]
def
cloneNode
(
self
):
"""Return a shallow copy of the current node i.e. a node with the same
name and attributes but with no parent or child nodes
"""
raise
NotImplementedError
def
hasContent
(
self
):
"""Return true if the node has children or text, false otherwise
"""
raise
NotImplementedError
class
ActiveFormattingElements
(
list
):
def
append
(
self
,
node
):
equalCount
=
0
if
node
!=
Marker
:
for
element
in
self
[::
-
1
]:
if
element
==
Marker
:
break
if
self
.
nodesEqual
(
element
,
node
):
equalCount
+=
1
if
equalCount
==
3
:
self
.
remove
(
element
)
break
list
.
append
(
self
,
node
)
def
nodesEqual
(
self
,
node1
,
node2
):
if
not
node1
.
nameTuple
==
node2
.
nameTuple
:
return
False
if
not
node1
.
attributes
==
node2
.
attributes
:
return
False
return
True
class
TreeBuilder
(
object
):
"""Base treebuilder implementation
* documentClass - the class to use for the bottommost node of a document
* elementClass - the class to use for HTML Elements
* commentClass - the class to use for comments
* doctypeClass - the class to use for doctypes
"""
# pylint:disable=not-callable
# Document class
documentClass
=
None
# The class to use for creating a node
elementClass
=
None
# The class to use for creating comments
commentClass
=
None
# The class to use for creating doctypes
doctypeClass
=
None
# Fragment class
fragmentClass
=
None
def
__init__
(
self
,
namespaceHTMLElements
):
"""Create a TreeBuilder
:arg namespaceHTMLElements: whether or not to namespace HTML elements
"""
if
namespaceHTMLElements
:
self
.
defaultNamespace
=
"http://www.w3.org/1999/xhtml"
else
:
self
.
defaultNamespace
=
None
self
.
reset
()
def
reset
(
self
):
self
.
openElements
=
[]
self
.
activeFormattingElements
=
ActiveFormattingElements
()
# XXX - rename these to headElement, formElement
self
.
headPointer
=
None
self
.
formPointer
=
None
self
.
insertFromTable
=
False
self
.
document
=
self
.
documentClass
()
def
elementInScope
(
self
,
target
,
variant
=
None
):
# If we pass a node in we match that. if we pass a string
# match any node with that name
exactNode
=
hasattr
(
target
,
"nameTuple"
)
if
not
exactNode
:
if
isinstance
(
target
,
text_type
):
target
=
(
namespaces
[
"html"
],
target
)
assert
isinstance
(
target
,
tuple
)
listElements
,
invert
=
listElementsMap
[
variant
]
for
node
in
reversed
(
self
.
openElements
):
if
exactNode
and
node
==
target
:
return
True
elif
not
exactNode
and
node
.
nameTuple
==
target
:
return
True
elif
(
invert
^
(
node
.
nameTuple
in
listElements
)):
return
False
assert
False
# We should never reach this point
def
reconstructActiveFormattingElements
(
self
):
# Within this algorithm the order of steps described in the
# specification is not quite the same as the order of steps in the
# code. It should still do the same though.
# Step 1: stop the algorithm when there's nothing to do.
if
not
self
.
activeFormattingElements
:
return
# Step 2 and step 3: we start with the last element. So i is -1.
i
=
len
(
self
.
activeFormattingElements
)
-
1
entry
=
self
.
activeFormattingElements
[
i
]
if
entry
==
Marker
or
entry
in
self
.
openElements
:
return
# Step 6
while
entry
!=
Marker
and
entry
not
in
self
.
openElements
:
if
i
==
0
:
# This will be reset to 0 below
i
=
-
1
break
i
-=
1
# Step 5: let entry be one earlier in the list.
entry
=
self
.
activeFormattingElements
[
i
]
while
True
:
# Step 7
i
+=
1
# Step 8
entry
=
self
.
activeFormattingElements
[
i
]
clone
=
entry
.
cloneNode
()
# Mainly to get a new copy of the attributes
# Step 9
element
=
self
.
insertElement
({
"type"
:
"StartTag"
,
"name"
:
clone
.
name
,
"namespace"
:
clone
.
namespace
,
"data"
:
clone
.
attributes
})
# Step 10
self
.
activeFormattingElements
[
i
]
=
element
# Step 11
if
element
==
self
.
activeFormattingElements
[
-
1
]:
break
def
clearActiveFormattingElements
(
self
):
entry
=
self
.
activeFormattingElements
.
pop
()
while
self
.
activeFormattingElements
and
entry
!=
Marker
:
entry
=
self
.
activeFormattingElements
.
pop
()
def
elementInActiveFormattingElements
(
self
,
name
):
"""Check if an element exists between the end of the active
formatting elements and the last marker. If it does, return it, else
return false"""
for
item
in
self
.
activeFormattingElements
[::
-
1
]:
# Check for Marker first because if it's a Marker it doesn't have a
# name attribute.
if
item
==
Marker
:
break
elif
item
.
name
==
name
:
return
item
return
False
def
insertRoot
(
self
,
token
):
element
=
self
.
createElement
(
token
)
self
.
openElements
.
append
(
element
)
self
.
document
.
appendChild
(
element
)
def
insertDoctype
(
self
,
token
):
name
=
token
[
"name"
]
publicId
=
token
[
"publicId"
]
systemId
=
token
[
"systemId"
]
doctype
=
self
.
doctypeClass
(
name
,
publicId
,
systemId
)
self
.
document
.
appendChild
(
doctype
)
def
insertComment
(
self
,
token
,
parent
=
None
):
if
parent
is
None
:
parent
=
self
.
openElements
[
-
1
]
parent
.
appendChild
(
self
.
commentClass
(
token
[
"data"
]))
def
createElement
(
self
,
token
):
"""Create an element but don't insert it anywhere"""
name
=
token
[
"name"
]
namespace
=
token
.
get
(
"namespace"
,
self
.
defaultNamespace
)
element
=
self
.
elementClass
(
name
,
namespace
)
element
.
attributes
=
token
[
"data"
]
return
element
def
_getInsertFromTable
(
self
):
return
self
.
_insertFromTable
def
_setInsertFromTable
(
self
,
value
):
"""Switch the function used to insert an element from the
normal one to the misnested table one and back again"""
self
.
_insertFromTable
=
value
if
value
:
self
.
insertElement
=
self
.
insertElementTable
else
:
self
.
insertElement
=
self
.
insertElementNormal
insertFromTable
=
property
(
_getInsertFromTable
,
_setInsertFromTable
)
def
insertElementNormal
(
self
,
token
):
name
=
token
[
"name"
]
assert
isinstance
(
name
,
text_type
),
"Element
%
s not unicode"
%
name
namespace
=
token
.
get
(
"namespace"
,
self
.
defaultNamespace
)
element
=
self
.
elementClass
(
name
,
namespace
)
element
.
attributes
=
token
[
"data"
]
self
.
openElements
[
-
1
]
.
appendChild
(
element
)
self
.
openElements
.
append
(
element
)
return
element
def
insertElementTable
(
self
,
token
):
"""Create an element and insert it into the tree"""
element
=
self
.
createElement
(
token
)
if
self
.
openElements
[
-
1
]
.
name
not
in
tableInsertModeElements
:
return
self
.
insertElementNormal
(
token
)
else
:
# We should be in the InTable mode. This means we want to do
# special magic element rearranging
parent
,
insertBefore
=
self
.
getTableMisnestedNodePosition
()
if
insertBefore
is
None
:
parent
.
appendChild
(
element
)
else
:
parent
.
insertBefore
(
element
,
insertBefore
)
self
.
openElements
.
append
(
element
)
return
element
def
insertText
(
self
,
data
,
parent
=
None
):
"""Insert text data."""
if
parent
is
None
:
parent
=
self
.
openElements
[
-
1
]
if
(
not
self
.
insertFromTable
or
(
self
.
insertFromTable
and
self
.
openElements
[
-
1
]
.
name
not
in
tableInsertModeElements
)):
parent
.
insertText
(
data
)
else
:
# We should be in the InTable mode. This means we want to do
# special magic element rearranging
parent
,
insertBefore
=
self
.
getTableMisnestedNodePosition
()
parent
.
insertText
(
data
,
insertBefore
)
def
getTableMisnestedNodePosition
(
self
):
"""Get the foster parent element, and sibling to insert before
(or None) when inserting a misnested table node"""
# The foster parent element is the one which comes before the most
# recently opened table element
# XXX - this is really inelegant
lastTable
=
None
fosterParent
=
None
insertBefore
=
None
for
elm
in
self
.
openElements
[::
-
1
]:
if
elm
.
name
==
"table"
:
lastTable
=
elm
break
if
lastTable
:
# XXX - we should really check that this parent is actually a
# node here
if
lastTable
.
parent
:
fosterParent
=
lastTable
.
parent
insertBefore
=
lastTable
else
:
fosterParent
=
self
.
openElements
[
self
.
openElements
.
index
(
lastTable
)
-
1
]
else
:
fosterParent
=
self
.
openElements
[
0
]
return
fosterParent
,
insertBefore
def
generateImpliedEndTags
(
self
,
exclude
=
None
):
name
=
self
.
openElements
[
-
1
]
.
name
# XXX td, th and tr are not actually needed
if
(
name
in
frozenset
((
"dd"
,
"dt"
,
"li"
,
"option"
,
"optgroup"
,
"p"
,
"rp"
,
"rt"
))
and
name
!=
exclude
):
self
.
openElements
.
pop
()
# XXX This is not entirely what the specification says. We should
# investigate it more closely.
self
.
generateImpliedEndTags
(
exclude
)
def
getDocument
(
self
):
"""Return the final tree"""
return
self
.
document
def
getFragment
(
self
):
"""Return the final fragment"""
# assert self.innerHTML
fragment
=
self
.
fragmentClass
()
self
.
openElements
[
0
]
.
reparentChildren
(
fragment
)
return
fragment
def
testSerializer
(
self
,
node
):
"""Serialize the subtree of node in the format required by unit tests
:arg node: the node from which to start serializing
"""
raise
NotImplementedError
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment