Skip to content

Commit

Permalink
Remove six dependency from html5lib (#618)
Browse files Browse the repository at this point in the history
This way lies madness, but at least we don't have a six dependency
anymore.

The way this work is that we vendored html5lib 1.1, but then this
applies a 01_html5lib_six.patch to that which changes imports from six
to import from bleach.six_shim.

This updates the vendor management code and vendorverify to install
html5lib 1.1 and then apply the patch and then compare with what's in
the tree. If we end up applying further patches in the future, we can
use this model to do that.
  • Loading branch information
willkg committed Oct 28, 2024
1 parent 5a47907 commit 156c589
Show file tree
Hide file tree
Showing 16 changed files with 195 additions and 18 deletions.
167 changes: 167 additions & 0 deletions bleach/_vendor/01_html5lib_six.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
diff --git bleach/_vendor/html5lib/_inputstream.py bleach/_vendor/html5lib/_inputstream.py
index 0207dd2..0976251 100644
--- bleach/_vendor/html5lib/_inputstream.py
+++ bleach/_vendor/html5lib/_inputstream.py
@@ -1,7 +1,7 @@
from __future__ import absolute_import, division, unicode_literals

-from six import text_type
-from six.moves import http_client, urllib
+from bleach.six_shim import text_type
+from bleach.six_shim import http_client, urllib

import codecs
import re
diff --git bleach/_vendor/html5lib/_tokenizer.py bleach/_vendor/html5lib/_tokenizer.py
index 4748a19..d884801 100644
--- bleach/_vendor/html5lib/_tokenizer.py
+++ bleach/_vendor/html5lib/_tokenizer.py
@@ -1,6 +1,6 @@
from __future__ import absolute_import, division, unicode_literals

-from six import unichr as chr
+from bleach.six_shim import unichr as chr

from collections import deque, OrderedDict
from sys import version_info
diff --git bleach/_vendor/html5lib/_trie/py.py bleach/_vendor/html5lib/_trie/py.py
index c2ba3da..56f66bd 100644
--- bleach/_vendor/html5lib/_trie/py.py
+++ bleach/_vendor/html5lib/_trie/py.py
@@ -1,5 +1,5 @@
from __future__ import absolute_import, division, unicode_literals
-from six import text_type
+from bleach.six_shim import text_type

from bisect import bisect_left

diff --git bleach/_vendor/html5lib/_utils.py bleach/_vendor/html5lib/_utils.py
index 9ea5794..635bb02 100644
--- bleach/_vendor/html5lib/_utils.py
+++ bleach/_vendor/html5lib/_utils.py
@@ -7,7 +7,7 @@ try:
except ImportError:
from collections import Mapping

-from six import text_type, PY3
+from bleach.six_shim import text_type, PY3

if PY3:
import xml.etree.ElementTree as default_etree
diff --git bleach/_vendor/html5lib/filters/lint.py bleach/_vendor/html5lib/filters/lint.py
index acd4d7a..1340d97 100644
--- bleach/_vendor/html5lib/filters/lint.py
+++ bleach/_vendor/html5lib/filters/lint.py
@@ -1,6 +1,6 @@
from __future__ import absolute_import, division, unicode_literals

-from six import text_type
+from bleach.six_shim import text_type

from . import base
from ..constants import namespaces, voidElements
diff --git bleach/_vendor/html5lib/filters/sanitizer.py bleach/_vendor/html5lib/filters/sanitizer.py
index 70ef906..5c31e97 100644
--- bleach/_vendor/html5lib/filters/sanitizer.py
+++ bleach/_vendor/html5lib/filters/sanitizer.py
@@ -12,7 +12,7 @@ import re
import warnings
from xml.sax.saxutils import escape, unescape

-from six.moves import urllib_parse as urlparse
+from bleach.six_shim import urllib_parse as urlparse

from . import base
from ..constants import namespaces, prefixes
diff --git bleach/_vendor/html5lib/html5parser.py bleach/_vendor/html5lib/html5parser.py
index 74d829d..5427b7d 100644
--- bleach/_vendor/html5lib/html5parser.py
+++ bleach/_vendor/html5lib/html5parser.py
@@ -1,5 +1,5 @@
from __future__ import absolute_import, division, unicode_literals
-from six import with_metaclass, viewkeys
+from bleach.six_shim import viewkeys

import types

@@ -423,7 +423,7 @@ def getPhases(debug):
return type

# pylint:disable=unused-argument
- class Phase(with_metaclass(getMetaclass(debug, log))):
+ class Phase(metaclass=getMetaclass(debug, log)):
"""Base class for helper object that implements each phase of processing
"""
__slots__ = ("parser", "tree", "__startTagCache", "__endTagCache")
diff --git bleach/_vendor/html5lib/serializer.py bleach/_vendor/html5lib/serializer.py
index c66df68..5666f49 100644
--- bleach/_vendor/html5lib/serializer.py
+++ bleach/_vendor/html5lib/serializer.py
@@ -1,5 +1,5 @@
from __future__ import absolute_import, division, unicode_literals
-from six import text_type
+from bleach.six_shim import text_type

import re

diff --git bleach/_vendor/html5lib/treebuilders/base.py bleach/_vendor/html5lib/treebuilders/base.py
index e4a3d71..2869da0 100644
--- bleach/_vendor/html5lib/treebuilders/base.py
+++ bleach/_vendor/html5lib/treebuilders/base.py
@@ -1,5 +1,5 @@
from __future__ import absolute_import, division, unicode_literals
-from six import text_type
+from bleach.six_shim import text_type

from ..constants import scopingElements, tableInsertModeElements, namespaces

diff --git bleach/_vendor/html5lib/treebuilders/etree.py bleach/_vendor/html5lib/treebuilders/etree.py
index 086bed4..5ccfc4d 100644
--- bleach/_vendor/html5lib/treebuilders/etree.py
+++ bleach/_vendor/html5lib/treebuilders/etree.py
@@ -1,7 +1,7 @@
from __future__ import absolute_import, division, unicode_literals
# pylint:disable=protected-access

-from six import text_type
+from bleach.six_shim import text_type

import re

diff --git bleach/_vendor/html5lib/treebuilders/etree_lxml.py bleach/_vendor/html5lib/treebuilders/etree_lxml.py
index e73de61..f462232 100644
--- bleach/_vendor/html5lib/treebuilders/etree_lxml.py
+++ bleach/_vendor/html5lib/treebuilders/etree_lxml.py
@@ -28,7 +28,7 @@ from . import etree as etree_builders
from .. import _ihatexml

import lxml.etree as etree
-from six import PY3, binary_type
+from bleach.six_shim import PY3, binary_type


fullTree = True
diff --git bleach/_vendor/html5lib/treewalkers/etree.py bleach/_vendor/html5lib/treewalkers/etree.py
index 4465337..a9d9450 100644
--- bleach/_vendor/html5lib/treewalkers/etree.py
+++ bleach/_vendor/html5lib/treewalkers/etree.py
@@ -3,7 +3,7 @@ from __future__ import absolute_import, division, unicode_literals
from collections import OrderedDict
import re

-from six import string_types
+from bleach.six_shim import string_types

from . import base
from .._utils import moduleFactoryFactory
diff --git bleach/_vendor/html5lib/treewalkers/etree_lxml.py bleach/_vendor/html5lib/treewalkers/etree_lxml.py
index a614ac5..ef42163 100644
--- bleach/_vendor/html5lib/treewalkers/etree_lxml.py
+++ bleach/_vendor/html5lib/treewalkers/etree_lxml.py
@@ -1,5 +1,5 @@
from __future__ import absolute_import, division, unicode_literals
-from six import text_type
+from bleach.six_shim import text_type

from collections import OrderedDict

4 changes: 2 additions & 2 deletions bleach/_vendor/html5lib/_inputstream.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import absolute_import, division, unicode_literals

from six import text_type
from six.moves import http_client, urllib
from bleach.six_shim import text_type
from bleach.six_shim import http_client, urllib

import codecs
import re
Expand Down
2 changes: 1 addition & 1 deletion bleach/_vendor/html5lib/_tokenizer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import absolute_import, division, unicode_literals

from six import unichr as chr
from bleach.six_shim import unichr as chr

from collections import deque, OrderedDict
from sys import version_info
Expand Down
2 changes: 1 addition & 1 deletion bleach/_vendor/html5lib/_trie/py.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from __future__ import absolute_import, division, unicode_literals
from six import text_type
from bleach.six_shim import text_type

from bisect import bisect_left

Expand Down
2 changes: 1 addition & 1 deletion bleach/_vendor/html5lib/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
except ImportError:
from collections import Mapping

from six import text_type, PY3
from bleach.six_shim import text_type, PY3

if PY3:
import xml.etree.ElementTree as default_etree
Expand Down
2 changes: 1 addition & 1 deletion bleach/_vendor/html5lib/filters/lint.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import absolute_import, division, unicode_literals

from six import text_type
from bleach.six_shim import text_type

from . import base
from ..constants import namespaces, voidElements
Expand Down
2 changes: 1 addition & 1 deletion bleach/_vendor/html5lib/filters/sanitizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import warnings
from xml.sax.saxutils import escape, unescape

from six.moves import urllib_parse as urlparse
from bleach.six_shim import urllib_parse as urlparse

from . import base
from ..constants import namespaces, prefixes
Expand Down
4 changes: 2 additions & 2 deletions bleach/_vendor/html5lib/html5parser.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from __future__ import absolute_import, division, unicode_literals
from six import with_metaclass, viewkeys
from bleach.six_shim import viewkeys

import types

Expand Down Expand Up @@ -423,7 +423,7 @@ def getMetaclass(use_metaclass, metaclass_func):
return type

# pylint:disable=unused-argument
class Phase(with_metaclass(getMetaclass(debug, log))):
class Phase(metaclass=getMetaclass(debug, log)):
"""Base class for helper object that implements each phase of processing
"""
__slots__ = ("parser", "tree", "__startTagCache", "__endTagCache")
Expand Down
2 changes: 1 addition & 1 deletion bleach/_vendor/html5lib/serializer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from __future__ import absolute_import, division, unicode_literals
from six import text_type
from bleach.six_shim import text_type

import re

Expand Down
2 changes: 1 addition & 1 deletion bleach/_vendor/html5lib/treebuilders/base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from __future__ import absolute_import, division, unicode_literals
from six import text_type
from bleach.six_shim import text_type

from ..constants import scopingElements, tableInsertModeElements, namespaces

Expand Down
2 changes: 1 addition & 1 deletion bleach/_vendor/html5lib/treebuilders/etree.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import absolute_import, division, unicode_literals
# pylint:disable=protected-access

from six import text_type
from bleach.six_shim import text_type

import re

Expand Down
2 changes: 1 addition & 1 deletion bleach/_vendor/html5lib/treebuilders/etree_lxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from .. import _ihatexml

import lxml.etree as etree
from six import PY3, binary_type
from bleach.six_shim import PY3, binary_type


fullTree = True
Expand Down
2 changes: 1 addition & 1 deletion bleach/_vendor/html5lib/treewalkers/etree.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from collections import OrderedDict
import re

from six import string_types
from bleach.six_shim import string_types

from . import base
from .._utils import moduleFactoryFactory
Expand Down
2 changes: 1 addition & 1 deletion bleach/_vendor/html5lib/treewalkers/etree_lxml.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from __future__ import absolute_import, division, unicode_literals
from six import text_type
from bleach.six_shim import text_type

from collections import OrderedDict

Expand Down
4 changes: 4 additions & 0 deletions bleach/_vendor/vendor_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,12 @@ set -o pipefail
BLEACH_VENDOR_DIR=${BLEACH_VENDOR_DIR:-"."}
DEST=${DEST:-"."}

# Install with no dependencies
pip install --no-binary all --no-compile --no-deps -r "${BLEACH_VENDOR_DIR}/vendor.txt" --target "${DEST}"

# Apply patches
(cd "${DEST}" && patch -p2 < 01_html5lib_six.patch)

# install Python 3.6.14 urllib.urlparse for #536
curl --proto '=https' --tlsv1.2 -o "${DEST}/parse.py" https://raw.githubusercontent.com/python/cpython/v3.6.14/Lib/urllib/parse.py
(cd "${DEST}" && sha256sum parse.py > parse.py.SHA256SUM)
12 changes: 9 additions & 3 deletions scripts/vendor_verify.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ set -e
# Install vendored packages into /tmp and then compare with what's in
# bleach/_vendor/.

DEST=/tmp/vendor-test
export DEST=/tmp/vendor-test
export BLEACH_VENDOR_DIR=bleach/_vendor

if [[ -e "${DEST}" ]]; then
echo "${DEST} exists. Please remove."
Expand All @@ -14,11 +15,16 @@ fi

mkdir "${DEST}"

# Get versions of pip and python
# Get diagnostic information
pip --version
echo "DEST: ${DEST}"
echo "BLEACH_VENDOR_DIR: ${BLEACH_VENDOR_DIR}"

# Copy patch files to dest directory
cp bleach/_vendor/*.patch "${DEST}"

# Install vendored dependencies into temp directory
BLEACH_VENDOR_DIR=bleach/_vendor DEST="${DEST}" bleach/_vendor/vendor_install.sh
bleach/_vendor/vendor_install.sh

# Diff contents of temp directory and bleach/_vendor/ excluding vendoring
# infrastructure
Expand Down

0 comments on commit 156c589

Please sign in to comment.