Skip to content

Commit

Permalink
Remove six dependency from html5lib (#618)
Browse files Browse the repository at this point in the history
This way lies madness, but at least we don't have a six dependency
anymore.

The way this work is that we vendored html5lib 1.1, but then this
applies a 01_html5lib_six.patch to that which changes imports from six
to import from bleach.six_shim.

This updates the vendor management code and vendorverify to install
html5lib 1.1 and then apply the patch and then compare with what's in
the tree. If we end up applying further patches in the future, we can
use this model to do that.
  • Loading branch information
willkg committed Oct 28, 2024
1 parent 365c1dc commit 677e4e8
Show file tree
Hide file tree
Showing 16 changed files with 177 additions and 15 deletions.
158 changes: 158 additions & 0 deletions bleach/_vendor/01_html5lib_six.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
diff --git bleach/_vendor/html5lib/_inputstream.py bleach/_vendor/html5lib/_inputstream.py
index 0207dd2..0976251 100644
--- bleach/_vendor/html5lib/_inputstream.py
+++ bleach/_vendor/html5lib/_inputstream.py
@@ -1,7 +1,7 @@
from __future__ import absolute_import, division, unicode_literals

-from six import text_type
-from six.moves import http_client, urllib
+from bleach.six_shim import text_type
+from bleach.six_shim import http_client, urllib

import codecs
import re
diff --git bleach/_vendor/html5lib/_tokenizer.py bleach/_vendor/html5lib/_tokenizer.py
index 4748a19..d884801 100644
--- bleach/_vendor/html5lib/_tokenizer.py
+++ bleach/_vendor/html5lib/_tokenizer.py
@@ -1,6 +1,6 @@
from __future__ import absolute_import, division, unicode_literals

-from six import unichr as chr
+from bleach.six_shim import unichr as chr

from collections import deque, OrderedDict
from sys import version_info
diff --git bleach/_vendor/html5lib/_trie/py.py bleach/_vendor/html5lib/_trie/py.py
index c2ba3da..56f66bd 100644
--- bleach/_vendor/html5lib/_trie/py.py
+++ bleach/_vendor/html5lib/_trie/py.py
@@ -1,5 +1,5 @@
from __future__ import absolute_import, division, unicode_literals
-from six import text_type
+from bleach.six_shim import text_type

from bisect import bisect_left

diff --git bleach/_vendor/html5lib/_utils.py bleach/_vendor/html5lib/_utils.py
index 9ea5794..635bb02 100644
--- bleach/_vendor/html5lib/_utils.py
+++ bleach/_vendor/html5lib/_utils.py
@@ -7,7 +7,7 @@ try:
except ImportError:
from collections import Mapping

-from six import text_type, PY3
+from bleach.six_shim import text_type, PY3

if PY3:
import xml.etree.ElementTree as default_etree
diff --git bleach/_vendor/html5lib/filters/lint.py bleach/_vendor/html5lib/filters/lint.py
index acd4d7a..1340d97 100644
--- bleach/_vendor/html5lib/filters/lint.py
+++ bleach/_vendor/html5lib/filters/lint.py
@@ -1,6 +1,6 @@
from __future__ import absolute_import, division, unicode_literals

-from six import text_type
+from bleach.six_shim import text_type

from . import base
from ..constants import namespaces, voidElements
diff --git bleach/_vendor/html5lib/filters/sanitizer.py bleach/_vendor/html5lib/filters/sanitizer.py
index 70ef906..5c31e97 100644
--- bleach/_vendor/html5lib/filters/sanitizer.py
+++ bleach/_vendor/html5lib/filters/sanitizer.py
@@ -12,7 +12,7 @@ import re
import warnings
from xml.sax.saxutils import escape, unescape

-from six.moves import urllib_parse as urlparse
+from bleach.six_shim import urllib_parse as urlparse

from . import base
from ..constants import namespaces, prefixes
diff --git bleach/_vendor/html5lib/html5parser.py bleach/_vendor/html5lib/html5parser.py
index 74d829d..b90a9fe 100644
--- bleach/_vendor/html5lib/html5parser.py
+++ bleach/_vendor/html5lib/html5parser.py
@@ -1,5 +1,5 @@
from __future__ import absolute_import, division, unicode_literals
-from six import with_metaclass, viewkeys
+from bleach.six_shim import with_metaclass, viewkeys

import types

diff --git bleach/_vendor/html5lib/serializer.py bleach/_vendor/html5lib/serializer.py
index c66df68..5666f49 100644
--- bleach/_vendor/html5lib/serializer.py
+++ bleach/_vendor/html5lib/serializer.py
@@ -1,5 +1,5 @@
from __future__ import absolute_import, division, unicode_literals
-from six import text_type
+from bleach.six_shim import text_type

import re

diff --git bleach/_vendor/html5lib/treebuilders/base.py bleach/_vendor/html5lib/treebuilders/base.py
index e4a3d71..2869da0 100644
--- bleach/_vendor/html5lib/treebuilders/base.py
+++ bleach/_vendor/html5lib/treebuilders/base.py
@@ -1,5 +1,5 @@
from __future__ import absolute_import, division, unicode_literals
-from six import text_type
+from bleach.six_shim import text_type

from ..constants import scopingElements, tableInsertModeElements, namespaces

diff --git bleach/_vendor/html5lib/treebuilders/etree.py bleach/_vendor/html5lib/treebuilders/etree.py
index 086bed4..5ccfc4d 100644
--- bleach/_vendor/html5lib/treebuilders/etree.py
+++ bleach/_vendor/html5lib/treebuilders/etree.py
@@ -1,7 +1,7 @@
from __future__ import absolute_import, division, unicode_literals
# pylint:disable=protected-access

-from six import text_type
+from bleach.six_shim import text_type

import re

diff --git bleach/_vendor/html5lib/treebuilders/etree_lxml.py bleach/_vendor/html5lib/treebuilders/etree_lxml.py
index e73de61..f462232 100644
--- bleach/_vendor/html5lib/treebuilders/etree_lxml.py
+++ bleach/_vendor/html5lib/treebuilders/etree_lxml.py
@@ -28,7 +28,7 @@ from . import etree as etree_builders
from .. import _ihatexml

import lxml.etree as etree
-from six import PY3, binary_type
+from bleach.six_shim import PY3, binary_type


fullTree = True
diff --git bleach/_vendor/html5lib/treewalkers/etree.py bleach/_vendor/html5lib/treewalkers/etree.py
index 4465337..a9d9450 100644
--- bleach/_vendor/html5lib/treewalkers/etree.py
+++ bleach/_vendor/html5lib/treewalkers/etree.py
@@ -3,7 +3,7 @@ from __future__ import absolute_import, division, unicode_literals
from collections import OrderedDict
import re

-from six import string_types
+from bleach.six_shim import string_types

from . import base
from .._utils import moduleFactoryFactory
diff --git bleach/_vendor/html5lib/treewalkers/etree_lxml.py bleach/_vendor/html5lib/treewalkers/etree_lxml.py
index a614ac5..ef42163 100644
--- bleach/_vendor/html5lib/treewalkers/etree_lxml.py
+++ bleach/_vendor/html5lib/treewalkers/etree_lxml.py
@@ -1,5 +1,5 @@
from __future__ import absolute_import, division, unicode_literals
-from six import text_type
+from bleach.six_shim import text_type

from collections import OrderedDict

4 changes: 2 additions & 2 deletions bleach/_vendor/html5lib/_inputstream.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import absolute_import, division, unicode_literals

from six import text_type
from six.moves import http_client, urllib
from bleach.six_shim import text_type
from bleach.six_shim import http_client, urllib

import codecs
import re
Expand Down
2 changes: 1 addition & 1 deletion bleach/_vendor/html5lib/_tokenizer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import absolute_import, division, unicode_literals

from six import unichr as chr
from bleach.six_shim import unichr as chr

from collections import deque, OrderedDict
from sys import version_info
Expand Down
2 changes: 1 addition & 1 deletion bleach/_vendor/html5lib/_trie/py.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from __future__ import absolute_import, division, unicode_literals
from six import text_type
from bleach.six_shim import text_type

from bisect import bisect_left

Expand Down
2 changes: 1 addition & 1 deletion bleach/_vendor/html5lib/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
except ImportError:
from collections import Mapping

from six import text_type, PY3
from bleach.six_shim import text_type, PY3

if PY3:
import xml.etree.ElementTree as default_etree
Expand Down
2 changes: 1 addition & 1 deletion bleach/_vendor/html5lib/filters/lint.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import absolute_import, division, unicode_literals

from six import text_type
from bleach.six_shim import text_type

from . import base
from ..constants import namespaces, voidElements
Expand Down
2 changes: 1 addition & 1 deletion bleach/_vendor/html5lib/filters/sanitizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import warnings
from xml.sax.saxutils import escape, unescape

from six.moves import urllib_parse as urlparse
from bleach.six_shim import urllib_parse as urlparse

from . import base
from ..constants import namespaces, prefixes
Expand Down
2 changes: 1 addition & 1 deletion bleach/_vendor/html5lib/html5parser.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from __future__ import absolute_import, division, unicode_literals
from six import with_metaclass, viewkeys
from bleach.six_shim import with_metaclass, viewkeys

import types

Expand Down
2 changes: 1 addition & 1 deletion bleach/_vendor/html5lib/serializer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from __future__ import absolute_import, division, unicode_literals
from six import text_type
from bleach.six_shim import text_type

import re

Expand Down
2 changes: 1 addition & 1 deletion bleach/_vendor/html5lib/treebuilders/base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from __future__ import absolute_import, division, unicode_literals
from six import text_type
from bleach.six_shim import text_type

from ..constants import scopingElements, tableInsertModeElements, namespaces

Expand Down
2 changes: 1 addition & 1 deletion bleach/_vendor/html5lib/treebuilders/etree.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import absolute_import, division, unicode_literals
# pylint:disable=protected-access

from six import text_type
from bleach.six_shim import text_type

import re

Expand Down
2 changes: 1 addition & 1 deletion bleach/_vendor/html5lib/treebuilders/etree_lxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from .. import _ihatexml

import lxml.etree as etree
from six import PY3, binary_type
from bleach.six_shim import PY3, binary_type


fullTree = True
Expand Down
2 changes: 1 addition & 1 deletion bleach/_vendor/html5lib/treewalkers/etree.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from collections import OrderedDict
import re

from six import string_types
from bleach.six_shim import string_types

from . import base
from .._utils import moduleFactoryFactory
Expand Down
2 changes: 1 addition & 1 deletion bleach/_vendor/html5lib/treewalkers/etree_lxml.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from __future__ import absolute_import, division, unicode_literals
from six import text_type
from bleach.six_shim import text_type

from collections import OrderedDict

Expand Down
1 change: 1 addition & 0 deletions bleach/_vendor/vendor_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ BLEACH_VENDOR_DIR=${BLEACH_VENDOR_DIR:-"."}
DEST=${DEST:-"."}

pip install --no-binary all --no-compile --no-deps -r "${BLEACH_VENDOR_DIR}/vendor.txt" --target "${DEST}"
(cd "${DEST}" && patch -p2 < 01_html5lib_six.patch)

# install Python 3.6.14 urllib.urlparse for #536
curl --proto '=https' --tlsv1.2 -o "${DEST}/parse.py" https://raw.githubusercontent.com/python/cpython/v3.6.14/Lib/urllib/parse.py
Expand Down
5 changes: 4 additions & 1 deletion scripts/vendor_verify.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,11 @@ mkdir "${DEST}"
# Get versions of pip and python
pip --version

# Copy patch files to dest directory
cp bleach/_vendor/*.patch "${DEST}"

# Install vendored dependencies into temp directory
BLEACH_VENDOR_DIR=bleach/_vendor DEST="${DEST}" bleach/_vendor/vendor_install.sh
BLEACH_VENDOR_DIR="bleach/_vendor" DEST="${DEST}" bleach/_vendor/vendor_install.sh

# Diff contents of temp directory and bleach/_vendor/ excluding vendoring
# infrastructure
Expand Down

0 comments on commit 677e4e8

Please sign in to comment.