From 91cf858f67111316441513958c1ca34b794bf9c3 Mon Sep 17 00:00:00 2001
From: Zac Hatfield-Dodds <zac.hatfield.dodds@gmail.com>
Date: Fri, 1 Sep 2023 17:17:09 -0700
Subject: [PATCH 1/6] Changelog markup

---
 hypothesis-python/docs/changes.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/hypothesis-python/docs/changes.rst b/hypothesis-python/docs/changes.rst
index 193530b021..e9ee15ce30 100644
--- a/hypothesis-python/docs/changes.rst
+++ b/hypothesis-python/docs/changes.rst
@@ -144,7 +144,7 @@ help narrow down any particularly weird bugs in complex environments.
 -------------------
 
 Fixes some lingering issues with inference of recursive types
-in `~hypothesis.strategies.from_type`. Closes :issue:`3525`.
+in :func:`~hypothesis.strategies.from_type`. Closes :issue:`3525`.
 
 .. _v6.81.0:
 
@@ -335,8 +335,8 @@ is strongly recommended.  You can ensure you have the dependencies with
 -------------------
 
 This patch continues the work started in :pull:`3651` by adding
-:pypi:`ruff` linter rules for pyflakes, flake8-comprehensions, and
-flake8-implicit-str-concat.
+:pypi:`ruff` linter rules for :pypi:`pyflakes`, :pypi:`flake8-comprehensions`,
+and :pypi:`flake8-implicit-str-concat`.
 
 .. _v6.75.5:
 
@@ -1184,7 +1184,7 @@ is really annoying.  See :issue:`2701` for details.
 6.48.0 - 2022-06-27
 -------------------
 
-This release raises :class:`~unittest.SkipTest` for which never executed any
+This release raises :class:`~unittest.SkipTest` for tests which never executed any
 examples, for example because the :obj:`~hypothesis.settings.phases` setting
 excluded the :obj:`~hypothesis.Phase.explicit`, :obj:`~hypothesis.Phase.reuse`,
 and :obj:`~hypothesis.Phase.generate` phases.  This helps to avoid cases where

From c0a60d7e47d9b10d071c881feab19aedda3c4a66 Mon Sep 17 00:00:00 2001
From: Zac Hatfield-Dodds <zac.hatfield.dodds@gmail.com>
Date: Fri, 1 Sep 2023 17:17:09 -0700
Subject: [PATCH 2/6] Fix pretty-printer typo

---
 hypothesis-python/src/hypothesis/vendor/pretty.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hypothesis-python/src/hypothesis/vendor/pretty.py b/hypothesis-python/src/hypothesis/vendor/pretty.py
index f9ffb128ea..5a1989182a 100644
--- a/hypothesis-python/src/hypothesis/vendor/pretty.py
+++ b/hypothesis-python/src/hypothesis/vendor/pretty.py
@@ -153,7 +153,7 @@ def __init__(self, output=None, *, context=None):
             ipp = sys.modules["IPython.lib.pretty"]
             self.singleton_pprinters.update(ipp._singleton_pprinters)
             self.type_pprinters.update(ipp._type_pprinters)
-            self.deferred_pprinters.update(ipp._deferred_pprinters)
+            self.deferred_pprinters.update(ipp._deferred_type_pprinters)
         # If there's overlap between our pprinters and IPython's, we'll use ours.
         self.singleton_pprinters.update(_singleton_pprinters)
         self.type_pprinters.update(_type_pprinters)

From 8ca6c3423640405afc1650f9cf5ff63a8ba61f80 Mon Sep 17 00:00:00 2001
From: Zac Hatfield-Dodds <zac.hatfield.dodds@gmail.com>
Date: Fri, 1 Sep 2023 17:17:09 -0700
Subject: [PATCH 3/6] Move helper fn to compat.py

---
 hypothesis-python/src/hypothesis/core.py            | 10 +---------
 hypothesis-python/src/hypothesis/internal/compat.py |  9 +++++++++
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/hypothesis-python/src/hypothesis/core.py b/hypothesis-python/src/hypothesis/core.py
index 113876aae0..bfccf55159 100644
--- a/hypothesis-python/src/hypothesis/core.py
+++ b/hypothesis-python/src/hypothesis/core.py
@@ -70,6 +70,7 @@
 from hypothesis.internal.compat import (
     PYPY,
     BaseExceptionGroup,
+    add_note,
     bad_django_TestCase,
     get_type_hints,
     int_from_bytes,
@@ -1008,15 +1009,6 @@ def run_engine(self):
         _raise_to_user(errors_to_report, self.settings, report_lines)
 
 
-def add_note(exc, note):
-    try:
-        exc.add_note(note)
-    except AttributeError:
-        if not hasattr(exc, "__notes__"):
-            exc.__notes__ = []
-        exc.__notes__.append(note)
-
-
 def _raise_to_user(errors_to_report, settings, target_lines, trailer=""):
     """Helper function for attaching notes and grouping multiple errors."""
     failing_prefix = "Falsifying example: "
diff --git a/hypothesis-python/src/hypothesis/internal/compat.py b/hypothesis-python/src/hypothesis/internal/compat.py
index 29baa7ea79..1f23ce1863 100644
--- a/hypothesis-python/src/hypothesis/internal/compat.py
+++ b/hypothesis-python/src/hypothesis/internal/compat.py
@@ -43,6 +43,15 @@
 WINDOWS = platform.system() == "Windows"
 
 
+def add_note(exc, note):
+    try:
+        exc.add_note(note)
+    except AttributeError:
+        if not hasattr(exc, "__notes__"):
+            exc.__notes__ = []
+        exc.__notes__.append(note)
+
+
 def escape_unicode_characters(s: str) -> str:
     return codecs.encode(s, "unicode_escape").decode("ascii")
 

From 49b0b80e77f5437c934f00cd119447fb939737cb Mon Sep 17 00:00:00 2001
From: Zac Hatfield-Dodds <zac.hatfield.dodds@gmail.com>
Date: Fri, 1 Sep 2023 17:17:09 -0700
Subject: [PATCH 4/6] Fix error messages

---
 hypothesis-python/src/hypothesis/strategies/_internal/core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hypothesis-python/src/hypothesis/strategies/_internal/core.py b/hypothesis-python/src/hypothesis/strategies/_internal/core.py
index 2f39fc5376..f6638959e2 100644
--- a/hypothesis-python/src/hypothesis/strategies/_internal/core.py
+++ b/hypothesis-python/src/hypothesis/strategies/_internal/core.py
@@ -2035,11 +2035,11 @@ def register_type_strategy(
         )
     elif not (isinstance(strategy, SearchStrategy) or callable(strategy)):
         raise InvalidArgument(
-            "strategy=%r must be a SearchStrategy, or a function that takes "
+            f"{strategy=} must be a SearchStrategy, or a function that takes "
             "a generic type and returns a specific SearchStrategy"
         )
     elif isinstance(strategy, SearchStrategy) and strategy.is_empty:
-        raise InvalidArgument("strategy=%r must not be empty")
+        raise InvalidArgument(f"{strategy=} must not be empty")
     elif types.has_type_arguments(custom_type):
         raise InvalidArgument(
             f"Cannot register generic type {custom_type!r}, because it has type "

From 26ffda96325e790e4aee58783e20dcd910d0b3af Mon Sep 17 00:00:00 2001
From: Zac Hatfield-Dodds <zac.hatfield.dodds@gmail.com>
Date: Fri, 1 Sep 2023 17:17:09 -0700
Subject: [PATCH 5/6] Refactor charmap/IntervalSet logic

---
 .../src/hypothesis/internal/charmap.py        | 144 ++---------------
 .../src/hypothesis/internal/intervalsets.py   | 147 ++++++++++++++++++
 hypothesis-python/tests/cover/test_charmap.py |  25 +--
 .../tests/cover/test_intervalset.py           |  12 +-
 4 files changed, 184 insertions(+), 144 deletions(-)

diff --git a/hypothesis-python/src/hypothesis/internal/charmap.py b/hypothesis-python/src/hypothesis/internal/charmap.py
index 80e94e2846..fe09de5227 100644
--- a/hypothesis-python/src/hypothesis/internal/charmap.py
+++ b/hypothesis-python/src/hypothesis/internal/charmap.py
@@ -18,6 +18,7 @@
 
 from hypothesis.configuration import mkdir_p, storage_directory
 from hypothesis.errors import InvalidArgument
+from hypothesis.internal.intervalsets import IntervalSet
 
 intervals = Tuple[Tuple[int, int], ...]
 cache_type = Dict[Tuple[Tuple[str, ...], int, int, intervals], intervals]
@@ -146,126 +147,6 @@ def as_general_categories(cats, name="cats"):
     return tuple(c for c in cs if c in out)
 
 
-def _union_intervals(x, y):
-    """Merge two sequences of intervals into a single tuple of intervals.
-
-    Any integer bounded by `x` or `y` is also bounded by the result.
-
-    >>> _union_intervals([(3, 10)], [(1, 2), (5, 17)])
-    ((1, 17),)
-    """
-    if not x:
-        return tuple((u, v) for u, v in y)
-    if not y:
-        return tuple((u, v) for u, v in x)
-    intervals = sorted(x + y, reverse=True)
-    result = [intervals.pop()]
-    while intervals:
-        # 1. intervals is in descending order
-        # 2. pop() takes from the RHS.
-        # 3. (a, b) was popped 1st, then (u, v) was popped 2nd
-        # 4. Therefore: a <= u
-        # 5. We assume that u <= v and a <= b
-        # 6. So we need to handle 2 cases of overlap, and one disjoint case
-        #    |   u--v     |   u----v   |       u--v  |
-        #    |   a----b   |   a--b     |  a--b       |
-        u, v = intervals.pop()
-        a, b = result[-1]
-        if u <= b + 1:
-            # Overlap cases
-            result[-1] = (a, max(v, b))
-        else:
-            # Disjoint case
-            result.append((u, v))
-    return tuple(result)
-
-
-def _subtract_intervals(x, y):
-    """Set difference for lists of intervals. That is, returns a list of
-    intervals that bounds all values bounded by x that are not also bounded by
-    y. x and y are expected to be in sorted order.
-
-    For example _subtract_intervals([(1, 10)], [(2, 3), (9, 15)]) would
-    return [(1, 1), (4, 8)], removing the values 2, 3, 9 and 10 from the
-    interval.
-    """
-    if not y:
-        return tuple(x)
-    x = list(map(list, x))
-    i = 0
-    j = 0
-    result = []
-    while i < len(x) and j < len(y):
-        # Iterate in parallel over x and y. j stays pointing at the smallest
-        # interval in the left hand side that could still overlap with some
-        # element of x at index >= i.
-        # Similarly, i is not incremented until we know that it does not
-        # overlap with any element of y at index >= j.
-
-        xl, xr = x[i]
-        assert xl <= xr
-        yl, yr = y[j]
-        assert yl <= yr
-
-        if yr < xl:
-            # The interval at y[j] is strictly to the left of the interval at
-            # x[i], so will not overlap with it or any later interval of x.
-            j += 1
-        elif yl > xr:
-            # The interval at y[j] is strictly to the right of the interval at
-            # x[i], so all of x[i] goes into the result as no further intervals
-            # in y will intersect it.
-            result.append(x[i])
-            i += 1
-        elif yl <= xl:
-            if yr >= xr:
-                # x[i] is contained entirely in y[j], so we just skip over it
-                # without adding it to the result.
-                i += 1
-            else:
-                # The beginning of x[i] is contained in y[j], so we update the
-                # left endpoint of x[i] to remove this, and increment j as we
-                # now have moved past it. Note that this is not added to the
-                # result as is, as more intervals from y may intersect it so it
-                # may need updating further.
-                x[i][0] = yr + 1
-                j += 1
-        else:
-            # yl > xl, so the left hand part of x[i] is not contained in y[j],
-            # so there are some values we should add to the result.
-            result.append((xl, yl - 1))
-
-            if yr + 1 <= xr:
-                # If y[j] finishes before x[i] does, there may be some values
-                # in x[i] left that should go in the result (or they may be
-                # removed by a later interval in y), so we update x[i] to
-                # reflect that and increment j because it no longer overlaps
-                # with any remaining element of x.
-                x[i][0] = yr + 1
-                j += 1
-            else:
-                # Every element of x[i] other than the initial part we have
-                # already added is contained in y[j], so we move to the next
-                # interval.
-                i += 1
-    # Any remaining intervals in x do not overlap with any of y, as if they did
-    # we would not have incremented j to the end, so can be added to the result
-    # as they are.
-    result.extend(x[i:])
-    return tuple(map(tuple, result))
-
-
-def _intervals(s):
-    """Return a tuple of intervals, covering the codepoints of characters in
-    `s`.
-
-    >>> _intervals('abcdef0123456789')
-    ((48, 57), (97, 102))
-    """
-    intervals = tuple((ord(c), ord(c)) for c in sorted(s))
-    return _union_intervals(intervals, intervals)
-
-
 category_index_cache = {(): ()}
 
 
@@ -306,11 +187,14 @@ def _query_for_key(key):
         pass
     assert key
     if set(key) == set(categories()):
-        result = ((0, sys.maxunicode),)
+        result = IntervalSet([(0, sys.maxunicode)])
     else:
-        result = _union_intervals(_query_for_key(key[:-1]), charmap()[key[-1]])
-    category_index_cache[key] = result
-    return result
+        result = IntervalSet(_query_for_key(key[:-1])).union(
+            IntervalSet(charmap()[key[-1]])
+        )
+    assert isinstance(result, IntervalSet)
+    category_index_cache[key] = result.intervals
+    return result.intervals
 
 
 limited_category_index_cache: cache_type = {}
@@ -344,14 +228,14 @@ def query(
     if max_codepoint is None:
         max_codepoint = sys.maxunicode
     catkey = _category_key(exclude_categories, include_categories)
-    character_intervals = _intervals(include_characters or "")
-    exclude_intervals = _intervals(exclude_characters or "")
+    character_intervals = IntervalSet.from_string(include_characters or "")
+    exclude_intervals = IntervalSet.from_string(exclude_characters or "")
     qkey = (
         catkey,
         min_codepoint,
         max_codepoint,
-        character_intervals,
-        exclude_intervals,
+        character_intervals.intervals,
+        exclude_intervals.intervals,
     )
     try:
         return limited_category_index_cache[qkey]
@@ -362,8 +246,6 @@ def query(
     for u, v in base:
         if v >= min_codepoint and u <= max_codepoint:
             result.append((max(u, min_codepoint), min(v, max_codepoint)))
-    result = tuple(result)
-    result = _union_intervals(result, character_intervals)
-    result = _subtract_intervals(result, exclude_intervals)
+    result = (IntervalSet(result) | character_intervals) - exclude_intervals
     limited_category_index_cache[qkey] = result
     return result
diff --git a/hypothesis-python/src/hypothesis/internal/intervalsets.py b/hypothesis-python/src/hypothesis/internal/intervalsets.py
index 5bdd731d2d..33d02dd8a8 100644
--- a/hypothesis-python/src/hypothesis/internal/intervalsets.py
+++ b/hypothesis-python/src/hypothesis/internal/intervalsets.py
@@ -10,6 +10,16 @@
 
 
 class IntervalSet:
+    @classmethod
+    def from_string(cls, s):
+        """Return a tuple of intervals, covering the codepoints of characters in `s`.
+
+        >>> IntervalSet.from_string('abcdef0123456789')
+        ((48, 57), (97, 102))
+        """
+        x = cls((ord(c), ord(c)) for c in sorted(s))
+        return x.union(x)
+
     def __init__(self, intervals):
         self.intervals = tuple(intervals)
         self.offsets = [0]
@@ -49,6 +59,13 @@ def __getitem__(self, i):
         assert r <= v
         return r
 
+    def __contains__(self, elem):
+        if isinstance(elem, str):
+            elem = ord(elem)
+        assert isinstance(elem, int)
+        assert 0 <= elem <= 0x10FFFF
+        return any(start <= elem <= end for start, end in self.intervals)
+
     def __repr__(self):
         return f"IntervalSet({self.intervals!r})"
 
@@ -69,3 +86,133 @@ def index_above(self, value):
             if value <= v:
                 return offset + (value - u)
         return self.size
+
+    def __or__(self, other):
+        return self.union(other)
+
+    def __sub__(self, other):
+        return self.difference(other)
+
+    def __and__(self, other):
+        return self.intersection(other)
+
+    def union(self, other):
+        """Merge two sequences of intervals into a single tuple of intervals.
+
+        Any integer bounded by `x` or `y` is also bounded by the result.
+
+        >>> union([(3, 10)], [(1, 2), (5, 17)])
+        ((1, 17),)
+        """
+        assert isinstance(other, type(self))
+        x = self.intervals
+        y = other.intervals
+        if not x:
+            return IntervalSet((u, v) for u, v in y)
+        if not y:
+            return IntervalSet((u, v) for u, v in x)
+        intervals = sorted(x + y, reverse=True)
+        result = [intervals.pop()]
+        while intervals:
+            # 1. intervals is in descending order
+            # 2. pop() takes from the RHS.
+            # 3. (a, b) was popped 1st, then (u, v) was popped 2nd
+            # 4. Therefore: a <= u
+            # 5. We assume that u <= v and a <= b
+            # 6. So we need to handle 2 cases of overlap, and one disjoint case
+            #    |   u--v     |   u----v   |       u--v  |
+            #    |   a----b   |   a--b     |  a--b       |
+            u, v = intervals.pop()
+            a, b = result[-1]
+            if u <= b + 1:
+                # Overlap cases
+                result[-1] = (a, max(v, b))
+            else:
+                # Disjoint case
+                result.append((u, v))
+        return IntervalSet(result)
+
+    def difference(self, other):
+        """Set difference for lists of intervals. That is, returns a list of
+        intervals that bounds all values bounded by x that are not also bounded by
+        y. x and y are expected to be in sorted order.
+
+        For example difference([(1, 10)], [(2, 3), (9, 15)]) would
+        return [(1, 1), (4, 8)], removing the values 2, 3, 9 and 10 from the
+        interval.
+        """
+        assert isinstance(other, type(self))
+        x = self.intervals
+        y = other.intervals
+        if not y:
+            return IntervalSet(x)
+        x = list(map(list, x))
+        i = 0
+        j = 0
+        result = []
+        while i < len(x) and j < len(y):
+            # Iterate in parallel over x and y. j stays pointing at the smallest
+            # interval in the left hand side that could still overlap with some
+            # element of x at index >= i.
+            # Similarly, i is not incremented until we know that it does not
+            # overlap with any element of y at index >= j.
+
+            xl, xr = x[i]
+            assert xl <= xr
+            yl, yr = y[j]
+            assert yl <= yr
+
+            if yr < xl:
+                # The interval at y[j] is strictly to the left of the interval at
+                # x[i], so will not overlap with it or any later interval of x.
+                j += 1
+            elif yl > xr:
+                # The interval at y[j] is strictly to the right of the interval at
+                # x[i], so all of x[i] goes into the result as no further intervals
+                # in y will intersect it.
+                result.append(x[i])
+                i += 1
+            elif yl <= xl:
+                if yr >= xr:
+                    # x[i] is contained entirely in y[j], so we just skip over it
+                    # without adding it to the result.
+                    i += 1
+                else:
+                    # The beginning of x[i] is contained in y[j], so we update the
+                    # left endpoint of x[i] to remove this, and increment j as we
+                    # now have moved past it. Note that this is not added to the
+                    # result as is, as more intervals from y may intersect it so it
+                    # may need updating further.
+                    x[i][0] = yr + 1
+                    j += 1
+            else:
+                # yl > xl, so the left hand part of x[i] is not contained in y[j],
+                # so there are some values we should add to the result.
+                result.append((xl, yl - 1))
+
+                if yr + 1 <= xr:
+                    # If y[j] finishes before x[i] does, there may be some values
+                    # in x[i] left that should go in the result (or they may be
+                    # removed by a later interval in y), so we update x[i] to
+                    # reflect that and increment j because it no longer overlaps
+                    # with any remaining element of x.
+                    x[i][0] = yr + 1
+                    j += 1
+                else:
+                    # Every element of x[i] other than the initial part we have
+                    # already added is contained in y[j], so we move to the next
+                    # interval.
+                    i += 1
+        # Any remaining intervals in x do not overlap with any of y, as if they did
+        # we would not have incremented j to the end, so can be added to the result
+        # as they are.
+        result.extend(x[i:])
+        return IntervalSet(map(tuple, result))
+
+    def intersection(self, other):
+        """Set intersection for lists of intervals.
+
+        Conveniently, this is trivial to define in terms of difference.
+        """
+        assert isinstance(other, type(self)), other
+        return self.difference(other - self).difference(self - other)
diff --git a/hypothesis-python/tests/cover/test_charmap.py b/hypothesis-python/tests/cover/test_charmap.py
index fe45fefb5e..48ebd97c84 100644
--- a/hypothesis-python/tests/cover/test_charmap.py
+++ b/hypothesis-python/tests/cover/test_charmap.py
@@ -16,6 +16,7 @@
 
 from hypothesis import assume, given, strategies as st
 from hypothesis.internal import charmap as cm
+from hypothesis.internal.intervalsets import IntervalSet
 
 
 def test_charmap_contains_all_unicode():
@@ -47,7 +48,7 @@ def assert_valid_range_list(ls):
     st.sets(st.sampled_from(cm.categories())) | st.none(),
 )
 def test_query_matches_categories(exclude, include):
-    values = cm.query(exclude, include)
+    values = cm.query(exclude, include).intervals
     assert_valid_range_list(values)
     for u, v in values:
         for i in (u, v, (u + v) // 2):
@@ -65,7 +66,7 @@ def test_query_matches_categories(exclude, include):
 )
 def test_query_matches_categories_codepoints(exclude, include, m1, m2):
     m1, m2 = sorted((m1, m2))
-    values = cm.query(exclude, include, min_codepoint=m1, max_codepoint=m2)
+    values = cm.query(exclude, include, min_codepoint=m1, max_codepoint=m2).intervals
     assert_valid_range_list(values)
     for u, v in values:
         assert m1 <= u
@@ -76,7 +77,7 @@ def test_query_matches_categories_codepoints(exclude, include, m1, m2):
 def test_exclude_only_excludes_from_that_category(cat, i):
     c = chr(i)
     assume(unicodedata.category(c) != cat)
-    intervals = cm.query(exclude_categories=(cat,))
+    intervals = cm.query(exclude_categories=(cat,)).intervals
     assert any(a <= i <= b for a, b in intervals)
 
 
@@ -115,30 +116,34 @@ def test_uses_cached_charmap():
     assert statinfo.st_mtime == mtime
 
 
+def _union_intervals(x, y):
+    return IntervalSet(x).union(IntervalSet(y)).intervals
+
+
 def test_union_empty():
-    assert cm._union_intervals([], []) == ()
-    assert cm._union_intervals([], [[1, 2]]) == ((1, 2),)
-    assert cm._union_intervals([[1, 2]], []) == ((1, 2),)
+    assert _union_intervals([], []) == ()
+    assert _union_intervals([], [[1, 2]]) == ((1, 2),)
+    assert _union_intervals([[1, 2]], []) == ((1, 2),)
 
 
 def test_union_handles_totally_overlapped_gap():
     #   < xx  >  Imagine the intervals x and y as bit strings.
     # | <yy yy>  The bit at position n is set if n falls inside that interval.
     # = <zzzzz>  In this model _union_intervals() performs bit-wise or.
-    assert cm._union_intervals([[2, 3]], [[1, 2], [4, 5]]) == ((1, 5),)
+    assert _union_intervals([[2, 3]], [[1, 2], [4, 5]]) == ((1, 5),)
 
 
 def test_union_handles_partially_overlapped_gap():
     #   <  x  >  Imagine the intervals x and y as bit strings.
     # | <yy  y>  The bit at position n is set if n falls inside that interval.
     # = <zzz z>  In this model _union_intervals() performs bit-wise or.
-    assert cm._union_intervals([[3, 3]], [[1, 2], [5, 5]]) == ((1, 3), (5, 5))
+    assert _union_intervals([[3, 3]], [[1, 2], [5, 5]]) == ((1, 3), (5, 5))
 
 
 def test_successive_union():
     x = []
     for v in cm.charmap().values():
-        x = cm._union_intervals(x, v)
+        x = _union_intervals(x, v)
     assert x == ((0, sys.maxunicode),)
 
 
@@ -175,7 +180,7 @@ def test_regenerate_broken_charmap_file():
 
 
 def test_exclude_characters_are_included_in_key():
-    assert cm.query() != cm.query(exclude_characters="0")
+    assert cm.query().intervals != cm.query(exclude_characters="0").intervals
 
 
 def test_error_writing_charmap_file_is_suppressed(monkeypatch):
diff --git a/hypothesis-python/tests/cover/test_intervalset.py b/hypothesis-python/tests/cover/test_intervalset.py
index 9b2f2d3485..8714378522 100644
--- a/hypothesis-python/tests/cover/test_intervalset.py
+++ b/hypothesis-python/tests/cover/test_intervalset.py
@@ -11,7 +11,6 @@
 import pytest
 
 from hypothesis import HealthCheck, assume, example, given, settings, strategies as st
-from hypothesis.internal.charmap import _subtract_intervals
 from hypothesis.internal.intervalsets import IntervalSet
 
 
@@ -58,7 +57,7 @@ def test_intervals_match_indexes(intervals):
 
 @example(intervals=IntervalSet(((1, 1),)), v=0)
 @example(intervals=IntervalSet(()), v=0)
-@given(Intervals, st.integers())
+@given(Intervals, st.integers(0, 0x10FFFF))
 def test_error_for_index_of_not_present_value(intervals, v):
     assume(v not in intervals)
     with pytest.raises(ValueError):
@@ -98,8 +97,15 @@ def test_subtraction_of_intervals(x, y):
     xs = intervals_to_set(x)
     ys = intervals_to_set(y)
     assume(not xs.isdisjoint(ys))
-    z = _subtract_intervals(x, y)
+    z = IntervalSet(x).difference(IntervalSet(y)).intervals
     assert z == tuple(sorted(z))
     for a, b in z:
         assert a <= b
     assert intervals_to_set(z) == intervals_to_set(x) - intervals_to_set(y)
+
+
+@given(Intervals, Intervals)
+def test_interval_intersection(x, y):
+    print(f"{set(x)=} {set(y)=} {set(x)-(set(y)-set(x))=}")
+    assert set(x & y) == set(x) & set(y)
+    assert set(x.intersection(y)) == set(x).intersection(y)

From 0cd8ca9bd3190c6247bb7f67be36e5652a164026 Mon Sep 17 00:00:00 2001
From: Zac Hatfield-Dodds <zac.hatfield.dodds@gmail.com>
Date: Fri, 1 Sep 2023 17:17:09 -0700
Subject: [PATCH 6/6] from_regex(..., alphabet=characters())

---
 hypothesis-python/RELEASE.rst                 |  13 ++
 .../hypothesis/strategies/_internal/core.py   |  55 ++++++-
 .../hypothesis/strategies/_internal/regex.py  | 135 +++++++++++-------
 .../strategies/_internal/strings.py           |  26 ++--
 .../tests/cover/test_direct_strategies.py     |  18 +++
 hypothesis-python/tests/cover/test_regex.py   |  25 +++-
 hypothesis-python/tests/cover/test_text.py    |   2 +-
 hypothesis-python/tests/nocover/test_regex.py |   2 +-
 8 files changed, 203 insertions(+), 73 deletions(-)
 create mode 100644 hypothesis-python/RELEASE.rst

diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst
new file mode 100644
index 0000000000..c75d34d13d
--- /dev/null
+++ b/hypothesis-python/RELEASE.rst
@@ -0,0 +1,13 @@
+RELEASE_TYPE: minor
+
+The :func:`~hypothesis.strategies.from_regex` strategy now takes an optional
+``alphabet=characters(codec="utf-8")`` argument for unicode strings, like
+:func:`~hypothesis.strategies.text`.
+
+This offers more and more-consistent control over the generated strings,
+removing previously-hard-coded limitations.  With ``fullmatch=False`` and
+``alphabet=characters()``, surrogate characters are now possible in leading
+and trailing text as well as the body of the match.  Negated character classes
+such as ``[^A-Z]`` or ``\S`` had a hard-coded exclusion of control characters
+and surrogate characters; now they permit anything in ``alphabet=`` consistent
+with the class, and control characters are permitted by default.
diff --git a/hypothesis-python/src/hypothesis/strategies/_internal/core.py b/hypothesis-python/src/hypothesis/strategies/_internal/core.py
index f6638959e2..e36fc64ae6 100644
--- a/hypothesis-python/src/hypothesis/strategies/_internal/core.py
+++ b/hypothesis-python/src/hypothesis/strategies/_internal/core.py
@@ -661,7 +661,7 @@ def characters(
             #       caching that, and taking the intersection of their intervals.
             raise InvalidArgument(f"{codec=} must be one of 'ascii', 'utf-8', or None")
 
-    return OneCharStringStrategy(
+    return OneCharStringStrategy.from_characters_args(
         whitelist_categories=whitelist_categories,
         blacklist_categories=blacklist_categories,
         blacklist_characters=blacklist_characters,
@@ -742,10 +742,32 @@ def text(
     return TextStrategy(char_strategy, min_size=min_size, max_size=max_size)
 
 
+@overload
+def from_regex(
+    regex: Union[bytes, Pattern[bytes]],
+    *,
+    fullmatch: bool = False,
+) -> SearchStrategy[bytes]:  # pragma: no cover
+    ...
+
+
+@overload
+def from_regex(
+    regex: Union[str, Pattern[str]],
+    *,
+    fullmatch: bool = False,
+    alphabet: Union[str, SearchStrategy[str]] = characters(codec="utf-8"),
+) -> SearchStrategy[str]:  # pragma: no cover
+    ...
+
+
 @cacheable
 @defines_strategy()
 def from_regex(
-    regex: Union[AnyStr, Pattern[AnyStr]], *, fullmatch: bool = False
+    regex: Union[AnyStr, Pattern[AnyStr]],
+    *,
+    fullmatch: bool = False,
+    alphabet: Union[str, SearchStrategy[str], None] = None,
 ) -> SearchStrategy[AnyStr]:
     r"""Generates strings that contain a match for the given regex (i.e. ones
     for which :func:`python:re.search` will return a non-None result).
@@ -771,15 +793,42 @@ def from_regex(
     Alternatively, passing ``fullmatch=True`` will ensure that the whole
     string is a match, as if you had used the ``\A`` and ``\Z`` markers.
 
+    The ``alphabet=`` argument constrains the characters in the generated
+    string, as for :func:`text`, and is only supported for unicode strings.
+
     Examples from this strategy shrink towards shorter strings and lower
     character values, with exact behaviour that may depend on the pattern.
     """
+    check_type((str, bytes, re.Pattern), regex, "regex")
     check_type(bool, fullmatch, "fullmatch")
+    pattern = regex.pattern if isinstance(regex, re.Pattern) else regex
+    if alphabet is not None:
+        check_type((str, SearchStrategy), alphabet, "alphabet")
+        if not isinstance(pattern, str):
+            raise InvalidArgument("alphabet= is not supported for bytestrings")
+
+        if isinstance(alphabet, str):
+            alphabet = characters(
+                whitelist_categories=(), whitelist_characters=alphabet
+            )
+        char_strategy = unwrap_strategies(alphabet)
+        if isinstance(char_strategy, SampledFromStrategy):
+            alphabet = characters(
+                whitelist_categories=(),
+                whitelist_characters=alphabet.elements,  # type: ignore
+            )
+        elif not isinstance(char_strategy, OneCharStringStrategy):
+            raise InvalidArgument(
+                f"{alphabet=} must be a sampled_from() or characters() strategy"
+            )
+    elif isinstance(pattern, str):
+        alphabet = characters(codec="utf-8")
+
     # TODO: We would like to move this to the top level, but pending some major
     # refactoring it's hard to do without creating circular imports.
     from hypothesis.strategies._internal.regex import regex_strategy
 
-    return regex_strategy(regex, fullmatch)
+    return regex_strategy(regex, fullmatch, alphabet=alphabet)
 
 
 @cacheable
diff --git a/hypothesis-python/src/hypothesis/strategies/_internal/regex.py b/hypothesis-python/src/hypothesis/strategies/_internal/regex.py
index 978153581e..df3ab324bb 100644
--- a/hypothesis-python/src/hypothesis/strategies/_internal/regex.py
+++ b/hypothesis-python/src/hypothesis/strategies/_internal/regex.py
@@ -11,6 +11,11 @@
 import operator
 import re
 
+from hypothesis.errors import InvalidArgument
+from hypothesis.internal import charmap
+from hypothesis.strategies._internal.lazy import unwrap_strategies
+from hypothesis.strategies._internal.strings import OneCharStringStrategy
+
 try:  # pragma: no cover
     import re._constants as sre
     import re._parser as sre_parse
@@ -26,7 +31,7 @@
 
 from hypothesis import reject, strategies as st
 from hypothesis.internal.charmap import as_general_categories, categories
-from hypothesis.internal.compat import int_to_byte
+from hypothesis.internal.compat import add_note, int_to_byte
 
 UNICODE_CATEGORIES = set(categories())
 
@@ -90,6 +95,14 @@ def clear_cache_after_draw(draw, base_strategy):
     return result
 
 
+def chars_not_in_alphabet(alphabet, string):
+    # Given a string, return a tuple of the characters which are not in alphabet
+    if alphabet is None:
+        return ()
+    intset = unwrap_strategies(alphabet).intervals
+    return tuple(c for c in string if c not in intset)
+
+
 class Context:
     __slots__ = ["flags"]
 
@@ -107,42 +120,38 @@ class CharactersBuilder:
     :param flags: Regex flags. They affect how and which characters are matched
     """
 
-    def __init__(self, negate=False, flags=0):
+    def __init__(self, negate=False, flags=0, *, alphabet):
         self._categories = set()
         self._whitelist_chars = set()
         self._blacklist_chars = set()
         self._negate = negate
         self._ignorecase = flags & re.IGNORECASE
-        self._unicode = not bool(flags & re.ASCII)
         self.code_to_char = chr
+        self._alphabet = unwrap_strategies(alphabet)
+        if flags & re.ASCII:
+            self._alphabet = OneCharStringStrategy(
+                self._alphabet.intervals & charmap.query(max_codepoint=127)
+            )
 
     @property
     def strategy(self):
         """Returns resulting strategy that generates configured char set."""
-        max_codepoint = None if self._unicode else 127
-        # Due to the .swapcase() issue described below (and in issue #2657),
-        # self._whitelist_chars may contain strings of len > 1.  We therefore
-        # have some extra logic to filter them out of st.characters() args,
-        # but still generate them if allowed to.
-        if self._negate:
-            black_chars = self._blacklist_chars - self._whitelist_chars
-            return st.characters(
-                blacklist_categories=self._categories | {"Cc", "Cs"},
-                blacklist_characters={c for c in self._whitelist_chars if len(c) == 1},
-                whitelist_characters=black_chars,
-                max_codepoint=max_codepoint,
-            )
+        # Start by getting the set of all characters allowed by the pattern
         white_chars = self._whitelist_chars - self._blacklist_chars
         multi_chars = {c for c in white_chars if len(c) > 1}
-        char_strategy = st.characters(
-            whitelist_categories=self._categories,
-            blacklist_characters=self._blacklist_chars,
-            whitelist_characters=white_chars - multi_chars,
-            max_codepoint=max_codepoint,
+        intervals = charmap.query(
+            include_categories=self._categories,
+            exclude_characters=self._blacklist_chars,
+            include_characters=white_chars - multi_chars,
+        )
+        # Then take the complement if this is from a negated character class
+        if self._negate:
+            intervals = charmap.query() - intervals
+            multi_chars.clear()
+        # and finally return the intersection with our alphabet
+        return OneCharStringStrategy(intervals & self._alphabet.intervals) | (
+            st.sampled_from(sorted(multi_chars)) if multi_chars else st.nothing()
         )
-        if multi_chars:
-            char_strategy |= st.sampled_from(sorted(multi_chars))
-        return char_strategy
 
     def add_category(self, category):
         """Update unicode state to match sre_parse object ``category``."""
@@ -152,14 +161,10 @@ def add_category(self, category):
             self._categories |= UNICODE_CATEGORIES - UNICODE_DIGIT_CATEGORIES
         elif category == sre.CATEGORY_SPACE:
             self._categories |= UNICODE_SPACE_CATEGORIES
-            self._whitelist_chars |= (
-                UNICODE_SPACE_CHARS if self._unicode else SPACE_CHARS
-            )
+            self._whitelist_chars |= UNICODE_SPACE_CHARS
         elif category == sre.CATEGORY_NOT_SPACE:
             self._categories |= UNICODE_CATEGORIES - UNICODE_SPACE_CATEGORIES
-            self._blacklist_chars |= (
-                UNICODE_SPACE_CHARS if self._unicode else SPACE_CHARS
-            )
+            self._blacklist_chars |= UNICODE_SPACE_CHARS
         elif category == sre.CATEGORY_WORD:
             self._categories |= UNICODE_WORD_CATEGORIES
             self._whitelist_chars.add("_")
@@ -169,9 +174,11 @@ def add_category(self, category):
         else:
             raise NotImplementedError(f"Unknown character category: {category}")
 
-    def add_char(self, char):
+    def add_char(self, char, *, check=True):
         """Add given char to the whitelist."""
         c = self.code_to_char(char)
+        if check and chars_not_in_alphabet(self._alphabet, c):
+            raise InvalidArgument(f"Literal {c!r} is not in the specified alphabet")
         self._whitelist_chars.add(c)
         if (
             self._ignorecase
@@ -186,6 +193,7 @@ def __init__(self, negate=False, flags=0):
         self._whitelist_chars = set()
         self._blacklist_chars = set()
         self._negate = negate
+        self._alphabet = None
         self._ignorecase = flags & re.IGNORECASE
         self.code_to_char = int_to_byte
 
@@ -216,15 +224,25 @@ def maybe_pad(draw, regex, strategy, left_pad_strategy, right_pad_strategy):
     return result
 
 
-def base_regex_strategy(regex, parsed=None):
+def base_regex_strategy(regex, parsed=None, alphabet=None):
     if parsed is None:
         parsed = sre_parse.parse(regex.pattern, flags=regex.flags)
-    return clear_cache_after_draw(
-        _strategy(parsed, Context(flags=regex.flags), isinstance(regex.pattern, str))
-    )
+    try:
+        s = _strategy(
+            parsed,
+            context=Context(flags=regex.flags),
+            is_unicode=isinstance(regex.pattern, str),
+            alphabet=alphabet,
+        )
+    except Exception as err:
+        add_note(err, f"{alphabet=} {regex=}")
+        raise
+    return clear_cache_after_draw(s)
 
 
-def regex_strategy(regex, fullmatch, *, _temp_jsonschema_hack_no_end_newline=False):
+def regex_strategy(
+    regex, fullmatch, *, alphabet, _temp_jsonschema_hack_no_end_newline=False
+):
     if not hasattr(regex, "pattern"):
         regex = re.compile(regex)
 
@@ -235,16 +253,16 @@ def regex_strategy(regex, fullmatch, *, _temp_jsonschema_hack_no_end_newline=Fal
     if fullmatch:
         if not parsed:
             return st.just("" if is_unicode else b"")
-        return base_regex_strategy(regex, parsed).filter(regex.fullmatch)
+        return base_regex_strategy(regex, parsed, alphabet).filter(regex.fullmatch)
 
     if not parsed:
         if is_unicode:
-            return st.text()
+            return st.text(alphabet=alphabet)
         else:
             return st.binary()
 
     if is_unicode:
-        base_padding_strategy = st.text()
+        base_padding_strategy = st.text(alphabet=alphabet)
         empty = st.just("")
         newline = st.just("\n")
     else:
@@ -283,12 +301,12 @@ def regex_strategy(regex, fullmatch, *, _temp_jsonschema_hack_no_end_newline=Fal
             else:
                 left_pad = empty
 
-    base = base_regex_strategy(regex, parsed).filter(regex.search)
+    base = base_regex_strategy(regex, parsed, alphabet).filter(regex.search)
 
     return maybe_pad(regex, base, left_pad, right_pad)
 
 
-def _strategy(codes, context, is_unicode):
+def _strategy(codes, context, is_unicode, *, alphabet):
     """Convert SRE regex parse tree to strategy that generates strings matching
     that regex represented by that parse tree.
 
@@ -317,7 +335,7 @@ def _strategy(codes, context, is_unicode):
     """
 
     def recurse(codes):
-        return _strategy(codes, context, is_unicode)
+        return _strategy(codes, context, is_unicode, alphabet=alphabet)
 
     if is_unicode:
         empty = ""
@@ -341,8 +359,13 @@ def recurse(codes):
                     j += 1
 
                 if i + 1 < j:
-                    chars = (to_char(charcode) for _, charcode in codes[i:j])
-                    strategies.append(st.just(empty.join(chars)))
+                    chars = empty.join(to_char(charcode) for _, charcode in codes[i:j])
+                    if invalid := chars_not_in_alphabet(alphabet, chars):
+                        raise InvalidArgument(
+                            f"Literal {chars!r} contains characters {invalid!r} "
+                            f"which are not in the specified alphabet"
+                        )
+                    strategies.append(st.just(chars))
                     i = j
                     continue
 
@@ -363,10 +386,13 @@ def recurse(codes):
         if code == sre.LITERAL:
             # Regex 'a' (single char)
             c = to_char(value)
+            if chars_not_in_alphabet(alphabet, c):
+                raise InvalidArgument(f"Literal {c!r} is not in the specified alphabet")
             if (
                 context.flags & re.IGNORECASE
                 and c != c.swapcase()
                 and re.match(re.escape(c), c.swapcase(), re.IGNORECASE) is not None
+                and not chars_not_in_alphabet(alphabet, c.swapcase())
             ):
                 # We do the explicit check for swapped-case matching because
                 # eg 'ß'.upper() == 'SS' and ignorecase doesn't match it.
@@ -399,7 +425,10 @@ def recurse(codes):
                         stack.extend(set(char.swapcase()) - blacklist)
 
             if is_unicode:
-                return st.characters(blacklist_characters=blacklist)
+                return OneCharStringStrategy(
+                    unwrap_strategies(alphabet).intervals
+                    & charmap.query(exclude_characters=blacklist)
+                )
             else:
                 return binary_char.filter(lambda c: c not in blacklist)
 
@@ -407,7 +436,7 @@ def recurse(codes):
             # Regex '[abc0-9]' (set of characters)
             negate = value[0][0] == sre.NEGATE
             if is_unicode:
-                builder = CharactersBuilder(negate, context.flags)
+                builder = CharactersBuilder(negate, context.flags, alphabet=alphabet)
             else:
                 builder = BytesBuilder(negate, context.flags)
 
@@ -423,7 +452,7 @@ def recurse(codes):
                     # Regex '[a-z]' (char range)
                     low, high = charset_value
                     for char_code in range(low, high + 1):
-                        builder.add_char(char_code)
+                        builder.add_char(char_code, check=char_code in (low, high))
                 elif charset_code == sre.CATEGORY:
                     # Regex '[\w]' (char category)
                     builder.add_category(charset_value)
@@ -436,9 +465,13 @@ def recurse(codes):
         elif code == sre.ANY:
             # Regex '.' (any char)
             if is_unicode:
+                assert alphabet is not None
                 if context.flags & re.DOTALL:
-                    return st.characters()
-                return st.characters(blacklist_characters="\n")
+                    return alphabet
+                return OneCharStringStrategy(
+                    unwrap_strategies(alphabet).intervals
+                    & charmap.query(exclude_characters="\n")
+                )
             else:
                 if context.flags & re.DOTALL:
                     return binary_char
@@ -455,7 +488,7 @@ def recurse(codes):
             old_flags = context.flags
             context.flags = (context.flags | value[1]) & ~value[2]
 
-            strat = _strategy(value[-1], context, is_unicode)
+            strat = _strategy(value[-1], context, is_unicode, alphabet=alphabet)
 
             context.flags = old_flags
 
@@ -501,7 +534,7 @@ def recurse(codes):
                 recurse(value[2]) if value[2] else st.just(empty),
             )
         elif code == ATOMIC_GROUP:  # pragma: no cover  # new in Python 3.11
-            return _strategy(value, context, is_unicode)
+            return _strategy(value, context, is_unicode, alphabet=alphabet)
 
         else:
             # Currently there are no known code points other than handled here.
diff --git a/hypothesis-python/src/hypothesis/strategies/_internal/strings.py b/hypothesis-python/src/hypothesis/strategies/_internal/strings.py
index f1eb143ed6..8507f6ddbf 100644
--- a/hypothesis-python/src/hypothesis/strategies/_internal/strings.py
+++ b/hypothesis-python/src/hypothesis/strategies/_internal/strings.py
@@ -22,8 +22,18 @@
 class OneCharStringStrategy(SearchStrategy):
     """A strategy which generates single character strings of text type."""
 
-    def __init__(
-        self,
+    def __init__(self, intervals, force_repr=None):
+        assert isinstance(intervals, IntervalSet)
+        self.intervals = intervals
+        self._force_repr = force_repr
+        self.zero_point = self.intervals.index_above(ord("0"))
+        self.Z_point = min(
+            self.intervals.index_above(ord("Z")), len(self.intervals) - 1
+        )
+
+    @classmethod
+    def from_characters_args(
+        cls,
         whitelist_categories=None,
         blacklist_categories=None,
         blacklist_characters=None,
@@ -41,7 +51,7 @@ def __init__(
             include_characters=whitelist_characters,
             exclude_characters=blacklist_characters,
         )
-        self._arg_repr = ", ".join(
+        _arg_repr = ", ".join(
             f"{k}={v!r}"
             for k, v in [
                 ("whitelist_categories", whitelist_categories),
@@ -56,16 +66,12 @@ def __init__(
         if not intervals:
             raise InvalidArgument(
                 "No characters are allowed to be generated by this "
-                f"combination of arguments: {self._arg_repr}"
+                f"combination of arguments: {_arg_repr}"
             )
-        self.intervals = IntervalSet(intervals)
-        self.zero_point = self.intervals.index_above(ord("0"))
-        self.Z_point = min(
-            self.intervals.index_above(ord("Z")), len(self.intervals) - 1
-        )
+        return cls(intervals, force_repr=f"characters({_arg_repr})")
 
     def __repr__(self):
-        return f"characters({self._arg_repr})"
+        return self._force_repr or f"OneCharStringStrategy({self.intervals!r})"
 
     def do_draw(self, data):
         if len(self.intervals) > 256:
diff --git a/hypothesis-python/tests/cover/test_direct_strategies.py b/hypothesis-python/tests/cover/test_direct_strategies.py
index cf8a0ea0f6..b584a250be 100644
--- a/hypothesis-python/tests/cover/test_direct_strategies.py
+++ b/hypothesis-python/tests/cover/test_direct_strategies.py
@@ -128,6 +128,18 @@ def fn_ktest(*fnkwargs):
     (ds.text, {"alphabet": ds.sampled_from([123, 456])}),
     (ds.text, {"alphabet": ds.builds(lambda: "abc")}),
     (ds.text, {"alphabet": ds.builds(lambda: 123)}),
+    (ds.from_regex, {"regex": 123}),
+    (ds.from_regex, {"regex": b"abc", "alphabet": "abc"}),
+    (ds.from_regex, {"regex": b"abc", "alphabet": b"def"}),
+    (ds.from_regex, {"regex": "abc", "alphabet": "def"}),
+    (ds.from_regex, {"regex": "[abc]", "alphabet": "def"}),
+    (ds.from_regex, {"regex": "[a-d]", "alphabet": "def"}),
+    (ds.from_regex, {"regex": "[f-z]", "alphabet": "def"}),
+    (ds.from_regex, {"regex": "[ab]x[de]", "alphabet": "abcdef"}),
+    (ds.from_regex, {"regex": "...", "alphabet": ds.builds(lambda: "a")}),
+    (ds.from_regex, {"regex": "abc", "alphabet": ds.sampled_from("def")}),
+    (ds.from_regex, {"regex": "abc", "alphabet": ds.characters(min_codepoint=128)}),
+    (ds.from_regex, {"regex": "abc", "alphabet": 123}),
     (ds.binary, {"min_size": 10, "max_size": 9}),
     (ds.floats, {"min_value": math.nan}),
     (ds.floats, {"min_value": "0"}),
@@ -272,6 +284,12 @@ def test_validates_keyword_arguments(fn, kwargs):
     (ds.text, {"alphabet": ds.builds(lambda: "a")}),
     (ds.characters, {"whitelist_categories": ["N"]}),
     (ds.characters, {"blacklist_categories": []}),
+    (ds.from_regex, {"regex": "abc", "alphabet": "abc"}),
+    (ds.from_regex, {"regex": "abc", "alphabet": "abcdef"}),
+    (ds.from_regex, {"regex": "[abc]", "alphabet": "abcdef"}),
+    (ds.from_regex, {"regex": "[a-f]", "alphabet": "abef"}),
+    (ds.from_regex, {"regex": "abc", "alphabet": ds.sampled_from("abc")}),
+    (ds.from_regex, {"regex": "abc", "alphabet": ds.characters(codec="ascii")}),
     (ds.ip_addresses, {}),
     (ds.ip_addresses, {"v": 4}),
     (ds.ip_addresses, {"v": 6}),
diff --git a/hypothesis-python/tests/cover/test_regex.py b/hypothesis-python/tests/cover/test_regex.py
index 373f8edc34..f11f302687 100644
--- a/hypothesis-python/tests/cover/test_regex.py
+++ b/hypothesis-python/tests/cover/test_regex.py
@@ -134,12 +134,16 @@ def pred(s):
         "[^\\S]",  # categories
     ],
 )
-@pytest.mark.parametrize("encode", [False, True])
+@pytest.mark.parametrize("encode", [None, False, True])
 def test_can_generate(pattern, encode):
+    alphabet = st.characters(max_codepoint=1000) if encode is None else None
     if encode:
         pattern = pattern.encode("ascii")
     with local_settings(settings(suppress_health_check=[HealthCheck.data_too_large])):
-        assert_all_examples(st.from_regex(pattern), re.compile(pattern).search)
+        assert_all_examples(
+            st.from_regex(pattern, alphabet=alphabet),
+            re.compile(pattern).search,
+        )
 
 
 @pytest.mark.parametrize(
@@ -268,8 +272,8 @@ def test_groupref_not_shared_between_regex():
 @given(st.data())
 def test_group_ref_is_not_shared_between_identical_regex(data):
     pattern = re.compile("^(.+)\\1\\Z", re.UNICODE)
-    x = data.draw(base_regex_strategy(pattern))
-    y = data.draw(base_regex_strategy(pattern))
+    x = data.draw(base_regex_strategy(pattern, alphabet=st.characters()))
+    y = data.draw(base_regex_strategy(pattern, alphabet=st.characters()))
     assume(x != y)
     assert pattern.match(x).end() == len(x)
     assert pattern.match(y).end() == len(y)
@@ -277,9 +281,11 @@ def test_group_ref_is_not_shared_between_identical_regex(data):
 
 @given(st.data())
 def test_does_not_leak_groups(data):
-    a = data.draw(base_regex_strategy(re.compile("^(a)\\Z")))
+    a = data.draw(base_regex_strategy(re.compile("^(a)\\Z"), alphabet=st.characters()))
     assert a == "a"
-    b = data.draw(base_regex_strategy(re.compile("^(?(1)a|b)(.)\\Z")))
+    b = data.draw(
+        base_regex_strategy(re.compile("^(?(1)a|b)(.)\\Z"), alphabet=st.characters())
+    )
     assert b[0] == "b"
 
 
@@ -469,6 +475,11 @@ def test_internals_can_disable_newline_from_dollar_for_jsonschema():
     pattern = "^abc$"
     find_any(st.from_regex(pattern), lambda s: s == "abc\n")
     assert_all_examples(
-        regex_strategy(pattern, False, _temp_jsonschema_hack_no_end_newline=True),
+        regex_strategy(
+            pattern,
+            False,
+            alphabet=st.characters(),
+            _temp_jsonschema_hack_no_end_newline=True,
+        ),
         lambda s: s == "abc",
     )
diff --git a/hypothesis-python/tests/cover/test_text.py b/hypothesis-python/tests/cover/test_text.py
index 964db142f3..c1cb7f03b2 100644
--- a/hypothesis-python/tests/cover/test_text.py
+++ b/hypothesis-python/tests/cover/test_text.py
@@ -12,7 +12,7 @@
 
 
 def test_rewriting_integers_covers_right_range():
-    strategy = OneCharStringStrategy()
+    strategy = OneCharStringStrategy.from_characters_args()
 
     rewritten = [strategy.rewrite_integer(i) for i in range(256)]
     assert sorted(rewritten) == sorted(range(256))
diff --git a/hypothesis-python/tests/nocover/test_regex.py b/hypothesis-python/tests/nocover/test_regex.py
index 11b1d4a31f..c16c4a360a 100644
--- a/hypothesis-python/tests/nocover/test_regex.py
+++ b/hypothesis-python/tests/nocover/test_regex.py
@@ -63,7 +63,7 @@ def conservative_regex(draw):
 @given(st.data())
 def test_conservative_regex_are_correct_by_construction(data):
     pattern = re.compile(data.draw(CONSERVATIVE_REGEX), flags=data.draw(FLAGS))
-    result = data.draw(base_regex_strategy(pattern))
+    result = data.draw(base_regex_strategy(pattern, alphabet=st.characters()))
     # We'll skip "capital I with dot above" due to awful casefolding behaviour
     # and "latin small letter dotless i" for the same reason.
     assume({"ı", "İ"}.isdisjoint(pattern.pattern + result))