|
26 | 26 |
|
27 | 27 | from hypothesis.internal.cache import LRUCache
|
28 | 28 | from hypothesis.internal.compat import int_from_bytes
|
| 29 | +from hypothesis.internal.conjecture.choice import ( |
| 30 | + StringKWargs, |
| 31 | + choice_kwargs_key, |
| 32 | + choice_permitted, |
| 33 | +) |
29 | 34 | from hypothesis.internal.conjecture.floats import float_to_lex, lex_to_float
|
30 | 35 | from hypothesis.internal.conjecture.junkdrawer import bits_to_bytes
|
31 | 36 | from hypothesis.internal.conjecture.utils import (
|
|
65 | 70 | "hypothesis": "hypothesis.internal.conjecture.providers.HypothesisProvider",
|
66 | 71 | }
|
67 | 72 | FLOAT_INIT_LOGIC_CACHE = LRUCache(4096)
|
| 73 | +STRING_SAMPLER_CACHE = LRUCache(64) |
68 | 74 |
|
69 | 75 | NASTY_FLOATS = sorted(
|
70 | 76 | [
|
|
96 | 102 | NASTY_FLOATS = list(map(float, NASTY_FLOATS))
|
97 | 103 | NASTY_FLOATS.extend([-x for x in NASTY_FLOATS])
|
98 | 104 |
|
| 105 | +NASTY_STRINGS = sorted( |
| 106 | + [ |
| 107 | + # strings which can be interpreted as code / logic |
| 108 | + "undefined", |
| 109 | + "null", |
| 110 | + "NULL", |
| 111 | + "nil", |
| 112 | + "NIL", |
| 113 | + "true", |
| 114 | + "false", |
| 115 | + "True", |
| 116 | + "False", |
| 117 | + "TRUE", |
| 118 | + "FALSE", |
| 119 | + "None", |
| 120 | + "none", |
| 121 | + "if", |
| 122 | + "then", |
| 123 | + "else", |
| 124 | + # strings which can be interpreted as a number |
| 125 | + "0", |
| 126 | + "1e100", |
| 127 | + "0..0", |
| 128 | + "0/0", |
| 129 | + "1/0", |
| 130 | + "+0.0", |
| 131 | + "Infinity", |
| 132 | + "-Infinity", |
| 133 | + "Inf", |
| 134 | + "INF", |
| 135 | + "NaN", |
| 136 | + "9" * 30, |
| 137 | + # common ascii characters |
| 138 | + ",./;'[]\\-=<>?:\"{}|_+!@#$%^&*()`~", |
| 139 | + # common unicode characters |
| 140 | + "Ω≈ç√∫˜µ≤≥÷åß∂ƒ©˙∆˚¬…æœ∑´®†¥¨ˆøπ“‘¡™£¢∞§¶•ªº–≠¸˛Ç◊ı˜Â¯˘¿ÅÍÎÏ˝ÓÔÒÚÆ☃Œ„´‰ˇÁ¨ˆØ∏”’`⁄€‹›fifl‡°·‚—±", |
| 141 | + # characters which increase in length when lowercased |
| 142 | + "Ⱥ", |
| 143 | + "Ⱦ", |
| 144 | + # ligatures |
| 145 | + "æœÆŒffʤʨß" |
| 146 | + # emoticons |
| 147 | + "(╯°□°)╯︵ ┻━┻)", |
| 148 | + # emojis |
| 149 | + "😍", |
| 150 | + "🇺🇸", |
| 151 | + # emoji modifiers |
| 152 | + "🏻" # U+1F3FB Light Skin Tone, |
| 153 | + "👍🏻", # 👍 followed by U+1F3FB |
| 154 | + # RTL text |
| 155 | + "الكل في المجمو عة", |
| 156 | + # Ogham text, which contains the only character in the Space Separators |
| 157 | + # unicode category (Zs) that isn't visually blank: . # noqa: RUF003 |
| 158 | + "᚛ᚄᚓᚐᚋᚒᚄ ᚑᚄᚂᚑᚏᚅ᚜", |
| 159 | + # readable variations on text (bolt/italic/script) |
| 160 | + "𝐓𝐡𝐞 𝐪𝐮𝐢𝐜𝐤 𝐛𝐫𝐨𝐰𝐧 𝐟𝐨𝐱 𝐣𝐮𝐦𝐩𝐬 𝐨𝐯𝐞𝐫 𝐭𝐡𝐞 𝐥𝐚𝐳𝐲 𝐝𝐨𝐠", |
| 161 | + "𝕿𝖍𝖊 𝖖𝖚𝖎𝖈𝖐 𝖇𝖗𝖔𝖜𝖓 𝖋𝖔𝖝 𝖏𝖚𝖒𝖕𝖘 𝖔𝖛𝖊𝖗 𝖙𝖍𝖊 𝖑𝖆𝖟𝖞 𝖉𝖔𝖌", |
| 162 | + "𝑻𝒉𝒆 𝒒𝒖𝒊𝒄𝒌 𝒃𝒓𝒐𝒘𝒏 𝒇𝒐𝒙 𝒋𝒖𝒎𝒑𝒔 𝒐𝒗𝒆𝒓 𝒕𝒉𝒆 𝒍𝒂𝒛𝒚 𝒅𝒐𝒈", |
| 163 | + "𝓣𝓱𝓮 𝓺𝓾𝓲𝓬𝓴 𝓫𝓻𝓸𝔀𝓷 𝓯𝓸𝔁 𝓳𝓾𝓶𝓹𝓼 𝓸𝓿𝓮𝓻 𝓽𝓱𝓮 𝓵𝓪𝔃𝔂 𝓭𝓸𝓰", |
| 164 | + "𝕋𝕙𝕖 𝕢𝕦𝕚𝕔𝕜 𝕓𝕣𝕠𝕨𝕟 𝕗𝕠𝕩 𝕛𝕦𝕞𝕡𝕤 𝕠𝕧𝕖𝕣 𝕥𝕙𝕖 𝕝𝕒𝕫𝕪 𝕕𝕠𝕘", |
| 165 | + # upsidown text |
| 166 | + "ʇǝɯɐ ʇᴉs ɹolop ɯnsdᴉ ɯǝɹo˥", |
| 167 | + # reserved strings in windows |
| 168 | + "NUL", |
| 169 | + "COM1", |
| 170 | + "LPT1", |
| 171 | + # scunthorpe problem |
| 172 | + "Scunthorpe", |
| 173 | + # zalgo text |
| 174 | + "Ṱ̺̺̕o͞ ̷i̲̬͇̪͙n̝̗͕v̟̜̘̦͟o̶̙̰̠kè͚̮̺̪̹̱̤ ̖t̝͕̳̣̻̪͞h̼͓̲̦̳̘̲e͇̣̰̦̬͎ ̢̼̻̱̘h͚͎͙̜̣̲ͅi̦̲̣̰̤v̻͍e̺̭̳̪̰-m̢iͅn̖̺̞̲̯̰d̵̼̟͙̩̼̘̳ ̞̥̱̳̭r̛̗̘e͙p͠r̼̞̻̭̗e̺̠̣͟s̘͇̳͍̝͉e͉̥̯̞̲͚̬͜ǹ̬͎͎̟̖͇̤t͍̬̤͓̼̭͘ͅi̪̱n͠g̴͉ ͏͉ͅc̬̟h͡a̫̻̯͘o̫̟̖͍̙̝͉s̗̦̲.̨̹͈̣", |
| 175 | + # |
| 176 | + # examples from https://faultlore.com/blah/text-hates-you/ |
| 177 | + "मनीष منش", |
| 178 | + "पन्ह पन्ह त्र र्च कृकृ ड्ड न्हृे إلا بسم الله", |
| 179 | + "lorem لا بسم الله ipsum 你好1234你好", |
| 180 | + ], |
| 181 | + key=len, |
| 182 | +) |
| 183 | + |
99 | 184 | # Masks for masking off the first byte of an n-bit buffer.
|
100 | 185 | # The appropriate mask is stored at position n % 8.
|
101 | 186 | BYTE_MASKS = [(1 << n) - 1 for n in range(8)]
|
@@ -391,6 +476,15 @@ def draw_string(
|
391 | 476 | if len(intervals) == 0:
|
392 | 477 | return ""
|
393 | 478 |
|
| 479 | + sampler, nasty_strings = self._draw_string_sampler( |
| 480 | + intervals=intervals, |
| 481 | + min_size=min_size, |
| 482 | + max_size=max_size, |
| 483 | + ) |
| 484 | + |
| 485 | + if sampler is not None and self.draw_boolean(p=0.05): |
| 486 | + return nasty_strings[sampler.sample(self._cd)] |
| 487 | + |
394 | 488 | average_size = min(
|
395 | 489 | max(min_size * 2, min_size + 5),
|
396 | 490 | 0.5 * (min_size + max_size),
|
@@ -575,6 +669,33 @@ def permitted(f: float) -> bool:
|
575 | 669 | )
|
576 | 670 | return (sampler, clamper, nasty_floats)
|
577 | 671 |
|
| 672 | + @classmethod |
| 673 | + def _draw_string_sampler( |
| 674 | + cls, |
| 675 | + *, |
| 676 | + intervals: IntervalSet, |
| 677 | + min_size: int, |
| 678 | + max_size: int, |
| 679 | + ) -> tuple[Optional[Sampler], list[str]]: |
| 680 | + kwargs: StringKWargs = { |
| 681 | + "intervals": intervals, |
| 682 | + "min_size": min_size, |
| 683 | + "max_size": max_size, |
| 684 | + } |
| 685 | + key = choice_kwargs_key("string", kwargs) |
| 686 | + if key in STRING_SAMPLER_CACHE: |
| 687 | + return STRING_SAMPLER_CACHE[key] |
| 688 | + |
| 689 | + nasty_strings = [s for s in NASTY_STRINGS if choice_permitted(s, kwargs)] |
| 690 | + sampler = ( |
| 691 | + Sampler([1 / len(nasty_strings)] * len(nasty_strings), observe=False) |
| 692 | + if nasty_strings |
| 693 | + else None |
| 694 | + ) |
| 695 | + result = (sampler, nasty_strings) |
| 696 | + STRING_SAMPLER_CACHE[key] = result |
| 697 | + return result |
| 698 | + |
578 | 699 |
|
579 | 700 | class BytestringProvider(PrimitiveProvider):
|
580 | 701 | lifetime = "test_case"
|
|
0 commit comments