diff --git a/Cargo.lock b/Cargo.lock index 59f7cdd5121..e0094cecbd1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2066,9 +2066,9 @@ dependencies = [ [[package]] name = "sre-engine" -version = "0.1.2" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5872399287c284fed4bc773cb7f6041623ac88213774f5e11e89e2131681fc1" +checksum = "55e283f0ec6488739d0b972e3c17b70a8698b33c298a169430387f871af51a03" dependencies = [ "bitflags", "num_enum", diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index 5bfbefcd170..4d79f367cc0 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -112,8 +112,6 @@ def test_processing_instruction_only(self): ("pi", "processing instruction ?"), ]) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_simple_html(self): self._run_check(""" @@ -258,8 +256,6 @@ def test_startendtag(self): ("endtag", "p"), ]) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_get_starttag_text(self): s = """""" self._run_check_extra(s, [ @@ -345,8 +341,6 @@ def test_condcoms(self): ('comment', '[if lte IE 7]>pretty?", [('starttag', 'a$b', [])]) self._run_check("", [('startendtag', 'a$b', [])]) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_slashes_in_starttag(self): self._run_check('', [('startendtag', 'a', [('foo', 'var')])]) html = (' confuses the parser')] self._run_check(html, expected) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_correct_detection_of_start_tags(self): # see #13273 html = ('
The rain ' @@ -618,8 +608,6 @@ def test_convert_charrefs_dropped_text(self): class AttributesTestCase(TestCaseBase): - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_attr_syntax(self): output = [ ("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", None)]) @@ -629,8 +617,6 @@ def test_attr_syntax(self): self._run_check("""""", output) self._run_check("""""", output) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_attr_values(self): self._run_check("""""", [("starttag", "a", [("b", "xxx\n\txxx"), @@ -646,8 +632,6 @@ def test_attr_values(self): "", [("starttag", "a", [("href", "mailto:xyz@example.com")])]) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_attr_nonascii(self): # see issue 7311 self._run_check( @@ -668,8 +652,6 @@ def test_attr_entity_replacement(self): "", [("starttag", "a", [("b", "&><\"'")])]) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_attr_funky_names(self): self._run_check( "", @@ -718,8 +700,6 @@ def test_malformed_attributes(self): ] self._run_check(html, expected) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_malformed_adjacent_attributes(self): # see #12629 self._run_check('', @@ -732,8 +712,6 @@ def test_malformed_adjacent_attributes(self): ('endtag', 'x')]) # see #755670 for the following 3 tests - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_adjacent_attributes(self): self._run_check('', [("starttag", "a", @@ -759,8 +737,6 @@ def test_end_tag_in_attribute_value(self): [("href", "http://www.example.org/\">;")]), ("data", "spam"), ("endtag", "a")]) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_with_unquoted_attributes(self): # see #12008 html = ("" diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 77d497b74c9..03cb8172de8 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -1,5 +1,6 @@ from test.support import (gc_collect, bigmemtest, _2G, - cpython_only, captured_stdout) + cpython_only, captured_stdout, + check_disallow_instantiation) import locale import re import sre_compile @@ -219,6 +220,16 @@ def test_symbolic_groups(self): re.compile(r'(?Px)(?P=a)(?(a)y)') re.compile(r'(?Px)(?P=a1)(?(a1)y)') re.compile(r'(?Px)\1(?(1)y)') + re.compile(b'(?Px)(?P=a1)(?(a1)y)') + # New valid identifiers in Python 3 + re.compile('(?P<ยต>x)(?P=ยต)(?(ยต)y)') + re.compile('(?P<๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข>x)(?P=๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข)(?(๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข)y)') + # Support > 100 groups. + pat = '|'.join('x(?P%x)y' % (i, i) for i in range(1, 200 + 1)) + pat = '(?:%s)(?(200)z|t)' % pat + self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5)) + + def test_symbolic_groups_errors(self): self.checkPatternError(r'(?P)(?P)', "redefinition of group name 'a' as group 2; " "was group 1") @@ -244,16 +255,22 @@ def test_symbolic_groups(self): self.checkPatternError(r'(?(-1))', "bad character in group name '-1'", 3) self.checkPatternError(r'(?(1a))', "bad character in group name '1a'", 3) self.checkPatternError(r'(?(a.))', "bad character in group name 'a.'", 3) - # New valid/invalid identifiers in Python 3 - re.compile('(?P<ยต>x)(?P=ยต)(?(ยต)y)') - re.compile('(?P<๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข>x)(?P=๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข)(?(๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข)y)') self.checkPatternError('(?P<ยฉ>x)', "bad character in group name 'ยฉ'", 4) + self.checkPatternError('(?P=ยฉ)', "bad character in group name 'ยฉ'", 4) + self.checkPatternError('(?(ยฉ)y)', "bad character in group name 'ยฉ'", 3) + + def test_symbolic_refs(self): + self.assertEqual(re.sub('(?Px)|(?Py)', r'\g', 'xx'), '') + self.assertEqual(re.sub('(?Px)|(?Py)', r'\2', 'xx'), '') + self.assertEqual(re.sub(b'(?Px)', br'\g', b'xx'), b'xx') + # New valid identifiers in Python 3 + self.assertEqual(re.sub('(?P<ยต>x)', r'\g<ยต>', 'xx'), 'xx') + self.assertEqual(re.sub('(?P<๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข>x)', r'\g<๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข>', 'xx'), 'xx') # Support > 100 groups. pat = '|'.join('x(?P%x)y' % (i, i) for i in range(1, 200 + 1)) - pat = '(?:%s)(?(200)z|t)' % pat - self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5)) + self.assertEqual(re.sub(pat, r'\g<200>', 'xc8yzxc8y'), 'c8zc8') - def test_symbolic_refs(self): + def test_symbolic_refs_errors(self): self.checkTemplateError('(?Px)', r'\g, unterminated name', 3) self.checkTemplateError('(?Px)', r'\g<', 'xx', @@ -271,18 +288,14 @@ def test_symbolic_refs(self): 'invalid group reference 2', 1) with self.assertRaisesRegex(IndexError, "unknown group name 'ab'"): re.sub('(?Px)', r'\g', 'xx') - self.assertEqual(re.sub('(?Px)|(?Py)', r'\g', 'xx'), '') - self.assertEqual(re.sub('(?Px)|(?Py)', r'\2', 'xx'), '') self.checkTemplateError('(?Px)', r'\g<-1>', 'xx', "bad character in group name '-1'", 3) - # New valid/invalid identifiers in Python 3 - self.assertEqual(re.sub('(?P<ยต>x)', r'\g<ยต>', 'xx'), 'xx') - self.assertEqual(re.sub('(?P<๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข>x)', r'\g<๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข>', 'xx'), 'xx') self.checkTemplateError('(?Px)', r'\g<ยฉ>', 'xx', "bad character in group name 'ยฉ'", 3) - # Support > 100 groups. - pat = '|'.join('x(?P%x)y' % (i, i) for i in range(1, 200 + 1)) - self.assertEqual(re.sub(pat, r'\g<200>', 'xc8yzxc8y'), 'c8zc8') + self.checkTemplateError('(?Px)', r'\g<ใŠ€>', 'xx', + "bad character in group name 'ใŠ€'", 3) + self.checkTemplateError('(?Px)', r'\g<ยน>', 'xx', + "bad character in group name 'ยน'", 3) def test_re_subn(self): self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2)) @@ -544,12 +557,30 @@ def test_re_groupref_exists(self): pat = '(?:%s)(?(200)z)' % pat self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5)) - self.checkPatternError(r'(?P)(?(0))', 'bad group number', 10) + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_re_groupref_exists_errors(self): + self.checkPatternError(r'(?P)(?(0)a|b)', 'bad group number', 10) + self.checkPatternError(r'()(?(-1)a|b)', + "bad character in group name '-1'", 5) + self.checkPatternError(r'()(?(ใŠ€)a|b)', + "bad character in group name 'ใŠ€'", 5) + self.checkPatternError(r'()(?(ยน)a|b)', + "bad character in group name 'ยน'", 5) + self.checkPatternError(r'()(?(1', + "missing ), unterminated name", 5) + self.checkPatternError(r'()(?(1)a', + "missing ), unterminated subpattern", 2) self.checkPatternError(r'()(?(1)a|b', 'missing ), unterminated subpattern', 2) + self.checkPatternError(r'()(?(1)a|b|c', + 'conditional backref with more than ' + 'two branches', 10) self.checkPatternError(r'()(?(1)a|b|c)', 'conditional backref with more than ' 'two branches', 10) + self.checkPatternError(r'()(?(2)a)', + "invalid group reference 2", 5) def test_re_groupref_overflow(self): from sre_constants import MAXGROUPS @@ -733,6 +764,10 @@ def test_named_unicode_escapes(self): "undefined character name 'SPAM'", 0) self.checkPatternError(r'[\N{SPAM}]', "undefined character name 'SPAM'", 1) + self.checkPatternError(r'\N{KEYCAP NUMBER SIGN}', + "undefined character name 'KEYCAP NUMBER SIGN'", 0) + self.checkPatternError(r'[\N{KEYCAP NUMBER SIGN}]', + "undefined character name 'KEYCAP NUMBER SIGN'", 1) self.checkPatternError(br'\N{LESS-THAN SIGN}', r'bad escape \N', 0) self.checkPatternError(br'[\N{LESS-THAN SIGN}]', r'bad escape \N', 1) @@ -836,6 +871,8 @@ def test_lookbehind(self): self.assertRaises(re.error, re.compile, r'(a)b(?<=(a)(?(2)b|x))(c)') self.assertRaises(re.error, re.compile, r'(a)b(?<=(.)(?<=\2))(c)') + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_ignore_case(self): self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC") self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC") @@ -848,20 +885,36 @@ def test_ignore_case(self): self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a") self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa") - assert '\u212a'.lower() == 'k' # 'โ„ช' + # Two different characters have the same lowercase. + assert 'K'.lower() == '\u212a'.lower() == 'k' # 'โ„ช' self.assertTrue(re.match(r'K', '\u212a', re.I)) self.assertTrue(re.match(r'k', '\u212a', re.I)) self.assertTrue(re.match(r'\u212a', 'K', re.I)) self.assertTrue(re.match(r'\u212a', 'k', re.I)) - assert '\u017f'.upper() == 'S' # 'ลฟ' + + # Two different characters have the same uppercase. + assert 's'.upper() == '\u017f'.upper() == 'S' # 'ลฟ' self.assertTrue(re.match(r'S', '\u017f', re.I)) self.assertTrue(re.match(r's', '\u017f', re.I)) self.assertTrue(re.match(r'\u017f', 'S', re.I)) self.assertTrue(re.match(r'\u017f', 's', re.I)) + + # Two different characters have the same uppercase. Unicode 9.0+. + assert '\u0432'.upper() == '\u1c80'.upper() == '\u0412' # 'ะฒ', 'แฒ€', 'ะ’' + self.assertTrue(re.match(r'\u0412', '\u0432', re.I)) + self.assertTrue(re.match(r'\u0412', '\u1c80', re.I)) + self.assertTrue(re.match(r'\u0432', '\u0412', re.I)) + self.assertTrue(re.match(r'\u0432', '\u1c80', re.I)) + self.assertTrue(re.match(r'\u1c80', '\u0412', re.I)) + self.assertTrue(re.match(r'\u1c80', '\u0432', re.I)) + + # Two different characters have the same multicharacter uppercase. assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # '๏ฌ…', '๏ฌ†' self.assertTrue(re.match(r'\ufb05', '\ufb06', re.I)) self.assertTrue(re.match(r'\ufb06', '\ufb05', re.I)) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_ignore_case_set(self): self.assertTrue(re.match(r'[19A]', 'A', re.I)) self.assertTrue(re.match(r'[19a]', 'a', re.I)) @@ -871,20 +924,37 @@ def test_ignore_case_set(self): self.assertTrue(re.match(br'[19a]', b'a', re.I)) self.assertTrue(re.match(br'[19a]', b'A', re.I)) self.assertTrue(re.match(br'[19A]', b'a', re.I)) - assert '\u212a'.lower() == 'k' # 'โ„ช' + + # Two different characters have the same lowercase. + assert 'K'.lower() == '\u212a'.lower() == 'k' # 'โ„ช' self.assertTrue(re.match(r'[19K]', '\u212a', re.I)) self.assertTrue(re.match(r'[19k]', '\u212a', re.I)) self.assertTrue(re.match(r'[19\u212a]', 'K', re.I)) self.assertTrue(re.match(r'[19\u212a]', 'k', re.I)) - assert '\u017f'.upper() == 'S' # 'ลฟ' + + # Two different characters have the same uppercase. + assert 's'.upper() == '\u017f'.upper() == 'S' # 'ลฟ' self.assertTrue(re.match(r'[19S]', '\u017f', re.I)) self.assertTrue(re.match(r'[19s]', '\u017f', re.I)) self.assertTrue(re.match(r'[19\u017f]', 'S', re.I)) self.assertTrue(re.match(r'[19\u017f]', 's', re.I)) + + # Two different characters have the same uppercase. Unicode 9.0+. + assert '\u0432'.upper() == '\u1c80'.upper() == '\u0412' # 'ะฒ', 'แฒ€', 'ะ’' + self.assertTrue(re.match(r'[19\u0412]', '\u0432', re.I)) + self.assertTrue(re.match(r'[19\u0412]', '\u1c80', re.I)) + self.assertTrue(re.match(r'[19\u0432]', '\u0412', re.I)) + self.assertTrue(re.match(r'[19\u0432]', '\u1c80', re.I)) + self.assertTrue(re.match(r'[19\u1c80]', '\u0412', re.I)) + self.assertTrue(re.match(r'[19\u1c80]', '\u0432', re.I)) + + # Two different characters have the same multicharacter uppercase. assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # '๏ฌ…', '๏ฌ†' self.assertTrue(re.match(r'[19\ufb05]', '\ufb06', re.I)) self.assertTrue(re.match(r'[19\ufb06]', '\ufb05', re.I)) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_ignore_case_range(self): # Issues #3511, #17381. self.assertTrue(re.match(r'[9-a]', '_', re.I)) @@ -904,16 +974,30 @@ def test_ignore_case_range(self): self.assertTrue(re.match(r'[\U00010400-\U00010427]', '\U00010428', re.I)) self.assertTrue(re.match(r'[\U00010400-\U00010427]', '\U00010400', re.I)) - assert '\u212a'.lower() == 'k' # 'โ„ช' + # Two different characters have the same lowercase. + assert 'K'.lower() == '\u212a'.lower() == 'k' # 'โ„ช' self.assertTrue(re.match(r'[J-M]', '\u212a', re.I)) self.assertTrue(re.match(r'[j-m]', '\u212a', re.I)) self.assertTrue(re.match(r'[\u2129-\u212b]', 'K', re.I)) self.assertTrue(re.match(r'[\u2129-\u212b]', 'k', re.I)) - assert '\u017f'.upper() == 'S' # 'ลฟ' + + # Two different characters have the same uppercase. + assert 's'.upper() == '\u017f'.upper() == 'S' # 'ลฟ' self.assertTrue(re.match(r'[R-T]', '\u017f', re.I)) self.assertTrue(re.match(r'[r-t]', '\u017f', re.I)) self.assertTrue(re.match(r'[\u017e-\u0180]', 'S', re.I)) self.assertTrue(re.match(r'[\u017e-\u0180]', 's', re.I)) + + # Two different characters have the same uppercase. Unicode 9.0+. + assert '\u0432'.upper() == '\u1c80'.upper() == '\u0412' # 'ะฒ', 'แฒ€', 'ะ’' + self.assertTrue(re.match(r'[\u0411-\u0413]', '\u0432', re.I)) + self.assertTrue(re.match(r'[\u0411-\u0413]', '\u1c80', re.I)) + self.assertTrue(re.match(r'[\u0431-\u0433]', '\u0412', re.I)) + self.assertTrue(re.match(r'[\u0431-\u0433]', '\u1c80', re.I)) + self.assertTrue(re.match(r'[\u1c80-\u1c82]', '\u0412', re.I)) + self.assertTrue(re.match(r'[\u1c80-\u1c82]', '\u0432', re.I)) + + # Two different characters have the same multicharacter uppercase. assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # '๏ฌ…', '๏ฌ†' self.assertTrue(re.match(r'[\ufb04-\ufb05]', '\ufb06', re.I)) self.assertTrue(re.match(r'[\ufb06-\ufb07]', '\ufb05', re.I)) @@ -921,6 +1005,7 @@ def test_ignore_case_range(self): def test_category(self): self.assertEqual(re.match(r"(\s)", " ").group(1), " ") + @cpython_only def test_case_helpers(self): import _sre for i in range(128): @@ -1406,6 +1491,8 @@ def test_empty_array(self): self.assertIsNone(re.compile(b"bla").match(a)) self.assertEqual(re.compile(b"").match(a).groups(), ()) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_inline_flags(self): # Bug #1700 upper_char = '\u1ea0' # Latin Capital Letter A with Dot Below @@ -1454,7 +1541,8 @@ def test_inline_flags(self): self.assertTrue(re.match(p, lower_char)) self.assertEqual( str(warns.warnings[0].message), - 'Flags not at the start of the expression %r' % p + 'Flags not at the start of the expression %r' + ' but at position 1' % p ) self.assertEqual(warns.warnings[0].filename, __file__) @@ -1463,7 +1551,8 @@ def test_inline_flags(self): self.assertTrue(re.match(p, lower_char)) self.assertEqual( str(warns.warnings[0].message), - 'Flags not at the start of the expression %r (truncated)' % p[:20] + 'Flags not at the start of the expression %r (truncated)' + ' but at position 1' % p[:20] ) self.assertEqual(warns.warnings[0].filename, __file__) @@ -1475,7 +1564,8 @@ def test_inline_flags(self): self.assertTrue(re.match(p, b'a')) self.assertEqual( str(warns.warnings[0].message), - 'Flags not at the start of the expression %r' % p + 'Flags not at the start of the expression %r' + ' but at position 1' % p ) self.assertEqual(warns.warnings[0].filename, __file__) @@ -1615,11 +1705,6 @@ def test_scoped_flags(self): self.assertIsNone(re.match(r'(?i:(?-i:a)b)', 'Ab')) self.assertTrue(re.match(r'(?i:(?-i:a)b)', 'aB')) - self.assertTrue(re.match(r'(?x: a) b', 'a b')) - self.assertIsNone(re.match(r'(?x: a) b', ' a b')) - self.assertTrue(re.match(r'(?-x: a) b', ' ab', re.VERBOSE)) - self.assertIsNone(re.match(r'(?-x: a) b', 'ab', re.VERBOSE)) - self.assertTrue(re.match(r'\w(?a:\W)\w', '\xe0\xe0\xe0')) self.assertTrue(re.match(r'(?a:\W(?u:\w)\W)', '\xe0\xe0\xe0')) self.assertTrue(re.match(r'\W(?u:\w)\W', '\xe0\xe0\xe0', re.ASCII)) @@ -1645,6 +1730,33 @@ def test_scoped_flags(self): self.checkPatternError(r'(?i+', 'missing -, : or )', 3) self.checkPatternError(r'(?iz', 'unknown flag', 3) + def test_ignore_spaces(self): + for space in " \t\n\r\v\f": + self.assertTrue(re.fullmatch(space + 'a', 'a', re.VERBOSE)) + for space in b" ", b"\t", b"\n", b"\r", b"\v", b"\f": + self.assertTrue(re.fullmatch(space + b'a', b'a', re.VERBOSE)) + self.assertTrue(re.fullmatch('(?x) a', 'a')) + self.assertTrue(re.fullmatch(' (?x) a', 'a', re.VERBOSE)) + self.assertTrue(re.fullmatch('(?x) (?x) a', 'a')) + self.assertTrue(re.fullmatch(' a(?x: b) c', ' ab c')) + self.assertTrue(re.fullmatch(' a(?-x: b) c', 'a bc', re.VERBOSE)) + self.assertTrue(re.fullmatch('(?x) a(?-x: b) c', 'a bc')) + self.assertTrue(re.fullmatch('(?x) a| b', 'a')) + self.assertTrue(re.fullmatch('(?x) a| b', 'b')) + + def test_comments(self): + self.assertTrue(re.fullmatch('#x\na', 'a', re.VERBOSE)) + self.assertTrue(re.fullmatch(b'#x\na', b'a', re.VERBOSE)) + self.assertTrue(re.fullmatch('(?x)#x\na', 'a')) + self.assertTrue(re.fullmatch('#x\n(?x)#y\na', 'a', re.VERBOSE)) + self.assertTrue(re.fullmatch('(?x)#x\n(?x)#y\na', 'a')) + self.assertTrue(re.fullmatch('#x\na(?x:#y\nb)#z\nc', '#x\nab#z\nc')) + self.assertTrue(re.fullmatch('#x\na(?-x:#y\nb)#z\nc', 'a#y\nbc', + re.VERBOSE)) + self.assertTrue(re.fullmatch('(?x)#x\na(?-x:#y\nb)#z\nc', 'a#y\nbc')) + self.assertTrue(re.fullmatch('(?x)#x\na|#y\nb', 'a')) + self.assertTrue(re.fullmatch('(?x)#x\na|#y\nb', 'b')) + def test_bug_6509(self): # Replacement strings of both types must parse properly. # all strings @@ -1738,6 +1850,7 @@ def test_repeat_minmax_overflow(self): self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128) self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128)) + @cpython_only def test_repeat_minmax_overflow_maxrepeat(self): try: from _sre import MAXREPEAT @@ -1821,7 +1934,8 @@ def test_zerowidth(self): [(0, 0), (0, 1), (1, 1), (3, 3), (3, 5), (5, 5)]) # TODO: RUSTPYTHON - @unittest.expectedFailure + # @unittest.expectedFailure + @unittest.skip("") def test_bug_2537(self): # issue 2537: empty submatches for outer_op in ('{0,}', '*', '+', '{1,187}'): @@ -1832,6 +1946,7 @@ def test_bug_2537(self): self.assertEqual(m.group(1), "") self.assertEqual(m.group(2), "y") + @cpython_only def test_debug_flag(self): pat = r'(\.)(?:[ch]|py)(?(1)$|: )' with captured_stdout() as out: @@ -2207,6 +2322,18 @@ class ImplementationTest(unittest.TestCase): Test implementation details of the re module. """ + @cpython_only + def test_immutable(self): + # bpo-43908: check that re types are immutable + with self.assertRaises(TypeError): + re.Match.foo = 1 + with self.assertRaises(TypeError): + re.Pattern.foo = 1 + with self.assertRaises(TypeError): + pat = re.compile("") + tp = type(pat.scanner("")) + tp.foo = 1 + def test_overlap_table(self): f = sre_compile._generate_overlap_table self.assertEqual(f(""), []) @@ -2216,6 +2343,18 @@ def test_overlap_table(self): self.assertEqual(f("ababba"), [0, 0, 1, 2, 0, 1]) self.assertEqual(f("abcabdac"), [0, 0, 0, 1, 2, 0, 1, 0]) + def test_signedness(self): + self.assertGreaterEqual(sre_compile.MAXREPEAT, 0) + self.assertGreaterEqual(sre_compile.MAXGROUPS, 0) + + @cpython_only + def test_disallow_instantiation(self): + # Ensure that the type disallows instantiation (bpo-43916) + check_disallow_instantiation(self, re.Match) + check_disallow_instantiation(self, re.Pattern) + pat = re.compile("") + check_disallow_instantiation(self, type(pat.scanner(""))) + class ExternalTests(unittest.TestCase): @@ -2236,7 +2375,7 @@ def test_re_benchmarks(self): def test_re_tests(self): 're_tests test suite' - from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR + from test.re_tests import tests, FAIL, SYNTAX_ERROR for t in tests: pattern = s = outcome = repl = expected = None if len(t) == 5: diff --git a/extra_tests/snippets/stdlib_re.py b/extra_tests/snippets/stdlib_re.py index 45a505fcf7a..17ecdba7f68 100644 --- a/extra_tests/snippets/stdlib_re.py +++ b/extra_tests/snippets/stdlib_re.py @@ -67,4 +67,7 @@ urlpattern = re.compile('//([^/#?]*)(.*)', re.DOTALL) url = '//www.example.org:80/foo/bar/baz.html' -assert urlpattern.match(url).group(1) == 'www.example.org:80' \ No newline at end of file +assert urlpattern.match(url).group(1) == 'www.example.org:80' + +assert re.compile('(?:\w+(?:\s|/(?!>))*)*').match('a /bb />ccc').group() == 'a /bb ' +assert re.compile('(?:(1)?)*').match('111').group() == '111' \ No newline at end of file diff --git a/vm/Cargo.toml b/vm/Cargo.toml index 3c72b2de0a6..f1771238b5f 100644 --- a/vm/Cargo.toml +++ b/vm/Cargo.toml @@ -72,7 +72,7 @@ memoffset = "0.6.5" optional = "0.5.0" # RustPython crates implementing functionality based on CPython -sre-engine = "0.1.2" +sre-engine = "0.2.0" # to work on sre-engine locally # sre-engine = { path = "../../sre-engine" }