From b59b31860575a0ced9f263f9ccd5b8ff41ae0b7e Mon Sep 17 00:00:00 2001 From: CPython Developers <> Date: Mon, 16 Feb 2026 00:35:04 +0900 Subject: [PATCH 1/3] Update test_fstring from v3.14.3 --- Lib/test/test_fstring.py | 94 ++++++++++++++++++++++++---------------- 1 file changed, 56 insertions(+), 38 deletions(-) diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index 930e409fb2e..b258a866add 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -383,7 +383,7 @@ def test_ast_line_numbers_multiline_fstring(self): self.assertEqual(t.body[0].value.values[1].value.col_offset, 11) self.assertEqual(t.body[0].value.values[1].value.end_col_offset, 16) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: 4 != 5 def test_ast_line_numbers_with_parentheses(self): expr = """ x = ( @@ -587,7 +587,7 @@ def test_ast_compile_time_concat(self): exec(c) self.assertEqual(x[0], 'foo3') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_compile_time_concat_errors(self): self.assertAllRaise(SyntaxError, 'cannot mix bytes and nonbytes literals', @@ -600,7 +600,7 @@ def test_literal(self): self.assertEqual(f'a', 'a') self.assertEqual(f' ', ' ') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_unterminated_string(self): self.assertAllRaise(SyntaxError, 'unterminated string', [r"""f'{"x'""", @@ -609,7 +609,7 @@ def test_unterminated_string(self): r"""f'{("x}'""", ]) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON @unittest.skipIf(support.is_wasi, "exhausts limited stack on WASI") def test_mismatched_parens(self): self.assertAllRaise(SyntaxError, r"closing parenthesis '\}' " @@ -632,14 +632,24 @@ def test_mismatched_parens(self): r"does not match opening parenthesis '\('", ["f'{a(4}'", ]) - self.assertRaises(SyntaxError, eval, "f'{" + "("*500 + "}'") + self.assertRaises(SyntaxError, eval, "f'{" + "("*20 + "}'") - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: No exception raised @unittest.skipIf(support.is_wasi, "exhausts limited stack on WASI") def test_fstring_nested_too_deeply(self): - self.assertAllRaise(SyntaxError, - "f-string: expressions nested too deeply", - ['f"{1+2:{1+2:{1+1:{1}}}}"']) + def raises_syntax_or_memory_error(txt): + try: + eval(txt) + except SyntaxError: + pass + except MemoryError: + pass + except Exception as ex: + self.fail(f"Should raise SyntaxError or MemoryError, not {type(ex)}") + else: + self.fail("No exception raised") + + raises_syntax_or_memory_error('f"{1+2:{1+2:{1+1:{1}}}}"') def create_nested_fstring(n): if n == 0: @@ -647,9 +657,10 @@ def create_nested_fstring(n): prev = create_nested_fstring(n-1) return f'f"{{{prev}}}"' - self.assertAllRaise(SyntaxError, - "too many nested f-strings", - [create_nested_fstring(160)]) + raises_syntax_or_memory_error(create_nested_fstring(160)) + raises_syntax_or_memory_error("f'{" + "("*100 + "}'") + raises_syntax_or_memory_error("f'{" + "("*1000 + "}'") + raises_syntax_or_memory_error("f'{" + "("*10_000 + "}'") def test_syntax_error_in_nested_fstring(self): # See gh-104016 for more information on this crash @@ -692,7 +703,7 @@ def test_double_braces(self): ["f'{ {{}} }'", # dict in a set ]) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_compile_time_concat(self): x = 'def' self.assertEqual('abc' f'## {x}ghi', 'abc## defghi') @@ -730,7 +741,7 @@ def test_compile_time_concat(self): ['''f'{3' f"}"''', # can't concat to get a valid f-string ]) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_comments(self): # These aren't comments, since they're in strings. d = {'#': 'hash'} @@ -807,7 +818,7 @@ def build_fstr(n, extra=''): s = "f'{1}' 'x' 'y'" * 1024 self.assertEqual(eval(s), '1xy' * 1024) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_format_specifier_expressions(self): width = 10 precision = 4 @@ -841,7 +852,7 @@ def test_format_specifier_expressions(self): """f'{"s"!{"r"}}'""", ]) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: SyntaxWarning not triggered def test_custom_format_specifier(self): class CustomFormat: def __format__(self, format_spec): @@ -863,7 +874,7 @@ def __format__(self, spec): x = X() self.assertEqual(f'{x} {x}', '1 2') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_missing_expression(self): self.assertAllRaise(SyntaxError, "f-string: valid expression required before '}'", @@ -926,7 +937,7 @@ def test_missing_expression(self): "\xa0", ]) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_parens_in_expressions(self): self.assertEqual(f'{3,}', '(3,)') @@ -939,13 +950,13 @@ def test_parens_in_expressions(self): ["f'{3)+(4}'", ]) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_newlines_before_syntax_error(self): self.assertAllRaise(SyntaxError, "f-string: expecting a valid expression after '{'", ["f'{.}'", "\nf'{.}'", "\n\nf'{.}'"]) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: SyntaxWarning not triggered def test_backslashes_in_string_part(self): self.assertEqual(f'\t', '\t') self.assertEqual(r'\t', '\\t') @@ -1004,7 +1015,7 @@ def test_backslashes_in_string_part(self): self.assertEqual(fr'\N{AMPERSAND}', '\\Nspam') self.assertEqual(f'\\\N{AMPERSAND}', '\\&') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_misformed_unicode_character_name(self): # These test are needed because unicode names are parsed # differently inside f-strings. @@ -1024,7 +1035,7 @@ def test_misformed_unicode_character_name(self): r"'\N{GREEK CAPITAL LETTER DELTA'", ]) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_backslashes_in_expression_part(self): self.assertEqual(f"{( 1 + @@ -1040,7 +1051,7 @@ def test_backslashes_in_expression_part(self): ["f'{\n}'", ]) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_invalid_backslashes_inside_fstring_context(self): # All of these variations are invalid python syntax, # so they are also invalid in f-strings as well. @@ -1075,7 +1086,7 @@ def test_newlines_in_expressions(self): self.assertEqual(rf'''{3+ 4}''', '7') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: "f-string: expecting a valid expression after '{'" does not match "invalid syntax (, line 1)" def test_lambda(self): x = 5 self.assertEqual(f'{(lambda y:x*y)("8")!r}', "'88888'") @@ -1118,7 +1129,7 @@ def test_roundtrip_raw_quotes(self): self.assertEqual(fr'\'\"\'', '\\\'\\"\\\'') self.assertEqual(fr'\"\'\"\'', '\\"\\\'\\"\\\'') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_fstring_backslash_before_double_bracket(self): deprecated_cases = [ (r"f'\{{\}}'", '\\{\\}'), @@ -1138,7 +1149,7 @@ def test_fstring_backslash_before_double_bracket(self): self.assertEqual(fr'\}}{1+1}', '\\}2') self.assertEqual(fr'{1+1}\}}', '2\\}') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: SyntaxWarning not triggered def test_fstring_backslash_before_double_bracket_warns_once(self): with self.assertWarns(SyntaxWarning) as w: eval(r"f'\{{'") @@ -1288,6 +1299,7 @@ def test_nested_fstrings(self): self.assertEqual(f'{f"{0}"*3}', '000') self.assertEqual(f'{f"{y}"*3}', '555') + @unittest.expectedFailure # TODO: RUSTPYTHON def test_invalid_string_prefixes(self): single_quote_cases = ["fu''", "uf''", @@ -1312,7 +1324,7 @@ def test_invalid_string_prefixes(self): "Bf''", "BF''",] double_quote_cases = [case.replace("'", '"') for case in single_quote_cases] - self.assertAllRaise(SyntaxError, 'invalid syntax', + self.assertAllRaise(SyntaxError, 'prefixes are incompatible', single_quote_cases + double_quote_cases) def test_leading_trailing_spaces(self): @@ -1342,7 +1354,7 @@ def test_equal_equal(self): self.assertEqual(f'{0==1}', 'False') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_conversions(self): self.assertEqual(f'{3.14:10.10}', ' 3.14') self.assertEqual(f'{1.25!s:10.10}', '1.25 ') @@ -1367,7 +1379,6 @@ def test_conversions(self): self.assertAllRaise(SyntaxError, "f-string: expecting '}'", ["f'{3!'", "f'{3!s'", - "f'{3!g'", ]) self.assertAllRaise(SyntaxError, 'f-string: missing conversion character', @@ -1408,14 +1419,14 @@ def test_assignment(self): "f'{x}' = x", ]) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_del(self): self.assertAllRaise(SyntaxError, 'invalid syntax', ["del f''", "del '' f''", ]) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_mismatched_braces(self): self.assertAllRaise(SyntaxError, "f-string: single '}' is not allowed", ["f'{{}'", @@ -1514,7 +1525,7 @@ def test_str_format_differences(self): self.assertEqual('{d[a]}'.format(d=d), 'string') self.assertEqual('{d[0]}'.format(d=d), 'integer') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_errors(self): # see issue 26287 self.assertAllRaise(TypeError, 'unsupported', @@ -1557,7 +1568,7 @@ def test_backslash_char(self): self.assertEqual(eval('f"\\\n"'), '') self.assertEqual(eval('f"\\\r"'), '') - @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: '1+2 = # my comment\n 3' != '1+2 = \n 3' + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: '1+2 = # my comment\n 3' != '1+2 = \n 3' def test_debug_conversion(self): x = 'A string' self.assertEqual(f'{x=}', 'x=' + repr(x)) @@ -1705,7 +1716,7 @@ def test_walrus(self): self.assertEqual(f'{(x:=10)}', '10') self.assertEqual(x, 10) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: "f-string: expecting '=', or '!', or ':', or '}'" does not match "invalid syntax (?, line 1)" def test_invalid_syntax_error_message(self): with self.assertRaisesRegex(SyntaxError, "f-string: expecting '=', or '!', or ':', or '}'"): @@ -1731,7 +1742,7 @@ def test_with_an_underscore_and_a_comma_in_format_specifier(self): with self.assertRaisesRegex(ValueError, error_msg): f'{1:_,}' - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: "f-string: expecting a valid expression after '{'" does not match "invalid syntax (?, line 1)" def test_syntax_error_for_starred_expressions(self): with self.assertRaisesRegex(SyntaxError, "can't use starred expression here"): compile("f'{*a}'", "?", "exec") @@ -1740,7 +1751,7 @@ def test_syntax_error_for_starred_expressions(self): "f-string: expecting a valid expression after '{'"): compile("f'{**a}'", "?", "exec") - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; - def test_not_closing_quotes(self): self.assertAllRaise(SyntaxError, "unterminated f-string literal", ['f"', "f'"]) self.assertAllRaise(SyntaxError, "unterminated triple-quoted f-string literal", @@ -1760,7 +1771,7 @@ def test_not_closing_quotes(self): except SyntaxError as e: self.assertEqual(e.text, 'z = f"""') self.assertEqual(e.lineno, 3) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_syntax_error_after_debug(self): self.assertAllRaise(SyntaxError, "f-string: expecting a valid expression after '{'", [ @@ -1788,7 +1799,7 @@ def test_debug_in_file(self): self.assertEqual(stdout.decode('utf-8').strip().replace('\r\n', '\n').replace('\r', '\n'), "3\n=3") - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: 0 != 2 def test_syntax_warning_infinite_recursion_in_file(self): with temp_cwd(): script = 'script.py' @@ -1878,6 +1889,13 @@ def __format__(self, format): # Test multiple format specs in same raw f-string self.assertEqual(rf"{UnchangedFormat():\xFF} {UnchangedFormat():\n}", '\\xFF \\n') + def test_gh139516(self): + with temp_cwd(): + script = 'script.py' + with open(script, 'wb') as f: + f.write('''def f(a): pass\nf"{f(a=lambda: 'à'\n)}"'''.encode()) + assert_python_ok(script) + if __name__ == '__main__': unittest.main() From 6ae95913bed55fb2e8514c20aaf13893aa96018e Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Mon, 16 Feb 2026 02:42:11 +0900 Subject: [PATCH 2/3] Fix 6 test_fstring expectedFailure tests - Add Unknown(char) variant to FormatType for proper error messages on unrecognized format codes (test_errors) - Strip comments from f-string debug text in compile.rs (test_debug_conversion) - Map ruff SyntaxError messages to match CPython in vm_new.rs: InvalidDeleteTarget, LineContinuationError, UnclosedStringError, OtherError(bytes mixing), OtherError(keyword identifier), FStringError(UnterminatedString/UnterminatedTripleQuotedString), and backtick-to-quote replacement for FStringError messages --- Lib/test/test_fstring.py | 11 -- crates/codegen/src/compile.rs | 28 ++- crates/common/src/format.rs | 14 +- crates/vm/src/vm/compile.rs | 334 +++++++++++++++++++++++++++++++++- crates/vm/src/vm/vm_new.rs | 84 ++++++++- 5 files changed, 454 insertions(+), 17 deletions(-) diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index b258a866add..f4fca1caec7 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -587,7 +587,6 @@ def test_ast_compile_time_concat(self): exec(c) self.assertEqual(x[0], 'foo3') - @unittest.expectedFailure # TODO: RUSTPYTHON def test_compile_time_concat_errors(self): self.assertAllRaise(SyntaxError, 'cannot mix bytes and nonbytes literals', @@ -600,7 +599,6 @@ def test_literal(self): self.assertEqual(f'a', 'a') self.assertEqual(f' ', ' ') - @unittest.expectedFailure # TODO: RUSTPYTHON def test_unterminated_string(self): self.assertAllRaise(SyntaxError, 'unterminated string', [r"""f'{"x'""", @@ -852,7 +850,6 @@ def test_format_specifier_expressions(self): """f'{"s"!{"r"}}'""", ]) - @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: SyntaxWarning not triggered def test_custom_format_specifier(self): class CustomFormat: def __format__(self, format_spec): @@ -956,7 +953,6 @@ def test_newlines_before_syntax_error(self): "f-string: expecting a valid expression after '{'", ["f'{.}'", "\nf'{.}'", "\n\nf'{.}'"]) - @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: SyntaxWarning not triggered def test_backslashes_in_string_part(self): self.assertEqual(f'\t', '\t') self.assertEqual(r'\t', '\\t') @@ -1051,7 +1047,6 @@ def test_backslashes_in_expression_part(self): ["f'{\n}'", ]) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_invalid_backslashes_inside_fstring_context(self): # All of these variations are invalid python syntax, # so they are also invalid in f-strings as well. @@ -1129,7 +1124,6 @@ def test_roundtrip_raw_quotes(self): self.assertEqual(fr'\'\"\'', '\\\'\\"\\\'') self.assertEqual(fr'\"\'\"\'', '\\"\\\'\\"\\\'') - @unittest.expectedFailure # TODO: RUSTPYTHON def test_fstring_backslash_before_double_bracket(self): deprecated_cases = [ (r"f'\{{\}}'", '\\{\\}'), @@ -1149,7 +1143,6 @@ def test_fstring_backslash_before_double_bracket(self): self.assertEqual(fr'\}}{1+1}', '\\}2') self.assertEqual(fr'{1+1}\}}', '2\\}') - @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: SyntaxWarning not triggered def test_fstring_backslash_before_double_bracket_warns_once(self): with self.assertWarns(SyntaxWarning) as w: eval(r"f'\{{'") @@ -1419,7 +1412,6 @@ def test_assignment(self): "f'{x}' = x", ]) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_del(self): self.assertAllRaise(SyntaxError, 'invalid syntax', ["del f''", @@ -1525,7 +1517,6 @@ def test_str_format_differences(self): self.assertEqual('{d[a]}'.format(d=d), 'string') self.assertEqual('{d[0]}'.format(d=d), 'integer') - @unittest.expectedFailure # TODO: RUSTPYTHON def test_errors(self): # see issue 26287 self.assertAllRaise(TypeError, 'unsupported', @@ -1568,7 +1559,6 @@ def test_backslash_char(self): self.assertEqual(eval('f"\\\n"'), '') self.assertEqual(eval('f"\\\r"'), '') - @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: '1+2 = # my comment\n 3' != '1+2 = \n 3' def test_debug_conversion(self): x = 'A string' self.assertEqual(f'{x=}', 'x=' + repr(x)) @@ -1799,7 +1789,6 @@ def test_debug_in_file(self): self.assertEqual(stdout.decode('utf-8').strip().replace('\r\n', '\n').replace('\r', '\n'), "3\n=3") - @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: 0 != 2 def test_syntax_warning_infinite_recursion_in_file(self): with temp_cwd(): script = 'script.py' diff --git a/crates/codegen/src/compile.rs b/crates/codegen/src/compile.rs index c132c0f4b09..a7672e9a472 100644 --- a/crates/codegen/src/compile.rs +++ b/crates/codegen/src/compile.rs @@ -8450,7 +8450,12 @@ impl Compiler { if let Some(ast::DebugText { leading, trailing }) = &fstring_expr.debug_text { let range = fstring_expr.expression.range(); let source = self.source_file.slice(range); - let text = [leading, source, trailing].concat(); + let text = [ + strip_fstring_debug_comments(leading).as_str(), + source, + strip_fstring_debug_comments(trailing).as_str(), + ] + .concat(); self.emit_load_const(ConstantData::Str { value: text.into() }); element_count += 1; @@ -8786,6 +8791,27 @@ impl ToU32 for usize { } } +/// Strip Python comments from f-string debug text (leading/trailing around `=`). +/// A comment starts with `#` and extends to the end of the line. +/// The newline character itself is preserved. +fn strip_fstring_debug_comments(text: &str) -> String { + let mut result = String::with_capacity(text.len()); + let mut in_comment = false; + for ch in text.chars() { + if in_comment { + if ch == '\n' { + in_comment = false; + result.push(ch); + } + } else if ch == '#' { + in_comment = true; + } else { + result.push(ch); + } + } + result +} + #[cfg(test)] mod ruff_tests { use super::*; diff --git a/crates/common/src/format.rs b/crates/common/src/format.rs index 2842bd0a3d4..40bc9e53046 100644 --- a/crates/common/src/format.rs +++ b/crates/common/src/format.rs @@ -149,6 +149,7 @@ pub enum FormatType { GeneralFormat(Case), FixedPoint(Case), Percentage, + Unknown(char), } impl From<&FormatType> for char { @@ -170,6 +171,7 @@ impl From<&FormatType> for char { FormatType::FixedPoint(Case::Lower) => 'f', FormatType::FixedPoint(Case::Upper) => 'F', FormatType::Percentage => '%', + FormatType::Unknown(c) => *c, } } } @@ -194,6 +196,7 @@ impl FormatParse for FormatType { Some('g') => (Some(Self::GeneralFormat(Case::Lower)), chars.as_wtf8()), Some('G') => (Some(Self::GeneralFormat(Case::Upper)), chars.as_wtf8()), Some('%') => (Some(Self::Percentage), chars.as_wtf8()), + Some(c) => (Some(Self::Unknown(c)), chars.as_wtf8()), _ => (None, text), } } @@ -429,7 +432,8 @@ impl FormatSpec { | FormatType::FixedPoint(_) | FormatType::GeneralFormat(_) | FormatType::Exponent(_) - | FormatType::Percentage, + | FormatType::Percentage + | FormatType::Number(_), ) => 3, None => 3, _ => panic!("Separators only valid for numbers!"), @@ -475,6 +479,7 @@ impl FormatSpec { let first_letter = (input.to_string().as_bytes()[0] as char).to_uppercase(); Ok(first_letter.collect::() + &input.to_string()[1..]) } + Some(FormatType::Unknown(c)) => Err(FormatSpecError::UnknownFormatCode(*c, "int")), _ => Err(FormatSpecError::InvalidFormatSpecifier), } } @@ -496,7 +501,8 @@ impl FormatSpec { | Some(FormatType::Hex(_)) | Some(FormatType::String) | Some(FormatType::Character) - | Some(FormatType::Number(Case::Upper)) => { + | Some(FormatType::Number(Case::Upper)) + | Some(FormatType::Unknown(_)) => { let ch = char::from(self.format_type.as_ref().unwrap()); Err(FormatSpecError::UnknownFormatCode(ch, "float")) } @@ -609,6 +615,7 @@ impl FormatSpec { Some(float) => return self.format_float(float), _ => Err(FormatSpecError::UnableToConvert), }, + Some(FormatType::Unknown(c)) => Err(FormatSpecError::UnknownFormatCode(c, "int")), None => self.format_int_radix(magnitude, 10), }?; let format_sign = self.sign.unwrap_or(FormatSign::Minus); @@ -707,7 +714,8 @@ impl FormatSpec { | Some(FormatType::String) | Some(FormatType::Character) | Some(FormatType::Number(Case::Upper)) - | Some(FormatType::Percentage) => { + | Some(FormatType::Percentage) + | Some(FormatType::Unknown(_)) => { let ch = char::from(self.format_type.as_ref().unwrap()); Err(FormatSpecError::UnknownFormatCode(ch, "complex")) } diff --git a/crates/vm/src/vm/compile.rs b/crates/vm/src/vm/compile.rs index 97f0f9e97b8..0077cd3a1e8 100644 --- a/crates/vm/src/vm/compile.rs +++ b/crates/vm/src/vm/compile.rs @@ -25,6 +25,338 @@ impl VirtualMachine { source_path: String, opts: CompileOpts, ) -> Result, CompileError> { - compiler::compile(source, mode, &source_path, opts).map(|code| self.ctx.new_code(code)) + let code = + compiler::compile(source, mode, &source_path, opts).map(|code| self.ctx.new_code(code)); + #[cfg(feature = "parser")] + if code.is_ok() { + self.emit_string_escape_warnings(source, &source_path); + } + code + } +} + +/// Scan source for invalid escape sequences in all string literals and emit +/// SyntaxWarning. +/// +/// Corresponds to: +/// - `warn_invalid_escape_sequence()` in `Parser/string_parser.c` +/// - `_PyTokenizer_warn_invalid_escape_sequence()` in `Parser/tokenizer/helpers.c` +#[cfg(feature = "parser")] +mod escape_warnings { + use super::*; + use crate::warn; + use ruff_python_ast::{self as ast, visitor::Visitor}; + use ruff_text_size::TextRange; + + /// Calculate 1-indexed line number at byte offset in source. + fn line_number_at(source: &str, offset: usize) -> usize { + source[..offset.min(source.len())] + .bytes() + .filter(|&b| b == b'\n') + .count() + + 1 + } + + /// Get content bounds (start, end byte offsets) of a quoted string literal, + /// excluding prefix characters and quote delimiters. + fn content_bounds(source: &str, range: TextRange) -> Option<(usize, usize)> { + let s = range.start().to_usize(); + let e = range.end().to_usize(); + if s >= e || e > source.len() { + return None; + } + let bytes = &source.as_bytes()[s..e]; + // Skip prefix (u, b, r, etc.) to find the first quote character. + let qi = bytes.iter().position(|&c| c == b'\'' || c == b'"')?; + let qc = bytes[qi]; + let ql = if bytes.get(qi + 1) == Some(&qc) && bytes.get(qi + 2) == Some(&qc) { + 3 + } else { + 1 + }; + let cs = s + qi + ql; + let ce = e.checked_sub(ql)?; + if cs <= ce { Some((cs, ce)) } else { None } + } + + /// Scan `source[start..end]` for the first invalid escape sequence. + /// Returns `Some((invalid_char, byte_offset_in_source))` for the first + /// invalid escape found, or `None` if all escapes are valid. + /// + /// When `is_bytes` is true, `\u`, `\U`, and `\N` are treated as invalid + /// (bytes literals only support byte-oriented escapes). + /// + /// Only reports the **first** invalid escape per string literal, matching + /// `_PyUnicode_DecodeUnicodeEscapeInternal2` which stores only the first + /// `first_invalid_escape_char`. + fn first_invalid_escape( + source: &str, + start: usize, + end: usize, + is_bytes: bool, + ) -> Option<(char, usize)> { + let raw = &source[start..end]; + let mut chars = raw.char_indices().peekable(); + while let Some((i, ch)) = chars.next() { + if ch != '\\' { + continue; + } + let Some((_, next)) = chars.next() else { + break; + }; + let valid = match next { + '\\' | '\'' | '"' | 'a' | 'b' | 'f' | 'n' | 'r' | 't' | 'v' => true, + '\n' => true, + '\r' => { + if matches!(chars.peek(), Some(&(_, '\n'))) { + chars.next(); + } + true + } + '0'..='7' => { + for _ in 0..2 { + if matches!(chars.peek(), Some(&(_, '0'..='7'))) { + chars.next(); + } else { + break; + } + } + true + } + 'x' | 'u' | 'U' => { + // \u and \U are only valid in string literals, not bytes + if is_bytes && next != 'x' { + false + } else { + let count = match next { + 'x' => 2, + 'u' => 4, + 'U' => 8, + _ => unreachable!(), + }; + for _ in 0..count { + if chars.peek().is_some_and(|&(_, c)| c.is_ascii_hexdigit()) { + chars.next(); + } else { + break; + } + } + true + } + } + 'N' => { + // \N{name} is only valid in string literals, not bytes + if is_bytes { + false + } else { + if matches!(chars.peek(), Some(&(_, '{'))) { + chars.next(); + for (_, c) in chars.by_ref() { + if c == '}' { + break; + } + } + } + true + } + } + _ => false, + }; + if !valid { + return Some((next, start + i)); + } + } + None + } + + /// Emit `SyntaxWarning` for an invalid escape sequence. + /// + /// `warn_invalid_escape_sequence()` in `Parser/string_parser.c` + fn warn_invalid_escape_sequence( + source: &str, + ch: char, + offset: usize, + filename: &str, + vm: &VirtualMachine, + ) { + let lineno = line_number_at(source, offset); + let message = vm.ctx.new_str(format!( + "\"\\{ch}\" is an invalid escape sequence. \ + Such sequences will not work in the future. \ + Did you mean \"\\\\{ch}\"? A raw string is also an option." + )); + let fname = vm.ctx.new_str(filename); + let _ = warn::warn_explicit( + Some(vm.ctx.exceptions.syntax_warning.to_owned()), + message.into(), + fname, + lineno, + None, + vm.ctx.none(), + None, + None, + vm, + ); + } + + struct EscapeWarningVisitor<'a> { + source: &'a str, + filename: &'a str, + vm: &'a VirtualMachine, + } + + impl<'a> EscapeWarningVisitor<'a> { + /// Check a quoted string/bytes literal for invalid escapes. + /// The range must include the prefix and quote delimiters. + fn check_quoted_literal(&self, range: TextRange, is_bytes: bool) { + if let Some((start, end)) = content_bounds(self.source, range) + && let Some((ch, offset)) = first_invalid_escape(self.source, start, end, is_bytes) + { + warn_invalid_escape_sequence(self.source, ch, offset, self.filename, self.vm); + } + } + + /// Check an f-string literal element for invalid escapes. + /// The range covers content only (no prefix/quotes). + /// + /// Also handles `\{` / `\}` at the literal–interpolation boundary, + /// equivalent to `_PyTokenizer_warn_invalid_escape_sequence` handling + /// `FSTRING_MIDDLE` / `FSTRING_END` tokens. + fn check_fstring_literal(&self, range: TextRange) { + let start = range.start().to_usize(); + let end = range.end().to_usize(); + if start >= end || end > self.source.len() { + return; + } + if let Some((ch, offset)) = first_invalid_escape(self.source, start, end, false) { + warn_invalid_escape_sequence(self.source, ch, offset, self.filename, self.vm); + return; + } + // In CPython, _PyTokenizer_warn_invalid_escape_sequence handles + // `\{` and `\}` for FSTRING_MIDDLE/FSTRING_END tokens. Ruff + // splits the literal element before the interpolation delimiter, + // so the `\` sits at the end of the literal range and the `{`/`}` + // sits just after it. Only warn when the number of trailing + // backslashes is odd (an even count means they are all escaped). + let trailing_bs = self.source[start..end] + .as_bytes() + .iter() + .rev() + .take_while(|&&b| b == b'\\') + .count(); + if trailing_bs % 2 == 1 + && let Some(&after) = self.source.as_bytes().get(end) + && (after == b'{' || after == b'}') + { + warn_invalid_escape_sequence( + self.source, + after as char, + end - 1, + self.filename, + self.vm, + ); + } + } + + /// Visit f-string elements, checking literals and recursing into + /// interpolation expressions and format specs. + fn visit_fstring_elements(&mut self, elements: &'a ast::InterpolatedStringElements) { + for element in elements { + match element { + ast::InterpolatedStringElement::Literal(lit) => { + self.check_fstring_literal(lit.range); + } + ast::InterpolatedStringElement::Interpolation(interp) => { + self.visit_expr(&interp.expression); + if let Some(spec) = &interp.format_spec { + self.visit_fstring_elements(&spec.elements); + } + } + } + } + } + } + + impl<'a> Visitor<'a> for EscapeWarningVisitor<'a> { + fn visit_expr(&mut self, expr: &'a ast::Expr) { + match expr { + // Regular string literals — decode_unicode_with_escapes path + ast::Expr::StringLiteral(string) => { + for part in string.value.as_slice() { + if !matches!( + part.flags.prefix(), + ast::str_prefix::StringLiteralPrefix::Raw { .. } + ) { + self.check_quoted_literal(part.range, false); + } + } + } + // Byte string literals — decode_bytes_with_escapes path + ast::Expr::BytesLiteral(bytes) => { + for part in bytes.value.as_slice() { + if !matches!( + part.flags.prefix(), + ast::str_prefix::ByteStringPrefix::Raw { .. } + ) { + self.check_quoted_literal(part.range, true); + } + } + } + // F-string literals — tokenizer + string_parser paths + ast::Expr::FString(fstring_expr) => { + for part in fstring_expr.value.as_slice() { + match part { + ast::FStringPart::Literal(string_lit) => { + // Plain string part in f-string concatenation + if !matches!( + string_lit.flags.prefix(), + ast::str_prefix::StringLiteralPrefix::Raw { .. } + ) { + self.check_quoted_literal(string_lit.range, false); + } + } + ast::FStringPart::FString(fstring) => { + if matches!( + fstring.flags.prefix(), + ast::str_prefix::FStringPrefix::Raw { .. } + ) { + continue; + } + self.visit_fstring_elements(&fstring.elements); + } + } + } + } + _ => ast::visitor::walk_expr(self, expr), + } + } + } + + impl VirtualMachine { + /// Walk all string literals in `source` and emit `SyntaxWarning` for + /// each that contains an invalid escape sequence. + pub(super) fn emit_string_escape_warnings(&self, source: &str, filename: &str) { + let Ok(parsed) = + ruff_python_parser::parse(source, ruff_python_parser::Mode::Module.into()) + else { + return; + }; + let ast = parsed.into_syntax(); + let mut visitor = EscapeWarningVisitor { + source, + filename, + vm: self, + }; + match ast { + ast::Mod::Module(module) => { + for stmt in &module.body { + visitor.visit_stmt(stmt); + } + } + ast::Mod::Expression(expr) => { + visitor.visit_expr(&expr.body); + } + } + } } } diff --git a/crates/vm/src/vm/vm_new.rs b/crates/vm/src/vm/vm_new.rs index 7c6035b62d1..a67c0636614 100644 --- a/crates/vm/src/vm/vm_new.rs +++ b/crates/vm/src/vm/vm_new.rs @@ -503,11 +503,52 @@ impl VirtualMachine { } let mut narrow_caret = false; match error { + #[cfg(feature = "parser")] + crate::compiler::CompileError::Parse(rustpython_compiler::ParseError { + error: + ruff_python_parser::ParseErrorType::FStringError( + ruff_python_parser::InterpolatedStringErrorType::UnterminatedString, + ) + | ruff_python_parser::ParseErrorType::Lexical( + ruff_python_parser::LexicalErrorType::FStringError( + ruff_python_parser::InterpolatedStringErrorType::UnterminatedString, + ), + ), + .. + }) => { + msg = "unterminated f-string literal".to_owned(); + } + #[cfg(feature = "parser")] + crate::compiler::CompileError::Parse(rustpython_compiler::ParseError { + error: + ruff_python_parser::ParseErrorType::FStringError( + ruff_python_parser::InterpolatedStringErrorType::UnterminatedTripleQuotedString, + ) + | ruff_python_parser::ParseErrorType::Lexical( + ruff_python_parser::LexicalErrorType::FStringError( + ruff_python_parser::InterpolatedStringErrorType::UnterminatedTripleQuotedString, + ), + ), + .. + }) => { + msg = "unterminated triple-quoted f-string literal".to_owned(); + } #[cfg(feature = "parser")] crate::compiler::CompileError::Parse(rustpython_compiler::ParseError { error: ruff_python_parser::ParseErrorType::FStringError(_) - | ruff_python_parser::ParseErrorType::UnexpectedExpressionToken, + | ruff_python_parser::ParseErrorType::Lexical( + ruff_python_parser::LexicalErrorType::FStringError(_), + ), + .. + }) => { + // Replace backticks with single quotes to match CPython's error messages + msg = msg.replace('`', "'"); + msg.insert_str(0, "invalid syntax: "); + } + #[cfg(feature = "parser")] + crate::compiler::CompileError::Parse(rustpython_compiler::ParseError { + error: ruff_python_parser::ParseErrorType::UnexpectedExpressionToken, .. }) => msg.insert_str(0, "invalid syntax: "), #[cfg(feature = "parser")] @@ -532,6 +573,47 @@ impl VirtualMachine { msg = "invalid syntax".to_owned(); narrow_caret = true; } + #[cfg(feature = "parser")] + crate::compiler::CompileError::Parse(rustpython_compiler::ParseError { + error: ruff_python_parser::ParseErrorType::InvalidDeleteTarget, + .. + }) => { + msg = "invalid syntax".to_owned(); + } + #[cfg(feature = "parser")] + crate::compiler::CompileError::Parse(rustpython_compiler::ParseError { + error: + ruff_python_parser::ParseErrorType::Lexical( + ruff_python_parser::LexicalErrorType::LineContinuationError, + ), + .. + }) => { + msg = "unexpected character after line continuation".to_owned(); + } + #[cfg(feature = "parser")] + crate::compiler::CompileError::Parse(rustpython_compiler::ParseError { + error: + ruff_python_parser::ParseErrorType::Lexical( + ruff_python_parser::LexicalErrorType::UnclosedStringError, + ), + .. + }) => { + msg = "unterminated string".to_owned(); + } + #[cfg(feature = "parser")] + crate::compiler::CompileError::Parse(rustpython_compiler::ParseError { + error: ruff_python_parser::ParseErrorType::OtherError(s), + .. + }) if s.eq_ignore_ascii_case("bytes literal cannot be mixed with non-bytes literals") => { + msg = "cannot mix bytes and nonbytes literals".to_owned(); + } + #[cfg(feature = "parser")] + crate::compiler::CompileError::Parse(rustpython_compiler::ParseError { + error: ruff_python_parser::ParseErrorType::OtherError(s), + .. + }) if s.starts_with("Expected an identifier, but found a keyword") => { + msg = "invalid syntax".to_owned(); + } _ => {} } if syntax_error_type.is(self.ctx.exceptions.tab_error) { From d9e49ea07d93e83e9839dbb45d94d7550e35007c Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Tue, 17 Feb 2026 10:49:48 +0900 Subject: [PATCH 3/3] Fix clippy::sliced_string_as_bytes warning --- crates/vm/src/vm/compile.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/vm/src/vm/compile.rs b/crates/vm/src/vm/compile.rs index 0077cd3a1e8..7294dc8f897 100644 --- a/crates/vm/src/vm/compile.rs +++ b/crates/vm/src/vm/compile.rs @@ -238,8 +238,7 @@ mod escape_warnings { // so the `\` sits at the end of the literal range and the `{`/`}` // sits just after it. Only warn when the number of trailing // backslashes is odd (an even count means they are all escaped). - let trailing_bs = self.source[start..end] - .as_bytes() + let trailing_bs = self.source.as_bytes()[start..end] .iter() .rev() .take_while(|&&b| b == b'\\')