From 9ec690b8f8758e5fb958f8cc40e803eca315861f Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Fri, 15 Nov 2024 09:40:18 +0530 Subject: [PATCH] [red-knot] Add support for string annotations (#14151) ## Summary This PR adds support for parsing and inferring types within string annotations. ### Implementation (attempt 1) This is preserved in https://github.com/astral-sh/ruff/pull/14151/commits/6217f48924f8f3f8a3d28c4929fe5aaad4ad0a59. The implementation here would separate the inference of string annotations in the deferred query. This requires the following: * Two ways of evaluating the deferred definitions - lazily and eagerly. * An eager evaluation occurs right outside the definition query which in this case would be in `binding_ty` and `declaration_ty`. * A lazy evaluation occurs on demand like using the `definition_expression_ty` to determine the function return type and class bases. * The above point means that when trying to get the binding type for a variable in an annotated assignment, the definition query won't include the type. So, it'll require going through the deferred query to get the type. This has the following limitations: * Nested string annotations, although not necessarily a useful feature, is difficult to implement unless we convert the implementation in an infinite loop * Partial string annotations require complex layout because inferring the types for stringified and non-stringified parts of the annotation are done in separate queries. This means we need to maintain additional information ### Implementation (attempt 2) This is the final diff in this PR. The implementation here does the complete inference of string annotation in the same definition query by maintaining certain state while trying to infer different parts of an expression and take decisions accordingly. These are: * Allow names that are part of a string annotation to not exists in the symbol table. For example, in `x: "Foo"`, if the "Foo" symbol is not defined then it won't exists in the symbol table even though it's being used. This is an invariant which is being allowed only for symbols in a string annotation. * Similarly, lookup name is updated to do the same and if the symbol doesn't exists, then it's not bounded. * Store the final type of a string annotation on the string expression itself and not for any of the sub-expressions that are created after parsing. This is because those sub-expressions won't exists in the semantic index. Design document: https://www.notion.so/astral-sh/String-Annotations-12148797e1ca801197a9f146641e5b71?pvs=4 Closes: #13796 ## Test Plan * Add various test cases in our markdown framework * Run `red_knot` on LibCST (contains a lot of string annotations, specifically https://github.com/Instagram/LibCST/blob/main/libcst/matchers/_matcher_base.py), FastAPI (good amount of annotated code including `typing.Literal`) and compare against the `main` branch output --- crates/red_knot_python_semantic/Cargo.toml | 1 + .../resources/mdtest/annotations/string.md | 186 ++++++++- .../mdtest/assignment/annotations.md | 26 ++ crates/red_knot_python_semantic/src/types.rs | 3 +- .../src/types/infer.rs | 363 ++++++++++++++---- .../src/types/string_annotation.rs | 77 ++++ 6 files changed, 569 insertions(+), 87 deletions(-) create mode 100644 crates/red_knot_python_semantic/src/types/string_annotation.rs diff --git a/crates/red_knot_python_semantic/Cargo.toml b/crates/red_knot_python_semantic/Cargo.toml index c08323b1706cf..ef219bcaf18bd 100644 --- a/crates/red_knot_python_semantic/Cargo.toml +++ b/crates/red_knot_python_semantic/Cargo.toml @@ -14,6 +14,7 @@ license = { workspace = true } ruff_db = { workspace = true } ruff_index = { workspace = true } ruff_python_ast = { workspace = true, features = ["salsa"] } +ruff_python_parser = { workspace = true } ruff_python_stdlib = { workspace = true } ruff_source_file = { workspace = true } ruff_text_size = { workspace = true } diff --git a/crates/red_knot_python_semantic/resources/mdtest/annotations/string.md b/crates/red_knot_python_semantic/resources/mdtest/annotations/string.md index 0d6fe841bdb64..d5693f61bbba2 100644 --- a/crates/red_knot_python_semantic/resources/mdtest/annotations/string.md +++ b/crates/red_knot_python_semantic/resources/mdtest/annotations/string.md @@ -1,9 +1,191 @@ # String annotations +## Simple + ```py def f() -> "int": return 1 -# TODO: We do not support string annotations, but we should not panic if we encounter them -reveal_type(f()) # revealed: @Todo +reveal_type(f()) # revealed: int +``` + +## Nested + +```py +def f() -> "'int'": + return 1 + +reveal_type(f()) # revealed: int +``` + +## Type expression + +```py +def f1() -> "int | str": + return 1 + +def f2() -> "tuple[int, str]": + return 1 + +reveal_type(f1()) # revealed: int | str +reveal_type(f2()) # revealed: tuple[int, str] +``` + +## Partial + +```py +def f() -> tuple[int, "str"]: + return 1 + +reveal_type(f()) # revealed: tuple[int, str] +``` + +## Deferred + +```py +def f() -> "Foo": + return Foo() + +class Foo: + pass + +reveal_type(f()) # revealed: Foo +``` + +## Deferred (undefined) + +```py +# error: [unresolved-reference] +def f() -> "Foo": + pass + +reveal_type(f()) # revealed: Unknown +``` + +## Partial deferred + +```py +def f() -> int | "Foo": + return 1 + +class Foo: + pass + +reveal_type(f()) # revealed: int | Foo ``` + +## `typing.Literal` + +```py +from typing import Literal + +def f1() -> Literal["Foo", "Bar"]: + return "Foo" + +def f2() -> 'Literal["Foo", "Bar"]': + return "Foo" + +class Foo: + pass + +reveal_type(f1()) # revealed: Literal["Foo", "Bar"] +reveal_type(f2()) # revealed: Literal["Foo", "Bar"] +``` + +## Various string kinds + +```py +# error: [annotation-raw-string] "Type expressions cannot use raw string literal" +def f1() -> r"int": + return 1 + +# error: [annotation-f-string] "Type expressions cannot use f-strings" +def f2() -> f"int": + return 1 + +# error: [annotation-byte-string] "Type expressions cannot use bytes literal" +def f3() -> b"int": + return 1 + +def f4() -> "int": + return 1 + +# error: [annotation-implicit-concat] "Type expressions cannot span multiple string literals" +def f5() -> "in" "t": + return 1 + +# error: [annotation-escape-character] "Type expressions cannot contain escape characters" +def f6() -> "\N{LATIN SMALL LETTER I}nt": + return 1 + +# error: [annotation-escape-character] "Type expressions cannot contain escape characters" +def f7() -> "\x69nt": + return 1 + +def f8() -> """int""": + return 1 + +# error: [annotation-byte-string] "Type expressions cannot use bytes literal" +def f9() -> "b'int'": + return 1 + +reveal_type(f1()) # revealed: Unknown +reveal_type(f2()) # revealed: Unknown +reveal_type(f3()) # revealed: Unknown +reveal_type(f4()) # revealed: int +reveal_type(f5()) # revealed: Unknown +reveal_type(f6()) # revealed: Unknown +reveal_type(f7()) # revealed: Unknown +reveal_type(f8()) # revealed: int +reveal_type(f9()) # revealed: Unknown +``` + +## Various string kinds in `typing.Literal` + +```py +from typing import Literal + +def f() -> Literal["a", r"b", b"c", "d" "e", "\N{LATIN SMALL LETTER F}", "\x67", """h"""]: + return "normal" + +reveal_type(f()) # revealed: Literal["a", "b", "de", "f", "g", "h"] | Literal[b"c"] +``` + +## Class variables + +```py +MyType = int + +class Aliases: + MyType = str + + forward: "MyType" + not_forward: MyType + +reveal_type(Aliases.forward) # revealed: str +reveal_type(Aliases.not_forward) # revealed: str +``` + +## Annotated assignment + +```py +a: "int" = 1 +b: "'int'" = 1 +c: "Foo" +# error: [invalid-assignment] "Object of type `Literal[1]` is not assignable to `Foo`" +d: "Foo" = 1 + +class Foo: + pass + +c = Foo() + +reveal_type(a) # revealed: Literal[1] +reveal_type(b) # revealed: Literal[1] +reveal_type(c) # revealed: Foo +reveal_type(d) # revealed: Foo +``` + +## Parameter + +TODO: Add tests once parameter inference is supported diff --git a/crates/red_knot_python_semantic/resources/mdtest/assignment/annotations.md b/crates/red_knot_python_semantic/resources/mdtest/assignment/annotations.md index 349924f75b9b8..1dc8156d8bca4 100644 --- a/crates/red_knot_python_semantic/resources/mdtest/assignment/annotations.md +++ b/crates/red_knot_python_semantic/resources/mdtest/assignment/annotations.md @@ -110,3 +110,29 @@ c: builtins.tuple[builtins.tuple[builtins.int, builtins.int], builtins.int] = (( # error: [invalid-assignment] "Object of type `Literal["foo"]` is not assignable to `tuple[tuple[int, int], int]`" c: builtins.tuple[builtins.tuple[builtins.int, builtins.int], builtins.int] = "foo" ``` + +## Future annotations are deferred + +```py +from __future__ import annotations + +x: Foo + +class Foo: + pass + +x = Foo() +reveal_type(x) # revealed: Foo +``` + +## Annotations in stub files are deferred + +```pyi path=main.pyi +x: Foo + +class Foo: + pass + +x = Foo() +reveal_type(x) # revealed: Foo +``` diff --git a/crates/red_knot_python_semantic/src/types.rs b/crates/red_knot_python_semantic/src/types.rs index 7995d4e49040a..8b1206a27f2ff 100644 --- a/crates/red_knot_python_semantic/src/types.rs +++ b/crates/red_knot_python_semantic/src/types.rs @@ -37,6 +37,7 @@ mod infer; mod mro; mod narrow; mod signatures; +mod string_annotation; mod unpacker; #[salsa::tracked(return_ref)] @@ -58,7 +59,7 @@ pub fn check_types(db: &dyn Db, file: File) -> TypeCheckDiagnostics { /// Infer the public type of a symbol (its type as seen from outside its scope). fn symbol_by_id<'db>(db: &'db dyn Db, scope: ScopeId<'db>, symbol: ScopedSymbolId) -> Symbol<'db> { - let _span = tracing::trace_span!("symbol_ty_by_id", ?symbol).entered(); + let _span = tracing::trace_span!("symbol_by_id", ?symbol).entered(); let use_def = use_def_map(db, scope); diff --git a/crates/red_knot_python_semantic/src/types/infer.rs b/crates/red_knot_python_semantic/src/types/infer.rs index bff3e5fc4806f..cb20dbe406f36 100644 --- a/crates/red_knot_python_semantic/src/types/infer.rs +++ b/crates/red_knot_python_semantic/src/types/infer.rs @@ -32,7 +32,7 @@ use itertools::Itertools; use ruff_db::files::File; use ruff_db::parsed::parsed_module; use ruff_python_ast::{self as ast, AnyNodeRef, Expr, ExprContext, UnaryOp}; -use rustc_hash::FxHashMap; +use rustc_hash::{FxHashMap, FxHashSet}; use salsa; use salsa::plumbing::AsId; @@ -48,9 +48,7 @@ use crate::semantic_index::semantic_index; use crate::semantic_index::symbol::{NodeWithScopeKind, NodeWithScopeRef, ScopeId}; use crate::semantic_index::SemanticIndex; use crate::stdlib::builtins_module_scope; -use crate::types::diagnostic::{ - TypeCheckDiagnostic, TypeCheckDiagnostics, TypeCheckDiagnosticsBuilder, -}; +use crate::types::diagnostic::{TypeCheckDiagnostics, TypeCheckDiagnosticsBuilder}; use crate::types::mro::MroErrorKind; use crate::types::unpacker::{UnpackResult, Unpacker}; use crate::types::{ @@ -64,6 +62,8 @@ use crate::unpack::Unpack; use crate::util::subscript::{PyIndex, PySlice}; use crate::Db; +use super::string_annotation::parse_string_annotation; + /// Infer all types for a [`ScopeId`], including all definitions and expressions in that scope. /// Use when checking a scope, or needing to provide a type for an arbitrary expression in the /// scope. @@ -212,12 +212,12 @@ pub(crate) struct TypeInference<'db> { /// The types of every declaration in this region. declarations: FxHashMap, Type<'db>>, + /// The definitions that are deferred. + deferred: FxHashSet>, + /// The diagnostics for this region. diagnostics: TypeCheckDiagnostics, - /// Are there deferred type expressions in this region? - has_deferred: bool, - /// The scope belong to this region. scope: ScopeId<'db>, } @@ -228,8 +228,8 @@ impl<'db> TypeInference<'db> { expressions: FxHashMap::default(), bindings: FxHashMap::default(), declarations: FxHashMap::default(), + deferred: FxHashSet::default(), diagnostics: TypeCheckDiagnostics::default(), - has_deferred: false, scope, } } @@ -253,7 +253,7 @@ impl<'db> TypeInference<'db> { self.declarations[&definition] } - pub(crate) fn diagnostics(&self) -> &[std::sync::Arc] { + pub(crate) fn diagnostics(&self) -> &TypeCheckDiagnostics { &self.diagnostics } @@ -262,6 +262,7 @@ impl<'db> TypeInference<'db> { self.bindings.shrink_to_fit(); self.declarations.shrink_to_fit(); self.diagnostics.shrink_to_fit(); + self.deferred.shrink_to_fit(); } } @@ -329,6 +330,17 @@ pub(super) struct TypeInferenceBuilder<'db> { /// The type inference results types: TypeInference<'db>, + /// The deferred state of inferring types of certain expressions within the region. + /// + /// This is different from [`InferenceRegion::Deferred`] which works on the entire definition + /// while this is relevant for specific expressions within the region itself and is updated + /// during the inference process. + /// + /// For example, when inferring the types of an annotated assignment, the type of an annotation + /// expression could be deferred if the file has `from __future__ import annotations` import or + /// is a stub file but we're still in a non-deferred region. + deferred_state: DeferredExpressionState, + diagnostics: TypeCheckDiagnosticsBuilder<'db>, } @@ -358,6 +370,7 @@ impl<'db> TypeInferenceBuilder<'db> { index, region, file, + deferred_state: DeferredExpressionState::None, types: TypeInference::empty(scope), diagnostics: TypeCheckDiagnosticsBuilder::new(db, file), } @@ -371,8 +384,8 @@ impl<'db> TypeInferenceBuilder<'db> { .declarations .extend(inference.declarations.iter()); self.types.expressions.extend(inference.expressions.iter()); + self.types.deferred.extend(inference.deferred.iter()); self.diagnostics.extend(&inference.diagnostics); - self.types.has_deferred |= inference.has_deferred; } fn scope(&self) -> ScopeId<'db> { @@ -387,7 +400,7 @@ impl<'db> TypeInferenceBuilder<'db> { /// Are we currently inferring deferred types? fn is_deferred(&self) -> bool { - matches!(self.region, InferenceRegion::Deferred(_)) + matches!(self.region, InferenceRegion::Deferred(_)) || self.deferred_state.is_deferred() } /// Get the already-inferred type of an expression node. @@ -439,17 +452,15 @@ impl<'db> TypeInferenceBuilder<'db> { } } - if self.types.has_deferred { - // invariant: only annotations and base classes are deferred, and both of these only - // occur within a declaration (annotated assignment, function or class definition) - for definition in self.types.declarations.keys() { - if infer_definition_types(self.db, *definition).has_deferred { - let deferred = infer_deferred_types(self.db, *definition); - self.types.expressions.extend(&deferred.expressions); - self.diagnostics.extend(&deferred.diagnostics); - } - } + // Infer the deferred types for the definitions here to consider the end-of-scope + // semantics. + for definition in std::mem::take(&mut self.types.deferred) { + self.extend(infer_deferred_types(self.db, definition)); } + assert!( + self.types.deferred.is_empty(), + "Inferring deferred types should not add more deferred definitions" + ); // TODO: Only call this function when diagnostics are enabled. self.check_class_definitions(); @@ -670,12 +681,18 @@ impl<'db> TypeInferenceBuilder<'db> { } fn infer_region_deferred(&mut self, definition: Definition<'db>) { + // N.B. We don't defer the types for an annotated assignment here because it is done in + // the same definition query. It utilizes the deferred expression state instead. + // + // This is because for partially stringified annotations like `a: tuple[int, "ForwardRef"]`, + // we need to defer the types of non-stringified expressions like `tuple` and `int` in the + // definition query while the stringified expression `"ForwardRef"` would need to deferred + // to use end-of-scope semantics. This would require custom and possibly a complex + // implementation to allow this "split" to happen. + match definition.kind(self.db) { DefinitionKind::Function(function) => self.infer_function_deferred(function.node()), DefinitionKind::Class(class) => self.infer_class_deferred(class.node()), - DefinitionKind::AnnotatedAssignment(_annotated_assignment) => { - // TODO self.infer_annotated_assignment_deferred(annotated_assignment.node()); - } _ => {} } } @@ -822,7 +839,10 @@ impl<'db> TypeInferenceBuilder<'db> { .as_deref() .expect("function type params scope without type params"); - self.infer_optional_annotation_expression(function.returns.as_deref()); + self.infer_optional_annotation_expression( + function.returns.as_deref(), + DeferredExpressionState::None, + ); self.infer_type_parameters(type_params); self.infer_parameters(&function.parameters); } @@ -915,9 +935,12 @@ impl<'db> TypeInferenceBuilder<'db> { // `infer_function_type_params`, rather than here. if type_params.is_none() { if self.are_all_types_deferred() { - self.types.has_deferred = true; + self.types.deferred.insert(definition); } else { - self.infer_optional_annotation_expression(returns.as_deref()); + self.infer_optional_annotation_expression( + returns.as_deref(), + DeferredExpressionState::None, + ); self.infer_parameters(parameters); } } @@ -968,7 +991,10 @@ impl<'db> TypeInferenceBuilder<'db> { default: _, } = parameter_with_default; - self.infer_optional_annotation_expression(parameter.annotation.as_deref()); + self.infer_optional_annotation_expression( + parameter.annotation.as_deref(), + DeferredExpressionState::None, + ); } fn infer_parameter(&mut self, parameter: &ast::Parameter) { @@ -978,7 +1004,10 @@ impl<'db> TypeInferenceBuilder<'db> { annotation, } = parameter; - self.infer_optional_annotation_expression(annotation.as_deref()); + self.infer_optional_annotation_expression( + annotation.as_deref(), + DeferredExpressionState::None, + ); } fn infer_parameter_with_default_definition( @@ -1055,7 +1084,7 @@ impl<'db> TypeInferenceBuilder<'db> { // Inference of bases deferred in stubs // TODO also defer stringified generic type parameters if self.are_all_types_deferred() { - self.types.has_deferred = true; + self.types.deferred.insert(definition); } else { for base in class_node.bases() { self.infer_expression(base); @@ -1065,15 +1094,16 @@ impl<'db> TypeInferenceBuilder<'db> { } fn infer_function_deferred(&mut self, function: &ast::StmtFunctionDef) { - self.infer_optional_annotation_expression(function.returns.as_deref()); + self.infer_optional_annotation_expression( + function.returns.as_deref(), + DeferredExpressionState::Deferred, + ); self.infer_parameters(function.parameters.as_ref()); } fn infer_class_deferred(&mut self, class: &ast::StmtClassDef) { - if self.are_all_types_deferred() { - for base in class.bases() { - self.infer_expression(base); - } + for base in class.bases() { + self.infer_expression(base); } } @@ -1609,12 +1639,13 @@ impl<'db> TypeInferenceBuilder<'db> { target, simple: _, } = assignment; - self.infer_annotation_expression(annotation); + self.infer_annotation_expression(annotation, DeferredExpressionState::None); self.infer_optional_expression(value.as_deref()); self.infer_expression(target); } } + /// Infer the types in an annotated assignment definition. fn infer_annotated_assignment_definition( &mut self, assignment: &ast::StmtAnnAssign, @@ -1628,7 +1659,10 @@ impl<'db> TypeInferenceBuilder<'db> { simple: _, } = assignment; - let mut annotation_ty = self.infer_annotation_expression(annotation); + let mut annotation_ty = self.infer_annotation_expression( + annotation, + DeferredExpressionState::from(self.are_all_types_deferred()), + ); // Handle various singletons. if let Type::Instance(InstanceType { class }) = annotation_ty { @@ -1646,7 +1680,7 @@ impl<'db> TypeInferenceBuilder<'db> { } } - if let Some(value) = value { + if let Some(value) = value.as_deref() { let value_ty = self.infer_expression(value); self.add_declaration_with_binding( assignment.into(), @@ -2182,6 +2216,12 @@ impl<'db> TypeInferenceBuilder<'db> { expression: &impl HasScopedAstId, ty: Type<'db>, ) { + if self.deferred_state.in_string_annotation() { + // Avoid storing the type of expressions that are part of a string annotation because + // the expression ids don't exists in the semantic index. Instead, we'll store the type + // on the string expression itself that represents the annotation. + return; + } let expr_id = expression.scoped_ast_id(self.db, self.scope()); let previous = self.types.expressions.insert(expr_id, ty); assert_eq!(previous, None); @@ -2666,12 +2706,16 @@ impl<'db> TypeInferenceBuilder<'db> { fn lookup_name(&mut self, name_node: &ast::ExprName) -> Symbol<'db> { let ast::ExprName { id: name, .. } = name_node; let file_scope_id = self.scope().file_scope_id(self.db); - let is_bound = self - .index - .symbol_table(file_scope_id) - .symbol_by_name(name) - .expect("Symbol table should create a symbol for every Name node") - .is_bound(); + let is_bound = + if let Some(symbol) = self.index.symbol_table(file_scope_id).symbol_by_name(name) { + symbol.is_bound() + } else { + assert!( + self.deferred_state.in_string_annotation(), + "Expected the symbol table to create a symbol for every Name node" + ); + false + }; // In function-like scopes, any local variable (symbol that is bound in this scope) can // only have a definition in this scope, or error; it never references another scope. @@ -2743,27 +2787,29 @@ impl<'db> TypeInferenceBuilder<'db> { let file_scope_id = self.scope().file_scope_id(self.db); let use_def = self.index.use_def_map(file_scope_id); - let symbol = self - .index - .symbol_table(file_scope_id) - .symbol_id_by_name(id) - .expect("Expected the symbol table to create a symbol for every Name node"); - // if we're inferring types of deferred expressions, always treat them as public symbols - let (definitions, boundness) = if self.is_deferred() { - ( - use_def.public_bindings(symbol), - use_def.public_boundness(symbol), - ) + + // If we're inferring types of deferred expressions, always treat them as public symbols + let (bindings_ty, boundness) = if self.is_deferred() { + if let Some(symbol) = self.index.symbol_table(file_scope_id).symbol_id_by_name(id) { + ( + bindings_ty(self.db, use_def.public_bindings(symbol)), + use_def.public_boundness(symbol), + ) + } else { + assert!( + self.deferred_state.in_string_annotation(), + "Expected the symbol table to create a symbol for every Name node" + ); + (None, Boundness::PossiblyUnbound) + } } else { let use_id = name.scoped_use_id(self.db, self.scope()); ( - use_def.bindings_at_use(use_id), + bindings_ty(self.db, use_def.bindings_at_use(use_id)), use_def.use_boundness(use_id), ) }; - let bindings_ty = bindings_ty(self.db, definitions); - if boundness == Boundness::PossiblyUnbound { match self.lookup_name(name) { Symbol::Type(looked_up_ty, looked_up_boundness) => { @@ -4062,39 +4108,125 @@ impl<'db> TypeInferenceBuilder<'db> { /// Annotation expressions. impl<'db> TypeInferenceBuilder<'db> { - fn infer_annotation_expression(&mut self, expression: &ast::Expr) -> Type<'db> { + /// Infer the type of an annotation expression with the given [`DeferredExpressionState`]. + fn infer_annotation_expression( + &mut self, + annotation: &ast::Expr, + deferred_state: DeferredExpressionState, + ) -> Type<'db> { + let previous_deferred_state = std::mem::replace(&mut self.deferred_state, deferred_state); + let annotation_ty = self.infer_annotation_expression_impl(annotation); + self.deferred_state = previous_deferred_state; + annotation_ty + } + + /// Similar to [`infer_annotation_expression`], but accepts an optional annotation expression + /// and returns [`None`] if the annotation is [`None`]. + /// + /// [`infer_annotation_expression`]: TypeInferenceBuilder::infer_annotation_expression + fn infer_optional_annotation_expression( + &mut self, + annotation: Option<&ast::Expr>, + deferred_state: DeferredExpressionState, + ) -> Option> { + annotation.map(|expr| self.infer_annotation_expression(expr, deferred_state)) + } + + /// Implementation of [`infer_annotation_expression`]. + /// + /// [`infer_annotation_expression`]: TypeInferenceBuilder::infer_annotation_expression + fn infer_annotation_expression_impl(&mut self, annotation: &ast::Expr) -> Type<'db> { // https://typing.readthedocs.io/en/latest/spec/annotations.html#grammar-token-expression-grammar-annotation_expression - let annotation_ty = match expression { - // TODO: parse the expression and check whether it is a string annotation, since they - // can be annotation expressions distinct from type expressions. - // https://typing.readthedocs.io/en/latest/spec/annotations.html#string-annotations - ast::Expr::StringLiteral(_literal) => Type::Todo, + let annotation_ty = match annotation { + // String annotations: https://typing.readthedocs.io/en/latest/spec/annotations.html#string-annotations + ast::Expr::StringLiteral(string) => self.infer_string_annotation_expression(string), // Annotation expressions also get special handling for `*args` and `**kwargs`. ast::Expr::Starred(starred) => self.infer_starred_expression(starred), + ast::Expr::BytesLiteral(bytes) => { + self.diagnostics.add( + bytes.into(), + "annotation-byte-string", + format_args!("Type expressions cannot use bytes literal"), + ); + Type::Unknown + } + + ast::Expr::FString(fstring) => { + self.diagnostics.add( + fstring.into(), + "annotation-f-string", + format_args!("Type expressions cannot use f-strings"), + ); + Type::Unknown + } + // All other annotation expressions are (possibly) valid type expressions, so handle // them there instead. type_expr => self.infer_type_expression_no_store(type_expr), }; - self.store_expression_type(expression, annotation_ty); + self.store_expression_type(annotation, annotation_ty); + annotation_ty } - fn infer_optional_annotation_expression( - &mut self, - expr: Option<&ast::Expr>, - ) -> Option> { - expr.map(|expr| self.infer_annotation_expression(expr)) + /// Infer the type of a string annotation expression. + fn infer_string_annotation_expression(&mut self, string: &ast::ExprStringLiteral) -> Type<'db> { + match parse_string_annotation(self.db, self.file, string) { + Ok(parsed) => { + // String annotations are always evaluated in the deferred context. + self.infer_annotation_expression( + parsed.expr(), + DeferredExpressionState::InStringAnnotation, + ) + } + Err(diagnostics) => { + self.diagnostics.extend(&diagnostics); + Type::Unknown + } + } } } /// Type expressions impl<'db> TypeInferenceBuilder<'db> { + /// Infer the type of a type expression. + fn infer_type_expression(&mut self, expression: &ast::Expr) -> Type<'db> { + let ty = self.infer_type_expression_no_store(expression); + self.store_expression_type(expression, ty); + ty + } + + /// Similar to [`infer_type_expression`], but accepts an optional type expression and returns + /// [`None`] if the expression is [`None`]. + /// + /// [`infer_type_expression`]: TypeInferenceBuilder::infer_type_expression + fn infer_optional_type_expression( + &mut self, + expression: Option<&ast::Expr>, + ) -> Option> { + expression.map(|expr| self.infer_type_expression(expr)) + } + + /// Similar to [`infer_type_expression`], but accepts a [`DeferredExpressionState`]. + /// + /// [`infer_type_expression`]: TypeInferenceBuilder::infer_type_expression + fn infer_type_expression_with_state( + &mut self, + expression: &ast::Expr, + deferred_state: DeferredExpressionState, + ) -> Type<'db> { + let previous_deferred_state = std::mem::replace(&mut self.deferred_state, deferred_state); + let annotation_ty = self.infer_type_expression(expression); + self.deferred_state = previous_deferred_state; + annotation_ty + } + + /// Infer the type of a type expression without storing the result. fn infer_type_expression_no_store(&mut self, expression: &ast::Expr) -> Type<'db> { // https://typing.readthedocs.io/en/latest/spec/annotations.html#grammar-token-expression-grammar-type_expression - match expression { ast::Expr::Name(name) => match name.ctx { ast::ExprContext::Load => { @@ -4114,9 +4246,8 @@ impl<'db> TypeInferenceBuilder<'db> { ast::Expr::NoneLiteral(_literal) => Type::none(self.db), - // TODO: parse the expression and check whether it is a string annotation. // https://typing.readthedocs.io/en/latest/spec/annotations.html#string-annotations - ast::Expr::StringLiteral(_literal) => Type::Todo, + ast::Expr::StringLiteral(string) => self.infer_string_type_expression(string), // TODO: an Ellipsis literal *on its own* does not have any meaning in annotation // expressions, but is meaningful in the context of a number of special forms. @@ -4261,17 +4392,21 @@ impl<'db> TypeInferenceBuilder<'db> { } } - fn infer_type_expression(&mut self, expression: &ast::Expr) -> Type<'db> { - let ty = self.infer_type_expression_no_store(expression); - self.store_expression_type(expression, ty); - ty - } - - fn infer_optional_type_expression( - &mut self, - opt_expression: Option<&ast::Expr>, - ) -> Option> { - opt_expression.map(|expr| self.infer_type_expression(expr)) + /// Infer the type of a string type expression. + fn infer_string_type_expression(&mut self, string: &ast::ExprStringLiteral) -> Type<'db> { + match parse_string_annotation(self.db, self.file, string) { + Ok(parsed) => { + // String annotations are always evaluated in the deferred context. + self.infer_type_expression_with_state( + parsed.expr(), + DeferredExpressionState::InStringAnnotation, + ) + } + Err(diagnostics) => { + self.diagnostics.extend(&diagnostics); + Type::Unknown + } + } } /// Given the slice of a `tuple[]` annotation, return the type that the annotation represents @@ -4459,6 +4594,66 @@ impl<'db> TypeInferenceBuilder<'db> { } } +/// The deferred state of a specific expression in an inference region. +#[derive(Default, Debug, Clone, Copy)] +enum DeferredExpressionState { + /// The expression is not deferred. + #[default] + None, + + /// The expression is deferred. + /// + /// In the following example, + /// ```py + /// from __future__ import annotation + /// + /// a: tuple[int, "ForwardRef"] = ... + /// ``` + /// + /// The expression `tuple` and `int` are deferred but `ForwardRef` (after parsing) is both + /// deferred and in a string annotation context. + Deferred, + + /// The expression is in a string annotation context. + /// + /// This is required to differentiate between a deferred annotation and a string annotation. + /// The former can occur when there's a `from __future__ import annotations` statement or we're + /// in a stub file. + /// + /// In the following example, + /// ```py + /// a: "List[int]" = ... + /// b: tuple[int, "ForwardRef"] = ... + /// ``` + /// + /// The annotation of `a` is completely inside a string while for `b`, it's only partially + /// stringified. + InStringAnnotation, +} + +impl DeferredExpressionState { + const fn is_deferred(self) -> bool { + matches!( + self, + DeferredExpressionState::Deferred | DeferredExpressionState::InStringAnnotation + ) + } + + const fn in_string_annotation(self) -> bool { + matches!(self, DeferredExpressionState::InStringAnnotation) + } +} + +impl From for DeferredExpressionState { + fn from(value: bool) -> Self { + if value { + DeferredExpressionState::Deferred + } else { + DeferredExpressionState::None + } + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum RichCompareOperator { Eq, diff --git a/crates/red_knot_python_semantic/src/types/string_annotation.rs b/crates/red_knot_python_semantic/src/types/string_annotation.rs new file mode 100644 index 0000000000000..b4801f4401225 --- /dev/null +++ b/crates/red_knot_python_semantic/src/types/string_annotation.rs @@ -0,0 +1,77 @@ +use ruff_db::files::File; +use ruff_db::source::source_text; +use ruff_python_ast::str::raw_contents; +use ruff_python_ast::{self as ast, ModExpression, StringFlags}; +use ruff_python_parser::{parse_expression_range, Parsed}; +use ruff_text_size::Ranged; + +use crate::types::diagnostic::{TypeCheckDiagnostics, TypeCheckDiagnosticsBuilder}; +use crate::Db; + +type AnnotationParseResult = Result, TypeCheckDiagnostics>; + +/// Parses the given expression as a string annotation. +pub(crate) fn parse_string_annotation( + db: &dyn Db, + file: File, + string_expr: &ast::ExprStringLiteral, +) -> AnnotationParseResult { + let _span = tracing::trace_span!("parse_string_annotation", string=?string_expr.range(), file=%file.path(db)).entered(); + + let source = source_text(db.upcast(), file); + let node_text = &source[string_expr.range()]; + let mut diagnostics = TypeCheckDiagnosticsBuilder::new(db, file); + + if let [string_literal] = string_expr.value.as_slice() { + let prefix = string_literal.flags.prefix(); + if prefix.is_raw() { + diagnostics.add( + string_literal.into(), + "annotation-raw-string", + format_args!("Type expressions cannot use raw string literal"), + ); + // Compare the raw contents (without quotes) of the expression with the parsed contents + // contained in the string literal. + } else if raw_contents(node_text) + .is_some_and(|raw_contents| raw_contents == string_literal.as_str()) + { + let range_excluding_quotes = string_literal + .range() + .add_start(string_literal.flags.opener_len()) + .sub_end(string_literal.flags.closer_len()); + + // TODO: Support multiline strings like: + // ```py + // x: """ + // int + // | float + // """ = 1 + // ``` + match parse_expression_range(source.as_str(), range_excluding_quotes) { + Ok(parsed) => return Ok(parsed), + Err(parse_error) => diagnostics.add( + string_literal.into(), + "forward-annotation-syntax-error", + format_args!("Syntax error in forward annotation: {}", parse_error.error), + ), + } + } else { + // The raw contents of the string doesn't match the parsed content. This could be the + // case for annotations that contain escape sequences. + diagnostics.add( + string_expr.into(), + "annotation-escape-character", + format_args!("Type expressions cannot contain escape characters"), + ); + } + } else { + // String is implicitly concatenated. + diagnostics.add( + string_expr.into(), + "annotation-implicit-concat", + format_args!("Type expressions cannot span multiple string literals"), + ); + } + + Err(diagnostics.finish()) +}