diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000..b0c6bbb --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1 @@ +github: davidhewitt diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..8821fa7 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,16 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 +updates: + - package-ecosystem: "cargo" # See documentation for possible values + directory: "/" # Location of package manifests + schedule: + interval: "weekly" + + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..a45b1b0 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,121 @@ +name: CI + +on: + push: + branches: + - main + pull_request: + +env: + CARGO_TERM_COLOR: always + +jobs: + resolve: + runs-on: ubuntu-latest + outputs: + MSRV: ${{ steps.resolve-msrv.outputs.MSRV }} + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: resolve MSRV + id: resolve-msrv + run: echo MSRV=`python -c 'import tomllib; print(tomllib.load(open("Cargo.toml", "rb"))["package"]["rust-version"])'` >> $GITHUB_OUTPUT + + fmt: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt + - name: Check rust formatting (rustfmt) + run: cargo fmt --all -- --check + + clippy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + with: + components: clippy + - run: cargo clippy --all + + build: + needs: [resolve, fmt] # don't wait for clippy as fails rarely and takes longer + name: python${{ matrix.python-version }} ${{ matrix.os }} rust-${{ matrix.rust}} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false # If one platform fails, allow the rest to keep testing. + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14", "3.14t"] + os: ["macos-latest", "ubuntu-latest", "windows-latest"] + rust: [stable] + include: + - python-version: "3.14" + os: "ubuntu-latest" + rust: ${{ needs.resolve.outputs.MSRV }} + - python-version: "3.14" + os: "macos-15-intel" + rust: "stable" + - python-version: "3.14" + os: "ubuntu-24.04-arm" + rust: "stable" + - python-version: "3.14" + os: "windows-11-arm" + rust: "stable" + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ matrix.rust }} + + - uses: Swatinem/rust-cache@v2 + continue-on-error: true + + - if: ${{ matrix.rust == needs.resolve.outputs.MSRV }} + name: Set dependencies on MSRV + run: cargo +stable update + env: + CARGO_RESOLVER_INCOMPATIBLE_RUST_VERSIONS: fallback + + - name: Test + run: cargo test --verbose + + - name: Test (abi3) + run: cargo test --verbose --features pyo3/abi3-py37 + + - name: Test (arbitrary_precision) + run: cargo test --verbose --features arbitrary_precision + + env: + RUST_BACKTRACE: 1 + + coverage: + needs: [fmt] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: Swatinem/rust-cache@v2 + continue-on-error: true + - name: Install cargo-llvm-cov + uses: taiki-e/install-action@cargo-llvm-cov + - uses: dtolnay/rust-toolchain@stable + with: + components: llvm-tools-preview + - run: | + cargo llvm-cov clean + cargo llvm-cov --codecov --output-path codecov.json + - uses: codecov/codecov-action@v4 + with: + file: codecov.json + token: ${{ secrets.CODECOV_TOKEN }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..f538d3d --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,32 @@ +name: Release Rust Crate + +on: + push: + tags: + - "v*" + workflow_dispatch: + inputs: + version: + description: The version to build + +jobs: + release: + permissions: + id-token: write + + runs-on: ubuntu-latest + environment: release + steps: + - uses: actions/checkout@v5 + with: + # The tag to build or the tag received by the tag event + ref: ${{ github.event.inputs.version || github.ref }} + persist-credentials: false + + - uses: rust-lang/crates-io-auth-action@v1 + id: auth + + - name: Publish to crates.io + run: cargo publish + env: + CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }} diff --git a/.gitignore b/.gitignore index 96ef6c0..b471067 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /target Cargo.lock +.idea diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..9fa1128 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,124 @@ +## 0.28.0 - 2026-02-18 + +- Bump MSRV to 1.83. +- Update `pyo3` to 0.28. +- Support deserializing `dataclass` instances to struct-like Rust types. +- Add `arbitrary_precision` feature + +## 0.27.0 - 2025-11-07 +- Update to PyO3 0.27 + +## 0.26.0 - 2025-08-30 + +### Packaging +- Bump MSRV to 1.74 +- Update to PyO3 0.26 + +### Changed +- `PythonizeTypes`, `PythonizeMappingType` and `PythonizeNamedMappingType` no longer have a lifetime on the trait, instead the `Builder` type is a GAT. + +## 0.25.0 - 2025-05-23 + +### Packaging +- Update to PyO3 0.25 + +## 0.24.0 - 2025-03-26 + +### Packaging +- Update to PyO3 0.24 + +## Removed +- Remove deprecated `depythonize_bound()` + +## 0.23.0 - 2024-11-22 + +### Packaging +- Update to PyO3 0.23 + +## 0.22.0 - 2024-08-10 + +### Packaging +- Bump MSRV to 1.63 +- Update to PyO3 0.22 + +### Added +- Support `u128` / `i128` integers. +- Implement `PythonizeListType` for `PyTuple` +- Support deserializing enums from any `PyMapping` instead of just `PyDict` +- Support serializing struct-like types to named mappings using `PythonizeTypes::NamedMap` + +### Changed +- `pythonize()` now returns `Bound<'py, PyAny>` instead of `Py` +- `depythonize()` now take `&'a Bound` and is no longer deprecated +- `depythonize_bound()` is now deprecated +- `Depythonizer::from_object()` now takes `&'a Bound` and is no longer deprecated +- `Depythonizer` now contains `&'a Bound` and so has an extra lifetime `'a` + +### Removed +- Remove support for PyO3's `gil-refs` feature + +### Fixed +- Fix overflow error attempting to depythonize `u64` values greater than `i64::MAX` to types like `serde_json::Value` +- Fix deserializing `set` and `frozenset` into Rust homogeneous containers + +## 0.21.1 - 2024-04-02 + +- Fix compile error when using PyO3 `abi3` feature targeting a minimum version below 3.10 + +## 0.21.0 - 2024-04-01 + +- Bump edition to 2021 +- Bump MSRV to 1.56 +- Update to PyO3 0.21 +- Export `PythonizeDefault` + +## 0.20.0 - 2023-10-15 + +- Update to PyO3 0.20 + +## 0.19.0 - 2023-06-11 + +- Update to PyO3 0.19 + +## 0.18.0 - 2023-01-22 + +- Add LICENSE file to the crate +- Update to PyO3 0.18 + +## 0.17.0 - 2022-08-24 + +- Update to PyO3 0.17 + +## 0.16.0 - 2022-03-06 + +- Update to PyO3 0.16 + +## 0.15.0 - 2021-11-12 + +- Update to PyO3 0.15 +- Add `pythonize_custom` for customizing the Python types to serialize to. +- Add support for `depythonize` to handle arbitrary Python sequence and mapping types. + +## 0.14.0 - 2021-07-05 + +- Update to PyO3 0.14 + +## 0.13.0 - 2020-12-28 + +- Update to PyO3 0.13 + +## 0.12.1 - 2020-12-08 + +- Require `std` feature of `serde`. +- Reduce memory consumption when deserializing sequences. +- Fix deserializing untagged struct enum variants. +- Fix deserializing sequences from Python tuples. + +## 0.12.0 - 2020-11-22 + +- Change release versioning to match `pyo3` major/minor version. +- Implement `depythonizer` + +## 0.1.0 - 2020-08-12 + +- Initial release diff --git a/Cargo.toml b/Cargo.toml index 0d91260..90c1696 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,22 +1,28 @@ [package] name = "pythonize" -version = "0.1.0" +version = "0.28.0" authors = ["David Hewitt <1939362+davidhewitt@users.noreply.github.com>"] -edition = "2018" +edition = "2021" +rust-version = "1.83" license = "MIT" description = "Serde Serializer & Deserializer from Rust <--> Python, backed by PyO3." homepage = "https://github.com/davidhewitt/pythonize" repository = "https://github.com/davidhewitt/pythonize" documentation = "https://docs.rs/crate/pythonize/" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -serde = { version = "1.0" } -pyo3 = { version = "0.11.1", default-features = false } +serde = { version = "1.0", default-features = false, features = ["std"] } +serde_json = { version = "1.0", optional = true, default-features = false, features = ["std"] } +pyo3 = { version = "0.28", default-features = false } [dev-dependencies] -serde = { version = "1.0", features = ["derive"] } -paste = "1.0" -serde_json = "1.0" +serde = { version = "1.0", default-features = false, features = ["derive"] } +pyo3 = { version = "0.28", default-features = false, features = ["auto-initialize", "macros", "py-clone"] } +serde_json = { version = "1.0", default-features = false, features = ["std"] } +serde_bytes = "0.11" maplit = "1.0.2" +serde_path_to_error = "0.1.15" + +[features] +arbitrary_precision = ["serde_json", "serde_json/arbitrary_precision"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..ace026e --- /dev/null +++ b/LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2022-present David Hewitt and Contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 1cdfbb2..8d1986e 100644 --- a/README.md +++ b/README.md @@ -1,37 +1,60 @@ # Pythonize -***WIP: Most functionality of this crate is still stubbed out. Please don't try to use this yet - unless you're interested in submitting PRs to help finish it off :)*** - -This is an experimental serializer for Rust's serde ecosystem, which can convert Rust objects to Python values and back. +This is a serializer for Rust's serde ecosystem, which can convert Rust objects to Python values and back. At the moment the Python structures it produces should be _very_ similar to those which are produced by `serde_json`; i.e. calling Python's `json.loads()` on a value encoded by `serde_json` should produce an identical structure to that which is produced directly by `pythonize`. ## Usage -Pythonize has two public APIs: `pythonize` and `depythonize`. +This crate converts Rust types which implement the [Serde] serialization +traits into Python objects using the [PyO3] library. -``` +Pythonize has two main public APIs: `pythonize` and `depythonize`. + + + +[Serde]: https://github.com/serde-rs/serde +[PyO3]: https://github.com/PyO3/pyo3 + +## Examples + +```rust use serde::{Serialize, Deserialize}; -use pyo3::{Python, py_run}; -use pythonize::pythonize; +use pyo3::prelude::*; +use pythonize::{depythonize, pythonize}; -#[derive(Serialize, Deserialize)] +#[derive(Debug, Serialize, Deserialize, PartialEq)] struct Sample { foo: String, bar: Option } -Python::with_gil(|py| -> PyResult<()> { - let sample = Sample { - foo: "foo".to_string(), - bar: None - }; +let sample = Sample { + foo: "Foo".to_string(), + bar: None +}; - let obj = pythonize(py, &sample)?; +Python::attach(|py| { + // Rust -> Python + let obj = pythonize(py, &sample).unwrap(); - println!("{}", obj.as_ref(py).repr()); + assert_eq!("{'foo': 'Foo', 'bar': None}", &format!("{}", obj.repr().unwrap())); + + // Python -> Rust + let new_sample: Sample = depythonize(&obj).unwrap(); + + assert_eq!(new_sample, sample); }) +``` + +## Features + +### `arbitrary_precision` + +Enable support for `serde_json`'s `arbitrary_precision` feature, which allows handling numbers that exceed the range of `i128`/`u128` when converting `serde_json::Value` to and from Python. -// XXX: depythonize is not yet implemented! +```toml +[dependencies] +pythonize = { version = "0.28", features = ["arbitrary_precision"] } ``` diff --git a/src/de.rs b/src/de.rs new file mode 100644 index 0000000..07728b8 --- /dev/null +++ b/src/de.rs @@ -0,0 +1,1147 @@ +use pyo3::exceptions::PyKeyError; +use pyo3::{intern, types::*, Bound}; +use serde::de::{self, IntoDeserializer}; +use serde::Deserialize; + +use crate::error::{ErrorImpl, PythonizeError, Result}; + +#[cfg(feature = "arbitrary_precision")] +const TOKEN: &str = "$serde_json::private::Number"; + +/// Attempt to convert a Python object to an instance of `T`. +/// +/// Generally this only supports Python types that match `serde`'s object model well: +/// - integers (including arbitrary precision integers if the `arbitrary_precision` feature is enabled) +/// - floats +/// - strings +/// - bytes +/// - `collections.abc.Sequence` instances (as serde sequences) +/// - `collections.abc.Mapping` instances (as serde maps) +/// - dataclasses (as serde maps) +pub fn depythonize<'a, 'py, T>(obj: &'a Bound<'py, PyAny>) -> Result +where + T: Deserialize<'a>, +{ + T::deserialize(&mut Depythonizer::from_object(obj)) +} + +/// A structure that deserializes Python objects into Rust values +pub struct Depythonizer<'a, 'py> { + input: &'a Bound<'py, PyAny>, +} + +impl<'a, 'py> Depythonizer<'a, 'py> { + /// Create a deserializer from a Python object + pub fn from_object(input: &'a Bound<'py, PyAny>) -> Self { + Depythonizer { input } + } + + fn sequence_access(&self, expected_len: Option) -> Result> { + let seq = self.input.cast::()?; + let len = self.input.len()?; + + match expected_len { + Some(expected) if expected != len => { + Err(PythonizeError::incorrect_sequence_length(expected, len)) + } + _ => Ok(PySequenceAccess::new(seq, len)), + } + } + + fn set_access(&self) -> Result> { + match self.input.cast::() { + Ok(set) => Ok(PySetAsSequence::from_set(set)), + Err(e) => { + if let Ok(f) = self.input.cast::() { + Ok(PySetAsSequence::from_frozenset(f)) + } else { + Err(e.into()) + } + } + } + } + + fn dict_access(&self) -> Result> { + PyMappingAccess::new(self.input.cast()?) + } + + fn dataclass_access(&self) -> Result>> { + if let Some(dc) = DataclassCandidate::try_new(self.input) { + Some(PyDataclassAccess::new(dc)).transpose() + } else { + Ok(None) + } + } + + fn deserialize_any_int<'de, V>(&self, int: &Bound<'_, PyInt>, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + if let Ok(x) = int.extract::() { + if let Ok(x) = u8::try_from(x) { + visitor.visit_u8(x) + } else if let Ok(x) = u16::try_from(x) { + visitor.visit_u16(x) + } else if let Ok(x) = u32::try_from(x) { + visitor.visit_u32(x) + } else if let Ok(x) = u64::try_from(x) { + visitor.visit_u64(x) + } else { + visitor.visit_u128(x) + } + } else if let Ok(x) = int.extract::() { + if let Ok(x) = i8::try_from(x) { + visitor.visit_i8(x) + } else if let Ok(x) = i16::try_from(x) { + visitor.visit_i16(x) + } else if let Ok(x) = i32::try_from(x) { + visitor.visit_i32(x) + } else if let Ok(x) = i64::try_from(x) { + visitor.visit_i64(x) + } else { + visitor.visit_i128(x) + } + } else { + #[cfg(feature = "arbitrary_precision")] + { + visitor.visit_map(NumberDeserializer { + number: Some(int.to_string()), + }) + } + #[cfg(not(feature = "arbitrary_precision"))] + { + // Re-attempt to return the original error. + let _: i128 = int.extract()?; + unreachable!() + } + } + } +} + +macro_rules! deserialize_type { + ($method:ident => $visit:ident) => { + fn $method(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + visitor.$visit(self.input.extract()?) + } + }; +} + +impl<'de> de::Deserializer<'de> for &'_ mut Depythonizer<'_, '_> { + type Error = PythonizeError; + + fn deserialize_any(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + let obj = self.input; + + // First check for cases which are cheap to check due to pointer + // comparison or bitflag checks + if obj.is_none() { + self.deserialize_unit(visitor) + } else if obj.is_instance_of::() { + self.deserialize_bool(visitor) + } else if let Ok(x) = obj.cast::() { + self.deserialize_any_int(x, visitor) + } else if obj.is_instance_of::() || obj.is_instance_of::() { + self.deserialize_tuple(obj.len()?, visitor) + } else if obj.is_instance_of::() { + self.deserialize_map(visitor) + } else if obj.is_instance_of::() { + self.deserialize_str(visitor) + } + // Continue with cases which are slower to check because they go + // through `isinstance` machinery + else if obj.is_instance_of::() || obj.is_instance_of::() { + self.deserialize_bytes(visitor) + } else if obj.is_instance_of::() { + self.deserialize_f64(visitor) + } else if obj.is_instance_of::() || obj.is_instance_of::() { + self.deserialize_seq(visitor) + } else if obj.cast::().is_ok() { + self.deserialize_tuple(obj.len()?, visitor) + } else if obj.cast::().is_ok() { + self.deserialize_map(visitor) + } else if let Some(dc) = DataclassCandidate::try_new(obj) { + visitor.visit_map(PyDataclassAccess::new(dc)?) + } else { + Err(obj.get_type().qualname().map_or_else( + |_| PythonizeError::unsupported_type("unknown"), + PythonizeError::unsupported_type, + )) + } + } + + fn deserialize_bool(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + visitor.visit_bool(self.input.is_truthy()?) + } + + fn deserialize_char(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + let s = self.input.cast::()?.to_cow()?; + if s.chars().count() != 1 { + return Err(PythonizeError::invalid_length_char()); + } + visitor.visit_char(s.chars().next().unwrap()) + } + + deserialize_type!(deserialize_i8 => visit_i8); + deserialize_type!(deserialize_i16 => visit_i16); + deserialize_type!(deserialize_i32 => visit_i32); + deserialize_type!(deserialize_i64 => visit_i64); + deserialize_type!(deserialize_i128 => visit_i128); + deserialize_type!(deserialize_u8 => visit_u8); + deserialize_type!(deserialize_u16 => visit_u16); + deserialize_type!(deserialize_u32 => visit_u32); + deserialize_type!(deserialize_u64 => visit_u64); + deserialize_type!(deserialize_u128 => visit_u128); + deserialize_type!(deserialize_f32 => visit_f32); + deserialize_type!(deserialize_f64 => visit_f64); + + fn deserialize_str(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + let s = self.input.cast::()?; + visitor.visit_str(&s.to_cow()?) + } + + fn deserialize_string(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + self.deserialize_str(visitor) + } + + fn deserialize_bytes(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + let b = self.input.cast::()?; + visitor.visit_bytes(b.as_bytes()) + } + + fn deserialize_byte_buf(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + self.deserialize_bytes(visitor) + } + + fn deserialize_option(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + if self.input.is_none() { + visitor.visit_none() + } else { + visitor.visit_some(self) + } + } + + fn deserialize_unit(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + if self.input.is_none() { + visitor.visit_unit() + } else { + Err(PythonizeError::msg("expected None")) + } + } + + fn deserialize_unit_struct(self, _name: &'static str, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + self.deserialize_unit(visitor) + } + + fn deserialize_newtype_struct(self, _name: &'static str, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + visitor.visit_newtype_struct(self) + } + + fn deserialize_seq(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + match self.sequence_access(None) { + Ok(seq) => visitor.visit_seq(seq), + Err(e) => { + // we allow sets to be deserialized as sequences, so try that + if matches!(*e.inner, ErrorImpl::UnexpectedType(_)) { + if let Ok(set) = self.set_access() { + return visitor.visit_seq(set); + } + } + Err(e) + } + } + } + + fn deserialize_tuple(self, len: usize, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + visitor.visit_seq(self.sequence_access(Some(len))?) + } + + fn deserialize_tuple_struct( + self, + _name: &'static str, + len: usize, + visitor: V, + ) -> Result + where + V: de::Visitor<'de>, + { + visitor.visit_seq(self.sequence_access(Some(len))?) + } + + fn deserialize_map(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + if let Some(dc_access) = self.dataclass_access()? { + visitor.visit_map(dc_access) + } else { + visitor.visit_map(self.dict_access()?) + } + } + + fn deserialize_struct( + self, + _name: &'static str, + _fields: &'static [&'static str], + visitor: V, + ) -> Result + where + V: de::Visitor<'de>, + { + self.deserialize_map(visitor) + } + + fn deserialize_enum( + self, + _name: &'static str, + _variants: &'static [&'static str], + visitor: V, + ) -> Result + where + V: de::Visitor<'de>, + { + let item = &self.input; + if let Ok(s) = item.cast::() { + visitor.visit_enum(s.to_cow()?.into_deserializer()) + } else if let Ok(m) = item.cast::() { + // Get the enum variant from the mapping key + if m.len()? != 1 { + return Err(PythonizeError::invalid_length_enum()); + } + let variant: Bound = m + .keys()? + .get_item(0)? + .cast_into::() + .map_err(|_| PythonizeError::dict_key_not_string())?; + let value = m.get_item(&variant)?; + visitor.visit_enum(PyEnumAccess::new(&value, variant)) + } else { + Err(PythonizeError::invalid_enum_type()) + } + } + + fn deserialize_identifier(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + let s = self + .input + .cast::() + .map_err(|_| PythonizeError::dict_key_not_string())?; + visitor.visit_str(&s.to_cow()?) + } + + fn deserialize_ignored_any(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + visitor.visit_unit() + } +} + +struct PySequenceAccess<'a, 'py> { + seq: &'a Bound<'py, PySequence>, + index: usize, + len: usize, +} + +impl<'a, 'py> PySequenceAccess<'a, 'py> { + fn new(seq: &'a Bound<'py, PySequence>, len: usize) -> Self { + Self { seq, index: 0, len } + } +} + +impl<'de> de::SeqAccess<'de> for PySequenceAccess<'_, '_> { + type Error = PythonizeError; + + fn next_element_seed(&mut self, seed: T) -> Result> + where + T: de::DeserializeSeed<'de>, + { + if self.index < self.len { + let item = self.seq.get_item(self.index)?; + self.index += 1; + seed.deserialize(&mut Depythonizer::from_object(&item)) + .map(Some) + } else { + Ok(None) + } + } +} + +struct PySetAsSequence<'py> { + iter: Bound<'py, PyIterator>, +} + +impl<'py> PySetAsSequence<'py> { + fn from_set(set: &Bound<'py, PySet>) -> Self { + Self { + iter: PyIterator::from_object(set).expect("set is always iterable"), + } + } + + fn from_frozenset(set: &Bound<'py, PyFrozenSet>) -> Self { + Self { + iter: PyIterator::from_object(set).expect("frozenset is always iterable"), + } + } +} + +impl<'de> de::SeqAccess<'de> for PySetAsSequence<'_> { + type Error = PythonizeError; + + fn next_element_seed(&mut self, seed: T) -> Result> + where + T: de::DeserializeSeed<'de>, + { + match self.iter.next() { + Some(item) => seed + .deserialize(&mut Depythonizer::from_object(&item?)) + .map(Some), + None => Ok(None), + } + } +} + +struct PyMappingAccess<'py> { + keys: Bound<'py, PyList>, + values: Bound<'py, PyList>, + key_idx: usize, + val_idx: usize, + len: usize, +} + +impl<'py> PyMappingAccess<'py> { + fn new(map: &Bound<'py, PyMapping>) -> Result { + let keys = map.keys()?; + let values = map.values()?; + let len = map.len()?; + Ok(Self { + keys, + values, + key_idx: 0, + val_idx: 0, + len, + }) + } +} + +impl<'de> de::MapAccess<'de> for PyMappingAccess<'_> { + type Error = PythonizeError; + + fn next_key_seed(&mut self, seed: K) -> Result> + where + K: de::DeserializeSeed<'de>, + { + if self.key_idx < self.len { + let item = self.keys.get_item(self.key_idx)?; + self.key_idx += 1; + seed.deserialize(&mut Depythonizer::from_object(&item)) + .map(Some) + } else { + Ok(None) + } + } + + fn next_value_seed(&mut self, seed: V) -> Result + where + V: de::DeserializeSeed<'de>, + { + let item = self.values.get_item(self.val_idx)?; + self.val_idx += 1; + seed.deserialize(&mut Depythonizer::from_object(&item)) + } +} + +/// Intermediate structure used to denote that `obj` is a dataclass with `fields`. +struct DataclassCandidate<'a, 'py> { + obj: &'a Bound<'py, PyAny>, + fields: Bound<'py, PyAny>, +} + +impl<'a, 'py> DataclassCandidate<'a, 'py> { + fn try_new(obj: &'a Bound<'py, PyAny>) -> Option { + let fields = obj + .getattr_opt(intern!(obj.py(), "__dataclass_fields__")) + .ok() + .flatten()?; + Some(Self { obj, fields }) + } +} + +struct PyDataclassAccess<'py> { + fields: Bound<'py, PyList>, + dict: Bound<'py, PyDict>, + field_idx: usize, + val_idx: usize, + len: usize, +} + +impl<'py> PyDataclassAccess<'py> { + fn new(dc: DataclassCandidate<'_, 'py>) -> Result { + let fields = dc.fields.cast::()?.keys(); + let dict = dc + .obj + .getattr(intern!(dc.obj.py(), "__dict__"))? + .cast_into()?; + let len = fields.len(); + Ok(Self { + fields, + dict, + field_idx: 0, + val_idx: 0, + len, + }) + } +} + +impl<'de> de::MapAccess<'de> for PyDataclassAccess<'_> { + type Error = PythonizeError; + + fn next_key_seed(&mut self, seed: K) -> Result> + where + K: de::DeserializeSeed<'de>, + { + if self.field_idx < self.len { + let item = self.fields.get_item(self.field_idx)?; + self.field_idx += 1; + seed.deserialize(&mut Depythonizer::from_object(&item)) + .map(Some) + } else { + Ok(None) + } + } + + fn next_value_seed(&mut self, seed: V) -> Result + where + V: de::DeserializeSeed<'de>, + { + let key = self.fields.get_item(self.val_idx)?; + let value = self + .dict + .get_item(&key)? + .ok_or_else(|| PyKeyError::new_err(key.unbind()))?; + self.val_idx += 1; + seed.deserialize(&mut Depythonizer::from_object(&value)) + } +} + +struct PyEnumAccess<'a, 'py> { + de: Depythonizer<'a, 'py>, + variant: Bound<'py, PyString>, +} + +impl<'a, 'py> PyEnumAccess<'a, 'py> { + fn new(obj: &'a Bound<'py, PyAny>, variant: Bound<'py, PyString>) -> Self { + Self { + de: Depythonizer::from_object(obj), + variant, + } + } +} + +impl<'de> de::EnumAccess<'de> for PyEnumAccess<'_, '_> { + type Error = PythonizeError; + type Variant = Self; + + fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant)> + where + V: de::DeserializeSeed<'de>, + { + let cow = self.variant.to_cow()?; + let de: de::value::StrDeserializer<'_, PythonizeError> = cow.as_ref().into_deserializer(); + let val = seed.deserialize(de)?; + Ok((val, self)) + } +} + +impl<'de> de::VariantAccess<'de> for PyEnumAccess<'_, '_> { + type Error = PythonizeError; + + fn unit_variant(self) -> Result<()> { + Ok(()) + } + + fn newtype_variant_seed(self, seed: T) -> Result + where + T: de::DeserializeSeed<'de>, + { + seed.deserialize(&mut { self.de }) + } + + fn tuple_variant(self, len: usize, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + visitor.visit_seq(self.de.sequence_access(Some(len))?) + } + + fn struct_variant(self, _fields: &'static [&'static str], visitor: V) -> Result + where + V: de::Visitor<'de>, + { + visitor.visit_map(self.de.dict_access()?) + } +} + +// See serde_json +#[cfg(feature = "arbitrary_precision")] +struct NumberDeserializer { + number: Option, +} + +#[cfg(feature = "arbitrary_precision")] +impl<'de> de::MapAccess<'de> for NumberDeserializer { + type Error = PythonizeError; + + fn next_key_seed(&mut self, seed: K) -> Result> + where + K: de::DeserializeSeed<'de>, + { + if self.number.is_none() { + return Ok(None); + } + seed.deserialize(TOKEN.into_deserializer()).map(Some) + } + + fn next_value_seed(&mut self, seed: V) -> Result + where + V: de::DeserializeSeed<'de>, + { + seed.deserialize(self.number.take().unwrap().into_deserializer()) + } +} + +#[cfg(test)] +mod test { + use std::{collections::HashMap, ffi::CStr}; + + use super::*; + use crate::error::ErrorImpl; + use maplit::hashmap; + use pyo3::{IntoPyObject, Python}; + use serde_json::{json, Value as JsonValue}; + + fn test_de(code: &CStr, expected: &T, expected_json: &JsonValue) + where + T: de::DeserializeOwned + PartialEq + std::fmt::Debug, + { + Python::attach(|py| { + let obj = py.eval(code, None, None).unwrap(); + test_de_with_obj(&obj, expected, expected_json); + }); + } + + fn test_de_with_obj(obj: &Bound<'_, PyAny>, expected: &T, expected_json: &JsonValue) + where + T: de::DeserializeOwned + PartialEq + std::fmt::Debug, + { + let actual: T = depythonize(obj).unwrap(); + assert_eq!(&actual, expected); + + let actual_json: JsonValue = depythonize(obj).unwrap(); + assert_eq!(&actual_json, expected_json); + } + + #[test] + fn test_empty_struct() { + #[derive(Debug, Deserialize, PartialEq)] + struct Empty; + + let expected = Empty; + let expected_json = json!(null); + let code = c"None"; + test_de(code, &expected, &expected_json); + } + + #[test] + fn test_struct() { + #[derive(Debug, Deserialize, PartialEq)] + struct Struct { + foo: String, + bar: usize, + baz: f32, + qux: bool, + } + + let expected = Struct { + foo: "Foo".to_string(), + bar: 8usize, + baz: 45.23, + qux: true, + }; + let expected_json = json!({ + "foo": "Foo", + "bar": 8, + "baz": 45.23, + "qux": true + }); + let code = c"{'foo': 'Foo', 'bar': 8, 'baz': 45.23, 'qux': True}"; + test_de(code, &expected, &expected_json); + } + + #[test] + fn test_struct_missing_key() { + #[derive(Debug, Deserialize, PartialEq)] + struct Struct { + foo: String, + bar: usize, + } + + let code = c"{'foo': 'Foo'}"; + + Python::attach(|py| { + let locals = PyDict::new(py); + let obj = py.eval(code, None, Some(&locals)).unwrap(); + assert!(matches!( + *depythonize::(&obj).unwrap_err().inner, + ErrorImpl::Message(msg) if msg == "missing field `bar`" + )); + }) + } + + #[test] + fn test_tuple_struct() { + #[derive(Debug, Deserialize, PartialEq)] + struct TupleStruct(String, f64); + + let expected = TupleStruct("cat".to_string(), -10.05); + let expected_json = json!(["cat", -10.05]); + let code = c"('cat', -10.05)"; + test_de(code, &expected, &expected_json); + } + + #[test] + fn test_tuple_too_long() { + #[derive(Debug, Deserialize, PartialEq)] + struct TupleStruct(String, f64); + + let code = c"('cat', -10.05, 'foo')"; + + Python::attach(|py| { + let locals = PyDict::new(py); + let obj = py.eval(code, None, Some(&locals)).unwrap(); + assert!(matches!( + *depythonize::(&obj).unwrap_err().inner, + ErrorImpl::IncorrectSequenceLength { expected, got } if expected == 2 && got == 3 + )); + }) + } + + #[test] + fn test_tuple_struct_from_pylist() { + #[derive(Debug, Deserialize, PartialEq)] + struct TupleStruct(String, f64); + + let expected = TupleStruct("cat".to_string(), -10.05); + let expected_json = json!(["cat", -10.05]); + let code = c"['cat', -10.05]"; + test_de(code, &expected, &expected_json); + } + + #[test] + fn test_tuple() { + let expected = ("foo".to_string(), 5); + let expected_json = json!(["foo", 5]); + let code = c"('foo', 5)"; + test_de(code, &expected, &expected_json); + } + + #[test] + fn test_tuple_from_pylist() { + let expected = ("foo".to_string(), 5); + let expected_json = json!(["foo", 5]); + let code = c"['foo', 5]"; + test_de(code, &expected, &expected_json); + } + + #[test] + fn test_vec_from_pyset() { + let expected = vec!["foo".to_string()]; + let expected_json = json!(["foo"]); + let code = c"{'foo'}"; + test_de(code, &expected, &expected_json); + } + + #[test] + fn test_vec_from_pyfrozenset() { + let expected = vec!["foo".to_string()]; + let expected_json = json!(["foo"]); + let code = c"frozenset({'foo'})"; + test_de(code, &expected, &expected_json); + } + + #[test] + fn test_vec() { + let expected = vec![3, 2, 1]; + let expected_json = json!([3, 2, 1]); + let code = c"[3, 2, 1]"; + test_de(code, &expected, &expected_json); + } + + #[test] + fn test_vec_from_tuple() { + let expected = vec![3, 2, 1]; + let expected_json = json!([3, 2, 1]); + let code = c"(3, 2, 1)"; + test_de(code, &expected, &expected_json); + } + + #[test] + fn test_hashmap() { + let expected = hashmap! {"foo".to_string() => 4}; + let expected_json = json!({"foo": 4 }); + let code = c"{'foo': 4}"; + test_de(code, &expected, &expected_json); + } + + #[test] + fn test_enum_variant() { + #[derive(Debug, Deserialize, PartialEq)] + enum Foo { + Variant, + } + + let expected = Foo::Variant; + let expected_json = json!("Variant"); + let code = c"'Variant'"; + test_de(code, &expected, &expected_json); + } + + #[test] + fn test_enum_tuple_variant() { + #[derive(Debug, Deserialize, PartialEq)] + enum Foo { + Tuple(i32, String), + } + + let expected = Foo::Tuple(12, "cat".to_string()); + let expected_json = json!({"Tuple": [12, "cat"]}); + let code = c"{'Tuple': [12, 'cat']}"; + test_de(code, &expected, &expected_json); + } + + #[test] + fn test_enum_newtype_variant() { + #[derive(Debug, Deserialize, PartialEq)] + enum Foo { + NewType(String), + } + + let expected = Foo::NewType("cat".to_string()); + let expected_json = json!({"NewType": "cat" }); + let code = c"{'NewType': 'cat'}"; + test_de(code, &expected, &expected_json); + } + + #[test] + fn test_enum_struct_variant() { + #[derive(Debug, Deserialize, PartialEq)] + enum Foo { + Struct { foo: String, bar: usize }, + } + + let expected = Foo::Struct { + foo: "cat".to_string(), + bar: 25, + }; + let expected_json = json!({"Struct": {"foo": "cat", "bar": 25 }}); + let code = c"{'Struct': {'foo': 'cat', 'bar': 25}}"; + test_de(code, &expected, &expected_json); + } + #[test] + fn test_enum_untagged_tuple_variant() { + #[derive(Debug, Deserialize, PartialEq)] + #[serde(untagged)] + enum Foo { + Tuple(f32, char), + } + + let expected = Foo::Tuple(12.0, 'c'); + let expected_json = json!([12.0, 'c']); + let code = c"[12.0, 'c']"; + test_de(code, &expected, &expected_json); + } + + #[test] + fn test_enum_untagged_newtype_variant() { + #[derive(Debug, Deserialize, PartialEq)] + #[serde(untagged)] + enum Foo { + NewType(String), + } + + let expected = Foo::NewType("cat".to_string()); + let expected_json = json!("cat"); + let code = c"'cat'"; + test_de(code, &expected, &expected_json); + } + + #[test] + fn test_enum_untagged_struct_variant() { + #[derive(Debug, Deserialize, PartialEq)] + #[serde(untagged)] + enum Foo { + Struct { foo: Vec, bar: [u8; 4] }, + } + + let expected = Foo::Struct { + foo: vec!['a', 'b', 'c'], + bar: [2, 5, 3, 1], + }; + let expected_json = json!({"foo": ["a", "b", "c"], "bar": [2, 5, 3, 1]}); + let code = c"{'foo': ['a', 'b', 'c'], 'bar': [2, 5, 3, 1]}"; + test_de(code, &expected, &expected_json); + } + + #[test] + fn test_nested_type() { + #[derive(Debug, Deserialize, PartialEq)] + struct Foo { + name: String, + bar: Bar, + } + + #[derive(Debug, Deserialize, PartialEq)] + struct Bar { + value: usize, + variant: Baz, + } + + #[derive(Debug, Deserialize, PartialEq)] + enum Baz { + Basic, + Tuple(f32, u32), + } + + let expected = Foo { + name: "SomeFoo".to_string(), + bar: Bar { + value: 13, + variant: Baz::Tuple(-1.5, 8), + }, + }; + let expected_json = + json!({"name": "SomeFoo", "bar": { "value": 13, "variant": { "Tuple": [-1.5, 8]}}}); + let code = c"{'name': 'SomeFoo', 'bar': {'value': 13, 'variant': {'Tuple': [-1.5, 8]}}}"; + test_de(code, &expected, &expected_json); + } + + #[test] + fn test_int_limits() { + Python::attach(|py| { + // serde_json::Value supports u64 and i64 as maximum sizes + let _: serde_json::Value = depythonize(&u8::MAX.into_pyobject(py).unwrap()).unwrap(); + let _: serde_json::Value = depythonize(&u8::MIN.into_pyobject(py).unwrap()).unwrap(); + let _: serde_json::Value = depythonize(&i8::MAX.into_pyobject(py).unwrap()).unwrap(); + let _: serde_json::Value = depythonize(&i8::MIN.into_pyobject(py).unwrap()).unwrap(); + + let _: serde_json::Value = depythonize(&u16::MAX.into_pyobject(py).unwrap()).unwrap(); + let _: serde_json::Value = depythonize(&u16::MIN.into_pyobject(py).unwrap()).unwrap(); + let _: serde_json::Value = depythonize(&i16::MAX.into_pyobject(py).unwrap()).unwrap(); + let _: serde_json::Value = depythonize(&i16::MIN.into_pyobject(py).unwrap()).unwrap(); + + let _: serde_json::Value = depythonize(&u32::MAX.into_pyobject(py).unwrap()).unwrap(); + let _: serde_json::Value = depythonize(&u32::MIN.into_pyobject(py).unwrap()).unwrap(); + let _: serde_json::Value = depythonize(&i32::MAX.into_pyobject(py).unwrap()).unwrap(); + let _: serde_json::Value = depythonize(&i32::MIN.into_pyobject(py).unwrap()).unwrap(); + + let _: serde_json::Value = depythonize(&u64::MAX.into_pyobject(py).unwrap()).unwrap(); + let _: serde_json::Value = depythonize(&u64::MIN.into_pyobject(py).unwrap()).unwrap(); + let _: serde_json::Value = depythonize(&i64::MAX.into_pyobject(py).unwrap()).unwrap(); + let _: serde_json::Value = depythonize(&i64::MIN.into_pyobject(py).unwrap()).unwrap(); + + let _: u128 = depythonize(&u128::MAX.into_pyobject(py).unwrap()).unwrap(); + let _: i128 = depythonize(&u128::MIN.into_pyobject(py).unwrap()).unwrap(); + + let _: i128 = depythonize(&i128::MAX.into_pyobject(py).unwrap()).unwrap(); + let _: i128 = depythonize(&i128::MIN.into_pyobject(py).unwrap()).unwrap(); + }); + } + + #[test] + fn test_deserialize_bytes() { + Python::attach(|py| { + let obj = PyBytes::new(py, "hello".as_bytes()); + let actual: Vec = depythonize(&obj).unwrap(); + assert_eq!(actual, b"hello"); + }) + } + + #[test] + fn test_char() { + let expected = 'a'; + let expected_json = json!("a"); + let code = c"'a'"; + test_de(code, &expected, &expected_json); + } + + #[test] + fn test_char_multibyte_codepoint() { + // 'ä' is U+00E4: one Unicode codepoint, two UTF-8 bytes. + // Previously, deserialize_char checked s.len() (byte length) != 1, + // which incorrectly rejected any non-ASCII char. The fix checks + // s.chars().count() (codepoint count) != 1 instead. + Python::attach(|py| { + let py_str = pyo3::types::PyString::new(py, "ä"); + let result = depythonize::(py_str.as_any()); + assert_eq!(result.unwrap(), 'ä'); + }); + } + + #[test] + fn test_unknown_type() { + Python::attach(|py| { + let obj = py + .import("decimal") + .unwrap() + .getattr("Decimal") + .unwrap() + .call0() + .unwrap(); + let err = depythonize::(&obj).unwrap_err(); + assert!(matches!( + *err.inner, + ErrorImpl::UnsupportedType(name) if name == "Decimal" + )); + }); + } + + #[test] + fn test_dataclass() { + let code = c"\ +from dataclasses import dataclass + +@dataclass +class Point: + x: int + y: int + +point = Point(1, 2)"; + + #[derive(Debug, Deserialize, PartialEq)] + struct Point { + x: i32, + y: i32, + } + + let expected = Point { x: 1, y: 2 }; + let expected_json = json!({"x": 1, "y": 2}); + + Python::attach(|py| { + let locals = PyDict::new(py); + py.run(code, None, Some(&locals)).unwrap(); + let obj = locals.get_item("point").unwrap().unwrap(); + test_de_with_obj(&obj, &expected, &expected_json); + + let map: HashMap = depythonize(&obj).unwrap(); + assert_eq!(map.len(), 2); + assert_eq!(*map.get("x").unwrap(), 1); + assert_eq!(*map.get("y").unwrap(), 2); + }); + } + + #[test] + fn test_dataclass_missing_field() { + let code = c"\ +from dataclasses import dataclass + +@dataclass +class Point: + x: int + y: int + +point = Point(1, 2)"; + + #[derive(Debug, Deserialize, PartialEq)] + struct Point { + x: i32, + y: i32, + z: i32, + } + + Python::attach(|py| { + let locals = PyDict::new(py); + py.run(code, None, Some(&locals)).unwrap(); + let obj = locals.get_item("point").unwrap().unwrap(); + let err = depythonize::(&obj).unwrap_err(); + assert!(matches!( + *err.inner, + ErrorImpl::Message(msg) if msg == "missing field `z`" + )); + }); + } + + #[test] + fn test_dataclass_extra_field() { + let code = c"\ +from dataclasses import dataclass + +@dataclass +class Point: + x: int + y: int + z: int + +point = Point(1, 2, 3)"; + + #[derive(Debug, Deserialize, PartialEq)] + #[serde(deny_unknown_fields)] + struct Point { + x: i32, + y: i32, + } + + Python::attach(|py| { + let locals = PyDict::new(py); + py.run(code, None, Some(&locals)).unwrap(); + let obj = locals.get_item("point").unwrap().unwrap(); + let err = depythonize::(&obj).unwrap_err(); + assert!(matches!( + *err.inner, + ErrorImpl::Message(msg) if msg == "unknown field `z`, expected `x` or `y`" + )); + }); + } +} diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..b608106 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,189 @@ +use pyo3::PyErr; +use pyo3::{exceptions::*, CastError, CastIntoError}; +use serde::{de, ser}; +use std::convert::Infallible; +use std::error; +use std::fmt::{self, Debug, Display}; +use std::result; + +/// Alias for `std::result::Result` with error type `PythonizeError` +pub type Result = result::Result; + +/// Errors that can occur when serializing/deserializing Python objects +pub struct PythonizeError { + pub(crate) inner: Box, +} + +impl PythonizeError { + pub(crate) fn msg(text: T) -> Self + where + T: ToString, + { + Self { + inner: Box::new(ErrorImpl::Message(text.to_string())), + } + } + + pub(crate) fn unsupported_type(t: T) -> Self + where + T: ToString, + { + Self { + inner: Box::new(ErrorImpl::UnsupportedType(t.to_string())), + } + } + + pub(crate) fn dict_key_not_string() -> Self { + Self { + inner: Box::new(ErrorImpl::DictKeyNotString), + } + } + + pub(crate) fn incorrect_sequence_length(expected: usize, got: usize) -> Self { + Self { + inner: Box::new(ErrorImpl::IncorrectSequenceLength { expected, got }), + } + } + + pub(crate) fn invalid_enum_type() -> Self { + Self { + inner: Box::new(ErrorImpl::InvalidEnumType), + } + } + + pub(crate) fn invalid_length_enum() -> Self { + Self { + inner: Box::new(ErrorImpl::InvalidLengthEnum), + } + } + + pub(crate) fn invalid_length_char() -> Self { + Self { + inner: Box::new(ErrorImpl::InvalidLengthChar), + } + } +} + +/// Error codes for problems that can occur when serializing/deserializing Python objects +#[derive(Debug)] +pub enum ErrorImpl { + /// An error originating from the Python runtime + PyErr(PyErr), + /// Generic error message + Message(String), + /// A Python type not supported by the deserializer + UnsupportedType(String), + /// A `PyAny` object that failed to cast to an expected Python type + UnexpectedType(String), + /// Dict keys should be strings to deserialize to struct fields + DictKeyNotString, + /// Sequence length did not match expected tuple or tuple struct length. + IncorrectSequenceLength { expected: usize, got: usize }, + /// Enum variants should either be dict (tagged) or str (variant) + InvalidEnumType, + /// Tagged enum variants should be a dict with exactly 1 key + InvalidLengthEnum, + /// Expected a `char`, but got a Python str that was not length 1 + InvalidLengthChar, +} + +impl error::Error for PythonizeError {} + +impl Display for PythonizeError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.inner.as_ref() { + ErrorImpl::PyErr(e) => Display::fmt(e, f), + ErrorImpl::Message(s) => Display::fmt(s, f), + ErrorImpl::UnsupportedType(s) => write!(f, "unsupported type {}", s), + ErrorImpl::UnexpectedType(s) => write!(f, "unexpected type: {}", s), + ErrorImpl::DictKeyNotString => f.write_str("dict keys must have type str"), + ErrorImpl::IncorrectSequenceLength { expected, got } => { + write!(f, "expected sequence of length {}, got {}", expected, got) + } + ErrorImpl::InvalidEnumType => f.write_str("expected either a str or dict for enum"), + ErrorImpl::InvalidLengthEnum => { + f.write_str("expected tagged enum dict to have exactly 1 key") + } + ErrorImpl::InvalidLengthChar => f.write_str("expected a str of length 1 for char"), + } + } +} + +impl Debug for PythonizeError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.inner.as_ref().fmt(f) + } +} + +impl ser::Error for PythonizeError { + fn custom(msg: T) -> Self + where + T: Display, + { + Self { + inner: Box::new(ErrorImpl::Message(msg.to_string())), + } + } +} + +impl de::Error for PythonizeError { + fn custom(msg: T) -> Self + where + T: Display, + { + Self { + inner: Box::new(ErrorImpl::Message(msg.to_string())), + } + } +} + +/// Convert an exception raised in Python to a `PythonizeError` +impl From for PythonizeError { + fn from(other: Infallible) -> Self { + match other {} + } +} + +/// Convert an exception raised in Python to a `PythonizeError` +impl From for PythonizeError { + fn from(other: PyErr) -> Self { + Self { + inner: Box::new(ErrorImpl::PyErr(other)), + } + } +} + +/// Handle errors that occur when attempting to use `PyAny::cast` +impl<'a, 'py> From> for PythonizeError { + fn from(other: CastError<'a, 'py>) -> Self { + Self { + inner: Box::new(ErrorImpl::UnexpectedType(other.to_string())), + } + } +} + +/// Handle errors that occur when attempting to use `PyAny::cast` +impl<'py> From> for PythonizeError { + fn from(other: CastIntoError<'py>) -> Self { + Self { + inner: Box::new(ErrorImpl::UnexpectedType(other.to_string())), + } + } +} + +/// Convert a `PythonizeError` to a Python exception +impl From for PyErr { + fn from(other: PythonizeError) -> Self { + match *other.inner { + ErrorImpl::PyErr(e) => e, + ErrorImpl::Message(e) => PyException::new_err(e), + ErrorImpl::UnsupportedType(_) + | ErrorImpl::UnexpectedType(_) + | ErrorImpl::DictKeyNotString + | ErrorImpl::InvalidEnumType => PyTypeError::new_err(other.to_string()), + ErrorImpl::IncorrectSequenceLength { .. } + | ErrorImpl::InvalidLengthEnum + | ErrorImpl::InvalidLengthChar => PyValueError::new_err(other.to_string()), + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 73bb2b0..e625b6f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,548 +1,12 @@ -/// Pythonize has two public APIs: `pythonize` and `depythonize`. -/// -/// ``` -/// use serde::{Serialize, Deserialize}; -/// use pyo3::{Python, AsPyRef}; -/// use pythonize::pythonize; -/// -/// #[derive(Serialize, Deserialize)] -/// struct Sample { -/// foo: String, -/// bar: Option -/// } -/// -/// let gil = Python::acquire_gil(); -/// let py = gil.python(); -/// -/// let sample = Sample { -/// foo: "foo".to_string(), -/// bar: None -/// }; -/// -/// let obj = pythonize(py, &sample).expect("failed to pythonize sample"); -/// -/// println!("{}", obj.as_ref(py).repr().expect("failed to get repr")); -/// -/// // XXX: depythonize is not yet implemented! -/// ``` - -use pyo3::types::{PyDict, PyList, PyTuple}; -use pyo3::{IntoPy, PyErr, PyNativeType, PyObject, PyResult, Python}; -use serde::{ser, Serialize, Serializer}; - -pub fn pythonize(py: Python, value: T) -> PyResult { - Ok(value.serialize(Pythonizer { py })?) -} - -pub fn depythonize(_py: Python, _obj: PyObject) -> T { - todo!() -} - -#[derive(Debug)] -pub struct PythonizerError(PyErr); - -impl ser::Error for PythonizerError { - fn custom(_msg: T) -> Self - where - T: std::fmt::Display, - { - todo!() - } -} - -impl std::fmt::Display for PythonizerError { - fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - todo!() - } -} - -impl std::error::Error for PythonizerError {} - -impl From for PythonizerError { - fn from(other: PyErr) -> Self { - Self(other) - } -} - -impl From for PyErr { - fn from(other: PythonizerError) -> Self { - other.0 - } -} - -#[derive(Clone, Copy)] -pub struct Pythonizer<'py> { - py: Python<'py> -} - -#[doc(hidden)] -pub struct PythonDictSerializer<'py> { - dict: &'py PyDict, -} - -#[doc(hidden)] -pub struct PythonMapSerializer<'py> { - dict: &'py PyDict, - key: Option -} - - -#[doc(hidden)] -pub struct PythonListSerializer<'py> { - list: &'py PyList, -} - -#[doc(hidden)] -pub struct PythonTupleSerializer<'py> { - items: Vec, - py: Python<'py> -} - -#[doc(hidden)] -pub struct PythonTupleVariantSerializer<'py> { - variant: &'static str, - inner: PythonTupleSerializer<'py> -} - -#[doc(hidden)] -pub struct PythonStructVariantSerializer<'py> { - variant: &'static str, - inner: PythonDictSerializer<'py> -} - -impl<'py> Serializer for Pythonizer<'py> { - type Ok = PyObject; - type Error = PythonizerError; - type SerializeSeq = PythonListSerializer<'py>; - type SerializeTuple = PythonTupleSerializer<'py>; - type SerializeTupleStruct = PythonTupleSerializer<'py>; - type SerializeTupleVariant = PythonTupleVariantSerializer<'py>; - type SerializeMap = PythonMapSerializer<'py>; - type SerializeStruct = PythonDictSerializer<'py>; - type SerializeStructVariant = PythonStructVariantSerializer<'py>; - fn serialize_bool(self, v: bool) -> Result { - Ok(v.into_py(self.py)) - } - fn serialize_i8(self, v: i8) -> Result { - Ok(v.into_py(self.py)) - } - fn serialize_i16(self, v: i16) -> Result { - Ok(v.into_py(self.py)) - } - fn serialize_i32(self, v: i32) -> Result { - Ok(v.into_py(self.py)) - } - fn serialize_i64(self, v: i64) -> Result { - Ok(v.into_py(self.py)) - } - fn serialize_u8(self, v: u8) -> Result { - Ok(v.into_py(self.py)) - } - fn serialize_u16(self, v: u16) -> Result { - Ok(v.into_py(self.py)) - } - fn serialize_u32(self, v: u32) -> Result { - Ok(v.into_py(self.py)) - } - fn serialize_u64(self, v: u64) -> Result { - Ok(v.into_py(self.py)) - } - fn serialize_f32(self, v: f32) -> Result { - Ok(v.into_py(self.py)) - } - fn serialize_f64(self, v: f64) -> Result { - Ok(v.into_py(self.py)) - } - fn serialize_char(self, _v: char) -> Result { - todo!() - } - fn serialize_str(self, v: &str) -> Result { - Ok(v.into_py(self.py)) - } - fn serialize_bytes(self, v: &[u8]) -> Result { - Ok(v.into_py(self.py)) - } - fn serialize_none(self) -> Result { - Ok(self.py.None()) - } - fn serialize_some(self, value: &T) -> Result - where - T: Serialize, - { - value.serialize(self) - } - fn serialize_unit(self) -> Result { - Ok(self.py.None()) - } - fn serialize_unit_struct(self, _name: &'static str) -> Result { - Ok(self.py.None()) - } - fn serialize_unit_variant( - self, - _name: &'static str, - _variant_index: u32, - variant: &'static str, - ) -> Result { - Ok(variant.into_py(self.py)) - } - fn serialize_newtype_struct( - self, - _name: &'static str, - value: &T, - ) -> Result - where - T: Serialize, - { - value.serialize(self) - } - fn serialize_newtype_variant( - self, - _name: &'static str, - _variant_index: u32, - variant: &'static str, - value: &T, - ) -> Result - where - T: Serialize, - { - let d = PyDict::new(self.py); - d.set_item(variant, value.serialize(self)?)?; - Ok(d.into()) - } - fn serialize_seq(self, _len: Option) -> Result { - Ok(PythonListSerializer { list: PyList::empty(self.py) }) - } - fn serialize_tuple(self, _len: usize) -> Result { - Ok(PythonTupleSerializer { - items: Vec::new(), - py: self.py - }) - } - fn serialize_tuple_struct( - self, - _name: &'static str, - _len: usize, - ) -> Result { - Ok(PythonTupleSerializer { - items: Vec::new(), - py: self.py - }) - } - fn serialize_tuple_variant( - self, - _name: &'static str, - _variant_index: u32, - variant: &'static str, - _len: usize, - ) -> Result { - Ok(PythonTupleVariantSerializer { - variant, - inner: PythonTupleSerializer { - items: Vec::new(), - py: self.py - } - }) - } - fn serialize_map(self, _len: Option) -> Result { - Ok(PythonMapSerializer { - dict: PyDict::new(self.py), - key: None - }) - } - fn serialize_struct( - self, - _name: &'static str, - _len: usize, - ) -> Result { - Ok(PythonDictSerializer { - dict: PyDict::new(self.py), - }) - } - fn serialize_struct_variant( - self, - _name: &'static str, - _variant_index: u32, - variant: &'static str, - _len: usize, - ) -> Result { - Ok(PythonStructVariantSerializer { - variant, - inner: PythonDictSerializer { - dict: PyDict::new(self.py) - } - }) - } -} - -impl ser::SerializeTupleVariant for PythonTupleVariantSerializer<'_> { - type Ok = PyObject; - type Error = PythonizerError; - fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> - where - T: Serialize, - { - Ok(self.inner.items.push(pythonize(self.inner.py, value)?)) - } - fn end(self) -> Result { - let d = PyDict::new(self.inner.py); - d.set_item(self.variant, PyTuple::new(self.inner.py, self.inner.items))?; - Ok(d.into()) - } -} - -impl ser::SerializeTuple for PythonTupleSerializer<'_> { - type Ok = PyObject; - type Error = PythonizerError; - fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> - where - T: Serialize, - { - self.items.push(pythonize(self.py, value)?); - Ok(()) - } - fn end(self) -> Result { - Ok(PyTuple::new(self.py, self.items).into()) - } -} - -impl ser::SerializeTupleStruct for PythonTupleSerializer<'_> { - type Ok = PyObject; - type Error = PythonizerError; - fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> - where - T: Serialize, - { - self.items.push(pythonize(self.py, value)?); - Ok(()) - } - fn end(self) -> Result { - Ok(PyTuple::new(self.py, self.items).into()) - } -} - -impl ser::SerializeStruct for PythonDictSerializer<'_> { - type Ok = PyObject; - type Error = PythonizerError; - fn serialize_field( - &mut self, - key: &'static str, - value: &T, - ) -> Result<(), Self::Error> - where - T: Serialize, - { - Ok(self - .dict - .set_item(key, pythonize(self.dict.py(), value)?)?) - } - fn end(self) -> Result { - Ok(self.dict.into()) - } -} - -impl ser::SerializeStructVariant for PythonStructVariantSerializer<'_> { - type Ok = PyObject; - type Error = PythonizerError; - fn serialize_field( - &mut self, - key: &'static str, - value: &T, - ) -> Result<(), Self::Error> - where - T: Serialize, - { - self.inner.dict.set_item(key, pythonize(self.inner.dict.py(), value)?)?; - Ok(()) - } - fn end(self) -> Result { - let d = PyDict::new(self.inner.dict.py()); - d.set_item(self.variant, self.inner.dict)?; - Ok(d.into()) - } -} - -impl ser::SerializeMap for PythonMapSerializer<'_> { - type Ok = PyObject; - type Error = PythonizerError; - fn serialize_key(&mut self, key: &T) -> Result<(), Self::Error> - where - T: Serialize, - { - self.key = Some(pythonize(self.dict.py(), key)?); - Ok(()) - } - fn serialize_value(&mut self, value: &T) -> Result<(), Self::Error> - where - T: Serialize, - { - self.dict.set_item( - self.key.take().expect("serialize_value should always be called after serialize_key"), - pythonize(self.dict.py(), value)? - )?; - Ok(()) - } - fn end(self) -> Result { - Ok(self.dict.into()) - } -} - -impl ser::SerializeSeq for PythonListSerializer<'_> { - type Ok = PyObject; - type Error = PythonizerError; - fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> - where - T: Serialize, - { - Ok(self.list.append(pythonize(self.list.py(), value)?)?) - } - fn end(self) -> Result { - Ok(self.list.into()) - } -} - -pub struct Depythonizer; - -#[cfg(test)] -mod test { - use super::pythonize; - use maplit::hashmap; - use paste::paste; - use pyo3::types::PyDict; - use pyo3::{PyResult, Python}; - use serde::{Deserialize, Serialize}; - - macro_rules! test_sample { - ($name:ident, $sample:expr, $expected:literal) => { - paste!( - #[test] - fn [] () -> PyResult<()> { - let gil = Python::acquire_gil(); - let py = gil.python(); - - let sample = $sample; - let obj = pythonize(py, &sample)?; - - let locals = PyDict::new(py); - locals.set_item("obj", obj)?; - - py.run("import json; result = json.dumps(obj, separators=(',', ':'))", None, Some(locals))?; - let result = locals.get_item("result").unwrap().extract::<&str>()?; - - assert_eq!(result, $expected); - assert_eq!(serde_json::to_string(&sample).unwrap(), $expected); - - Ok(()) - } - ); - }; - } - - test_sample!( - empty_struct, - { - #[derive(Serialize, Deserialize)] - struct Empty; - - Empty - }, - r#"null"# - ); - - test_sample!( - struct, - { - #[derive(Serialize, Deserialize)] - struct Struct { - foo: String, - bar: usize, - } - - Struct { - foo: "foo".to_string(), - bar: 5 - } - }, - r#"{"foo":"foo","bar":5}"# - ); - - test_sample!( - tuple_struct, - { - #[derive(Serialize, Deserialize)] - struct TupleStruct(String, usize); - - TupleStruct("foo".to_string(), 5) - }, - r#"["foo",5]"# - ); - - test_sample!( - tuple, - ("foo", 5), - r#"["foo",5]"# - ); - - test_sample!( - vec, - vec![1, 2, 3], - r#"[1,2,3]"# - ); - - test_sample!( - map, - hashmap!{"foo" => "foo"}, - r#"{"foo":"foo"}"# - ); - - test_sample!( - enum_unit_variant, - { - #[derive(Serialize, Deserialize)] - enum E { - Empty - } - - E::Empty - }, - r#""Empty""# - ); - - test_sample!( - enum_tuple_variant, - { - #[derive(Serialize, Deserialize)] - enum E { - Tuple(i32, String) - } - - E::Tuple(5, "foo".to_string()) - }, - r#"{"Tuple":[5,"foo"]}"# - ); - - test_sample!( - enum_newtype_variant, - { - #[derive(Serialize, Deserialize)] - enum E { - NewType(String) - } - - E::NewType("foo".to_string()) - }, - r#"{"NewType":"foo"}"# - ); - - test_sample!( - enum_struct_variant, - { - #[derive(Serialize, Deserialize)] - enum E { - Struct { foo: String, bar: usize } - } - - E::Struct { foo:"foo".to_string(), bar: 5 } - }, - r#"{"Struct":{"foo":"foo","bar":5}}"# - ); -} +#![doc = include_str!("../README.md")] + +mod de; +mod error; +mod ser; + +pub use crate::de::{depythonize, Depythonizer}; +pub use crate::error::{PythonizeError, Result}; +pub use crate::ser::{ + pythonize, pythonize_custom, PythonizeDefault, PythonizeListType, PythonizeMappingType, + PythonizeNamedMappingType, PythonizeTypes, PythonizeUnnamedMappingAdapter, Pythonizer, +}; diff --git a/src/ser.rs b/src/ser.rs new file mode 100644 index 0000000..02f3caf --- /dev/null +++ b/src/ser.rs @@ -0,0 +1,954 @@ +use std::marker::PhantomData; + +#[cfg(feature = "arbitrary_precision")] +use pyo3::types::{PyAnyMethods, PyFloat, PyInt}; +use pyo3::types::{ + PyDict, PyDictMethods, PyList, PyListMethods, PyMapping, PySequence, PyString, PyTuple, + PyTupleMethods, +}; +use pyo3::{Bound, IntoPyObject, IntoPyObjectExt, PyAny, PyResult, Python}; +use serde::{ser, Serialize}; + +use crate::error::{PythonizeError, Result}; + +/// Trait for types which can represent a Python mapping +pub trait PythonizeMappingType { + /// Builder type for Python mappings + type Builder<'py>: 'py; + + /// Create a builder for a Python mapping + fn builder<'py>(py: Python<'py>, len: Option) -> PyResult>; + + /// Adds the key-value item to the mapping being built + fn push_item<'py>( + builder: &mut Self::Builder<'py>, + key: Bound<'py, PyAny>, + value: Bound<'py, PyAny>, + ) -> PyResult<()>; + + /// Build the Python mapping + fn finish<'py>(builder: Self::Builder<'py>) -> PyResult>; +} + +/// Trait for types which can represent a Python mapping and have a name +pub trait PythonizeNamedMappingType { + /// Builder type for Python mappings with a name + type Builder<'py>: 'py; + + /// Create a builder for a Python mapping with a name + fn builder<'py>( + py: Python<'py>, + len: usize, + name: &'static str, + ) -> PyResult>; + + /// Adds the field to the named mapping being built + fn push_field<'py>( + builder: &mut Self::Builder<'py>, + name: Bound<'py, PyString>, + value: Bound<'py, PyAny>, + ) -> PyResult<()>; + + /// Build the Python mapping + fn finish<'py>(builder: Self::Builder<'py>) -> PyResult>; +} + +/// Trait for types which can represent a Python sequence +pub trait PythonizeListType: Sized { + /// Constructor + fn create_sequence<'py, T, U>( + py: Python<'py>, + elements: impl IntoIterator, + ) -> PyResult> + where + T: IntoPyObject<'py>, + U: ExactSizeIterator; +} + +/// Custom types for serialization +pub trait PythonizeTypes { + /// Python map type (should be representable as python mapping) + type Map: PythonizeMappingType; + /// Python (struct-like) named map type (should be representable as python mapping) + type NamedMap: PythonizeNamedMappingType; + /// Python sequence type (should be representable as python sequence) + type List: PythonizeListType; +} + +impl PythonizeMappingType for PyDict { + type Builder<'py> = Bound<'py, Self>; + + fn builder<'py>(py: Python<'py>, _len: Option) -> PyResult> { + Ok(Self::new(py)) + } + + fn push_item<'py>( + builder: &mut Self::Builder<'py>, + key: Bound<'py, PyAny>, + value: Bound<'py, PyAny>, + ) -> PyResult<()> { + builder.set_item(key, value) + } + + fn finish<'py>(builder: Self::Builder<'py>) -> PyResult> { + Ok(builder.into_mapping()) + } +} + +/// Adapter type to use an unnamed mapping type, i.e. one that implements +/// [`PythonizeMappingType`], as a named mapping type, i.e. one that implements +/// [`PythonizeNamedMappingType`]. The adapter simply drops the provided name. +/// +/// This adapter is commonly applied to use the same unnamed mapping type for +/// both [`PythonizeTypes::Map`] and [`PythonizeTypes::NamedMap`] while only +/// implementing [`PythonizeMappingType`]. +pub struct PythonizeUnnamedMappingAdapter { + _unnamed: T, +} + +impl PythonizeNamedMappingType for PythonizeUnnamedMappingAdapter { + type Builder<'py> = T::Builder<'py>; + + fn builder<'py>( + py: Python<'py>, + len: usize, + _name: &'static str, + ) -> PyResult> { + T::builder(py, Some(len)) + } + + fn push_field<'py>( + builder: &mut Self::Builder<'py>, + name: Bound<'py, PyString>, + value: Bound<'py, PyAny>, + ) -> PyResult<()> { + T::push_item(builder, name.into_any(), value) + } + + fn finish<'py>(builder: Self::Builder<'py>) -> PyResult> { + T::finish(builder) + } +} + +impl PythonizeListType for PyList { + fn create_sequence<'py, T, U>( + py: Python<'py>, + elements: impl IntoIterator, + ) -> PyResult> + where + T: IntoPyObject<'py>, + U: ExactSizeIterator, + { + Ok(PyList::new(py, elements)?.into_sequence()) + } +} + +impl PythonizeListType for PyTuple { + fn create_sequence<'py, T, U>( + py: Python<'py>, + elements: impl IntoIterator, + ) -> PyResult> + where + T: IntoPyObject<'py>, + U: ExactSizeIterator, + { + Ok(PyTuple::new(py, elements)?.into_sequence()) + } +} + +pub struct PythonizeDefault; + +impl PythonizeTypes for PythonizeDefault { + type Map = PyDict; + type NamedMap = PythonizeUnnamedMappingAdapter; + type List = PyList; +} + +/// Attempt to convert the given data into a Python object +pub fn pythonize<'py, T>(py: Python<'py>, value: &T) -> Result> +where + T: ?Sized + Serialize, +{ + value.serialize(Pythonizer::new(py)) +} + +/// Attempt to convert the given data into a Python object. +/// Also uses custom mapping python class for serialization. +pub fn pythonize_custom<'py, P, T>(py: Python<'py>, value: &T) -> Result> +where + T: ?Sized + Serialize, + P: PythonizeTypes, +{ + value.serialize(Pythonizer::custom::

(py)) +} + +/// A structure that serializes Rust values into Python objects +#[derive(Clone, Copy)] +pub struct Pythonizer<'py, P> { + py: Python<'py>, + _types: PhantomData

, +} + +impl<'py, P> From> for Pythonizer<'py, P> { + fn from(py: Python<'py>) -> Self { + Self { + py, + _types: PhantomData, + } + } +} + +impl<'py> Pythonizer<'py, PythonizeDefault> { + /// Creates a serializer to convert data into a Python object using the default mapping class + pub fn new(py: Python<'py>) -> Self { + Self::from(py) + } + + /// Creates a serializer to convert data into a Python object using a custom mapping class + pub fn custom

(py: Python<'py>) -> Pythonizer<'py, P> { + Pythonizer::from(py) + } +} + +#[doc(hidden)] +pub struct PythonCollectionSerializer<'py, P> { + items: Vec>, + py: Python<'py>, + _types: PhantomData

, +} + +#[doc(hidden)] +pub struct PythonTupleVariantSerializer<'py, P> { + name: &'static str, + variant: &'static str, + inner: PythonCollectionSerializer<'py, P>, +} + +#[doc(hidden)] +pub struct PythonStructVariantSerializer<'py, P: PythonizeTypes> { + name: &'static str, + variant: &'static str, + inner: PythonStructDictSerializer<'py, P>, +} + +#[cfg(feature = "arbitrary_precision")] +#[doc(hidden)] +pub enum StructSerializer<'py, P: PythonizeTypes> { + Struct(PythonStructDictSerializer<'py, P>), + Number { + py: Python<'py>, + number_string: Option, + _types: PhantomData

, + }, +} + +#[cfg(not(feature = "arbitrary_precision"))] +#[doc(hidden)] +pub type StructSerializer<'py, P> = PythonStructDictSerializer<'py, P>; + +#[doc(hidden)] +pub struct PythonStructDictSerializer<'py, P: PythonizeTypes> { + py: Python<'py>, + builder: ::Builder<'py>, + _types: PhantomData

, +} + +#[doc(hidden)] +pub struct PythonMapSerializer<'py, P: PythonizeTypes> { + py: Python<'py>, + builder: ::Builder<'py>, + key: Option>, + _types: PhantomData

, +} + +impl<'py, P: PythonizeTypes> Pythonizer<'py, P> { + /// The default implementation for serialisation functions. + #[inline] + fn serialise_default(self, v: T) -> Result> + where + T: IntoPyObject<'py>, + { + v.into_bound_py_any(self.py).map_err(Into::into) + } +} + +impl<'py, P: PythonizeTypes> ser::Serializer for Pythonizer<'py, P> { + type Ok = Bound<'py, PyAny>; + type Error = PythonizeError; + type SerializeSeq = PythonCollectionSerializer<'py, P>; + type SerializeTuple = PythonCollectionSerializer<'py, P>; + type SerializeTupleStruct = PythonCollectionSerializer<'py, P>; + type SerializeTupleVariant = PythonTupleVariantSerializer<'py, P>; + type SerializeMap = PythonMapSerializer<'py, P>; + type SerializeStruct = StructSerializer<'py, P>; + type SerializeStructVariant = PythonStructVariantSerializer<'py, P>; + + fn serialize_bool(self, v: bool) -> Result> { + self.serialise_default(v) + } + + fn serialize_i8(self, v: i8) -> Result> { + self.serialise_default(v) + } + + fn serialize_i16(self, v: i16) -> Result> { + self.serialise_default(v) + } + + fn serialize_i32(self, v: i32) -> Result> { + self.serialise_default(v) + } + + fn serialize_i64(self, v: i64) -> Result> { + self.serialise_default(v) + } + + fn serialize_i128(self, v: i128) -> Result> { + self.serialise_default(v) + } + + fn serialize_u8(self, v: u8) -> Result> { + self.serialise_default(v) + } + + fn serialize_u16(self, v: u16) -> Result> { + self.serialise_default(v) + } + + fn serialize_u32(self, v: u32) -> Result> { + self.serialise_default(v) + } + + fn serialize_u64(self, v: u64) -> Result> { + self.serialise_default(v) + } + + fn serialize_u128(self, v: u128) -> Result> { + self.serialise_default(v) + } + + fn serialize_f32(self, v: f32) -> Result> { + self.serialise_default(v) + } + + fn serialize_f64(self, v: f64) -> Result> { + self.serialise_default(v) + } + + fn serialize_char(self, v: char) -> Result> { + self.serialize_str(&v.to_string()) + } + + fn serialize_str(self, v: &str) -> Result> { + Ok(PyString::new(self.py, v).into_any()) + } + + fn serialize_bytes(self, v: &[u8]) -> Result> { + self.serialise_default(v) + } + + fn serialize_none(self) -> Result> { + Ok(self.py.None().into_bound(self.py)) + } + + fn serialize_some(self, value: &T) -> Result> + where + T: ?Sized + Serialize, + { + value.serialize(self) + } + + fn serialize_unit(self) -> Result> { + self.serialize_none() + } + + fn serialize_unit_struct(self, _name: &'static str) -> Result> { + self.serialize_none() + } + + fn serialize_unit_variant( + self, + _name: &'static str, + _variant_index: u32, + variant: &'static str, + ) -> Result> { + self.serialize_str(variant) + } + + fn serialize_newtype_struct( + self, + _name: &'static str, + value: &T, + ) -> Result> + where + T: ?Sized + Serialize, + { + value.serialize(self) + } + + fn serialize_newtype_variant( + self, + name: &'static str, + _variant_index: u32, + variant: &'static str, + value: &T, + ) -> Result> + where + T: ?Sized + Serialize, + { + let mut m = P::NamedMap::builder(self.py, 1, name)?; + P::NamedMap::push_field( + &mut m, + PyString::new(self.py, variant), + value.serialize(self)?, + )?; + Ok(P::NamedMap::finish(m)?.into_any()) + } + + fn serialize_seq(self, len: Option) -> Result> { + let items = match len { + Some(len) => Vec::with_capacity(len), + None => Vec::new(), + }; + Ok(PythonCollectionSerializer { + items, + py: self.py, + _types: PhantomData, + }) + } + + fn serialize_tuple(self, len: usize) -> Result> { + Ok(PythonCollectionSerializer { + items: Vec::with_capacity(len), + py: self.py, + _types: PhantomData, + }) + } + + fn serialize_tuple_struct( + self, + _name: &'static str, + len: usize, + ) -> Result> { + self.serialize_tuple(len) + } + + fn serialize_tuple_variant( + self, + name: &'static str, + _variant_index: u32, + variant: &'static str, + len: usize, + ) -> Result> { + let inner = self.serialize_tuple(len)?; + Ok(PythonTupleVariantSerializer { + name, + variant, + inner, + }) + } + + fn serialize_map(self, len: Option) -> Result> { + Ok(PythonMapSerializer { + builder: P::Map::builder(self.py, len)?, + key: None, + py: self.py, + _types: PhantomData, + }) + } + + fn serialize_struct(self, name: &'static str, len: usize) -> Result> { + #[cfg(feature = "arbitrary_precision")] + { + // With arbitrary_precision enabled, a serde_json::Number serializes as a "$serde_json::private::Number" + // struct with a "$serde_json::private::Number" field, whose value is the String in Number::n. + if name == "$serde_json::private::Number" && len == 1 { + return Ok(StructSerializer::Number { + py: self.py, + number_string: None, + _types: PhantomData, + }); + } + + Ok(StructSerializer::Struct(PythonStructDictSerializer { + py: self.py, + builder: P::NamedMap::builder(self.py, len, name)?, + _types: PhantomData, + })) + } + + #[cfg(not(feature = "arbitrary_precision"))] + { + Ok(PythonStructDictSerializer { + py: self.py, + builder: P::NamedMap::builder(self.py, len, name)?, + _types: PhantomData, + }) + } + } + + fn serialize_struct_variant( + self, + name: &'static str, + _variant_index: u32, + variant: &'static str, + len: usize, + ) -> Result> { + Ok(PythonStructVariantSerializer { + name, + variant, + inner: PythonStructDictSerializer { + py: self.py, + builder: P::NamedMap::builder(self.py, len, variant)?, + _types: PhantomData, + }, + }) + } +} + +impl<'py, P: PythonizeTypes> ser::SerializeSeq for PythonCollectionSerializer<'py, P> { + type Ok = Bound<'py, PyAny>; + type Error = PythonizeError; + + fn serialize_element(&mut self, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + self.items.push(pythonize_custom::(self.py, value)?); + Ok(()) + } + + fn end(self) -> Result> { + let instance = P::List::create_sequence(self.py, self.items)?; + Ok(instance.into_pyobject(self.py)?.into_any()) + } +} + +impl<'py, P: PythonizeTypes> ser::SerializeTuple for PythonCollectionSerializer<'py, P> { + type Ok = Bound<'py, PyAny>; + type Error = PythonizeError; + + fn serialize_element(&mut self, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + ser::SerializeSeq::serialize_element(self, value) + } + + fn end(self) -> Result> { + Ok(PyTuple::new(self.py, self.items)?.into_any()) + } +} + +impl<'py, P: PythonizeTypes> ser::SerializeTupleStruct for PythonCollectionSerializer<'py, P> { + type Ok = Bound<'py, PyAny>; + type Error = PythonizeError; + + fn serialize_field(&mut self, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + ser::SerializeSeq::serialize_element(self, value) + } + + fn end(self) -> Result> { + ser::SerializeTuple::end(self) + } +} + +impl<'py, P: PythonizeTypes> ser::SerializeTupleVariant for PythonTupleVariantSerializer<'py, P> { + type Ok = Bound<'py, PyAny>; + type Error = PythonizeError; + + fn serialize_field(&mut self, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + ser::SerializeSeq::serialize_element(&mut self.inner, value) + } + + fn end(self) -> Result> { + let mut m = P::NamedMap::builder(self.inner.py, 1, self.name)?; + P::NamedMap::push_field( + &mut m, + PyString::new(self.inner.py, self.variant), + ser::SerializeTuple::end(self.inner)?, + )?; + Ok(P::NamedMap::finish(m)?.into_any()) + } +} + +impl<'py, P: PythonizeTypes> ser::SerializeMap for PythonMapSerializer<'py, P> { + type Ok = Bound<'py, PyAny>; + type Error = PythonizeError; + + fn serialize_key(&mut self, key: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + self.key = Some(pythonize_custom::(self.py, key)?); + Ok(()) + } + + fn serialize_value(&mut self, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + P::Map::push_item( + &mut self.builder, + self.key + .take() + .expect("serialize_value should always be called after serialize_key"), + pythonize_custom::(self.py, value)?, + )?; + Ok(()) + } + + fn end(self) -> Result> { + Ok(P::Map::finish(self.builder)?.into_any()) + } +} + +#[cfg(feature = "arbitrary_precision")] +impl<'py, P: PythonizeTypes> ser::SerializeStruct for StructSerializer<'py, P> { + type Ok = Bound<'py, PyAny>; + type Error = PythonizeError; + + fn serialize_field(&mut self, key: &'static str, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + match self { + StructSerializer::Struct(s) => s.serialize_field(key, value), + StructSerializer::Number { number_string, .. } => { + let serde_json::Value::String(s) = value + .serialize(serde_json::value::Serializer) + .map_err(|e| { + PythonizeError::msg(format!("Failed to serialize number: {}", e)) + })? + else { + return Err(PythonizeError::msg("Expected string in serde_json::Number")); + }; + + *number_string = Some(s); + Ok(()) + } + } + } + + fn end(self) -> Result> { + match self { + StructSerializer::Struct(s) => s.end(), + StructSerializer::Number { + py, + number_string: Some(s), + .. + } => { + if let Ok(i) = s.parse::() { + return Ok(PyInt::new(py, i).into_any()); + } + if let Ok(u) = s.parse::() { + return Ok(PyInt::new(py, u).into_any()); + } + if s.chars().any(|c| c == '.' || c == 'e' || c == 'E') { + if let Ok(f) = s.parse::() { + return Ok(PyFloat::new(py, f).into_any()); + } + } + // Fall back to Python's int() constructor, which supports arbitrary precision. + py.get_type::() + .call1((s.as_str(),)) + .map_err(|e| PythonizeError::msg(format!("Invalid number: {}", e))) + } + StructSerializer::Number { .. } => Err(PythonizeError::msg("Empty serde_json::Number")), + } + } +} + +impl<'py, P: PythonizeTypes> ser::SerializeStruct for PythonStructDictSerializer<'py, P> { + type Ok = Bound<'py, PyAny>; + type Error = PythonizeError; + + fn serialize_field(&mut self, key: &'static str, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + P::NamedMap::push_field( + &mut self.builder, + PyString::new(self.py, key), + pythonize_custom::(self.py, value)?, + )?; + Ok(()) + } + + fn end(self) -> Result> { + Ok(P::NamedMap::finish(self.builder)?.into_any()) + } +} + +impl<'py, P: PythonizeTypes> ser::SerializeStructVariant for PythonStructVariantSerializer<'py, P> { + type Ok = Bound<'py, PyAny>; + type Error = PythonizeError; + + fn serialize_field(&mut self, key: &'static str, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + P::NamedMap::push_field( + &mut self.inner.builder, + PyString::new(self.inner.py, key), + pythonize_custom::(self.inner.py, value)?, + )?; + Ok(()) + } + + fn end(self) -> Result> { + let v = P::NamedMap::finish(self.inner.builder)?; + let mut m = P::NamedMap::builder(self.inner.py, 1, self.name)?; + P::NamedMap::push_field( + &mut m, + PyString::new(self.inner.py, self.variant), + v.into_any(), + )?; + Ok(P::NamedMap::finish(m)?.into_any()) + } +} + +#[cfg(test)] +mod test { + use super::pythonize; + use maplit::hashmap; + use pyo3::prelude::*; + use pyo3::pybacked::PyBackedStr; + use pyo3::types::{PyBytes, PyDict}; + use serde::Serialize; + + fn test_ser(src: T, expected: &str) + where + T: Serialize, + { + Python::attach(|py| -> PyResult<()> { + let obj = pythonize(py, &src)?; + + let locals = PyDict::new(py); + locals.set_item("obj", obj)?; + + py.run( + c"import json; result = json.dumps(obj, separators=(',', ':'))", + None, + Some(&locals), + )?; + let result = locals.get_item("result")?.unwrap(); + let result = result.extract::()?; + + assert_eq!(result, expected); + assert_eq!(serde_json::to_string(&src).unwrap(), expected); + + Ok(()) + }) + .unwrap(); + } + + #[test] + fn test_empty_struct() { + #[derive(Serialize)] + struct Empty; + + test_ser(Empty, "null"); + } + + #[test] + fn test_struct() { + #[derive(Serialize)] + struct Struct { + foo: String, + bar: usize, + } + + test_ser( + Struct { + foo: "foo".to_string(), + bar: 5, + }, + r#"{"foo":"foo","bar":5}"#, + ); + } + + #[test] + fn test_nested_struct() { + #[derive(Serialize)] + struct Foo { + name: String, + bar: Bar, + } + + #[derive(Serialize)] + struct Bar { + name: String, + } + + test_ser( + Foo { + name: "foo".to_string(), + bar: Bar { + name: "bar".to_string(), + }, + }, + r#"{"name":"foo","bar":{"name":"bar"}}"#, + ) + } + + #[test] + fn test_tuple_struct() { + #[derive(Serialize)] + struct TupleStruct(String, usize); + + test_ser(TupleStruct("foo".to_string(), 5), r#"["foo",5]"#); + } + + #[test] + fn test_tuple() { + test_ser(("foo", 5), r#"["foo",5]"#); + } + + #[test] + fn test_vec() { + test_ser(vec![1, 2, 3], r#"[1,2,3]"#); + } + + #[test] + fn test_map() { + test_ser(hashmap! {"foo" => "foo"}, r#"{"foo":"foo"}"#); + } + + #[test] + fn test_enum_unit_variant() { + #[derive(Serialize)] + enum E { + Empty, + } + + test_ser(E::Empty, r#""Empty""#); + } + + #[test] + fn test_enum_tuple_variant() { + #[derive(Serialize)] + enum E { + Tuple(i32, String), + } + + test_ser(E::Tuple(5, "foo".to_string()), r#"{"Tuple":[5,"foo"]}"#); + } + + #[test] + fn test_enum_newtype_variant() { + #[derive(Serialize)] + enum E { + NewType(String), + } + + test_ser(E::NewType("foo".to_string()), r#"{"NewType":"foo"}"#); + } + + #[test] + fn test_enum_struct_variant() { + #[derive(Serialize)] + enum E { + Struct { foo: String, bar: usize }, + } + + test_ser( + E::Struct { + foo: "foo".to_string(), + bar: 5, + }, + r#"{"Struct":{"foo":"foo","bar":5}}"#, + ); + } + + #[test] + fn test_integers() { + #[derive(Serialize)] + struct Integers { + a: i8, + b: i16, + c: i32, + d: i64, + e: u8, + f: u16, + g: u32, + h: u64, + i: i128, + j: u128, + } + + test_ser( + Integers { + a: 1, + b: 2, + c: 3, + d: 4, + e: 5, + f: 6, + g: 7, + h: 8, + i: 9, + j: 10, + }, + r#"{"a":1,"b":2,"c":3,"d":4,"e":5,"f":6,"g":7,"h":8,"i":9,"j":10}"#, + ) + } + + #[test] + fn test_floats() { + #[derive(Serialize)] + struct Floats { + a: f32, + b: f64, + } + + test_ser(Floats { a: 1.0, b: 2.0 }, r#"{"a":1.0,"b":2.0}"#) + } + + #[test] + fn test_char() { + #[derive(Serialize)] + struct Char { + a: char, + } + + test_ser(Char { a: 'a' }, r#"{"a":"a"}"#) + } + + #[test] + fn test_bool() { + test_ser(true, "true"); + test_ser(false, "false"); + } + + #[test] + fn test_none() { + #[derive(Serialize)] + struct S; + + test_ser((), "null"); + test_ser(S, "null"); + + test_ser(Some(1), "1"); + test_ser(None::, "null"); + } + + #[test] + fn test_bytes() { + // serde treats &[u8] as a sequence of integers due to lack of specialization + test_ser(b"foo", "[102,111,111]"); + + Python::attach(|py| { + assert!(pythonize(py, serde_bytes::Bytes::new(b"foo")) + .expect("bytes will always serialize successfully") + .eq(&PyBytes::new(py, b"foo")) + .expect("bytes will always compare successfully")); + }); + } +} diff --git a/tests/test_arbitrary_precision.rs b/tests/test_arbitrary_precision.rs new file mode 100644 index 0000000..dc5b38f --- /dev/null +++ b/tests/test_arbitrary_precision.rs @@ -0,0 +1,108 @@ +#![cfg(feature = "arbitrary_precision")] + +use pyo3::prelude::*; +use pythonize::{depythonize, pythonize}; +use serde_json::Value; + +#[test] +fn test_greater_than_u64_max() { + Python::attach(|py| { + let json_str = r#"18446744073709551616"#; + let value: Value = serde_json::from_str(json_str).unwrap(); + let result = pythonize(py, &value).unwrap(); + let number_str = result.str().unwrap().to_string(); + + assert!(result.is_instance_of::()); + assert_eq!(number_str, "18446744073709551616"); + }); +} + +#[test] +fn test_less_than_i64_min() { + Python::attach(|py| { + let json_str = r#"-9223372036854775809"#; + let value: Value = serde_json::from_str(json_str).unwrap(); + let result = pythonize(py, &value).unwrap(); + let number_str = result.str().unwrap().to_string(); + + assert!(result.is_instance_of::()); + assert_eq!(number_str, "-9223372036854775809"); + }); +} + +#[test] +fn test_float() { + Python::attach(|py| { + let json_str = r#"3.141592653589793238"#; + let value: Value = serde_json::from_str(json_str).unwrap(); + let result = pythonize(py, &value).unwrap(); + let num: f32 = result.extract().unwrap(); + + assert!(result.is_instance_of::()); + assert_eq!(num, 3.141592653589793238); // not {'$serde_json::private::Number': ...} + }); +} + +#[test] +fn test_int() { + Python::attach(|py| { + let json_str = r#"2"#; + let value: Value = serde_json::from_str(json_str).unwrap(); + let result = pythonize(py, &value).unwrap(); + let num: i32 = result.extract().unwrap(); + + assert!(result.is_instance_of::()); + assert_eq!(num, 2); // not {'$serde_json::private::Number': '2'} + }); +} + +#[test] +fn test_serde_error_if_token_empty() { + let json_str = r#"{"$serde_json::private::Number": ""}"#; + let result: Result = serde_json::from_str(json_str); + + assert!(result.is_err()); + assert!(result + .unwrap_err() + .to_string() + .contains("EOF while parsing a value")); +} + +#[test] +fn test_serde_error_if_token_invalid() { + let json_str = r#"{"$serde_json::private::Number": 2}"#; + let result: Result = serde_json::from_str(json_str); + + assert!(result.is_err()); + assert!(result + .unwrap_err() + .to_string() + .contains("invalid type: integer `2`, expected string containing a number")); +} + +#[test] +fn test_token_valid() { + Python::attach(|py| { + let json_str = r#"{"$serde_json::private::Number": "2"}"#; + let value: Value = serde_json::from_str(json_str).unwrap(); + let result = pythonize(py, &value).unwrap(); + let num: i32 = result.extract().unwrap(); + + assert!(result.is_instance_of::()); + assert_eq!(num, 2); + }); +} + +#[test] +fn test_depythonize_greater_than_u128_max() { + Python::attach(|py| { + // u128::MAX + 1 + let py_int = py + .eval(c"340282366920938463463374607431768211456", None, None) + .unwrap(); + let value: Value = depythonize(&py_int).unwrap(); + + assert!(value.is_number()); + assert_eq!(value.to_string(), "340282366920938463463374607431768211456"); + }); +} diff --git a/tests/test_collections.rs b/tests/test_collections.rs new file mode 100644 index 0000000..9cd7164 --- /dev/null +++ b/tests/test_collections.rs @@ -0,0 +1,203 @@ +use std::collections::{BTreeMap, HashMap}; + +use maplit::{btreemap, hashmap}; +use pyo3::prelude::*; +use pythonize::{depythonize, pythonize}; + +fn round_trip(py: Python<'_>, val: T) -> T +where + T: serde::Serialize + serde::de::DeserializeOwned + std::fmt::Debug + PartialEq, +{ + let py_val = pythonize(py, &val).expect("pythonize failed"); + depythonize(&py_val).expect("depythonize failed") +} + +// --- Vec --- + +#[test] +fn test_vec_i32_empty() { + Python::attach(|py| { + let result = round_trip(py, Vec::::new()); + assert_eq!(result, Vec::::new()); + }); +} + +#[test] +fn test_vec_i32_single() { + Python::attach(|py| { + assert_eq!(round_trip(py, vec![42i32]), vec![42i32]); + }); +} + +#[test] +fn test_vec_i32_three_elements() { + Python::attach(|py| { + assert_eq!(round_trip(py, vec![1i32, -7, 100]), vec![1i32, -7, 100]); + }); +} + +// --- Vec --- + +#[test] +fn test_vec_f64_three_elements() { + Python::attach(|py| { + let input = vec![0.0f64, -1.5, 3.14]; + let result = round_trip(py, input.clone()); + assert_eq!(result, input); + }); +} + +// --- Vec --- + +#[test] +fn test_vec_string_empty_vec() { + Python::attach(|py| { + let result = round_trip(py, Vec::::new()); + assert_eq!(result, Vec::::new()); + }); +} + +#[test] +fn test_vec_string_empty_strings() { + Python::attach(|py| { + let input = vec!["".to_string(), "".to_string()]; + assert_eq!(round_trip(py, input.clone()), input); + }); +} + +#[test] +fn test_vec_string_non_empty() { + Python::attach(|py| { + let input = vec!["hello".to_string(), "world".to_string()]; + assert_eq!(round_trip(py, input.clone()), input); + }); +} + +// --- Vec --- + +#[test] +fn test_vec_bool_mixed() { + Python::attach(|py| { + let input = vec![true, false, true, false]; + assert_eq!(round_trip(py, input.clone()), input); + }); +} + +// --- Vec> --- + +#[test] +fn test_vec_nested_two_inner_vecs() { + Python::attach(|py| { + let input = vec![vec![1i32, 2, 3], vec![-1i32, 0]]; + assert_eq!(round_trip(py, input.clone()), input); + }); +} + +// --- Tuple characterisation --- + +#[test] +fn test_tuple_i32_string_round_trip_ok() { + Python::attach(|py| { + let input = (7i32, "hello".to_string()); + let py_val = pythonize(py, &input).expect("pythonize failed"); + let result: (i32, String) = depythonize(&py_val).expect("depythonize failed"); + assert_eq!(result.0, 7i32); + assert_eq!(result.1, "hello"); + }); +} + +#[test] +fn test_tuple_bool_f64_i64_round_trip_ok() { + Python::attach(|py| { + let input = (true, 2.718f64, -42i64); + let py_val = pythonize(py, &input).expect("pythonize failed"); + let result: (bool, f64, i64) = depythonize(&py_val).expect("depythonize failed"); + assert_eq!(result.0, true); + assert_eq!(result.1, 2.718f64); + assert_eq!(result.2, -42i64); + }); +} + +// --- HashMap --- + +#[test] +fn test_hashmap_string_i64_empty() { + Python::attach(|py| { + let input: HashMap = hashmap! {}; + let result = round_trip(py, input); + assert_eq!(result, HashMap::new()); + }); +} + +#[test] +fn test_hashmap_string_i64_three_entries() { + Python::attach(|py| { + let input: HashMap = hashmap! { + "a".to_string() => 1, + "b".to_string() => -2, + "c".to_string() => 300, + }; + let result = round_trip(py, input.clone()); + assert_eq!(result, input); + }); +} + +#[test] +fn test_hashmap_string_vec_i32_two_entries() { + Python::attach(|py| { + let input: HashMap> = hashmap! { + "evens".to_string() => vec![2, 4, 6], + "odds".to_string() => vec![1, 3, 5], + }; + let result = round_trip(py, input.clone()); + assert_eq!(result, input); + }); +} + +#[test] +fn test_hashmap_string_bool_two_entries() { + Python::attach(|py| { + let input: HashMap = hashmap! { + "yes".to_string() => true, + "no".to_string() => false, + }; + let result = round_trip(py, input.clone()); + assert_eq!(result, input); + }); +} + +// --- BTreeMap --- + +#[test] +fn test_btreemap_string_i64_empty() { + Python::attach(|py| { + let input: BTreeMap = btreemap! {}; + let result = round_trip(py, input); + assert_eq!(result, BTreeMap::new()); + }); +} + +#[test] +fn test_btreemap_string_i64_three_entries() { + Python::attach(|py| { + let input: BTreeMap = btreemap! { + "alpha".to_string() => 10, + "beta".to_string() => -20, + "gamma".to_string() => 300, + }; + let result = round_trip(py, input.clone()); + assert_eq!(result, input); + }); +} + +#[test] +fn test_btreemap_string_string_two_entries() { + Python::attach(|py| { + let input: BTreeMap = btreemap! { + "key1".to_string() => "value1".to_string(), + "key2".to_string() => "value2".to_string(), + }; + let result = round_trip(py, input.clone()); + assert_eq!(result, input); + }); +} diff --git a/tests/test_custom_types.rs b/tests/test_custom_types.rs new file mode 100644 index 0000000..27888d0 --- /dev/null +++ b/tests/test_custom_types.rs @@ -0,0 +1,285 @@ +use std::collections::HashMap; + +use pyo3::{ + exceptions::{PyIndexError, PyKeyError}, + prelude::*, + types::{PyDict, PyMapping, PySequence, PyTuple}, + IntoPyObjectExt, +}; +use pythonize::{ + depythonize, pythonize_custom, PythonizeListType, PythonizeMappingType, + PythonizeNamedMappingType, PythonizeTypes, PythonizeUnnamedMappingAdapter, Pythonizer, +}; +use serde::Serialize; +use serde_json::{json, Value}; + +#[pyclass(sequence)] +struct CustomList { + items: Vec>, +} + +#[pymethods] +impl CustomList { + fn __len__(&self) -> usize { + self.items.len() + } + + fn __getitem__(&self, idx: isize) -> PyResult> { + self.items + .get(idx as usize) + .cloned() + .ok_or_else(|| PyIndexError::new_err(idx)) + } +} + +impl PythonizeListType for CustomList { + fn create_sequence<'py, T, U>( + py: Python<'py>, + elements: impl IntoIterator, + ) -> PyResult> + where + T: IntoPyObject<'py>, + U: ExactSizeIterator, + { + let sequence = Bound::new( + py, + CustomList { + items: elements + .into_iter() + .map(|item| item.into_py_any(py)) + .collect::>()?, + }, + )?; + + Ok(unsafe { sequence.cast_into_unchecked() }) + } +} + +struct PythonizeCustomList; +impl<'py> PythonizeTypes for PythonizeCustomList { + type Map = PyDict; + type NamedMap = PythonizeUnnamedMappingAdapter; + type List = CustomList; +} + +#[test] +fn test_custom_list() { + Python::attach(|py| { + PySequence::register::(py).unwrap(); + let serialized = pythonize_custom::(py, &json!([1, 2, 3])).unwrap(); + assert!(serialized.is_instance_of::()); + + let deserialized: Value = depythonize(&serialized).unwrap(); + assert_eq!(deserialized, json!([1, 2, 3])); + }) +} + +#[pyclass(mapping)] +struct CustomDict { + items: HashMap>, +} + +#[pymethods] +impl CustomDict { + fn __len__(&self) -> usize { + self.items.len() + } + + fn __getitem__(&self, key: String) -> PyResult> { + self.items + .get(&key) + .cloned() + .ok_or_else(|| PyKeyError::new_err(key)) + } + + fn __setitem__(&mut self, key: String, value: Py) { + self.items.insert(key, value); + } + + fn keys(&self) -> Vec<&String> { + self.items.keys().collect() + } + + fn values(&self) -> Vec> { + self.items.values().cloned().collect() + } +} + +impl PythonizeMappingType for CustomDict { + type Builder<'py> = Bound<'py, CustomDict>; + + fn builder<'py>(py: Python<'py>, len: Option) -> PyResult> { + Bound::new( + py, + CustomDict { + items: HashMap::with_capacity(len.unwrap_or(0)), + }, + ) + } + + fn push_item<'py>( + builder: &mut Self::Builder<'py>, + key: Bound<'py, PyAny>, + value: Bound<'py, PyAny>, + ) -> PyResult<()> { + unsafe { builder.cast_unchecked::() }.set_item(key, value) + } + + fn finish<'py>(builder: Self::Builder<'py>) -> PyResult> { + Ok(unsafe { builder.cast_into_unchecked() }) + } +} + +struct PythonizeCustomDict; +impl<'py> PythonizeTypes for PythonizeCustomDict { + type Map = CustomDict; + type NamedMap = PythonizeUnnamedMappingAdapter; + type List = PyTuple; +} + +#[test] +fn test_custom_dict() { + Python::attach(|py| { + PyMapping::register::(py).unwrap(); + let serialized = + pythonize_custom::(py, &json!({ "hello": 1, "world": 2 })) + .unwrap(); + assert!(serialized.is_instance_of::()); + + let deserialized: Value = depythonize(&serialized).unwrap(); + assert_eq!(deserialized, json!({ "hello": 1, "world": 2 })); + }) +} + +#[test] +fn test_tuple() { + Python::attach(|py| { + PyMapping::register::(py).unwrap(); + let serialized = + pythonize_custom::(py, &json!([1, 2, 3, 4])).unwrap(); + assert!(serialized.is_instance_of::()); + + let deserialized: Value = depythonize(&serialized).unwrap(); + assert_eq!(deserialized, json!([1, 2, 3, 4])); + }) +} + +#[test] +fn test_pythonizer_can_be_created() { + // https://github.com/davidhewitt/pythonize/pull/56 + Python::attach(|py| { + let sample = json!({ "hello": 1, "world": 2 }); + assert!(sample + .serialize(Pythonizer::new(py)) + .unwrap() + .is_instance_of::()); + + assert!(sample + .serialize(Pythonizer::custom::(py)) + .unwrap() + .is_instance_of::()); + }) +} + +#[pyclass(mapping)] +struct NamedCustomDict { + name: String, + items: HashMap>, +} + +#[pymethods] +impl NamedCustomDict { + fn __len__(&self) -> usize { + self.items.len() + } + + fn __getitem__(&self, key: String) -> PyResult> { + self.items + .get(&key) + .cloned() + .ok_or_else(|| PyKeyError::new_err(key)) + } + + fn __setitem__(&mut self, key: String, value: Py) { + self.items.insert(key, value); + } + + fn keys(&self) -> Vec<&String> { + self.items.keys().collect() + } + + fn values(&self) -> Vec> { + self.items.values().cloned().collect() + } +} + +impl PythonizeNamedMappingType for NamedCustomDict { + type Builder<'py> = Bound<'py, NamedCustomDict>; + + fn builder<'py>( + py: Python<'py>, + len: usize, + name: &'static str, + ) -> PyResult> { + Bound::new( + py, + NamedCustomDict { + name: String::from(name), + items: HashMap::with_capacity(len), + }, + ) + } + + fn push_field<'py>( + builder: &mut Self::Builder<'py>, + name: Bound<'py, pyo3::types::PyString>, + value: Bound<'py, PyAny>, + ) -> PyResult<()> { + unsafe { builder.cast_unchecked::() }.set_item(name, value) + } + + fn finish<'py>(builder: Self::Builder<'py>) -> PyResult> { + Ok(unsafe { builder.cast_into_unchecked() }) + } +} + +struct PythonizeNamedCustomDict; +impl<'py> PythonizeTypes for PythonizeNamedCustomDict { + type Map = CustomDict; + type NamedMap = NamedCustomDict; + type List = PyTuple; +} + +#[derive(Serialize)] +struct Struct { + hello: u8, + world: i8, +} + +#[test] +fn test_custom_unnamed_dict() { + Python::attach(|py| { + PyMapping::register::(py).unwrap(); + let serialized = + pythonize_custom::(py, &Struct { hello: 1, world: 2 }).unwrap(); + assert!(serialized.is_instance_of::()); + + let deserialized: Value = depythonize(&serialized).unwrap(); + assert_eq!(deserialized, json!({ "hello": 1, "world": 2 })); + }) +} + +#[test] +fn test_custom_named_dict() { + Python::attach(|py| { + PyMapping::register::(py).unwrap(); + let serialized = + pythonize_custom::(py, &Struct { hello: 1, world: 2 }) + .unwrap(); + let named: Bound = serialized.extract().unwrap(); + assert_eq!(named.borrow().name, "Struct"); + + let deserialized: Value = depythonize(&serialized).unwrap(); + assert_eq!(deserialized, json!({ "hello": 1, "world": 2 })); + }) +} diff --git a/tests/test_structs.rs b/tests/test_structs.rs new file mode 100644 index 0000000..b00899b --- /dev/null +++ b/tests/test_structs.rs @@ -0,0 +1,222 @@ +use pyo3::prelude::*; +use pyo3::types::PyDict; +use pythonize::{depythonize, pythonize}; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, PartialEq, Serialize, Deserialize)] +struct Simple { + name: String, + value: i64, +} + +#[derive(Debug, PartialEq, Serialize, Deserialize)] +struct WithRename { + #[serde(rename = "firstName")] + first_name: String, + age: i32, +} + +#[derive(Debug, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +struct WithRenameAll { + first_name: String, + last_name: String, + year_of_birth: i32, +} + +#[derive(Debug, PartialEq, Serialize, Deserialize)] +struct WithOption { + label: String, + count: Option, +} + +#[derive(Debug, PartialEq, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +struct Strict { + id: i32, + tag: String, +} + +// --- Simple struct --- + +#[test] +fn test_simple_round_trip() { + Python::attach(|py| { + let original = Simple { + name: "Alice".into(), + value: 42, + }; + let py_obj = pythonize(py, &original).unwrap(); + let result: Simple = depythonize(&py_obj).unwrap(); + assert_eq!(result, original); + }); +} + +#[test] +fn test_simple_empty_string_zero() { + Python::attach(|py| { + let original = Simple { + name: String::new(), + value: 0, + }; + let py_obj = pythonize(py, &original).unwrap(); + let result: Simple = depythonize(&py_obj).unwrap(); + assert_eq!(result, original); + }); +} + +#[test] +fn test_simple_boundary_i64_min() { + Python::attach(|py| { + let original = Simple { + name: "hello world".into(), + value: i64::MIN, + }; + let py_obj = pythonize(py, &original).unwrap(); + let result: Simple = depythonize(&py_obj).unwrap(); + assert_eq!(result, original); + }); +} + +// --- #[serde(rename)] --- + +#[test] +fn test_rename_key_present() { + Python::attach(|py| { + let original = WithRename { + first_name: "Bob".into(), + age: 30, + }; + let py_obj = pythonize(py, &original).unwrap(); + let dict = py_obj.cast::().unwrap(); + assert!(dict.get_item("firstName").unwrap().is_some()); + assert!(dict.get_item("first_name").unwrap().is_none()); + }); +} + +#[test] +fn test_rename_round_trip() { + Python::attach(|py| { + let original = WithRename { + first_name: "Bob".into(), + age: 30, + }; + let py_obj = pythonize(py, &original).unwrap(); + let result: WithRename = depythonize(&py_obj).unwrap(); + assert_eq!(result, original); + }); +} + +// --- #[serde(rename_all = "camelCase")] --- + +#[test] +fn test_rename_all_keys_present() { + Python::attach(|py| { + let original = WithRenameAll { + first_name: "Jane".into(), + last_name: "Doe".into(), + year_of_birth: 1990, + }; + let py_obj = pythonize(py, &original).unwrap(); + let dict = py_obj.cast::().unwrap(); + assert!(dict.get_item("firstName").unwrap().is_some()); + assert!(dict.get_item("lastName").unwrap().is_some()); + assert!(dict.get_item("yearOfBirth").unwrap().is_some()); + }); +} + +#[test] +fn test_rename_all_round_trip() { + Python::attach(|py| { + let original = WithRenameAll { + first_name: "Jane".into(), + last_name: "Doe".into(), + year_of_birth: 1990, + }; + let py_obj = pythonize(py, &original).unwrap(); + let result: WithRenameAll = depythonize(&py_obj).unwrap(); + assert_eq!(result, original); + }); +} + +// --- Unknown fields (default: ignore) --- + +#[test] +fn test_unknown_fields_ignored() { + Python::attach(|py| { + let dict = PyDict::new(py); + dict.set_item("name", "test").unwrap(); + dict.set_item("value", 1i64).unwrap(); + dict.set_item("extra", "ignored").unwrap(); + let result: Result = depythonize(dict.as_any()); + assert!(result.is_ok()); + assert_eq!( + result.unwrap(), + Simple { + name: "test".into(), + value: 1 + } + ); + }); +} + +// --- #[serde(deny_unknown_fields)] --- + +#[test] +fn test_deny_unknown_fields_fails() { + Python::attach(|py| { + let dict = PyDict::new(py); + dict.set_item("id", 1i32).unwrap(); + dict.set_item("tag", "hello").unwrap(); + dict.set_item("extra", "bad").unwrap(); + let result: Result = depythonize(dict.as_any()); + assert!(result.is_err()); + }); +} + +// --- Option None --- + +#[test] +fn test_option_none_round_trip() { + Python::attach(|py| { + let original = WithOption { + label: "x".into(), + count: None, + }; + let py_obj = pythonize(py, &original).unwrap(); + let dict = py_obj.cast::().unwrap(); + let count_opt = dict.get_item("count").unwrap(); + assert!(count_opt.is_some()); + assert!(count_opt.unwrap().is_none()); + let result: WithOption = depythonize(&py_obj).unwrap(); + assert_eq!(result, original); + }); +} + +// --- Option Some --- + +#[test] +fn test_option_some_round_trip() { + Python::attach(|py| { + let original = WithOption { + label: "y".into(), + count: Some(99), + }; + let py_obj = pythonize(py, &original).unwrap(); + let result: WithOption = depythonize(&py_obj).unwrap(); + assert_eq!(result, original); + }); +} + +#[test] +fn test_option_some_i64_max() { + Python::attach(|py| { + let original = WithOption { + label: "z".into(), + count: Some(i64::MAX), + }; + let py_obj = pythonize(py, &original).unwrap(); + let result: WithOption = depythonize(&py_obj).unwrap(); + assert_eq!(result, original); + }); +} diff --git a/tests/test_with_serde_path_to_err.rs b/tests/test_with_serde_path_to_err.rs new file mode 100644 index 0000000..504dae9 --- /dev/null +++ b/tests/test_with_serde_path_to_err.rs @@ -0,0 +1,211 @@ +use std::collections::BTreeMap; + +use pyo3::{ + prelude::*, + types::{PyDict, PyList}, +}; +use pythonize::{PythonizeTypes, PythonizeUnnamedMappingAdapter}; +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize, Debug, PartialEq, Eq)] +struct Root { + root_key: String, + root_map: BTreeMap>, +} + +impl<'py, T> PythonizeTypes for Root { + type Map = PyDict; + type NamedMap = PythonizeUnnamedMappingAdapter; + type List = PyList; +} + +#[derive(Serialize, Deserialize, Debug, PartialEq, Eq)] +struct Nested { + nested_key: T, +} + +#[derive(Deserialize, Debug, PartialEq, Eq)] +struct CannotSerialize {} + +impl Serialize for CannotSerialize { + fn serialize(&self, _serializer: S) -> Result + where + S: serde::Serializer, + { + Err(serde::ser::Error::custom( + "something went intentionally wrong", + )) + } +} + +#[test] +fn test_de_valid() { + Python::attach(|py| { + let pyroot = PyDict::new(py); + pyroot.set_item("root_key", "root_value").unwrap(); + + let nested = PyDict::new(py); + let nested_0 = PyDict::new(py); + nested_0.set_item("nested_key", "nested_value_0").unwrap(); + nested.set_item("nested_0", nested_0).unwrap(); + let nested_1 = PyDict::new(py); + nested_1.set_item("nested_key", "nested_value_1").unwrap(); + nested.set_item("nested_1", nested_1).unwrap(); + + pyroot.set_item("root_map", nested).unwrap(); + + let de = &mut pythonize::Depythonizer::from_object(&pyroot); + let root: Root = serde_path_to_error::deserialize(de).unwrap(); + + assert_eq!( + root, + Root { + root_key: String::from("root_value"), + root_map: BTreeMap::from([ + ( + String::from("nested_0"), + Nested { + nested_key: String::from("nested_value_0") + } + ), + ( + String::from("nested_1"), + Nested { + nested_key: String::from("nested_value_1") + } + ) + ]) + } + ); + }) +} + +#[test] +fn test_de_invalid() { + Python::attach(|py| { + let pyroot = PyDict::new(py); + pyroot.set_item("root_key", "root_value").unwrap(); + + let nested = PyDict::new(py); + let nested_0 = PyDict::new(py); + nested_0.set_item("nested_key", "nested_value_0").unwrap(); + nested.set_item("nested_0", nested_0).unwrap(); + let nested_1 = PyDict::new(py); + nested_1.set_item("nested_key", 1).unwrap(); + nested.set_item("nested_1", nested_1).unwrap(); + + pyroot.set_item("root_map", nested).unwrap(); + + let de = &mut pythonize::Depythonizer::from_object(&pyroot); + let err = serde_path_to_error::deserialize::<_, Root>(de).unwrap_err(); + + assert_eq!(err.path().to_string(), "root_map.nested_1.nested_key"); + assert_eq!( + err.to_string(), + "root_map.nested_1.nested_key: unexpected type: 'int' object is not an instance of 'str'" + ); + }) +} + +#[test] +fn test_ser_valid() { + Python::attach(|py| { + let root = Root { + root_key: String::from("root_value"), + root_map: BTreeMap::from([ + ( + String::from("nested_0"), + Nested { + nested_key: String::from("nested_value_0"), + }, + ), + ( + String::from("nested_1"), + Nested { + nested_key: String::from("nested_value_1"), + }, + ), + ]), + }; + + let ser = pythonize::Pythonizer::>::from(py); + let pyroot: Bound<'_, PyAny> = serde_path_to_error::serialize(&root, ser).unwrap(); + + let pyroot = pyroot.cast::().unwrap(); + assert_eq!(pyroot.len(), 2); + + let root_value: String = pyroot + .get_item("root_key") + .unwrap() + .unwrap() + .extract() + .unwrap(); + assert_eq!(root_value, "root_value"); + + let root_map = pyroot + .get_item("root_map") + .unwrap() + .unwrap() + .cast_into::() + .unwrap(); + assert_eq!(root_map.len(), 2); + + let nested_0 = root_map + .get_item("nested_0") + .unwrap() + .unwrap() + .cast_into::() + .unwrap(); + assert_eq!(nested_0.len(), 1); + let nested_key_0: String = nested_0 + .get_item("nested_key") + .unwrap() + .unwrap() + .extract() + .unwrap(); + assert_eq!(nested_key_0, "nested_value_0"); + + let nested_1 = root_map + .get_item("nested_1") + .unwrap() + .unwrap() + .cast_into::() + .unwrap(); + assert_eq!(nested_1.len(), 1); + let nested_key_1: String = nested_1 + .get_item("nested_key") + .unwrap() + .unwrap() + .extract() + .unwrap(); + assert_eq!(nested_key_1, "nested_value_1"); + }); +} + +#[test] +fn test_ser_invalid() { + Python::attach(|py| { + let root = Root { + root_key: String::from("root_value"), + root_map: BTreeMap::from([ + ( + String::from("nested_0"), + Nested { + nested_key: CannotSerialize {}, + }, + ), + ( + String::from("nested_1"), + Nested { + nested_key: CannotSerialize {}, + }, + ), + ]), + }; + + let ser = pythonize::Pythonizer::>::from(py); + let err = serde_path_to_error::serialize(&root, ser).unwrap_err(); + + assert_eq!(err.path().to_string(), "root_map.nested_0.nested_key"); + }); +}