forked from lance-format/lance
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdebug.rs
More file actions
148 lines (131 loc) · 4.57 KB
/
debug.rs
File metadata and controls
148 lines (131 loc) · 4.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The Lance Authors
use std::sync::Arc;
use lance::{datatypes::Schema, Error};
use lance_table::format::{DeletionFile, Fragment};
use pyo3::{exceptions::PyIOError, prelude::*};
use crate::{rt, utils::PyLance, Dataset};
/// Format the Lance schema of a dataset as a string.
///
/// This can be used to view the field ids and types in the schema.
#[pyfunction]
pub fn format_schema(dataset: &Bound<'_, PyAny>) -> PyResult<String> {
let py = dataset.py();
let dataset = dataset.getattr("_ds")?.extract::<Py<Dataset>>()?;
let dataset_ref = &dataset.bind(py).borrow().ds;
let schema = dataset_ref.schema();
Ok(format!("{:#?}", schema))
}
/// Print the full Lance manifest of the dataset.
#[pyfunction]
pub fn format_manifest(dataset: &Bound<'_, PyAny>) -> PyResult<String> {
let py = dataset.py();
let dataset = dataset.getattr("_ds")?.extract::<Py<Dataset>>()?;
let dataset_ref = &dataset.bind(py).borrow().ds;
let manifest = dataset_ref.manifest();
Ok(format!("{:#?}", manifest))
}
// These are dead code because they just exist for the debug impl.
#[derive(Debug)]
#[allow(dead_code)]
struct PrettyPrintableFragment {
id: u64,
files: Vec<PrettyPrintableDataFile>,
deletion_file: Option<DeletionFile>,
physical_rows: Option<usize>,
}
#[derive(Debug)]
#[allow(dead_code)]
struct PrettyPrintableDataFile {
path: String,
fields: Vec<i32>,
column_indices: Vec<i32>,
schema: Schema,
major_version: u32,
minor_version: u32,
}
impl PrettyPrintableFragment {
fn new(fragment: &Fragment, schema: &Schema) -> Self {
let files = fragment
.files
.iter()
.map(|file| {
let schema = schema.project_by_ids(&file.fields, false);
PrettyPrintableDataFile {
path: file.path.clone(),
fields: file.fields.clone(),
column_indices: file.column_indices.clone(),
schema,
major_version: file.file_major_version,
minor_version: file.file_minor_version,
}
})
.collect();
Self {
id: fragment.id,
files,
deletion_file: fragment.deletion_file.clone(),
physical_rows: fragment.physical_rows,
}
}
}
/// Debug print a LanceFragment.
#[pyfunction]
pub fn format_fragment(
fragment: PyLance<Fragment>,
dataset: &Bound<'_, PyAny>,
) -> PyResult<String> {
let py = dataset.py();
let fragment = fragment.0;
let dataset = dataset.getattr("_ds")?.extract::<Py<Dataset>>()?;
let dataset_ref = &dataset.bind(py).borrow().ds;
let schema = dataset_ref.schema();
let pp_meta = PrettyPrintableFragment::new(&fragment, schema);
Ok(format!("{:#?}", pp_meta))
}
/// Return a string representation of each transaction in the dataset, in
/// reverse chronological order.
///
/// If `max_transactions` is provided, only the most recent `max_transactions`
/// transactions will be returned. Defaults to 10.
#[pyfunction]
#[pyo3(signature = (dataset, /, max_transactions = 10))]
pub fn list_transactions(
dataset: &Bound<'_, PyAny>,
max_transactions: usize,
) -> PyResult<Vec<Option<String>>> {
let py = dataset.py();
let dataset = dataset.getattr("_ds")?.extract::<Py<Dataset>>()?;
let mut dataset = dataset.bind(py).borrow().ds.clone();
rt().block_on(Some(py), async move {
let mut transactions = vec![];
loop {
let transaction = dataset.read_transaction().await.map_err(|err| {
PyIOError::new_err(format!("Failed to read transaction file: {:?}", err))
})?;
if let Some(transaction) = transaction {
transactions.push(Some(format!("{:#?}", transaction)));
} else {
transactions.push(None);
}
if transactions.len() >= max_transactions {
break;
} else {
match dataset
.checkout_version(dataset.version().version - 1)
.await
{
Ok(ds) => dataset = Arc::new(ds),
Err(Error::DatasetNotFound { .. }) => break,
Err(err) => {
return Err(PyIOError::new_err(format!(
"Failed to checkout version: {:?}",
err
)))
}
}
}
}
Ok(transactions)
})?
}