forked from lance-format/lance
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_random_access.py
More file actions
37 lines (28 loc) · 1.21 KB
/
test_random_access.py
File metadata and controls
37 lines (28 loc) · 1.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright The Lance Authors
from datetime import datetime
import lance
import pyarrow.parquet as pq
# This file compares the performance of lance v1 and v2 on the lineitem dataset,
# specifically for random access scans
tab = pq.read_table("~/lineitemsf1.snappy.parquet")
dsv1 = lance.write_dataset(tab, "/tmp/lineitem.lancev1", data_storage_version="2.0")
dsv2 = lance.write_dataset(tab, "/tmp/lineitem.lancev2", data_storage_version="2.1")
dsv1 = lance.dataset("/tmp/lineitem.lancev1")
dsv2 = lance.dataset("/tmp/lineitem.lancev2")
start = datetime.now()
dsv1.to_table(filter="l_shipmode = 'FOB'", limit=10000)
duration = (datetime.now() - start).total_seconds()
print(f"V1 query time: {duration}s")
start = datetime.now()
dsv2.to_table(filter="l_shipmode = 'FOB'", limit=10000)
duration = (datetime.now() - start).total_seconds()
print(f"V2 query time: {duration}s")
start = datetime.now()
dsv1.take([1, 40, 100, 130, 200])
duration = (datetime.now() - start).total_seconds()
print(f"V1 query time: {duration}s")
start = datetime.now()
dsv2.take([1, 40, 100, 130, 200])
duration = (datetime.now() - start).total_seconds()
print(f"V2 query time: {duration}s")