Skip to content

Commit 61b40f1

Browse files
committedJul 19, 2024
feat: initial prototype for Apache Arrow implementation in Mojo
0 parents  commit 61b40f1

22 files changed

+2063
-0
lines changed
 

‎.gitignore

+162
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
share/python-wheels/
24+
*.egg-info/
25+
.installed.cfg
26+
*.egg
27+
MANIFEST
28+
29+
# PyInstaller
30+
# Usually these files are written by a python script from a template
31+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
32+
*.manifest
33+
*.spec
34+
35+
# Installer logs
36+
pip-log.txt
37+
pip-delete-this-directory.txt
38+
39+
# Unit test / coverage reports
40+
htmlcov/
41+
.tox/
42+
.nox/
43+
.coverage
44+
.coverage.*
45+
.cache
46+
nosetests.xml
47+
coverage.xml
48+
*.cover
49+
*.py,cover
50+
.hypothesis/
51+
.pytest_cache/
52+
cover/
53+
54+
# Translations
55+
*.mo
56+
*.pot
57+
58+
# Django stuff:
59+
*.log
60+
local_settings.py
61+
db.sqlite3
62+
db.sqlite3-journal
63+
64+
# Flask stuff:
65+
instance/
66+
.webassets-cache
67+
68+
# Scrapy stuff:
69+
.scrapy
70+
71+
# Sphinx documentation
72+
docs/_build/
73+
74+
# PyBuilder
75+
.pybuilder/
76+
target/
77+
78+
# Jupyter Notebook
79+
.ipynb_checkpoints
80+
81+
# IPython
82+
profile_default/
83+
ipython_config.py
84+
85+
# pyenv
86+
# For a library or package, you might want to ignore these files since the code is
87+
# intended to run in multiple environments; otherwise, check them in:
88+
# .python-version
89+
90+
# pipenv
91+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
93+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
94+
# install all needed dependencies.
95+
#Pipfile.lock
96+
97+
# poetry
98+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99+
# This is especially recommended for binary packages to ensure reproducibility, and is more
100+
# commonly ignored for libraries.
101+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102+
#poetry.lock
103+
104+
# pdm
105+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106+
#pdm.lock
107+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108+
# in version control.
109+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
110+
.pdm.toml
111+
.pdm-python
112+
.pdm-build/
113+
114+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115+
__pypackages__/
116+
117+
# Celery stuff
118+
celerybeat-schedule
119+
celerybeat.pid
120+
121+
# SageMath parsed files
122+
*.sage.py
123+
124+
# Environments
125+
.env
126+
.venv
127+
env/
128+
venv/
129+
ENV/
130+
env.bak/
131+
venv.bak/
132+
133+
# Spyder project settings
134+
.spyderproject
135+
.spyproject
136+
137+
# Rope project settings
138+
.ropeproject
139+
140+
# mkdocs documentation
141+
/site
142+
143+
# mypy
144+
.mypy_cache/
145+
.dmypy.json
146+
dmypy.json
147+
148+
# Pyre type checker
149+
.pyre/
150+
151+
# pytype static type analyzer
152+
.pytype/
153+
154+
# Cython debug symbols
155+
cython_debug/
156+
157+
# PyCharm
158+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160+
# and can be added to the global gitignore or merged into this file. For a more nuclear
161+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
162+
#.idea/

‎LICENSE.txt

+202
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
2+
Apache License
3+
Version 2.0, January 2004
4+
http://www.apache.org/licenses/
5+
6+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7+
8+
1. Definitions.
9+
10+
"License" shall mean the terms and conditions for use, reproduction,
11+
and distribution as defined by Sections 1 through 9 of this document.
12+
13+
"Licensor" shall mean the copyright owner or entity authorized by
14+
the copyright owner that is granting the License.
15+
16+
"Legal Entity" shall mean the union of the acting entity and all
17+
other entities that control, are controlled by, or are under common
18+
control with that entity. For the purposes of this definition,
19+
"control" means (i) the power, direct or indirect, to cause the
20+
direction or management of such entity, whether by contract or
21+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
22+
outstanding shares, or (iii) beneficial ownership of such entity.
23+
24+
"You" (or "Your") shall mean an individual or Legal Entity
25+
exercising permissions granted by this License.
26+
27+
"Source" form shall mean the preferred form for making modifications,
28+
including but not limited to software source code, documentation
29+
source, and configuration files.
30+
31+
"Object" form shall mean any form resulting from mechanical
32+
transformation or translation of a Source form, including but
33+
not limited to compiled object code, generated documentation,
34+
and conversions to other media types.
35+
36+
"Work" shall mean the work of authorship, whether in Source or
37+
Object form, made available under the License, as indicated by a
38+
copyright notice that is included in or attached to the work
39+
(an example is provided in the Appendix below).
40+
41+
"Derivative Works" shall mean any work, whether in Source or Object
42+
form, that is based on (or derived from) the Work and for which the
43+
editorial revisions, annotations, elaborations, or other modifications
44+
represent, as a whole, an original work of authorship. For the purposes
45+
of this License, Derivative Works shall not include works that remain
46+
separable from, or merely link (or bind by name) to the interfaces of,
47+
the Work and Derivative Works thereof.
48+
49+
"Contribution" shall mean any work of authorship, including
50+
the original version of the Work and any modifications or additions
51+
to that Work or Derivative Works thereof, that is intentionally
52+
submitted to Licensor for inclusion in the Work by the copyright owner
53+
or by an individual or Legal Entity authorized to submit on behalf of
54+
the copyright owner. For the purposes of this definition, "submitted"
55+
means any form of electronic, verbal, or written communication sent
56+
to the Licensor or its representatives, including but not limited to
57+
communication on electronic mailing lists, source code control systems,
58+
and issue tracking systems that are managed by, or on behalf of, the
59+
Licensor for the purpose of discussing and improving the Work, but
60+
excluding communication that is conspicuously marked or otherwise
61+
designated in writing by the copyright owner as "Not a Contribution."
62+
63+
"Contributor" shall mean Licensor and any individual or Legal Entity
64+
on behalf of whom a Contribution has been received by Licensor and
65+
subsequently incorporated within the Work.
66+
67+
2. Grant of Copyright License. Subject to the terms and conditions of
68+
this License, each Contributor hereby grants to You a perpetual,
69+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70+
copyright license to reproduce, prepare Derivative Works of,
71+
publicly display, publicly perform, sublicense, and distribute the
72+
Work and such Derivative Works in Source or Object form.
73+
74+
3. Grant of Patent License. Subject to the terms and conditions of
75+
this License, each Contributor hereby grants to You a perpetual,
76+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77+
(except as stated in this section) patent license to make, have made,
78+
use, offer to sell, sell, import, and otherwise transfer the Work,
79+
where such license applies only to those patent claims licensable
80+
by such Contributor that are necessarily infringed by their
81+
Contribution(s) alone or by combination of their Contribution(s)
82+
with the Work to which such Contribution(s) was submitted. If You
83+
institute patent litigation against any entity (including a
84+
cross-claim or counterclaim in a lawsuit) alleging that the Work
85+
or a Contribution incorporated within the Work constitutes direct
86+
or contributory patent infringement, then any patent licenses
87+
granted to You under this License for that Work shall terminate
88+
as of the date such litigation is filed.
89+
90+
4. Redistribution. You may reproduce and distribute copies of the
91+
Work or Derivative Works thereof in any medium, with or without
92+
modifications, and in Source or Object form, provided that You
93+
meet the following conditions:
94+
95+
(a) You must give any other recipients of the Work or
96+
Derivative Works a copy of this License; and
97+
98+
(b) You must cause any modified files to carry prominent notices
99+
stating that You changed the files; and
100+
101+
(c) You must retain, in the Source form of any Derivative Works
102+
that You distribute, all copyright, patent, trademark, and
103+
attribution notices from the Source form of the Work,
104+
excluding those notices that do not pertain to any part of
105+
the Derivative Works; and
106+
107+
(d) If the Work includes a "NOTICE" text file as part of its
108+
distribution, then any Derivative Works that You distribute must
109+
include a readable copy of the attribution notices contained
110+
within such NOTICE file, excluding those notices that do not
111+
pertain to any part of the Derivative Works, in at least one
112+
of the following places: within a NOTICE text file distributed
113+
as part of the Derivative Works; within the Source form or
114+
documentation, if provided along with the Derivative Works; or,
115+
within a display generated by the Derivative Works, if and
116+
wherever such third-party notices normally appear. The contents
117+
of the NOTICE file are for informational purposes only and
118+
do not modify the License. You may add Your own attribution
119+
notices within Derivative Works that You distribute, alongside
120+
or as an addendum to the NOTICE text from the Work, provided
121+
that such additional attribution notices cannot be construed
122+
as modifying the License.
123+
124+
You may add Your own copyright statement to Your modifications and
125+
may provide additional or different license terms and conditions
126+
for use, reproduction, or distribution of Your modifications, or
127+
for any such Derivative Works as a whole, provided Your use,
128+
reproduction, and distribution of the Work otherwise complies with
129+
the conditions stated in this License.
130+
131+
5. Submission of Contributions. Unless You explicitly state otherwise,
132+
any Contribution intentionally submitted for inclusion in the Work
133+
by You to the Licensor shall be under the terms and conditions of
134+
this License, without any additional terms or conditions.
135+
Notwithstanding the above, nothing herein shall supersede or modify
136+
the terms of any separate license agreement you may have executed
137+
with Licensor regarding such Contributions.
138+
139+
6. Trademarks. This License does not grant permission to use the trade
140+
names, trademarks, service marks, or product names of the Licensor,
141+
except as required for reasonable and customary use in describing the
142+
origin of the Work and reproducing the content of the NOTICE file.
143+
144+
7. Disclaimer of Warranty. Unless required by applicable law or
145+
agreed to in writing, Licensor provides the Work (and each
146+
Contributor provides its Contributions) on an "AS IS" BASIS,
147+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148+
implied, including, without limitation, any warranties or conditions
149+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150+
PARTICULAR PURPOSE. You are solely responsible for determining the
151+
appropriateness of using or redistributing the Work and assume any
152+
risks associated with Your exercise of permissions under this License.
153+
154+
8. Limitation of Liability. In no event and under no legal theory,
155+
whether in tort (including negligence), contract, or otherwise,
156+
unless required by applicable law (such as deliberate and grossly
157+
negligent acts) or agreed to in writing, shall any Contributor be
158+
liable to You for damages, including any direct, indirect, special,
159+
incidental, or consequential damages of any character arising as a
160+
result of this License or out of the use or inability to use the
161+
Work (including but not limited to damages for loss of goodwill,
162+
work stoppage, computer failure or malfunction, or any and all
163+
other commercial damages or losses), even if such Contributor
164+
has been advised of the possibility of such damages.
165+
166+
9. Accepting Warranty or Additional Liability. While redistributing
167+
the Work or Derivative Works thereof, You may choose to offer,
168+
and charge a fee for, acceptance of support, warranty, indemnity,
169+
or other liability obligations and/or rights consistent with this
170+
License. However, in accepting such obligations, You may act only
171+
on Your own behalf and on Your sole responsibility, not on behalf
172+
of any other Contributor, and only if You agree to indemnify,
173+
defend, and hold each Contributor harmless for any liability
174+
incurred by, or claims asserted against, such Contributor by reason
175+
of your accepting any such warranty or additional liability.
176+
177+
END OF TERMS AND CONDITIONS
178+
179+
APPENDIX: How to apply the Apache License to your work.
180+
181+
To apply the Apache License to your work, attach the following
182+
boilerplate notice, with the fields enclosed by brackets "[]"
183+
replaced with your own identifying information. (Don't include
184+
the brackets!) The text should be enclosed in the appropriate
185+
comment syntax for the file format. We also recommend that a
186+
file or class name and description of purpose be included on the
187+
same "printed page" as the copyright notice for easier
188+
identification within third-party archives.
189+
190+
Copyright 2024 Szűcs Krisztián
191+
192+
Licensed under the Apache License, Version 2.0 (the "License");
193+
you may not use this file except in compliance with the License.
194+
You may obtain a copy of the License at
195+
196+
http://www.apache.org/licenses/LICENSE-2.0
197+
198+
Unless required by applicable law or agreed to in writing, software
199+
distributed under the License is distributed on an "AS IS" BASIS,
200+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201+
See the License for the specific language governing permissions and
202+
limitations under the License.

‎README.md

+150
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
# In-progress implementation of Apache Arrow in Mojo
2+
3+
Initial motivation for this project was to learn the Mojo programming language and the best is to learn by doing. Since I've been involved in the Apache Arrow project for a while, I thought it would be a good idea to implement the Arrow specification in Mojo.
4+
5+
The implementation is far from being complete or usable in practice, but I prefer to share it its early stage so others can join the effort.
6+
7+
### What is Arrow?
8+
9+
[Apache Arrow](https://arrow.apache.org) is a cross-language development platform for in-memory data. It specifies a standardized language-independent columnar memory format for flat and hierarchical data, organized for efficient analytic operations on modern hardware like CPUs and GPUs.
10+
11+
### What is Mojo?
12+
13+
[Mojo](https://www.modular.com/mojo) is promising new programming language built on top of MLIR providing the expressiveness of Python, with the performance of systems programming languages.
14+
15+
### Why Arrow in Mojo?
16+
17+
I find the Mojo lanauge really promising and Arrow should be a first-class citizen in Mojo's ecosystem. Since the language itself is still in its early stages, under heavy development, this Arrow implementation is still in an experimental phase.
18+
19+
## Currently implemented abstractions
20+
21+
- `Buffer` providing the memory management for contiguous memory regions.
22+
- `DataType` for defining the `Arrow` data types.
23+
- `ArrayData` as the common layout for all `Arrow` arrays.
24+
- Typed array views for primitive, string and nested arrow arrays providing more convenient and efficient access to the underlying `ArrayData`.
25+
- [Arrow C Data Interface](https://arrow.apache.org/docs/format/CDataInterface.html) to exchange arrow data between other implementations in a zero-copy manner, but only one direction is implemented for now.
26+
27+
## Examples
28+
29+
### Creating a primitive array
30+
31+
```mojo
32+
from firebolt.arrays import array, StringArray, ListArray, Int64Array
33+
from firebolt.dtypes import int8, bool_, list_
34+
35+
var a = array[int8](1, 2, 3, 4)
36+
var b = array[bool_](True, False, True)
37+
```
38+
39+
### Creating a string array
40+
41+
```mojo
42+
var s = StringArray()
43+
s.unsafe_append("hello")
44+
s.unsafe_append("world")
45+
```
46+
47+
More convenient APIs are planned to be added in the future.
48+
49+
### Creating a list array
50+
51+
```mojo
52+
var ints = Int64Array()
53+
var lists = ListArray(ints)
54+
55+
ints.append(1)
56+
ints.append(2)
57+
ints.append(3)
58+
lists.unsafe_append(True)
59+
assert_equal(len(lists), 1)
60+
assert_equal(lists.data.dtype, list_(int64))
61+
```
62+
63+
### Zero-copy access of a PyArrow array in Mojo
64+
65+
For more details see the [Arrow C Data Interface](https://arrow.apache.org/docs/format/CDataInterface.html).
66+
67+
```mojo
68+
var pa = Python.import_module("pyarrow")
69+
var pyarr = pa.array(
70+
[1, 2, 3, 4, 5], mask=[False, False, False, False, True]
71+
)
72+
73+
var c_array = CArrowArray.from_pyarrow(pyarr)
74+
var c_schema = CArrowSchema.from_pyarrow(pyarr.type)
75+
76+
var dtype = c_schema.to_dtype()
77+
assert_equal(dtype, int64)
78+
assert_equal(c_array.length, 5)
79+
assert_equal(c_array.null_count, 1)
80+
assert_equal(c_array.offset, 0)
81+
assert_equal(c_array.n_buffers, 2)
82+
assert_equal(c_array.n_children, 0)
83+
84+
var data = c_array.to_array(dtype)
85+
var array = data.as_int64()
86+
assert_equal(array.bitmap[].size, 64)
87+
assert_equal(array.is_valid(0), True)
88+
assert_equal(array.is_valid(1), True)
89+
assert_equal(array.is_valid(2), True)
90+
assert_equal(array.is_valid(3), True)
91+
assert_equal(array.is_valid(4), False)
92+
assert_equal(array.unsafe_get(0), 1)
93+
assert_equal(array.unsafe_get(1), 2)
94+
assert_equal(array.unsafe_get(2), 3)
95+
assert_equal(array.unsafe_get(3), 4)
96+
assert_equal(array.unsafe_get(4), 0)
97+
98+
array.unsafe_set(0, 10)
99+
assert_equal(array.unsafe_get(0), 10)
100+
assert_equal(str(pyarr), "[\n 10,\n 2,\n 3,\n 4,\n null\n]")
101+
```
102+
103+
## Rough edges and limitations
104+
105+
So far the implementation has been focused to provide a solid foundation for further development, not for memory efficiency, performance or completeness.
106+
107+
A couple of notable limitations:
108+
109+
1. The chosen abstractions may not be ideal, but:
110+
- mojo lacks support for dynamic dispatch at the moment
111+
- variant elements must be copyable
112+
- references and lifetimes are not hardened yet
113+
- expressing nested data types is not straightforward
114+
115+
Due to these reasons polymorphism is achieved by defining a common layout for type hierarchies and providing specialized views for each child type. This approach seems to work well for nested `DataType` and `Array` types and the implementation can be continued while `Mojo` gains the necessary features to rethink theses abstractions.
116+
117+
2. The `C Data Interface` doesn't call the release callbacks yet and only consuming arrow data is implemented for now because a `Mojo` callback cannot be passed to a `C` function yet. As mojo matures, this limitation will be certainly addressed.
118+
119+
3. Testing of the conformance against the `Arrow` specification is done by reading arrow data from the python implementation `PyArrow` since `Mojo` can already call python functions. If the project manages to evolve further, it should be wired into the arrow integration testing suite, but first that requires a `JSON` library in `Mojo`.
120+
121+
4. Only boolean, numeric, string, list and struct datatypes are supported for now since these cover most of the implementation complexity. Support for the rest of the arrow data types can be added incrementally.
122+
123+
5. A convenient API hasn't been designed yet, preferably that should be tackled once the implementation is more mature.
124+
125+
6. No `ChunkedArray`s, `RecordBatch`es, `Table`s are implemented yet, but soon they will be.
126+
127+
7. No CI has been set up yet, but it is going to be in focus really soon.
128+
129+
## Development
130+
131+
I shared the implementation it its current state so others can join the effort.
132+
If the project manages to evolve, ideally it should be donated to the upstream Apache Arrow project.
133+
134+
Given an existing Mojo installation the tests can be run with:
135+
136+
```bash
137+
cd firebolt
138+
mojo test firebolt -I .
139+
```
140+
141+
Tested with nightly `Mojo`:
142+
143+
```bash
144+
$ mojo --version
145+
mojo 2024.7.1805 (0a697965)
146+
```
147+
148+
## References
149+
150+
- [Another effort to implement Arrow in Mojo](https://github.com/mojo-data/arrow.mojo)

‎firebolt/__init__.mojo

Whitespace-only changes.

‎firebolt/arrays/__init__.mojo

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from .base import *
2+
from .binary import *
3+
from .nested import *
4+
from .primitive import *
5+
6+
7+
# fn array[T: DType](*values: Scalar[T]) -> PrimitiveArray[DataType(T)]:
8+
# var a = PrimitiveArray[DataType(T)](len(values))
9+
# for value in values:
10+
# a.unsafe_append(value)
11+
# return a^
12+
13+
14+
fn array[T: DataType](*values: Scalar[T.native]) -> PrimitiveArray[T]:
15+
var a = PrimitiveArray[T](len(values))
16+
for value in values:
17+
a.unsafe_append(value)
18+
return a^

‎firebolt/arrays/base.mojo

+70
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
from .primitive import *
2+
3+
4+
trait Array(Movable, Sized):
5+
fn as_data(self) -> ArrayData:
6+
...
7+
8+
9+
@value
10+
struct ArrayData(Movable):
11+
var dtype: DataType
12+
var length: Int
13+
var bitmap: Arc[Buffer]
14+
var buffers: List[Arc[Buffer]]
15+
var children: List[Arc[ArrayData]]
16+
17+
fn is_valid(self, index: Int) -> Bool:
18+
return self.bitmap[].unsafe_get[DType.bool](index)
19+
20+
fn as_primitive[T: DataType](self) raises -> PrimitiveArray[T]:
21+
return PrimitiveArray[T](self)
22+
23+
fn as_int8(self) raises -> Int8Array:
24+
return Int8Array(self)
25+
26+
fn as_int16(self) raises -> Int16Array:
27+
return Int16Array(self)
28+
29+
fn as_int32(self) raises -> Int32Array:
30+
return Int32Array(self)
31+
32+
fn as_int64(self) raises -> Int64Array:
33+
return Int64Array(self)
34+
35+
fn as_uint8(self) raises -> UInt8Array:
36+
return UInt8Array(self)
37+
38+
fn as_uint16(self) raises -> UInt16Array:
39+
return UInt16Array(self)
40+
41+
fn as_uint32(self) raises -> UInt32Array:
42+
return UInt32Array(self)
43+
44+
fn as_uint64(self) raises -> UInt64Array:
45+
return UInt64Array(self)
46+
47+
fn as_float32(self) raises -> Float32Array:
48+
return Float32Array(self)
49+
50+
fn as_float64(self) raises -> Float64Array:
51+
return Float64Array(self)
52+
53+
fn as_string(self) raises -> StringArray:
54+
return StringArray(self)
55+
56+
fn as_list(self) raises -> ListArray:
57+
return ListArray(self)
58+
59+
60+
struct ChunkedArray:
61+
var dtype: DataType
62+
var length: Int
63+
var chunks: List[ArrayData]
64+
65+
fn __init__(inout self, dtype: DataType, chunks: List[ArrayData]):
66+
self.dtype = dtype
67+
self.chunks = chunks
68+
self.length = 0
69+
for chunk in chunks:
70+
self.length += chunk.length

‎firebolt/arrays/binary.mojo

+99
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
from ..buffers import Buffer
2+
from ..dtypes import *
3+
4+
5+
struct StringArray(Array):
6+
var data: ArrayData
7+
var bitmap: Arc[Buffer]
8+
var offsets: Arc[Buffer]
9+
var values: Arc[Buffer]
10+
var capacity: Int
11+
12+
fn __init__(inout self, data: ArrayData) raises:
13+
if data.dtype != string:
14+
raise Error("Unexpected dtype")
15+
elif len(data.buffers) != 2:
16+
raise Error("StringArray requires exactly two buffers")
17+
18+
self.data = data
19+
self.bitmap = data.bitmap
20+
self.offsets = data.buffers[0]
21+
self.values = data.buffers[1]
22+
self.capacity = data.length
23+
24+
fn __init__(inout self, capacity: Int = 0):
25+
var bitmap = Buffer.alloc[DType.bool](capacity)
26+
# TODO(kszucs): initial values capacity should be either 0 or some value received from the user
27+
var values = Buffer.alloc[DType.uint8](capacity)
28+
var offsets = Buffer.alloc[DType.uint32](capacity + 1)
29+
offsets.unsafe_set[DType.uint32](0, 0)
30+
31+
self.capacity = capacity
32+
self.bitmap = bitmap^
33+
self.offsets = offsets^
34+
self.values = values^
35+
self.data = ArrayData(
36+
dtype=string,
37+
length=0,
38+
bitmap=self.bitmap,
39+
buffers=List(self.offsets, self.values),
40+
children=List[Arc[ArrayData]](),
41+
)
42+
43+
fn __moveinit__(inout self, owned existing: Self):
44+
self.data = existing.data^
45+
self.bitmap = existing.bitmap^
46+
self.offsets = existing.offsets^
47+
self.values = existing.values^
48+
self.capacity = existing.capacity
49+
50+
fn __len__(self) -> Int:
51+
return self.data.length
52+
53+
fn as_data(self) -> ArrayData:
54+
return self.data
55+
56+
fn grow(inout self, capacity: Int):
57+
self.bitmap[].grow[DType.bool](capacity)
58+
self.offsets[].grow[DType.uint32](capacity + 1)
59+
self.capacity = capacity
60+
61+
# fn shrink_to_fit(inout self):
62+
63+
fn is_valid(self, index: Int) -> Bool:
64+
return self.bitmap[].unsafe_get[DType.bool](index)
65+
66+
fn unsafe_append(inout self, value: String):
67+
# todo(kszucs): use unsafe set
68+
var index = self.data.length
69+
var last_offset = self.offsets[].unsafe_get[DType.uint32](index)
70+
var next_offset = last_offset + len(value)
71+
self.data.length += 1
72+
self.bitmap[].unsafe_set[DType.bool](index, True)
73+
self.offsets[].unsafe_set[DType.uint32](index + 1, next_offset)
74+
self.values[].grow[DType.uint8](next_offset)
75+
var dst_address = self.values[].offset(int(last_offset))
76+
var src_address = value.unsafe_ptr()
77+
memcpy(dst_address, src_address, len(value))
78+
79+
fn unsafe_get(self, index: Int) -> String:
80+
var start_offset = self.offsets[].unsafe_get[DType.int32](index)
81+
var end_offset = self.offsets[].unsafe_get[DType.int32](index + 1)
82+
var address = self.values[].offset(int(start_offset))
83+
var length = int(end_offset - start_offset)
84+
return StringRef(address, length)
85+
86+
fn unsafe_set(inout self, index: Int, value: String) raises:
87+
var start_offset = self.offsets[].unsafe_get[DType.int32](index)
88+
var end_offset = self.offsets[].unsafe_get[DType.int32](index + 1)
89+
var length = int(end_offset - start_offset)
90+
91+
if length != len(value):
92+
raise Error(
93+
"String length mismatch, inplace update must have the same"
94+
" length"
95+
)
96+
97+
var dst_address = self.values[].offset(int(start_offset))
98+
var src_address = value.unsafe_ptr()
99+
memcpy(dst_address, src_address, length)

‎firebolt/arrays/nested.mojo

+108
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
from ..buffers import Buffer
2+
from ..dtypes import *
3+
4+
5+
struct ListArray(Array):
6+
var data: ArrayData
7+
var bitmap: Arc[Buffer]
8+
var offsets: Arc[Buffer]
9+
var values: Arc[ArrayData]
10+
var capacity: Int
11+
12+
fn __init__(inout self, data: ArrayData) raises:
13+
if not data.dtype.is_list():
14+
raise Error("Unexpected dtype")
15+
elif len(data.buffers) != 1:
16+
raise Error("ListArray requires exactly one buffer")
17+
elif len(data.children) != 1:
18+
raise Error("ListArray requires exactly one child array")
19+
20+
self.data = data
21+
self.bitmap = data.bitmap
22+
self.offsets = data.buffers[0]
23+
self.values = data.children[0]
24+
self.capacity = data.length
25+
26+
fn __init__[T: Array](inout self, values: T, capacity: Int = 0):
27+
var bitmap = Buffer.alloc[DType.bool](capacity)
28+
var offsets = Buffer.alloc[DType.uint32](capacity + 1)
29+
offsets.unsafe_set[DType.uint32](0, 0)
30+
31+
var values_data = values.as_data()
32+
var list_dtype = list_(values_data.dtype)
33+
34+
self.capacity = capacity
35+
self.bitmap = bitmap^
36+
self.offsets = offsets^
37+
self.values = values_data^
38+
self.data = ArrayData(
39+
dtype=list_dtype,
40+
length=0,
41+
bitmap=self.bitmap,
42+
buffers=List(self.offsets),
43+
children=List(self.values),
44+
)
45+
46+
fn __moveinit__(inout self, owned existing: Self):
47+
self.data = existing.data^
48+
self.bitmap = existing.bitmap^
49+
self.offsets = existing.offsets^
50+
self.values = existing.values^
51+
self.capacity = existing.capacity
52+
53+
fn __len__(self) -> Int:
54+
return self.data.length
55+
56+
fn as_data(self) -> ArrayData:
57+
return self.data
58+
59+
fn is_valid(self, index: Int) -> Bool:
60+
return self.bitmap[].unsafe_get[DType.bool](index)
61+
62+
fn unsafe_append(inout self, is_valid: Bool):
63+
self.bitmap[].unsafe_set[DType.bool](self.data.length, is_valid)
64+
self.offsets[].unsafe_set[DType.uint32](
65+
self.data.length + 1, self.values[].length
66+
)
67+
self.data.length += 1
68+
69+
70+
struct StructArray(Array):
71+
var data: ArrayData
72+
var bitmap: Arc[Buffer]
73+
var fields: List[Arc[ArrayData]]
74+
var capacity: Int
75+
76+
fn __init__(inout self, fields: List[Array], capacity: Int = 0):
77+
var field_datas = List[Arc[ArrayData]]()
78+
var field_dtypes = List[DataType]()
79+
for field in fields:
80+
var data = field.as_data()
81+
field_dtypes.append(data.dtype)
82+
field_datas.append(data^)
83+
84+
var bitmap = Buffer.alloc[DType.bool](capacity)
85+
var struct_dtype = struct_(field_dtypes)
86+
87+
self.capacity = capacity
88+
self.bitmap = bitmap^
89+
self.fields = field_datas^
90+
self.data = ArrayData(
91+
dtype=struct_dtype,
92+
length=0,
93+
bitmap=self.bitmap,
94+
buffers=List(),
95+
children=self.fields,
96+
)
97+
98+
fn __moveinit__(inout self, owned existing: Self):
99+
self.data = existing.data^
100+
self.bitmap = existing.bitmap^
101+
self.fields = existing.fields^
102+
self.capacity = existing.capacity
103+
104+
fn __len__(self) -> Int:
105+
return self.data.length
106+
107+
fn as_data(self) -> ArrayData:
108+
return self.data

‎firebolt/arrays/primitive.mojo

+95
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
from ..buffers import Buffer
2+
from ..dtypes import *
3+
4+
5+
struct PrimitiveArray[T: DataType](Array):
6+
alias dtype = T
7+
alias scalar = Scalar[T.native]
8+
var data: ArrayData
9+
var bitmap: Arc[Buffer]
10+
var buffer: Arc[Buffer]
11+
var capacity: Int
12+
13+
fn __init__(inout self, data: ArrayData) raises:
14+
# TODO(kszucs): put a dtype constraint here
15+
if data.dtype != T:
16+
raise Error("Unexpected dtype")
17+
elif len(data.buffers) != 1:
18+
raise Error("PrimitiveArray requires exactly one buffer")
19+
20+
self.data = data
21+
self.bitmap = data.bitmap
22+
self.buffer = data.buffers[0]
23+
self.capacity = data.length
24+
25+
fn __init__(inout self, capacity: Int = 0):
26+
self.capacity = capacity
27+
self.bitmap = Buffer.alloc[DType.bool](capacity)
28+
self.buffer = Buffer.alloc[T.native](capacity)
29+
self.data = ArrayData(
30+
dtype=T,
31+
length=0,
32+
bitmap=self.bitmap,
33+
buffers=List(self.buffer),
34+
children=List[Arc[ArrayData]](),
35+
)
36+
37+
fn __moveinit__(inout self, owned existing: Self):
38+
self.data = existing.data^
39+
self.bitmap = existing.bitmap^
40+
self.buffer = existing.buffer^
41+
self.capacity = existing.capacity
42+
43+
fn as_data(self) -> ArrayData:
44+
return self.data
45+
46+
fn grow(inout self, capacity: Int):
47+
self.bitmap[].grow[DType.bool](capacity)
48+
self.buffer[].grow[T.native](capacity)
49+
self.capacity = capacity
50+
51+
@always_inline
52+
fn __len__(self) -> Int:
53+
return self.data.length
54+
55+
@always_inline
56+
fn is_valid(self, index: Int) -> Bool:
57+
return self.bitmap[].unsafe_get[DType.bool](index)
58+
59+
@always_inline
60+
fn unsafe_get(self, index: Int) -> Self.scalar:
61+
return self.buffer[].unsafe_get[T.native](index)
62+
63+
@always_inline
64+
fn unsafe_set(inout self, index: Int, value: Self.scalar):
65+
self.bitmap[].unsafe_set[DType.bool](index, True)
66+
self.buffer[].unsafe_set[T.native](index, value)
67+
68+
@always_inline
69+
fn unsafe_append(inout self, value: Self.scalar):
70+
self.unsafe_set(self.data.length, value)
71+
self.data.length += 1
72+
73+
fn append(inout self, value: Self.scalar):
74+
if self.data.length >= self.capacity:
75+
self.grow(self.capacity * 2)
76+
self.unsafe_append(value)
77+
78+
# fn append(inout self, value: Optional[Self.scalar]):
79+
80+
fn extend(inout self, values: List[Bool]):
81+
if self.length + len(values) >= self.capacity:
82+
self.grow(self.capacity + len(values))
83+
for value in values:
84+
self.unsafe_append(value)
85+
86+
87+
alias BoolArray = PrimitiveArray[bool_]
88+
alias Int8Array = PrimitiveArray[int8]
89+
alias Int16Array = PrimitiveArray[int16]
90+
alias Int32Array = PrimitiveArray[int32]
91+
alias Int64Array = PrimitiveArray[int64]
92+
alias UInt8Array = PrimitiveArray[uint8]
93+
alias UInt16Array = PrimitiveArray[uint16]
94+
alias UInt32Array = PrimitiveArray[uint32]
95+
alias UInt64Array = PrimitiveArray[uint64]

‎firebolt/arrays/tests/test_base.mojo

Whitespace-only changes.
+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
from testing import assert_equal, assert_true, assert_false
2+
3+
4+
from firebolt.arrays import *
5+
from firebolt.dtypes import *
6+
7+
8+
def test_string_builder():
9+
var a = StringArray()
10+
assert_equal(len(a), 0)
11+
assert_equal(a.capacity, 0)
12+
13+
a.grow(2)
14+
assert_equal(len(a), 0)
15+
assert_equal(a.capacity, 2)
16+
17+
a.unsafe_append("hello")
18+
a.unsafe_append("world")
19+
assert_equal(len(a), 2)
20+
assert_equal(a.capacity, 2)
21+
22+
var s = a.unsafe_get(0)
23+
assert_equal(s, "hello")
+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
from testing import assert_equal, assert_true, assert_false
2+
3+
4+
from firebolt.arrays import *
5+
from firebolt.dtypes import *
6+
7+
8+
def test_list_int_array():
9+
var ints = Int64Array()
10+
var lists = ListArray(ints)
11+
assert_equal(lists.data.dtype, list_(int64))
12+
13+
ints.append(1)
14+
ints.append(2)
15+
ints.append(3)
16+
lists.unsafe_append(True)
17+
assert_equal(len(lists), 1)
18+
19+
var data = lists.as_data()
20+
assert_equal(data.length, 1)
21+
22+
var arr = data.as_list()
23+
assert_equal(len(arr), 1)
24+
25+
26+
def test_list_bool_array():
27+
var bools = BoolArray()
28+
var lists = ListArray(bools)
29+
30+
bools.append(True)
31+
bools.append(False)
32+
bools.append(True)
33+
lists.unsafe_append(True)
34+
assert_equal(len(lists), 1)
+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
from testing import assert_equal, assert_true, assert_false
2+
3+
4+
from firebolt.arrays import *
5+
6+
7+
def test_boolean_array():
8+
var a = BoolArray()
9+
assert_equal(len(a), 0)
10+
assert_equal(a.capacity, 0)
11+
12+
a.grow(3)
13+
assert_equal(len(a), 0)
14+
assert_equal(a.capacity, 3)
15+
16+
a.append(True)
17+
a.append(False)
18+
a.append(True)
19+
assert_equal(len(a), 3)
20+
assert_equal(a.capacity, 3)
21+
22+
a.append(True)
23+
assert_equal(len(a), 4)
24+
assert_equal(a.capacity, 6)
25+
assert_true(a.is_valid(0))
26+
assert_true(a.is_valid(1))
27+
assert_true(a.is_valid(2))
28+
assert_true(a.is_valid(3))
29+
30+
var d = a.as_data()
31+
assert_equal(d.length, 4)
32+
33+
var b = d.as_primitive[bool_]()
34+
35+
36+
def test_e():
37+
var a = Int8Array()
38+
assert_equal(len(a), 0)
39+
assert_equal(a.capacity, 0)
40+
a.unsafe_append(1)
41+
a.unsafe_append(2)
42+
a.unsafe_append(3)
43+
assert_equal(len(a), 3)
44+
45+
46+
def test_array_from_bools():
47+
var a = array[bool_](True, False, True)
48+
assert_equal(len(a), 3)
49+
assert_equal(a.dtype, bool_)
50+
assert_true(a.unsafe_get(0))
51+
assert_false(a.unsafe_get(1))
52+
assert_true(a.unsafe_get(2))
53+
54+
55+
def test_array_from_ints():
56+
var g = array[int8](1, 2)
57+
assert_equal(len(g), 2)
58+
assert_equal(g.dtype, int8)
59+
assert_equal(g.unsafe_get(0), 1)
60+
assert_equal(g.unsafe_get(1), 2)

‎firebolt/buffers.mojo

+102
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
import math
2+
3+
4+
fn _required_bytes(length: Int, T: DType) -> Int:
5+
var size: Int
6+
if T is DType.bool:
7+
size = math.ceildiv(length, 8)
8+
else:
9+
size = length * T.sizeof()
10+
return math.align_up(size, 64)
11+
12+
13+
struct Buffer(Movable):
14+
var ptr: DTypePointer[DType.uint8]
15+
var size: Int
16+
var owns: Bool
17+
18+
fn __init__(
19+
inout self, ptr: DTypePointer[DType.uint8], size: Int, owns: Bool = True
20+
):
21+
self.ptr = ptr
22+
self.size = size
23+
self.owns = owns
24+
25+
fn __moveinit__(inout self, owned existing: Self):
26+
self.ptr = existing.ptr
27+
self.size = existing.size
28+
self.owns = existing.owns
29+
30+
@staticmethod
31+
fn alloc[I: Intable, //, T: DType = DType.uint8](length: I) -> Buffer:
32+
var size = _required_bytes(int(length), T)
33+
var ptr = DTypePointer[DType.uint8].alloc(size, alignment=64)
34+
memset_zero(ptr.bitcast[UInt8](), size)
35+
return Buffer(ptr, size)
36+
37+
@staticmethod
38+
fn view[
39+
I: Intable, //
40+
](
41+
ptr: UnsafePointer[NoneType], length: I, dtype: DType = DType.uint8
42+
) -> Buffer:
43+
var size = _required_bytes(int(length), dtype)
44+
return Buffer(ptr.bitcast[UInt8](), size, owns=False)
45+
46+
@always_inline
47+
fn offset(self, index: Int) -> UnsafePointer[UInt8]:
48+
return (self.ptr + index).bitcast[UInt8]()
49+
50+
fn grow[
51+
I: Intable, //, T: DType = DType.uint8
52+
](inout self, target_length: I):
53+
if self.length[T]() >= int(target_length):
54+
return
55+
56+
var new = Buffer.alloc[T](target_length)
57+
memcpy(new.ptr.bitcast[UInt8](), self.ptr.bitcast[UInt8](), self.size)
58+
self.ptr.free()
59+
self.ptr = new.ptr
60+
self.size = new.size
61+
new.ptr = DTypePointer[DType.uint8]()
62+
63+
fn __del__(owned self):
64+
if self.owns:
65+
self.ptr.free()
66+
67+
@always_inline
68+
fn length[T: DType = DType.uint8](self) -> Int:
69+
@parameter
70+
if T is DType.bool:
71+
return self.size * 8
72+
else:
73+
return self.size // sizeof[T]()
74+
75+
@always_inline
76+
fn unsafe_get[T: DType = DType.uint8](self, index: Int) -> Scalar[T]:
77+
@parameter
78+
if T is DType.bool:
79+
var byte_index = index // 8
80+
var bit_index = index % 8
81+
var byte = self.ptr[byte_index]
82+
return (byte & (1 << bit_index)) != 0
83+
else:
84+
return self.ptr.bitcast[T]()[index]
85+
86+
@always_inline
87+
fn unsafe_set[
88+
T: DType = DType.uint8
89+
](inout self, index: Int, value: Scalar[T]):
90+
@parameter
91+
if T is DType.bool:
92+
var byte_index = index // 8
93+
var bit_index = index % 8
94+
var byte = self.ptr[byte_index]
95+
if value:
96+
self.ptr[byte_index] = byte | (1 << bit_index)
97+
else:
98+
self.ptr[byte_index] = byte & ~(1 << bit_index)
99+
else:
100+
self.ptr.bitcast[T]()[index] = value
101+
102+
# fn unsafe_ptr() -> UnsafePointer[UInt8]

‎firebolt/c_data.mojo

+241
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,241 @@
1+
import math
2+
from python import Python
3+
from sys.ffi import C_char
4+
5+
from .dtypes import *
6+
from .arrays import *
7+
8+
alias ARROW_FLAG_NULLABLE = 2
9+
10+
11+
alias CSchemaReleaseFunction = fn (
12+
schema: UnsafePointer[CArrowSchema]
13+
) -> NoneType
14+
alias CArrayReleaseFunction = fn (
15+
schema: UnsafePointer[CArrowArray]
16+
) -> NoneType
17+
18+
19+
@value
20+
struct CArrowSchema:
21+
var format: UnsafePointer[C_char]
22+
var name: UnsafePointer[C_char]
23+
var metadata: UnsafePointer[C_char]
24+
var flags: Int64
25+
var n_children: Int64
26+
var children: UnsafePointer[UnsafePointer[CArrowSchema]]
27+
var dictionary: UnsafePointer[CArrowSchema]
28+
# TODO(kszucs): release callback must be called otherwise memory gets leaked
29+
var release: UnsafePointer[CSchemaReleaseFunction]
30+
var private_data: UnsafePointer[NoneType]
31+
32+
# fn __del__(owned self):
33+
# var this = UnsafePointer.address_of(self)
34+
# if self.release:
35+
# self.release[](this)
36+
37+
@staticmethod
38+
fn from_pyarrow(pyobj: PythonObject) raises -> CArrowSchema:
39+
var ptr = UnsafePointer[CArrowSchema].alloc(1)
40+
pyobj._export_to_c(int(ptr))
41+
return ptr.take_pointee()
42+
43+
fn to_pyarrow(self) raises -> PythonObject:
44+
var pa = Python.import_module("pyarrow")
45+
var ptr = UnsafePointer[CArrowSchema].address_of(self)
46+
return pa.Schema._import_from_c(int(ptr))
47+
48+
@staticmethod
49+
fn from_dtype(dtype: DataType) -> CArrowSchema:
50+
var fmt: String
51+
var n_children: Int64 = 0
52+
var children = UnsafePointer[UnsafePointer[CArrowSchema]]()
53+
54+
if dtype == null:
55+
fmt = "n"
56+
elif dtype == bool_:
57+
fmt = "b"
58+
elif dtype == int8:
59+
fmt = "c"
60+
elif dtype == uint8:
61+
fmt = "C"
62+
elif dtype == int16:
63+
fmt = "s"
64+
elif dtype == uint16:
65+
fmt = "S"
66+
elif dtype == int32:
67+
fmt = "i"
68+
elif dtype == uint32:
69+
fmt = "I"
70+
elif dtype == int64:
71+
fmt = "l"
72+
elif dtype == uint64:
73+
fmt = "L"
74+
elif dtype == float16:
75+
fmt = "e"
76+
elif dtype == float32:
77+
fmt = "f"
78+
elif dtype == float64:
79+
fmt = "g"
80+
elif dtype == binary:
81+
fmt = "z"
82+
elif dtype == string:
83+
fmt = "u"
84+
elif dtype.is_struct():
85+
print("EEE")
86+
87+
fmt = "+s"
88+
n_children = int(len(dtype.fields))
89+
children = UnsafePointer[UnsafePointer[CArrowSchema]].alloc(
90+
int(n_children)
91+
)
92+
93+
for i in range(n_children):
94+
var child = CArrowSchema.from_field(dtype.fields[i])
95+
children[i].init_pointee_move(child)
96+
else:
97+
fmt = ""
98+
# constrained[False, "Unknown dtype"]()
99+
100+
return CArrowSchema(
101+
format=fmt.unsafe_cstr_ptr(),
102+
name=UnsafePointer[C_char](),
103+
metadata=UnsafePointer[C_char](),
104+
flags=0,
105+
n_children=n_children,
106+
children=children,
107+
dictionary=UnsafePointer[CArrowSchema](),
108+
# TODO(kszucs): currently there is no way to pass a mojo callback to C
109+
release=UnsafePointer[CSchemaReleaseFunction](),
110+
private_data=UnsafePointer[NoneType](),
111+
)
112+
113+
@staticmethod
114+
fn from_field(field: Field) -> CArrowSchema:
115+
var flags: Int64 = 0 # TODO: nullable
116+
117+
return CArrowSchema(
118+
format="".unsafe_cstr_ptr(),
119+
name=field.name.unsafe_cstr_ptr(),
120+
metadata="".unsafe_cstr_ptr(),
121+
flags=flags,
122+
n_children=0,
123+
children=UnsafePointer[UnsafePointer[CArrowSchema]](),
124+
dictionary=UnsafePointer[CArrowSchema](),
125+
# TODO(kszucs): currently there is no way to pass a mojo callback to C
126+
release=UnsafePointer[CSchemaReleaseFunction](),
127+
private_data=UnsafePointer[NoneType](),
128+
)
129+
130+
fn to_dtype(self) raises -> DataType:
131+
var fmt = StringRef(self.format)
132+
# TODO(kszucs): not the nicest, but dictionary literals are not supported yet
133+
if fmt == "n":
134+
return null
135+
elif fmt == "b":
136+
return bool_
137+
elif fmt == "c":
138+
return int8
139+
elif fmt == "C":
140+
return uint8
141+
elif fmt == "s":
142+
return int16
143+
elif fmt == "S":
144+
return uint16
145+
elif fmt == "i":
146+
return int32
147+
elif fmt == "I":
148+
return uint32
149+
elif fmt == "l":
150+
return int64
151+
elif fmt == "L":
152+
return uint64
153+
elif fmt == "e":
154+
return float16
155+
elif fmt == "f":
156+
return float32
157+
elif fmt == "g":
158+
return float64
159+
elif fmt == "z":
160+
return binary
161+
elif fmt == "u":
162+
return string
163+
elif fmt == "+l":
164+
var field = self.children[0][].to_field()
165+
return list_(field.dtype)
166+
elif fmt == "+s":
167+
var fields = List[Field]()
168+
for i in range(self.n_children):
169+
fields.append(self.children[i][].to_field())
170+
return struct_(fields)
171+
else:
172+
raise Error("Unknown format")
173+
174+
fn to_field(self) raises -> Field:
175+
var name = StringRef(self.name)
176+
var dtype = self.to_dtype()
177+
var nullable = self.flags & ARROW_FLAG_NULLABLE
178+
return Field(name, dtype, nullable)
179+
180+
181+
@value
182+
struct CArrowArray:
183+
var length: Int64
184+
var null_count: Int64
185+
var offset: Int64
186+
var n_buffers: Int64
187+
var n_children: Int64
188+
var buffers: UnsafePointer[UnsafePointer[NoneType]]
189+
var children: UnsafePointer[UnsafePointer[CArrowArray]]
190+
var dictionary: UnsafePointer[CArrowArray]
191+
var release: UnsafePointer[CArrayReleaseFunction]
192+
var private_data: UnsafePointer[NoneType]
193+
194+
@staticmethod
195+
fn from_pyarrow(pyobj: PythonObject) raises -> CArrowArray:
196+
var ptr = UnsafePointer[CArrowArray].alloc(1)
197+
pyobj._export_to_c(int(ptr))
198+
return ptr.take_pointee()
199+
200+
fn to_array(self, dtype: DataType) raises -> ArrayData:
201+
var bitmap: Arc[Buffer]
202+
if self.buffers[0]:
203+
bitmap = Buffer.view(self.buffers[0], self.length, DType.bool)
204+
else:
205+
# bitmaps are allowed to be nullptrs by the specification, in this
206+
# case we allocate a new buffer to hold the null bitmap
207+
bitmap = Buffer.alloc[DType.uint8](self.length)
208+
209+
var buffers = List[Arc[Buffer]]()
210+
if dtype.is_numeric():
211+
var buffer = Buffer.view(self.buffers[1], self.length, dtype.native)
212+
buffers.append(buffer^)
213+
elif dtype == string:
214+
var offsets = Buffer.view(
215+
self.buffers[1], self.length + 1, DType.uint32
216+
)
217+
var values_size = int(offsets.unsafe_get(int(self.length)))
218+
var values = Buffer.view(self.buffers[2], values_size, DType.uint8)
219+
buffers.append(offsets^)
220+
buffers.append(values^)
221+
elif dtype.is_list():
222+
var offsets = Buffer.view(
223+
self.buffers[1], self.length + 1, DType.uint32
224+
)
225+
buffers.append(offsets^)
226+
else:
227+
raise Error("Unknown dtype")
228+
229+
var children = List[Arc[ArrayData]]()
230+
for i in range(self.n_children):
231+
var child_field = dtype.fields[i]
232+
var child_array = self.children[i][].to_array(child_field.dtype)
233+
children.append(child_array^)
234+
235+
return ArrayData(
236+
dtype=dtype,
237+
length=int(self.length),
238+
bitmap=bitmap,
239+
buffers=buffers,
240+
children=children,
241+
)

‎firebolt/dtypes.mojo

+367
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,367 @@
1+
# The following enum codes are copied from the C++ implementation of Arrow
2+
3+
# A NULL type having no physical storage
4+
alias NA = 0
5+
6+
# Boolean as 1 bit, LSB bit-packed ordering
7+
alias BOOL = 1
8+
9+
# Unsigned 8-bit little-endian integer
10+
alias UINT8 = 2
11+
12+
# Signed 8-bit little-endian integer
13+
alias INT8 = 3
14+
15+
# Unsigned 16-bit little-endian integer
16+
alias UINT16 = 4
17+
18+
# Signed 16-bit little-endian integer
19+
alias INT16 = 5
20+
21+
# Unsigned 32-bit little-endian integer
22+
alias UINT32 = 6
23+
24+
# Signed 32-bit little-endian integer
25+
alias INT32 = 7
26+
27+
# Unsigned 64-bit little-endian integer
28+
alias UINT64 = 8
29+
30+
# Signed 64-bit little-endian integer
31+
alias INT64 = 9
32+
33+
# 2-byte floating point value
34+
alias FLOAT16 = 10
35+
36+
# 4-byte floating point value
37+
alias FLOAT32 = 11
38+
39+
# 8-byte floating point value
40+
alias FLOAT64 = 12
41+
42+
# UTF8 variable-length string as List<Char>
43+
alias STRING = 13
44+
45+
# Variable-length bytes (no guarantee of UTF8-ness)
46+
alias BINARY = 14
47+
48+
# Fixed-size binary. Each value occupies the same number of bytes
49+
alias FIXED_SIZE_BINARY = 15
50+
51+
# int32_t days since the UNIX epoch
52+
alias DATE32 = 16
53+
54+
# int64_t milliseconds since the UNIX epoch
55+
alias DATE64 = 17
56+
57+
# Exact timestamp encoded with int64 since UNIX epoch
58+
# Default unit millisecond
59+
alias TIMESTAMP = 18
60+
61+
# Time as signed 32-bit integer, representing either seconds or
62+
# milliseconds since midnight
63+
alias TIME32 = 19
64+
65+
# Time as signed 64-bit integer, representing either microseconds or
66+
# nanoseconds since midnight
67+
alias TIME64 = 20
68+
69+
# YEAR_MONTH interval in SQL style
70+
alias INTERVAL_MONTHS = 21
71+
72+
# DAY_TIME interval in SQL style
73+
alias INTERVAL_DAY_TIME = 22
74+
75+
# Precision- and scale-based decimal type with 128 bits.
76+
alias DECIMAL128 = 23
77+
78+
# Defined for backward-compatibility.
79+
alias DECIMAL = DECIMAL128
80+
81+
# Precision- and scale-based decimal type with 256 bits.
82+
alias DECIMAL256 = 24
83+
84+
# A list of some logical data type
85+
alias LIST = 25
86+
87+
# Struct of logical types
88+
alias STRUCT = 26
89+
90+
# Sparse unions of logical types
91+
alias SPARSE_UNION = 27
92+
93+
# Dense unions of logical types
94+
alias DENSE_UNION = 28
95+
96+
# Dictionary-encoded type, also called "categorical" or "factor"
97+
# in other programming languages. Holds the dictionary value
98+
# type but not the dictionary itself, which is part of the
99+
# ArrayData struct
100+
alias DICTIONARY = 29
101+
102+
# Map, a repeated struct logical type
103+
alias MAP = 30
104+
105+
# Custom data type, implemented by user
106+
alias EXTENSION = 31
107+
108+
# Fixed size list of some logical type
109+
alias FIXED_SIZE_LIST = 32,
110+
111+
# Measure of elapsed time in either seconds, milliseconds, microseconds
112+
# or nanoseconds.
113+
alias DURATION = 33
114+
115+
# Like STRING, but with 64-bit offsets
116+
alias LARGE_STRING = 34
117+
118+
# Like BINARY, but with 64-bit offsets
119+
alias LARGE_BINARY = 35
120+
121+
# Like LIST, but with 64-bit offsets
122+
alias LARGE_LIST = 36
123+
124+
# Calendar interval type with three fields.
125+
alias INTERVAL_MONTH_DAY_NANO = 37
126+
127+
# Run-end encoded data.
128+
alias RUN_END_ENCODED = 38
129+
130+
# String (UTF8) view type with 4-byte prefix and inline small string
131+
# optimization
132+
alias STRING_VIEW = 39
133+
134+
# Bytes view type with 4-byte prefix and inline small string optimization
135+
alias BINARY_VIEW = 40
136+
137+
# A list of some logical data type represented by offset and size.
138+
alias LIST_VIEW = 41
139+
140+
# Like LIST_VIEW, but with 64-bit offsets and sizes
141+
alias LARGE_LIST_VIEW = 42
142+
143+
144+
@value
145+
struct Field(CollectionElement, EqualityComparable):
146+
var name: String
147+
var dtype: DataType
148+
var nullable: Bool
149+
150+
fn __init__(
151+
inout self, name: String, dtype: DataType, nullable: Bool = False
152+
):
153+
self.name = name
154+
self.dtype = dtype
155+
self.nullable = nullable
156+
157+
fn __eq__(self, other: Field) -> Bool:
158+
return (
159+
self.name == other.name
160+
and self.dtype == other.dtype
161+
and self.nullable == other.nullable
162+
)
163+
164+
fn __ne__(self, other: Field) -> Bool:
165+
return not self == other
166+
167+
168+
struct DataType(CollectionElement, EqualityComparable, Stringable):
169+
var code: UInt8
170+
var native: DType
171+
var fields: List[Field]
172+
173+
fn __init__(inout self, *, code: UInt8):
174+
self.code = code
175+
self.native = DType.invalid
176+
self.fields = List[Field]()
177+
178+
fn __init__(inout self, native: DType):
179+
if native is DType.bool:
180+
self.code = BOOL
181+
elif native is DType.int8:
182+
self.code = INT8
183+
elif native is DType.int16:
184+
self.code = INT16
185+
elif native is DType.int32:
186+
self.code = INT32
187+
elif native is DType.int64:
188+
self.code = INT64
189+
elif native is DType.uint8:
190+
self.code = UINT8
191+
elif native is DType.uint16:
192+
self.code = UINT16
193+
elif native is DType.uint32:
194+
self.code = UINT32
195+
elif native is DType.uint64:
196+
self.code = UINT64
197+
elif native is DType.float32:
198+
self.code = FLOAT32
199+
elif native is DType.float64:
200+
self.code = FLOAT64
201+
else:
202+
self.code = NA
203+
self.native = native
204+
self.fields = List[Field]()
205+
206+
fn __init__(inout self, *, code: UInt8, native: DType):
207+
self.code = code
208+
self.native = native
209+
self.fields = List[Field]()
210+
211+
fn __init__(inout self, *, code: UInt8, fields: List[Field]):
212+
self.code = code
213+
self.native = DType.invalid
214+
self.fields = fields
215+
216+
fn __copyinit__(inout self, value: Self):
217+
self.code = value.code
218+
self.native = value.native
219+
self.fields = value.fields
220+
221+
fn __moveinit__(inout self, owned value: Self):
222+
self.code = value.code
223+
self.native = value.native
224+
self.fields = value.fields^
225+
226+
fn __is__(self, other: DataType) -> Bool:
227+
return self == other
228+
229+
fn __isnot__(self, other: DataType) -> Bool:
230+
return self != other
231+
232+
fn __eq__(self, other: DataType) -> Bool:
233+
if self.code != other.code:
234+
return False
235+
if len(self.fields) != len(other.fields):
236+
return False
237+
for i in range(len(self.fields)):
238+
if self.fields[i] != other.fields[i]:
239+
return False
240+
return True
241+
242+
fn __ne__(self, other: DataType) -> Bool:
243+
return not self == other
244+
245+
fn __str__(self) -> String:
246+
if self.code == NA:
247+
return "null"
248+
elif self.code == BOOL:
249+
return "bool"
250+
elif self.code == INT8:
251+
return "int8"
252+
elif self.code == INT16:
253+
return "int16"
254+
else:
255+
return "unknown"
256+
257+
fn is_bool(self) -> Bool:
258+
return self.code == BOOL
259+
260+
fn bitwidth(self) -> UInt8:
261+
if self.code == BOOL:
262+
return 1
263+
elif self.code == INT8:
264+
return 8
265+
elif self.code == INT16:
266+
return 16
267+
elif self.code == INT32:
268+
return 32
269+
elif self.code == INT64:
270+
return 64
271+
elif self.code == UINT8:
272+
return 8
273+
elif self.code == UINT16:
274+
return 16
275+
elif self.code == UINT32:
276+
return 32
277+
elif self.code == UINT64:
278+
return 64
279+
elif self.code == FLOAT32:
280+
return 32
281+
elif self.code == FLOAT64:
282+
return 64
283+
else:
284+
return 0
285+
286+
@always_inline
287+
fn is_boolean(self) -> Bool:
288+
return self.code == BOOL
289+
290+
@always_inline
291+
fn is_fixed_size(self) -> Bool:
292+
return self.bitwidth > 0
293+
294+
@always_inline
295+
fn is_integer(self) -> Bool:
296+
# TODO(kszucs): cannot use the following because ListLiteral.__contains__ is not implemented
297+
# return self.code in [INT8, INT16, INT32, INT64, UINT8, UINT16, UINT32, UINT64]
298+
# return self.is_signed_integer() or self.is_unsigned_integer()
299+
return self.is_signed_integer() or self.is_unsigned_integer()
300+
301+
@always_inline
302+
fn is_signed_integer(self) -> Bool:
303+
return (
304+
self.code == INT8
305+
or self.code == INT16
306+
or self.code == INT32
307+
or self.code == INT64
308+
)
309+
310+
@always_inline
311+
fn is_unsigned_integer(self) -> Bool:
312+
return (
313+
self.code == UINT8
314+
or self.code == UINT16
315+
or self.code == UINT32
316+
or self.code == UINT64
317+
)
318+
319+
@always_inline
320+
fn is_floating_point(self) -> Bool:
321+
return self.code == FLOAT32 or self.code == FLOAT64
322+
323+
@always_inline
324+
fn is_numeric(self) -> Bool:
325+
return self.is_integer() or self.is_floating_point()
326+
327+
@always_inline
328+
fn is_list(self) -> Bool:
329+
return self.code == LIST
330+
331+
@always_inline
332+
fn is_struct(self) -> Bool:
333+
return self.code == STRUCT
334+
335+
336+
fn list_(value_type: DataType) -> DataType:
337+
return DataType(code=LIST, fields=List(Field("value", value_type)))
338+
339+
340+
fn struct_(fields: List[Field]) -> DataType:
341+
return DataType(code=STRUCT, fields=fields)
342+
343+
344+
fn struct_(*fields: Field) -> DataType:
345+
# TODO(kszucs): it would be easier to just List(struct_fields)
346+
# but that doesn't seem to be supported
347+
var struct_fields = List[Field](capacity=len(fields))
348+
for field in fields:
349+
struct_fields.append(field[])
350+
return DataType(code=STRUCT, fields=struct_fields)
351+
352+
353+
alias null = DataType(code=NA)
354+
alias bool_ = DataType(code=BOOL, native=DType.bool)
355+
alias int8 = DataType(code=INT8, native=DType.int8)
356+
alias int16 = DataType(code=INT16, native=DType.int16)
357+
alias int32 = DataType(code=INT32, native=DType.int32)
358+
alias int64 = DataType(code=INT64, native=DType.int64)
359+
alias uint8 = DataType(code=UINT8, native=DType.uint8)
360+
alias uint16 = DataType(code=UINT16, native=DType.uint16)
361+
alias uint32 = DataType(code=UINT32, native=DType.uint32)
362+
alias uint64 = DataType(code=UINT64, native=DType.uint64)
363+
alias float16 = DataType(code=FLOAT16, native=DType.float16)
364+
alias float32 = DataType(code=FLOAT32, native=DType.float32)
365+
alias float64 = DataType(code=FLOAT64, native=DType.float64)
366+
alias string = DataType(code=STRING)
367+
alias binary = DataType(code=BINARY)

‎firebolt/tabular.mojo

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from .arrays import *
2+
3+
4+
struct RecordBatch:
5+
var schema: Schema
6+
var fields: List[Array]
7+
8+
fn __init__(inout self, schema: Schema, fields: List[Array]):
9+
self.schema = schema
10+
self.fields = fields

‎firebolt/tests/test_buffers.mojo

+58
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
from testing import assert_equal, assert_true, assert_false
2+
3+
from firebolt.buffers import *
4+
5+
6+
def test_buffer_init():
7+
var b = Buffer.alloc(10)
8+
assert_equal(b.size, 64)
9+
assert_true(b.ptr.is_aligned[64]())
10+
11+
var b1 = Buffer.alloc[DType.bool](10)
12+
assert_equal(b1.size, 64)
13+
assert_true(b1.ptr.is_aligned[64]())
14+
15+
var b2 = Buffer.alloc[DType.bool](64 * 8 + 1)
16+
assert_equal(b2.size, 128)
17+
assert_true(b2.ptr.is_aligned[64]())
18+
19+
20+
def test_buffer_grow():
21+
var b = Buffer.alloc(10)
22+
assert_equal(b.size, 64)
23+
b.grow(20)
24+
assert_equal(b.size, 64)
25+
b.grow(80)
26+
assert_equal(b.size, 128)
27+
28+
29+
def test_buffer():
30+
var buf = Buffer.alloc(10)
31+
assert_equal(buf.size, 64)
32+
33+
buf.unsafe_set(0, 42)
34+
buf.unsafe_set(1, 43)
35+
buf.unsafe_set(2, 44)
36+
assert_equal(buf.unsafe_get(0), 42)
37+
assert_equal(buf.unsafe_get(1), 43)
38+
assert_equal(buf.unsafe_get(2), 44)
39+
40+
assert_equal(buf.size, 64)
41+
assert_equal(
42+
buf.length[DType.uint16](), 32
43+
) # 64 bytes / 2 bytes per element
44+
# reinterpreting the underlying bits as uint16
45+
assert_equal(buf.unsafe_get[DType.uint16](0), 42 + (43 << 8))
46+
assert_equal(buf.unsafe_get[DType.uint16](1), 44)
47+
48+
49+
def test_bitmap():
50+
var b = Buffer.alloc[DType.bool](10)
51+
assert_equal(b.size, 64)
52+
assert_equal(b.length[DType.bool](), 64 * 8)
53+
54+
b.unsafe_set(0, True)
55+
assert_true(b.unsafe_get(0))
56+
assert_false(b.unsafe_get(1))
57+
b.unsafe_set(1, True)
58+
assert_true(b.unsafe_get(1))

‎firebolt/tests/test_c_data.mojo

+148
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
from testing import assert_equal, assert_true, assert_false
2+
from python import Python
3+
from firebolt.c_data import *
4+
5+
6+
def test_schema_from_pyarrow():
7+
var pa = Python.import_module("pyarrow")
8+
var pyint = pa.field("int_field", pa.int32())
9+
var pystring = pa.field("string_field", pa.string())
10+
var pyschema = pa.schema([])
11+
pyschema = pyschema.append(pyint)
12+
pyschema = pyschema.append(pystring)
13+
14+
var c_schema = CArrowSchema.from_pyarrow(pyschema)
15+
var schema = c_schema.to_dtype()
16+
17+
assert_equal(schema.fields[0].name, "int_field")
18+
assert_equal(schema.fields[0].dtype, int32)
19+
assert_equal(schema.fields[1].name, "string_field")
20+
assert_equal(schema.fields[1].dtype, string)
21+
22+
23+
def test_primitive_array_from_pyarrow():
24+
var pa = Python.import_module("pyarrow")
25+
var pyarr = pa.array(
26+
[1, 2, 3, 4, 5], mask=[False, False, False, False, True]
27+
)
28+
29+
var c_array = CArrowArray.from_pyarrow(pyarr)
30+
var c_schema = CArrowSchema.from_pyarrow(pyarr.type)
31+
32+
var dtype = c_schema.to_dtype()
33+
assert_equal(dtype, int64)
34+
assert_equal(c_array.length, 5)
35+
assert_equal(c_array.null_count, 1)
36+
assert_equal(c_array.offset, 0)
37+
assert_equal(c_array.n_buffers, 2)
38+
assert_equal(c_array.n_children, 0)
39+
40+
var data = c_array.to_array(dtype)
41+
var array = data.as_int64()
42+
assert_equal(array.bitmap[].size, 64)
43+
assert_equal(array.is_valid(0), True)
44+
assert_equal(array.is_valid(1), True)
45+
assert_equal(array.is_valid(2), True)
46+
assert_equal(array.is_valid(3), True)
47+
assert_equal(array.is_valid(4), False)
48+
assert_equal(array.unsafe_get(0), 1)
49+
assert_equal(array.unsafe_get(1), 2)
50+
assert_equal(array.unsafe_get(2), 3)
51+
assert_equal(array.unsafe_get(3), 4)
52+
assert_equal(array.unsafe_get(4), 0)
53+
54+
array.unsafe_set(0, 10)
55+
assert_equal(array.unsafe_get(0), 10)
56+
assert_equal(str(pyarr), "[\n 10,\n 2,\n 3,\n 4,\n null\n]")
57+
58+
59+
def test_binary_array_from_pyarrow():
60+
var pa = Python.import_module("pyarrow")
61+
62+
var pyarr = pa.array(["foo", "bar", "baz"], mask=[False, False, True])
63+
64+
var c_array = CArrowArray.from_pyarrow(pyarr)
65+
var c_schema = CArrowSchema.from_pyarrow(pyarr.type)
66+
67+
var dtype = c_schema.to_dtype()
68+
assert_equal(dtype, string)
69+
70+
assert_equal(c_array.length, 3)
71+
assert_equal(c_array.null_count, 1)
72+
assert_equal(c_array.offset, 0)
73+
assert_equal(c_array.n_buffers, 3)
74+
assert_equal(c_array.n_children, 0)
75+
76+
var data = c_array.to_array(dtype)
77+
var array = data.as_string()
78+
79+
assert_equal(array.bitmap[].size, 64)
80+
assert_equal(array.is_valid(0), True)
81+
assert_equal(array.is_valid(1), True)
82+
assert_equal(array.is_valid(2), False)
83+
84+
assert_equal(array.unsafe_get(0), "foo")
85+
assert_equal(array.unsafe_get(1), "bar")
86+
assert_equal(array.unsafe_get(2), "")
87+
88+
array.unsafe_set(0, "qux")
89+
assert_equal(array.unsafe_get(0), "qux")
90+
assert_equal(str(pyarr), '[\n "qux",\n "bar",\n null\n]')
91+
92+
93+
def test_list_array_from_pyarrow():
94+
var pa = Python.import_module("pyarrow")
95+
96+
var pylist1 = PythonObject([1, 2, 3])
97+
var pylist2 = PythonObject([4, 5])
98+
var pylist3 = PythonObject([6, 7])
99+
var pyarr = pa.array([pylist1, pylist2, pylist3], mask=[False, True, False])
100+
101+
var c_array = CArrowArray.from_pyarrow(pyarr)
102+
var c_schema = CArrowSchema.from_pyarrow(pyarr.type)
103+
104+
var dtype = c_schema.to_dtype()
105+
assert_equal(dtype, list_(int64))
106+
107+
assert_equal(c_array.length, 3)
108+
assert_equal(c_array.null_count, 1)
109+
assert_equal(c_array.offset, 0)
110+
assert_equal(c_array.n_buffers, 2)
111+
assert_equal(c_array.n_children, 1)
112+
113+
var data = c_array.to_array(dtype)
114+
var array = data.as_list()
115+
116+
assert_equal(array.bitmap[].size, 64)
117+
assert_equal(array.is_valid(0), True)
118+
assert_equal(array.is_valid(1), False)
119+
assert_equal(array.is_valid(2), True)
120+
121+
var values = array.values[].as_int64()
122+
assert_equal(values.unsafe_get(0), 1)
123+
assert_equal(values.unsafe_get(1), 2)
124+
values.unsafe_set(0, 10)
125+
values.unsafe_set(2, 30)
126+
127+
assert_equal(
128+
str(pyarr),
129+
(
130+
"[\n [\n 10,\n 2,\n 30\n ],\n null,\n [\n 6,\n "
131+
" 7\n ]\n]"
132+
),
133+
)
134+
135+
136+
# def test_schema_to_pyarrow():
137+
# var pa = Python.import_module("pyarrow")
138+
139+
# var struct_type = struct_(
140+
# Field("int_field", int32),
141+
# Field("string_field", string),
142+
# )
143+
144+
# try:
145+
# # mojo->python direction is not working yet
146+
# var c_schema = CArrowSchema.from_dtype(int32)
147+
# except Error:
148+
# pass

‎firebolt/tests/test_dtypes.mojo

+96
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
from testing import assert_equal, assert_true, assert_false
2+
import firebolt.dtypes as dt
3+
4+
5+
def test_bool_type():
6+
assert_true(dt.bool_ == dt.bool_)
7+
assert_false(dt.bool_ == dt.int64)
8+
assert_true(dt.bool_ is dt.bool_)
9+
assert_false(dt.bool_ is dt.int64)
10+
11+
12+
def test_list_type():
13+
assert_true(dt.list_(dt.int64) == dt.list_(dt.int64))
14+
assert_false(dt.list_(dt.int64) == dt.list_(dt.int32))
15+
16+
17+
def test_struct_type():
18+
s1 = dt.struct_(dt.Field("a", dt.int64), dt.Field("b", dt.int32))
19+
s2 = dt.struct_(dt.Field("a", dt.int64), dt.Field("b", dt.int32))
20+
s3 = dt.struct_(
21+
dt.Field("a", dt.int64), dt.Field("b", dt.int32), dt.Field("c", dt.int8)
22+
)
23+
assert_true(s1 == s2)
24+
assert_false(s1 == s3)
25+
26+
27+
def test_is_integer():
28+
assert_true(dt.int8.is_integer())
29+
assert_true(dt.int16.is_integer())
30+
assert_true(dt.int32.is_integer())
31+
assert_true(dt.int64.is_integer())
32+
assert_true(dt.uint8.is_integer())
33+
assert_true(dt.uint16.is_integer())
34+
assert_true(dt.uint32.is_integer())
35+
assert_true(dt.uint64.is_integer())
36+
assert_false(dt.bool_.is_integer())
37+
assert_false(dt.float32.is_integer())
38+
assert_false(dt.float64.is_integer())
39+
assert_false(dt.list_(dt.int64).is_integer())
40+
41+
42+
def test_is_signed_integer():
43+
assert_true(dt.int8.is_signed_integer())
44+
assert_true(dt.int16.is_signed_integer())
45+
assert_true(dt.int32.is_signed_integer())
46+
assert_true(dt.int64.is_signed_integer())
47+
assert_false(dt.uint8.is_signed_integer())
48+
assert_false(dt.uint16.is_signed_integer())
49+
assert_false(dt.uint32.is_signed_integer())
50+
assert_false(dt.uint64.is_signed_integer())
51+
assert_false(dt.bool_.is_signed_integer())
52+
assert_false(dt.float32.is_signed_integer())
53+
assert_false(dt.float64.is_signed_integer())
54+
55+
56+
def test_is_unsigned_integer():
57+
assert_false(dt.int8.is_unsigned_integer())
58+
assert_false(dt.int16.is_unsigned_integer())
59+
assert_false(dt.int32.is_unsigned_integer())
60+
assert_false(dt.int64.is_unsigned_integer())
61+
assert_true(dt.uint8.is_unsigned_integer())
62+
assert_true(dt.uint16.is_unsigned_integer())
63+
assert_true(dt.uint32.is_unsigned_integer())
64+
assert_true(dt.uint64.is_unsigned_integer())
65+
assert_false(dt.bool_.is_unsigned_integer())
66+
assert_false(dt.float32.is_unsigned_integer())
67+
assert_false(dt.float64.is_unsigned_integer())
68+
69+
70+
def test_is_floating_point():
71+
assert_false(dt.int8.is_floating_point())
72+
assert_false(dt.int16.is_floating_point())
73+
assert_false(dt.int32.is_floating_point())
74+
assert_false(dt.int64.is_floating_point())
75+
assert_false(dt.uint8.is_floating_point())
76+
assert_false(dt.uint16.is_floating_point())
77+
assert_false(dt.uint32.is_floating_point())
78+
assert_false(dt.uint64.is_floating_point())
79+
assert_false(dt.bool_.is_floating_point())
80+
assert_true(dt.float32.is_floating_point())
81+
assert_true(dt.float64.is_floating_point())
82+
83+
84+
def test_bitwidth():
85+
assert_equal(dt.int8.bitwidth(), 8)
86+
assert_equal(dt.int16.bitwidth(), 16)
87+
assert_equal(dt.int32.bitwidth(), 32)
88+
assert_equal(dt.int64.bitwidth(), 64)
89+
assert_equal(dt.uint8.bitwidth(), 8)
90+
assert_equal(dt.uint16.bitwidth(), 16)
91+
assert_equal(dt.uint32.bitwidth(), 32)
92+
assert_equal(dt.uint64.bitwidth(), 64)
93+
assert_equal(dt.bool_.bitwidth(), 1)
94+
assert_equal(dt.float32.bitwidth(), 32)
95+
assert_equal(dt.float64.bitwidth(), 64)
96+
assert_equal(dt.list_(dt.int64).bitwidth(), 0)

‎firebolt/tests/test_utils.mojo

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
from testing import assert_true
2+
from firebolt.utils import Box
3+
4+

‎firebolt/utils.mojo

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
struct Box[T: CollectionElement](CollectionElement):
2+
var ptr: UnsafePointer[T]
3+
4+
fn __init__(inout self, owned value: T):
5+
self.ptr = UnsafePointer[T].alloc(1)
6+
self.ptr.init_pointee_move(value)
7+
8+
fn __copyinit__(inout self, value: Self):
9+
self.ptr = UnsafePointer[T].alloc(1)
10+
self.ptr.init_pointee_copy(value.ptr[])
11+
12+
fn __moveinit__(inout self, owned value: Self):
13+
self.ptr = value.ptr
14+
15+
fn __getitem__(ref [_]self: Self) -> ref [__lifetime_of(self)] T:
16+
return self.ptr[]

0 commit comments

Comments
 (0)
Please sign in to comment.