diff --git a/src/Apache.Arrow/Arrays/Bool8Array.cs b/src/Apache.Arrow/Arrays/Bool8Array.cs new file mode 100644 index 0000000..c250e4e --- /dev/null +++ b/src/Apache.Arrow/Arrays/Bool8Array.cs @@ -0,0 +1,151 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections; +using System.Collections.Generic; +using Apache.Arrow.Arrays; +using Apache.Arrow.Types; + +namespace Apache.Arrow +{ + /// + /// Extension definition for the "arrow.bool8" extension type, + /// backed by the Int8 storage type. + /// + public class Bool8ExtensionDefinition : ExtensionDefinition + { + public static Bool8ExtensionDefinition Instance = new Bool8ExtensionDefinition(); + + public override string ExtensionName => "arrow.bool8"; + + private Bool8ExtensionDefinition() { } + + public override bool TryCreateType(IArrowType storageType, string metadata, out ExtensionType type) + { + if (storageType is Int8Type i8Type) + { + type = new Bool8Type(i8Type); + return true; + } + type = null; + return false; + } + } + + /// + /// Extension type representing 1-byte boolean values + /// + public class Bool8Type : ExtensionType + { + public static Bool8Type Default = new Bool8Type(); + + public override string Name => "arrow.bool8"; + public override string ExtensionMetadata => ""; + + public Bool8Type() : base(Int8Type.Default) { } + + internal Bool8Type(Int8Type storageType) : base(storageType) { } + + public override ExtensionArray CreateArray(IArrowArray storageArray) + { + return new Bool8Array(this, storageArray); + } + } + + /// + /// Extension array for 1-byte boolean values, backed by an Int8Array. + /// + public class Bool8Array : ExtensionArray, IReadOnlyList + { + public Int8Array StorageArray => (Int8Array)Storage; + + public Bool8Array(Bool8Type bool8Type, IArrowArray storage) : base(bool8Type, storage) { } + + public Bool8Array(IArrowArray storage) : base(Bool8Type.Default, storage) { } + + public class Builder : PrimitiveArrayBuilder + { + protected override Bool8Array Build( + ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, + int length, int nullCount, int offset) => + new Bool8Array(new Int8Array(valueBuffer, nullBitmapBuffer, length, nullCount, offset)); + + public Builder Append(bool value) + { + return Append(value ? (sbyte)1 : (sbyte)0); + } + + public Builder Append(bool? value) + { + if (value == null) + { + return AppendNull(); + } + return Append(value.Value); + } + + public Builder AppendRange(IEnumerable values) + { + if (values == null) + { + throw new ArgumentNullException(nameof(values)); + } + + foreach (bool value in values) + { + Append(value); + } + + return Instance; + } + + public Builder AppendRange(IEnumerable values) + { + if (values == null) + { + throw new ArgumentNullException(nameof(values)); + } + + foreach (bool? value in values) + { + Append(value); + } + + return Instance; + } + + public Builder Set(int index, bool value) + { + return Set(index, value ? (sbyte)1 : (sbyte)0); + } + } + + public int Count => Length; + public bool? this[int index] => GetValue(index); + + public bool? GetValue(int index) => IsValid(index) ? StorageArray.GetValue(index).Value != 0 : null; + + public IEnumerator GetEnumerator() + { + for (int i = 0; i < Length; i++) + { + yield return GetValue(i); + } + } + + IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); + } +} diff --git a/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs b/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs index 4276061..6a10255 100644 --- a/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs +++ b/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs @@ -1201,6 +1201,112 @@ public unsafe void ImportGuidArray() CArrowArray.Free(cArray); } + [SkippableFact] + public unsafe void ExportBool8Array() + { + // Export a C# Bool8Array via the C Data Interface and verify + // that Python/pyarrow sees it as an arrow.bool8 extension array + // with the correct values. + + var builder = new Bool8Array.Builder(); + builder.Append(true); + builder.Append(false); + builder.Append(null); + builder.Append(false); + builder.Append(true); + var bool8Array = builder.Build(); + + var field = new Field("bools", Bool8Type.Default, true); + var schema = new Schema(new[] { field }, null); + var batch = new RecordBatch(schema, new[] { bool8Array }, bool8Array.Length); + + CArrowArray* cArray = CArrowArray.Create(); + CArrowArrayExporter.ExportRecordBatch(batch, cArray); + + CArrowSchema* cSchema = CArrowSchema.Create(); + CArrowSchemaExporter.ExportSchema(batch.Schema, cSchema); + + try + { + long arrayPtr = ((IntPtr)cArray).ToInt64(); + long schemaPtr = ((IntPtr)cSchema).ToInt64(); + + using (Py.GIL()) + { + dynamic pa = Py.Import("pyarrow"); + + dynamic pyBatch = pa.RecordBatch._import_from_c(arrayPtr, schemaPtr); + dynamic pyArray = pyBatch.column(0); + + // Build the expected bool8 array in Python + dynamic expectedArray = pa.array( + new PyList(new PyObject[] + { + PyObject.FromManagedObject(true), + PyObject.FromManagedObject(false), + PyObject.None, + PyObject.FromManagedObject(false), + PyObject.FromManagedObject(true), + }), + type: pa.bool8()); + + Assert.True((bool)pyArray.equals(expectedArray)); + } + } + finally + { + CArrowArray.Free(cArray); + CArrowSchema.Free(cSchema); + } + } + + [SkippableFact] + public unsafe void ImportBool8Array() + { + // Create a bool8 array in Python, export it via the C Data Interface, + // and verify that C# resolves it as a Bool8Array with correct values. + + CArrowArray* cArray = CArrowArray.Create(); + CArrowSchema* cSchema = CArrowSchema.Create(); + + using (Py.GIL()) + { + dynamic pa = Py.Import("pyarrow"); + + dynamic pyArray = pa.array( + new PyList(new PyObject[] + { + PyObject.None, + PyObject.FromManagedObject(false), + PyObject.FromManagedObject(true), + PyObject.FromManagedObject(true), + }), + type: pa.bool8()); + + long arrayPtr = ((IntPtr)cArray).ToInt64(); + long schemaPtr = ((IntPtr)cSchema).ToInt64(); + pyArray._export_to_c(arrayPtr, schemaPtr); + } + + var registry = new ExtensionTypeRegistry(); + registry.Register(Bool8ExtensionDefinition.Instance); + + Field field = CArrowSchemaImporter.ImportField(cSchema, registry); + Assert.IsType(field.DataType); + + IArrowArray importedArray = CArrowArrayImporter.ImportArray(cArray, field.DataType); + Assert.IsType(importedArray); + + var bool8Array = (Bool8Array)importedArray; + Assert.Equal(4, bool8Array.Length); + Assert.Null(bool8Array.GetValue(0)); + Assert.Equal(false, bool8Array.GetValue(1)); + Assert.Equal(true, bool8Array.GetValue(2)); + Assert.Equal(true, bool8Array.GetValue(3)); + + CArrowArray.Free(cArray); + } + private static PyObject List(params int?[] values) { return new PyList(values.Select(i => i == null ? PyObject.None : new PyInt(i.Value)).ToArray());