Skip to content

Commit c4e2013

Browse files
apacheGH-38214: [MATLAB] Add a common arrow.tabular.Tabular MATLAB interface (apache#47014)
### Rationale for this change Currently, the properties and methods of `arrow.tabular.RecordBatch` and `arrow.tabular.Table` are very similar. To simplify implementation of these classes, reduce code duplication, and ensure design consistency, it might be helpful to factor out the common tabular functionality into an `arrow.tabular.Tabular` interface that both classes implement. ### What changes are included in this PR? 1. Defined a `arrow.tabular.Tabular` interface that contains the tabular functionality shared by `arrow.tabular.Table` and `arrow.tabular.RecordBatch`. 2. Updated `arrow.tabular.Table` and `arrow.tabular.RecordBatch` to implement the `arrow.tabular.Tabular` interface. ### Are these changes tested? Yes. These changes are covered by existing cases defined in `tTable.m` and `tRecordBatch`. ### Are there any user-facing changes? No. * GitHub Issue: apache#38214 Lead-authored-by: Sarah Gilmore <[email protected]> Co-authored-by: Sarah Gilmore <[email protected]> Co-authored-by: Kevin Gurney <[email protected]> Signed-off-by: Sarah Gilmore <[email protected]>
1 parent a0cc2d8 commit c4e2013

File tree

3 files changed

+147
-178
lines changed

3 files changed

+147
-178
lines changed
Lines changed: 12 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
%RECORDBATCH A tabular data structure representing a set of
22
%arrow.array.Array objects with a fixed schema.
33

4-
54
% Licensed to the Apache Software Foundation (ASF) under one or more
65
% contributor license agreements. See the NOTICE file distributed with
76
% this work for additional information regarding copyright ownership.
@@ -17,90 +16,16 @@
1716
% implied. See the License for the specific language governing
1817
% permissions and limitations under the License.
1918

20-
classdef RecordBatch < matlab.mixin.CustomDisplay & ...
21-
matlab.mixin.Scalar
22-
23-
properties (Dependent, SetAccess=private, GetAccess=public)
24-
NumRows
25-
NumColumns
26-
ColumnNames
27-
Schema
28-
end
29-
30-
properties (Hidden, SetAccess=private, GetAccess=public)
31-
Proxy
32-
end
19+
classdef RecordBatch < arrow.tabular.Tabular
3320

3421
methods
22+
3523
function obj = RecordBatch(proxy)
3624
arguments
3725
proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy, "arrow.tabular.proxy.RecordBatch")}
3826
end
3927
import arrow.internal.proxy.validate
40-
obj.Proxy = proxy;
41-
end
42-
43-
function numRows = get.NumRows(obj)
44-
numRows = obj.Proxy.getNumRows();
45-
end
46-
47-
function numColumns = get.NumColumns(obj)
48-
numColumns = obj.Proxy.getNumColumns();
49-
end
50-
51-
function columnNames = get.ColumnNames(obj)
52-
columnNames = obj.Proxy.getColumnNames();
53-
end
54-
55-
function schema = get.Schema(obj)
56-
proxyID = obj.Proxy.getSchema();
57-
proxy = libmexclass.proxy.Proxy(Name="arrow.tabular.proxy.Schema", ID=proxyID);
58-
schema = arrow.tabular.Schema(proxy);
59-
end
60-
61-
function arrowArray = column(obj, idx)
62-
import arrow.internal.validate.*
63-
64-
idx = index.numericOrString(idx, "int32", AllowNonScalar=false);
65-
66-
if isnumeric(idx)
67-
args = struct(Index=idx);
68-
proxyInfo = obj.Proxy.getColumnByIndex(args);
69-
else
70-
args = struct(Name=idx);
71-
proxyInfo = obj.Proxy.getColumnByName(args);
72-
end
73-
74-
traits = arrow.type.traits.traits(arrow.type.ID(proxyInfo.TypeID));
75-
proxy = libmexclass.proxy.Proxy(Name=traits.ArrayProxyClassName, ID=proxyInfo.ProxyID);
76-
arrowArray = traits.ArrayConstructor(proxy);
77-
end
78-
79-
function T = table(obj)
80-
import arrow.tabular.internal.*
81-
82-
numColumns = obj.NumColumns;
83-
matlabArrays = cell(1, numColumns);
84-
85-
for ii = 1:numColumns
86-
arrowArray = obj.column(ii);
87-
matlabArrays{ii} = toMATLAB(arrowArray);
88-
end
89-
90-
validVariableNames = makeValidVariableNames(obj.ColumnNames);
91-
validDimensionNames = makeValidDimensionNames(validVariableNames);
92-
93-
T = table(matlabArrays{:}, ...
94-
VariableNames=validVariableNames, ...
95-
DimensionNames=validDimensionNames);
96-
end
97-
98-
function T = toMATLAB(obj)
99-
T = obj.table();
100-
end
101-
102-
function tf = isequal(obj, varargin)
103-
tf = arrow.tabular.internal.isequal(obj, varargin{:});
28+
10429
end
10530

10631
function export(obj, cArrowArrayAddress, cArrowSchemaAddress)
@@ -115,23 +40,21 @@ function export(obj, cArrowArrayAddress, cArrowSchemaAddress)
11540
);
11641
obj.Proxy.exportToC(args);
11742
end
118-
end
11943

120-
methods (Access = private)
121-
function str = toString(obj)
122-
str = obj.Proxy.toString();
123-
end
12444
end
12545

12646
methods (Access=protected)
127-
function displayScalarObject(obj)
128-
className = matlab.mixin.CustomDisplay.getClassNameForHeader(obj);
129-
tabularDisplay = arrow.tabular.internal.display.getTabularDisplay(obj, className);
130-
disp(tabularDisplay + newline);
47+
48+
function column = constructColumnFromProxy(~, proxyInfo)
49+
traits = arrow.type.traits.traits(arrow.type.ID(proxyInfo.TypeID));
50+
proxy = libmexclass.proxy.Proxy(Name=traits.ArrayProxyClassName, ID=proxyInfo.ProxyID);
51+
column = traits.ArrayConstructor(proxy);
13152
end
53+
13254
end
13355

13456
methods (Static, Access=public)
57+
13558
function recordBatch = fromArrays(arrowArrays, opts)
13659
arguments(Repeating)
13760
arrowArrays(1, 1) arrow.array.Array
@@ -163,5 +86,7 @@ function displayScalarObject(obj)
16386
importer = arrow.c.internal.RecordBatchImporter();
16487
recordBatch = importer.import(cArray, cSchema);
16588
end
89+
16690
end
91+
16792
end

matlab/src/matlab/+arrow/+tabular/Table.m

Lines changed: 6 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
%TABLE A tabular data structure representing a set of
2-
% arrow.array.ChunkedArray objects with a fixed schema.
3-
41
% Licensed to the Apache Software Foundation (ASF) under one or more
52
% contributor license agreements. See the NOTICE file distributed with
63
% this work for additional information regarding copyright ownership.
@@ -16,18 +13,7 @@
1613
% implied. See the License for the specific language governing
1714
% permissions and limitations under the License.
1815

19-
classdef Table < matlab.mixin.CustomDisplay & matlab.mixin.Scalar
20-
21-
properties (Dependent, SetAccess=private, GetAccess=public)
22-
NumRows
23-
NumColumns
24-
ColumnNames
25-
Schema
26-
end
27-
28-
properties (Hidden, SetAccess=private, GetAccess=public)
29-
Proxy
30-
end
16+
classdef Table < arrow.tabular.Tabular
3117

3218
methods
3319

@@ -36,88 +22,17 @@
3622
proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy, "arrow.tabular.proxy.Table")}
3723
end
3824
import arrow.internal.proxy.validate
39-
obj.Proxy = proxy;
40-
end
41-
42-
function numColumns = get.NumColumns(obj)
43-
numColumns = obj.Proxy.getNumColumns();
44-
end
45-
46-
function numRows = get.NumRows(obj)
47-
numRows = obj.Proxy.getNumRows();
48-
end
49-
50-
function columnNames = get.ColumnNames(obj)
51-
columnNames = obj.Proxy.getColumnNames();
52-
end
53-
54-
function schema = get.Schema(obj)
55-
proxyID = obj.Proxy.getSchema();
56-
proxy = libmexclass.proxy.Proxy(Name="arrow.tabular.proxy.Schema", ID=proxyID);
57-
schema = arrow.tabular.Schema(proxy);
58-
end
59-
60-
function chunkedArray = column(obj, idx)
61-
import arrow.internal.validate.*
62-
63-
idx = index.numericOrString(idx, "int32", AllowNonScalar=false);
64-
65-
if isnumeric(idx)
66-
args = struct(Index=idx);
67-
proxyID = obj.Proxy.getColumnByIndex(args);
68-
else
69-
args = struct(Name=idx);
70-
proxyID = obj.Proxy.getColumnByName(args);
71-
end
72-
73-
proxy = libmexclass.proxy.Proxy(Name="arrow.array.proxy.ChunkedArray", ID=proxyID);
74-
chunkedArray = arrow.array.ChunkedArray(proxy);
75-
end
76-
77-
function T = table(obj)
78-
import arrow.tabular.internal.*
79-
80-
numColumns = obj.NumColumns;
81-
matlabArrays = cell(1, numColumns);
82-
83-
for ii = 1:numColumns
84-
chunkedArray = obj.column(ii);
85-
matlabArrays{ii} = toMATLAB(chunkedArray);
86-
end
87-
88-
validVariableNames = makeValidVariableNames(obj.ColumnNames);
89-
validDimensionNames = makeValidDimensionNames(validVariableNames);
90-
91-
T = table(matlabArrays{:}, ...
92-
VariableNames=validVariableNames, ...
93-
DimensionNames=validDimensionNames);
94-
end
95-
96-
function T = toMATLAB(obj)
97-
T = obj.table();
98-
end
99-
100-
function tf = isequal(obj, varargin)
101-
tf = arrow.tabular.internal.isequal(obj, varargin{:});
102-
end
103-
104-
end
105-
106-
methods (Access = private)
107-
108-
function str = toString(obj)
109-
str = obj.Proxy.toString();
25+
11026
end
11127

11228
end
11329

11430
methods (Access=protected)
115-
function displayScalarObject(obj)
116-
className = matlab.mixin.CustomDisplay.getClassNameForHeader(obj);
117-
tabularDisplay = arrow.tabular.internal.display.getTabularDisplay(obj, className);
118-
disp(tabularDisplay + newline);
119-
end
12031

32+
function column = constructColumnFromProxy(~, proxyInfo)
33+
proxy = libmexclass.proxy.Proxy(Name="arrow.array.proxy.ChunkedArray", ID=proxyInfo);
34+
column = arrow.array.ChunkedArray(proxy);
35+
end
12136
end
12237

12338
methods (Static, Access=public)
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
%TABULAR Interface that represents a tabular data structure.
2+
3+
% Licensed to the Apache Software Foundation (ASF) under one or more
4+
% contributor license agreements. See the NOTICE file distributed with
5+
% this work for additional information regarding copyright ownership.
6+
% The ASF licenses this file to you under the Apache License, Version
7+
% 2.0 (the "License"); you may not use this file except in compliance
8+
% with the License. You may obtain a copy of the License at
9+
%
10+
% http://www.apache.org/licenses/LICENSE-2.0
11+
%
12+
% Unless required by applicable law or agreed to in writing, software
13+
% distributed under the License is distributed on an "AS IS" BASIS,
14+
% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
15+
% implied. See the License for the specific language governing
16+
% permissions and limitations under the License.
17+
18+
classdef Tabular < matlab.mixin.CustomDisplay & matlab.mixin.Scalar
19+
20+
properties (Dependent, SetAccess=private, GetAccess=public)
21+
NumRows
22+
NumColumns
23+
ColumnNames
24+
Schema
25+
end
26+
27+
properties (Hidden, SetAccess=private, GetAccess=public)
28+
Proxy
29+
end
30+
31+
methods(Access=protected, Abstract)
32+
% constructColumnFromProxy must construct an instance of the
33+
% appropriate MATLAB class from the proxyInfo argument. The
34+
% template method arrow.tabular.Tabular/column() invokes this
35+
% method.
36+
column = constructColumnFromProxy(obj, proxyInfo)
37+
end
38+
39+
methods
40+
41+
function obj = Tabular(proxy)
42+
arguments
43+
proxy(1, 1) libmexclass.proxy.Proxy
44+
end
45+
import arrow.internal.proxy.validate
46+
obj.Proxy = proxy;
47+
end
48+
49+
function numColumns = get.NumColumns(obj)
50+
numColumns = obj.Proxy.getNumColumns();
51+
end
52+
53+
function numRows = get.NumRows(obj)
54+
numRows = obj.Proxy.getNumRows();
55+
end
56+
57+
function columnNames = get.ColumnNames(obj)
58+
columnNames = obj.Proxy.getColumnNames();
59+
end
60+
61+
function schema = get.Schema(obj)
62+
proxyID = obj.Proxy.getSchema();
63+
proxy = libmexclass.proxy.Proxy(Name="arrow.tabular.proxy.Schema", ID=proxyID);
64+
schema = arrow.tabular.Schema(proxy);
65+
end
66+
67+
function array = column(obj, idx)
68+
import arrow.internal.validate.*
69+
70+
idx = index.numericOrString(idx, "int32", AllowNonScalar=false);
71+
72+
if isnumeric(idx)
73+
args = struct(Index=idx);
74+
proxyInfo = obj.Proxy.getColumnByIndex(args);
75+
else
76+
args = struct(Name=idx);
77+
proxyInfo = obj.Proxy.getColumnByName(args);
78+
end
79+
80+
array = obj.constructColumnFromProxy(proxyInfo);
81+
end
82+
83+
function T = table(obj)
84+
import arrow.tabular.internal.*
85+
86+
numColumns = obj.NumColumns;
87+
matlabArrays = cell(1, numColumns);
88+
89+
for ii = 1:numColumns
90+
matlabArrays{ii} = toMATLAB(obj.column(ii));
91+
end
92+
93+
validVariableNames = makeValidVariableNames(obj.ColumnNames);
94+
validDimensionNames = makeValidDimensionNames(validVariableNames);
95+
96+
T = table(matlabArrays{:}, ...
97+
VariableNames=validVariableNames, ...
98+
DimensionNames=validDimensionNames);
99+
end
100+
101+
function T = toMATLAB(obj)
102+
T = obj.table();
103+
end
104+
105+
function tf = isequal(obj, varargin)
106+
tf = arrow.tabular.internal.isequal(obj, varargin{:});
107+
end
108+
109+
end
110+
111+
methods (Access = private)
112+
113+
function str = toString(obj)
114+
str = obj.Proxy.toString();
115+
end
116+
117+
end
118+
119+
methods (Access=protected)
120+
121+
function displayScalarObject(obj)
122+
className = matlab.mixin.CustomDisplay.getClassNameForHeader(obj);
123+
tabularDisplay = arrow.tabular.internal.display.getTabularDisplay(obj, className);
124+
disp(tabularDisplay + newline);
125+
end
126+
127+
end
128+
129+
end

0 commit comments

Comments
 (0)