Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Discussion: Metadata; internal to external data type conversion #360

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions drivers/metadata/informationschema/metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ type InformationSchema struct {
systemSchemas []string
currentSchema string
dataTypeFormatter func(metadata.Column) string
externalDataType func(metadata.Column) string
}

var _ metadata.BasicReader = &InformationSchema{}
Expand All @@ -42,6 +43,7 @@ type ClauseName string

const (
ColumnsDataType = ClauseName("columns.data_type")
ColumnsInternalDataType = ClauseName("columns.internal_data_type")
ColumnsColumnSize = ClauseName("columns.column_size")
ColumnsNumericScale = ClauseName("columns.numeric_scale")
ColumnsNumericPrecRadix = ClauseName("columns.numeric_precision_radix")
Expand Down Expand Up @@ -77,6 +79,7 @@ func New(opts ...metadata.ReaderOption) func(drivers.DB, ...metadata.ReaderOptio
hasUsagePrivileges: true,
clauses: map[ClauseName]string{
ColumnsDataType: "data_type",
ColumnsInternalDataType: "int_data_type",
ColumnsColumnSize: "COALESCE(character_maximum_length, numeric_precision, datetime_precision, 0)",
ColumnsNumericScale: "COALESCE(numeric_scale, 0)",
ColumnsNumericPrecRadix: "COALESCE(numeric_precision_radix, 10)",
Expand Down Expand Up @@ -200,6 +203,13 @@ func WithDataTypeFormatter(f func(metadata.Column) string) metadata.ReaderOption
}
}

// WithExternalDataType function to build external string representation of data type
func WithExternalDataType(f func(metadata.Column) string) metadata.ReaderOption {
return func(r metadata.Reader) {
r.(*InformationSchema).externalDataType = f
}
}

func (s *InformationSchema) SetLimit(l int) {
s.limit = l
}
Expand All @@ -213,6 +223,7 @@ func (s InformationSchema) Columns(f metadata.Filter) (*metadata.ColumnSet, erro
"column_name",
"ordinal_position",
s.clauses[ColumnsDataType],
s.clauses[ColumnsInternalDataType],
"COALESCE(column_default, '')",
"COALESCE(is_nullable, '') AS is_nullable",
s.clauses[ColumnsColumnSize],
Expand Down Expand Up @@ -247,6 +258,7 @@ func (s InformationSchema) Columns(f metadata.Filter) (*metadata.ColumnSet, erro
&rec.Name,
&rec.OrdinalPosition,
&rec.DataType,
&rec.InternalDataType,
&rec.Default,
&rec.IsNullable,
&rec.ColumnSize,
Expand All @@ -257,6 +269,10 @@ func (s InformationSchema) Columns(f metadata.Filter) (*metadata.ColumnSet, erro
if err != nil {
return nil, err
}
// run the internal to external data type if is has been set
if s.externalDataType != nil {
rec.ExternalDataType = s.externalDataType(rec)
}
rec.DataType = s.dataTypeFormatter(rec)
results = append(results, rec)
}
Expand Down
37 changes: 24 additions & 13 deletions drivers/metadata/metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -317,12 +317,14 @@ type Column struct {
OrdinalPosition int
DataType string
// ScanType reflect.Type
Default string
ColumnSize int
DecimalDigits int
NumPrecRadix int
CharOctetLength int
IsNullable Bool
InternalDataType string
ExternalDataType string
Default string
ColumnSize int
DecimalDigits int
NumPrecRadix int
CharOctetLength int
IsNullable Bool
}

type Bool string
Expand Down Expand Up @@ -500,13 +502,15 @@ func (c IndexColumnSet) Get() *IndexColumn {
}

type IndexColumn struct {
Catalog string
Schema string
Table string
IndexName string
Name string
DataType string
OrdinalPosition int
Catalog string
Schema string
Table string
IndexName string
Name string
DataType string
InternalDataType string
ExternalDataType string
OrdinalPosition int
}

func (c IndexColumn) values() []interface{} {
Expand Down Expand Up @@ -1143,3 +1147,10 @@ func NewTriggerSet(t []Trigger) *TriggerSet {
func (t TriggerSet) Get() *Trigger {
return t.results[t.current-1].(*Trigger)
}

// one big map of all column mapping, since most data types are similar between databases,
// all in one big bucket

var DataTypeMapping = map[string]string{
"aaa": "",
}
82 changes: 82 additions & 0 deletions drivers/metadata/metadataext_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
package metadata_test

import (
"database/sql"
"fmt"
"os"
"testing"

_ "github.com/mattn/go-sqlite3"
"github.com/stretchr/testify/assert"
"github.com/xo/usql/drivers/metadata"
"github.com/xo/usql/drivers/metadata/postgres"
_ "github.com/xo/usql/drivers/postgres"
"github.com/xo/usql/drivers/sqlite3/sqshared"
)

type error struct {
strError string
msg string
sql string
codeError int32
trace string
}

func (err *error) GetTrace() string {
return err.trace
}
func (err *error) Error() string {
return fmt.Sprintf("H2 SQL Exception: [%s] %s", err.strError, err.msg)
}

// Test that postgres conversion of internal to external datatype works
// To setup, import the northwind sql in testdata/northwind/nortwind.sql into a database called northwind
// also don't forget to add user and password.
func TestGetPgExternalDataType(t *testing.T) {

db, err := sql.Open("postgres", "postgres://localhost:5432/northwind")
assert.NoError(t, err)
assert.NotEmpty(t, db)

reader := postgres.NewReader()(db).(metadata.BasicReader)
assert.NotEmpty(t, reader)

columnSet, err := reader.Columns(metadata.Filter{Catalog: "northwind", Schema: "public", Types: []string{"TABLE"}})
assert.NoError(t, err)
assert.NotEmpty(t, columnSet)

for columnSet.Next() {
assert.NotEmpty(t, columnSet.Get().ExternalDataType, fmt.Sprintf("Should not be empty: %s", columnSet.Get().DataType))
assert.Equal(t, postgres.Mapping[columnSet.Get().InternalDataType], columnSet.Get().ExternalDataType)
}

err = db.Close()
assert.NoError(t, err)
}

// Test that sqlite conversion of internal to external datatype works
// Does not use the information schema functionality since it seems to be bypassed in general for sqlite
func TestGetSqliteExternalDataType(t *testing.T) {
path, err := os.Getwd()
assert.NoError(t, err)
url := fmt.Sprintf("file:%s/../../testdata/northwind/northwind.sqlite", path)

db, err := sql.Open("sqlite3", url)
assert.NoError(t, err)
assert.NotEmpty(t, db)

reader := &sqshared.MetadataReader{LoggingReader: metadata.NewLoggingReader(db)}
assert.NotEmpty(t, reader)

columnSet, err := reader.Columns(metadata.Filter{Parent: "Customer", Types: []string{"TABLE"}, WithSystem: false, OnlyVisible: true})
assert.NoError(t, err)
assert.NotEmpty(t, columnSet)

for columnSet.Next() {
assert.NotEmpty(t, columnSet.Get().ExternalDataType, fmt.Sprintf("Should not be empty: %s", columnSet.Get().DataType))
assert.Equal(t, sqshared.Mapping[columnSet.Get().InternalDataType], columnSet.Get().ExternalDataType)
}

err = db.Close()
assert.NoError(t, err)
}
54 changes: 53 additions & 1 deletion drivers/metadata/postgres/metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,14 @@ func NewReader() func(drivers.DB, ...metadata.ReaderOption) metadata.Reader {
newIS := infos.New(
infos.WithIndexes(false),
infos.WithCustomClauses(map[infos.ClauseName]string{
infos.ColumnsInternalDataType: "udt_name",
infos.ColumnsColumnSize: "COALESCE(character_maximum_length, numeric_precision, datetime_precision, interval_precision, 0)",
infos.FunctionColumnsColumnSize: "COALESCE(character_maximum_length, numeric_precision, datetime_precision, interval_precision, 0)",
}),
infos.WithSystemSchemas([]string{"pg_catalog", "pg_toast", "information_schema"}),
infos.WithCurrentSchema("CURRENT_SCHEMA"),
infos.WithDataTypeFormatter(dataTypeFormatter))
infos.WithDataTypeFormatter(dataTypeFormatter),
infos.WithExternalDataType(externalDataType))
return metadata.NewPluginReader(
newIS(db, opts...),
&metaReader{
Expand Down Expand Up @@ -73,6 +75,11 @@ func dataTypeFormatter(col metadata.Column) string {
}
}

// Function that maps internal postgres data types to external
func externalDataType(col metadata.Column) string {
return Mapping[col.InternalDataType]
}

func (r *metaReader) SetLimit(l int) {
r.limit = l
}
Expand Down Expand Up @@ -430,3 +437,48 @@ func (r metaReader) query(qstr string, conds []string, order string, vals ...int
}
return r.Query(qstr, vals...)
}

// mapping map with postgres to external data types
// see https://www.instaclustr.com/blog/postgresql-data-types-mappings-to-sql-jdbc-and-java-data-types/
var Mapping = map[string]string{
"bool": "BIT",
"boolean": "BOOL",
"bit": "BIT",
"int8": "BIGINT",
"bigserial": "BIGINT",
"bigint": "BIGINT",
"oid": "BIGINT",
"bytea": "BINARY",
"char": "CHAR",
"real": "FLOAT",
"character": "CHAR",
"bpchar": "CHAR",
"numeric": "NUMERIC",
"int4": "INTEGER",
"integer": "INTEGER",
"serial": "INTEGER",
"int2": "SMALLINT",
"smallserial": "SMALLINT",
"smallint": "SMALLINT",
"float4": "REAL",
"float8": "DOUBLE",
"money": "DOUBLE",
"name": "VARCHAR",
"text": "VARCHAR",
"varchar": "VARCHAR",
"character varying": "VARCHAR",
"date": "DATE",
"time": "TIME",
"timetz": "TIME",
"timestamp": "TIMESTAMP",
"timestamp with time zone": "TIMESTAMP",
"timestamp without time zone": "TIMESTAMP",
"timestamptz": "TIMESTAMP",
"cardinal_number": "DISTINCT",
"character_data": "DISTINCT",
"sql_identifier": "DISTINCT",
"time_stamp": "DISTINCT",
"yes_or_no": "DISTINCT",
"xml": "SQLXML",
"refcursor": "REF_CURSOR",
}
42 changes: 42 additions & 0 deletions drivers/sqlite3/sqshared/reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package sqshared
import (
"database/sql"
"fmt"
"regexp"
"strings"

"github.com/xo/usql/drivers"
Expand Down Expand Up @@ -71,6 +72,7 @@ FROM pragma_table_info(?)`
if err != nil {
return nil, err
}
rec = ExternalDataType(rec)
results = append(results, rec)
}
if rows.Err() != nil {
Expand Down Expand Up @@ -329,3 +331,43 @@ func (r MetadataReader) query(qstr string, conds []string, order string, vals ..
}
return r.Query(qstr, vals...)
}

// Function that maps internal postgres data types to external
func ExternalDataType(col metadata.Column) metadata.Column {
regex, err := regexp.Compile(`\w+`)
if err != nil {
return col
}
intType := regex.FindString(col.DataType)
if intType != "" {
col.InternalDataType = intType
col.ExternalDataType = Mapping[intType]
}
return col
}

// Mapping from sqlite types to external types
var Mapping = map[string]string{
"BOOLEAN": "BOOLEAN",
"TINYINT": "TINYINT",
"SMALLINT": "SMALLINT",
"BIGINT": "BIGINT",
"DATE": "DATE",
"TIMESTAMP": "TIMESTAMP",
"INT": "INTEGER",
"INTEGER": "INTEGER",
"MEDIUMINT": "INTEGER",
"DECIMAL": "DECIMAL",
"DOUBLE": "DOUBLE",
"NUMERIC": "NUMERIC",
"REAL": "REAL",
"FLOAT": "FLOAT",
"CHARACTER": "CHAR",
"NCHAR": "CHAR",
"NATIVE CHARACTER": "CHAR",
"CHAR": "CHAR",
"BINARY": "BINARY",
"BLOB": "BLOB",
"VARCHAR": "VARCHAR",
"TEXT": "TEXT",
}
7 changes: 7 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ require (
sqlflow.org/gomaxcompute v0.0.0-20210805062559-c14ae028b44c
)

require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

require (
cloud.google.com/go v0.103.0 // indirect
cloud.google.com/go/bigquery v1.38.0 // indirect
Expand Down Expand Up @@ -202,6 +208,7 @@ require (
github.com/segmentio/asm v1.2.0 // indirect
github.com/shopspring/decimal v1.3.1 // indirect
github.com/spaolacci/murmur3 v1.1.0 // indirect
github.com/stretchr/testify v1.8.0
github.com/twmb/murmur3 v1.1.6 // indirect
github.com/uber-go/tally v3.5.0+incompatible // indirect
github.com/unchartedsoftware/witch v0.0.0-20200617171400-4f405404126f // indirect
Expand Down
Loading