Skip to content

Commit 6644557

Browse files
committedOct 15, 2022
Add configuration parameters for JSON maximum line size as well as file reader buffer size.
1 parent cc43f25 commit 6644557

File tree

9 files changed

+62
-30
lines changed

9 files changed

+62
-30
lines changed
 

‎cmd/root.go

+10-9
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,12 @@ octosql "SELECT * FROM plugins.plugins"`,
7878
defer profile.Start(profile.TraceProfile, profile.ProfilePath(".")).Stop()
7979
}
8080
ctx := cmd.Context()
81+
cfg, err := config.Read()
82+
if err != nil {
83+
return fmt.Errorf("couldn't read config: %w", err)
84+
}
85+
ctx = config.ContextWithConfig(ctx, cfg)
86+
8187
debug.SetGCPercent(1000)
8288

8389
logs.InitializeFileLogger()
@@ -94,11 +100,6 @@ octosql "SELECT * FROM plugins.plugins"`,
94100
}
95101
}()
96102

97-
cfg, err := config.Read()
98-
if err != nil {
99-
return fmt.Errorf("couldn't read config: %w", err)
100-
}
101-
102103
installedPlugins, err := pluginManager.ListInstalledPlugins()
103104
if err != nil {
104105
return fmt.Errorf("couldn't list installed plugins: %w", err)
@@ -188,15 +189,15 @@ octosql "SELECT * FROM plugins.plugins"`,
188189
if err != nil {
189190
return fmt.Errorf("couldn't get file extension handlers: %w", err)
190191
}
191-
fileHandlers := map[string]func(name string, options map[string]string) (physical.DatasourceImplementation, physical.Schema, error){
192+
fileHandlers := map[string]func(ctx context.Context, name string, options map[string]string) (physical.DatasourceImplementation, physical.Schema, error){
192193
"csv": csv.Creator(','),
193194
"json": json.Creator,
194195
"lines": lines.Creator,
195196
"parquet": parquet.Creator,
196197
"tsv": csv.Creator('\t'),
197198
}
198199
for ext, pluginName := range fileExtensionHandlers {
199-
fileHandlers[ext] = func(name string, options map[string]string) (physical.DatasourceImplementation, physical.Schema, error) {
200+
fileHandlers[ext] = func(ctx context.Context, name string, options map[string]string) (physical.DatasourceImplementation, physical.Schema, error) {
200201
db, err := databases[pluginName]()
201202
if err != nil {
202203
return nil, physical.Schema{}, fmt.Errorf("couldn't get plugin %s database for plugin extensions %s: %w", pluginName, ext, err)
@@ -513,13 +514,13 @@ func typecheckExpr(ctx context.Context, expr logical.Expression, env physical.En
513514
}
514515

515516
type fileTypeDatabaseCreator struct {
516-
creator func(name string, options map[string]string) (physical.DatasourceImplementation, physical.Schema, error)
517+
creator func(ctx context.Context, name string, options map[string]string) (physical.DatasourceImplementation, physical.Schema, error)
517518
}
518519

519520
func (f *fileTypeDatabaseCreator) ListTables(ctx context.Context) ([]string, error) {
520521
return nil, nil
521522
}
522523

523524
func (f *fileTypeDatabaseCreator) GetTable(ctx context.Context, name string, options map[string]string) (physical.DatasourceImplementation, physical.Schema, error) {
524-
return f.creator(name, options)
525+
return f.creator(ctx, name, options)
525526
}

‎config/config.go

+37-9
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package config
22

33
import (
4+
"context"
45
"fmt"
56
"io/ioutil"
67
"log"
@@ -23,6 +24,7 @@ var octosqlHomeDir = func() string {
2324

2425
type Config struct {
2526
Databases []DatabaseConfig `yaml:"databases"`
27+
Files FilesConfig `yaml:"files"`
2628
}
2729

2830
type DatabaseConfig struct {
@@ -32,20 +34,34 @@ type DatabaseConfig struct {
3234
Config yaml.Node `yaml:"config"`
3335
}
3436

37+
type FilesConfig struct {
38+
JSON JSONConfig `yaml:"json"`
39+
BufferSizeBytes int `yaml:"buffer_size_bytes"`
40+
}
41+
42+
type JSONConfig struct {
43+
MaxLineSizeBytes int `yaml:"max_line_size_bytes"`
44+
}
45+
3546
func Read() (*Config, error) {
47+
var config Config
3648
data, err := ioutil.ReadFile(filepath.Join(octosqlHomeDir, "octosql.yml"))
37-
if err != nil {
38-
if os.IsNotExist(err) {
39-
return &Config{
40-
Databases: nil,
41-
}, nil
42-
}
49+
if err != nil && os.IsNotExist(err) {
50+
config = Config{}
51+
} else if err != nil {
4352
return nil, fmt.Errorf("couldn't read config file: %w", err)
53+
} else {
54+
if err := yaml.Unmarshal(data, &config); err != nil {
55+
return nil, fmt.Errorf("couldn't unmarshal yaml configuration: %w", err)
56+
}
4457
}
4558

46-
var config Config
47-
if err := yaml.Unmarshal(data, &config); err != nil {
48-
return nil, fmt.Errorf("couldn't unmarshal yaml configuration: %w", err)
59+
// TODO: Refactor to a sensibly structured default value system.
60+
if config.Files.BufferSizeBytes == 0 {
61+
config.Files.BufferSizeBytes = 4096 * 1024
62+
}
63+
if config.Files.JSON.MaxLineSizeBytes == 0 {
64+
config.Files.JSON.MaxLineSizeBytes = 1024 * 1024
4965
}
5066

5167
return &config, nil
@@ -89,3 +105,15 @@ func (r *PluginReference) UnmarshalText(text []byte) error {
89105
func (r *PluginReference) String() string {
90106
return fmt.Sprintf("%s/%s", r.Repository, r.Name)
91107
}
108+
109+
// TODO: Using a custom context everywhere would be better.
110+
111+
type contextKey struct{}
112+
113+
func ContextWithConfig(ctx context.Context, config *Config) context.Context {
114+
return context.WithValue(ctx, contextKey{}, config)
115+
}
116+
117+
func FromContext(ctx context.Context) *Config {
118+
return ctx.Value(contextKey{}).(*Config)
119+
}

‎datasources/csv/impl.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@ import (
1414
"github.com/cube2222/octosql/physical"
1515
)
1616

17-
func Creator(separator rune) func(name string, options map[string]string) (physical.DatasourceImplementation, physical.Schema, error) {
18-
return func(name string, options map[string]string) (physical.DatasourceImplementation, physical.Schema, error) {
19-
f, err := files.OpenLocalFile(context.Background(), name, files.WithPreview())
17+
func Creator(separator rune) func(ctx context.Context, name string, options map[string]string) (physical.DatasourceImplementation, physical.Schema, error) {
18+
return func(ctx context.Context, name string, options map[string]string) (physical.DatasourceImplementation, physical.Schema, error) {
19+
f, err := files.OpenLocalFile(ctx, name, files.WithPreview())
2020
if err != nil {
2121
return nil, physical.Schema{}, fmt.Errorf("couldn't open local file: %w", err)
2222
}

‎datasources/json/execution.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88

99
"github.com/valyala/fastjson"
1010

11+
"github.com/cube2222/octosql/config"
1112
. "github.com/cube2222/octosql/execution"
1213
"github.com/cube2222/octosql/execution/files"
1314
"github.com/cube2222/octosql/octosql"
@@ -28,7 +29,7 @@ func (d *DatasourceExecuting) Run(ctx ExecutionContext, produce ProduceFn, metaS
2829
defer f.Close()
2930

3031
sc := bufio.NewScanner(f)
31-
sc.Buffer(nil, 1024*1024)
32+
sc.Buffer(nil, config.FromContext(ctx).Files.JSON.MaxLineSizeBytes)
3233

3334
// All async processing in this function and all jobs created by it use this context.
3435
// This means that returning from this function will properly clean up all async processors.

‎datasources/json/impl.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ import (
1515
"github.com/cube2222/octosql/physical"
1616
)
1717

18-
func Creator(name string, options map[string]string) (physical.DatasourceImplementation, physical.Schema, error) {
19-
f, err := files.OpenLocalFile(context.Background(), name, files.WithPreview())
18+
func Creator(ctx context.Context, name string, options map[string]string) (physical.DatasourceImplementation, physical.Schema, error) {
19+
f, err := files.OpenLocalFile(ctx, name, files.WithPreview())
2020
if err != nil {
2121
return nil, physical.Schema{}, fmt.Errorf("couldn't open local file: %w", err)
2222
}

‎datasources/lines/impl.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ import (
1010
"github.com/cube2222/octosql/physical"
1111
)
1212

13-
func Creator(name string, options map[string]string) (physical.DatasourceImplementation, physical.Schema, error) {
14-
f, err := files.OpenLocalFile(context.Background(), name, files.WithPreview())
13+
func Creator(ctx context.Context, name string, options map[string]string) (physical.DatasourceImplementation, physical.Schema, error) {
14+
f, err := files.OpenLocalFile(ctx, name, files.WithPreview())
1515
if err != nil {
1616
return nil, physical.Schema{}, fmt.Errorf("couldn't open local file: %w", err)
1717
}

‎datasources/parquet/impl.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import (
1212
"github.com/cube2222/octosql/physical"
1313
)
1414

15-
func Creator(name string, options map[string]string) (physical.DatasourceImplementation, physical.Schema, error) {
15+
func Creator(ctx context.Context, name string, options map[string]string) (physical.DatasourceImplementation, physical.Schema, error) {
1616
f, err := os.Open(name)
1717
if err != nil {
1818
return nil, physical.Schema{}, fmt.Errorf("couldn't open file: %w", err)

‎execution/files/files.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ import (
1010
"sync"
1111

1212
"github.com/nxadm/tail"
13+
14+
"github.com/cube2222/octosql/config"
1315
)
1416

1517
type customCloser struct {
@@ -113,7 +115,7 @@ func OpenLocalFile(ctx context.Context, path string, opts ...OpenFileOption) (io
113115
return nil, fmt.Errorf("couldn't open file: %w", err)
114116
}
115117
return &customCloser{
116-
Reader: bufio.NewReaderSize(f, 4096*1024),
118+
Reader: bufio.NewReaderSize(f, config.FromContext(ctx).Files.BufferSizeBytes),
117119
close: f.Close,
118120
}, nil
119121
} else {

‎physical/physical.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ type DatasourceRepository struct {
5454
// Bo inaczej będzie na start strasznie dużo rzeczy ładować niepotrzebnych dla wszystkich
5555
// skonfigurowanych baz danych.
5656
Databases map[string]func() (Database, error)
57-
FileHandlers map[string]func(name string, options map[string]string) (DatasourceImplementation, Schema, error)
57+
FileHandlers map[string]func(ctx context.Context, name string, options map[string]string) (DatasourceImplementation, Schema, error)
5858
}
5959

6060
type Database interface {
@@ -78,7 +78,7 @@ func (dr *DatasourceRepository) GetDatasource(ctx context.Context, name string,
7878
if index := strings.LastIndex(name, "."); index != -1 {
7979
extension := name[index+1:]
8080
if handler, ok := dr.FileHandlers[extension]; ok {
81-
return handler(name, options)
81+
return handler(ctx, name, options)
8282
}
8383
}
8484

0 commit comments

Comments
 (0)
Please sign in to comment.