Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Query cache interface implementation for timeseries & metrics views #1446

Merged
merged 30 commits into from
Dec 19, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
7f1700b
queries-caching-impl
Dec 6, 2022
c133a22
queries-caching-impl
Dec 6, 2022
3126c35
caching for time grain query
Dec 6, 2022
eda43c1
caching: column numeric histogram
Dec 6, 2022
c680489
caching: column numeric histogram
Dec 6, 2022
235356f
Merge remote-tracking branch 'origin/main' into query-cache-interface…
Dec 6, 2022
7d095d3
caching: table cardinality fix
Dec 7, 2022
4d6a455
caching: rug histogram
Dec 7, 2022
3895bf1
caching: time range
Dec 7, 2022
c9b94ac
caching: time range
Dec 7, 2022
5d93116
caching: column cardinality
Dec 8, 2022
b4d6095
caching: rollup interval
Dec 8, 2022
0670dec
caching: column with all nulls
Dec 8, 2022
d34946b
caching: code style
Dec 9, 2022
7822a6a
caching: rug
Dec 9, 2022
5f45640
caching: code style
Dec 13, 2022
8923aab
caching: ts fix
Dec 13, 2022
0d8f659
Merge remote-tracking branch 'origin/main' into query-cache-interface…
Dec 13, 2022
5c94aa9
caching: ts fix
Dec 13, 2022
fd3d586
caching: ts fix
Dec 13, 2022
3750567
caching: code style
Dec 14, 2022
c69f7fd
Merge remote-tracking branch 'origin/main' into query-cache-interface…
Dec 14, 2022
1f0f02f
caching: timeseries
Dec 14, 2022
954d9ad
Merge remote-tracking branch 'origin/main' into query-cache-interface…
Dec 14, 2022
7c33d50
caching: table columns
Dec 15, 2022
3387238
caching: metricsview totals
Dec 15, 2022
b8c3bad
caching: metricsview toplist
Dec 15, 2022
0e89a64
caching: metricsview toplist
Dec 15, 2022
40c7c27
caching: metricsview timeseries
Dec 15, 2022
8330ae1
caching: metricsview timeseries
Dec 19, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
caching: column cardinality
  • Loading branch information
egor-ryashin committed Dec 8, 2022
commit 5d931169a4aae12d80c40561f502d394b7a92277
14 changes: 14 additions & 0 deletions runtime/queries/column_api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,17 @@ func BenchmarkColumnTimeRange(b *testing.B) {
require.NotEmpty(b, q.Result)
}
}

func BenchmarkColumnCardinality(b *testing.B) {
rt, instanceID := testruntime.NewInstanceForProject(b, "ad_bids")
b.ResetTimer()
for i := 0; i < b.N; i++ {
q := &ColumnCardinality{
TableName: "ad_bids",
ColumnName: "publisher",
}
err := q.Resolve(context.Background(), rt, instanceID, 0)
require.NoError(b, err)
require.NotEmpty(b, q.Result)
}
}
59 changes: 59 additions & 0 deletions runtime/queries/column_cardinality.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package queries

import (
"context"
"fmt"

"github.com/rilldata/rill/runtime"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)

type ColumnCardinality struct {
TableName string
ColumnName string
Result float64
}

var _ runtime.Query = &ColumnCardinality{}

func (q *ColumnCardinality) Key() string {
return fmt.Sprintf("ColumnCardinality:%s:%s", q.TableName, q.ColumnName)
}

func (q *ColumnCardinality) Deps() []string {
return []string{q.TableName}
}

func (q *ColumnCardinality) MarshalResult() any {
return q.Result
}

func (q *ColumnCardinality) UnmarshalResult(v any) error {
res, ok := v.(float64)
if !ok {
return fmt.Errorf("ColumnCardinality: mismatched unmarshal input")
}
q.Result = res
return nil
}

func (q *ColumnCardinality) Resolve(ctx context.Context, rt *runtime.Runtime, instanceID string, priority int) error {
sanitizedColumnName := quoteName(q.ColumnName)
requestSql := fmt.Sprintf("SELECT approx_count_distinct(%s) as count from %s", sanitizedColumnName, quoteName(q.TableName))
rows, err := rt.Execute(ctx, instanceID, priority, requestSql)
if err != nil {
return err
}
defer rows.Close()
var count float64
for rows.Next() {
err = rows.Scan(&count)
if err != nil {
return err
}
q.Result = count
return nil
}
return status.Error(codes.Internal, "no rows returned")
}
20 changes: 19 additions & 1 deletion runtime/queries/column_time_range.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"time"

"github.com/marcboeker/go-duckdb"
runtimev1 "github.com/rilldata/rill/proto/gen/rill/runtime/v1"
"github.com/rilldata/rill/runtime"
"google.golang.org/grpc/codes"
Expand Down Expand Up @@ -64,7 +65,7 @@ func (q *ColumnTimeRange) Resolve(ctx context.Context, rt *runtime.Runtime, inst
if v := rowMap["min"]; v != nil {
summary.Min = timestamppb.New(v.(time.Time))
summary.Max = timestamppb.New(rowMap["max"].(time.Time))
summary.Interval, err = server.handleInterval(rowMap["interval"])
summary.Interval, err = handleInterval(rowMap["interval"])
if err != nil {
return err
}
Expand All @@ -74,3 +75,20 @@ func (q *ColumnTimeRange) Resolve(ctx context.Context, rt *runtime.Runtime, inst
}
return status.Error(codes.Internal, "no rows returned")
}

func handleInterval(interval any) (*runtimev1.TimeRangeSummary_Interval, error) {
switch i := interval.(type) {
case duckdb.Interval:
var result = new(runtimev1.TimeRangeSummary_Interval)
result.Days = i.Days
result.Months = i.Months
result.Micros = i.Micros
return result, nil
case int64:
// for date type column interval is difference in num days for two dates
var result = new(runtimev1.TimeRangeSummary_Interval)
result.Days = int32(i)
return result, nil
}
return nil, fmt.Errorf("cannot handle interval type %T", interval)
}
56 changes: 11 additions & 45 deletions runtime/server/queries_columns.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,8 @@ package server

import (
"context"
"fmt"

"github.com/marcboeker/go-duckdb"

runtimev1 "github.com/rilldata/rill/proto/gen/rill/runtime/v1"
"github.com/rilldata/rill/runtime/drivers"
"github.com/rilldata/rill/runtime/queries"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
Expand Down Expand Up @@ -176,50 +172,20 @@ func (s *Server) GetTimeRangeSummary(ctx context.Context, request *runtimev1.Get
}, nil
}

func handleInterval(interval any) (*runtimev1.TimeRangeSummary_Interval, error) {
switch i := interval.(type) {
case duckdb.Interval:
var result = new(runtimev1.TimeRangeSummary_Interval)
result.Days = i.Days
result.Months = i.Months
result.Micros = i.Micros
return result, nil
case int64:
// for date type column interval is difference in num days for two dates
var result = new(runtimev1.TimeRangeSummary_Interval)
result.Days = int32(i)
return result, nil
}
return nil, fmt.Errorf("cannot handle interval type %T", interval)
}

func (s *Server) GetCardinalityOfColumn(ctx context.Context, request *runtimev1.GetCardinalityOfColumnRequest) (*runtimev1.GetCardinalityOfColumnResponse, error) {
sanitizedColumnName := quoteName(request.ColumnName)
rows, err := s.query(ctx, request.InstanceId, &drivers.Statement{
Query: fmt.Sprintf("SELECT approx_count_distinct(%s) as count from %s", sanitizedColumnName, request.TableName),
Priority: int(request.Priority),
})
q := &queries.ColumnCardinality{
TableName: request.TableName,
ColumnName: request.ColumnName,
}
err := s.runtime.Query(ctx, request.InstanceId, q, int(request.Priority))
if err != nil {
return nil, err
}
defer rows.Close()
var count float64
for rows.Next() {
err = rows.Scan(&count)
if err != nil {
return nil, err
}
return &runtimev1.GetCardinalityOfColumnResponse{
CategoricalSummary: &runtimev1.CategoricalSummary{
Case: &runtimev1.CategoricalSummary_Cardinality{
Cardinality: count,
},
return &runtimev1.GetCardinalityOfColumnResponse{
CategoricalSummary: &runtimev1.CategoricalSummary{
Case: &runtimev1.CategoricalSummary_Cardinality{
Cardinality: q.Result,
},
}, nil
}
return nil, status.Error(codes.Internal, "no rows returned")
}

func quoteName(columnName string) string {
return fmt.Sprintf("\"%s\"", columnName)
},
}, nil
}