Refactor FuturesWebsocket: implement batch subscription handling, enhance connection management, and improve logging
This commit is contained in:
3
go.mod
3
go.mod
@@ -4,12 +4,13 @@ go 1.25.1
|
||||
|
||||
require (
|
||||
github.com/google/uuid v1.6.0
|
||||
github.com/gorilla/websocket v1.5.3
|
||||
google.golang.org/grpc v1.75.0
|
||||
google.golang.org/protobuf v1.36.8
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/coder/websocket v1.8.14 // indirect
|
||||
github.com/lmittmann/tint v1.1.2 // indirect
|
||||
golang.org/x/net v0.43.0 // indirect
|
||||
golang.org/x/sys v0.35.0 // indirect
|
||||
golang.org/x/text v0.28.0 // indirect
|
||||
|
||||
4
go.sum
4
go.sum
@@ -1,3 +1,5 @@
|
||||
github.com/coder/websocket v1.8.14 h1:9L0p0iKiNOibykf283eHkKUHHrpG7f65OE3BhhO7v9g=
|
||||
github.com/coder/websocket v1.8.14/go.mod h1:NX3SzP+inril6yawo5CQXx8+fk145lPDC6pumgx0mVg=
|
||||
github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
|
||||
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
|
||||
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
|
||||
@@ -10,6 +12,8 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
|
||||
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
|
||||
github.com/lmittmann/tint v1.1.2 h1:2CQzrL6rslrsyjqLDwD11bZ5OpLBPU+g3G/r5LSfS8w=
|
||||
github.com/lmittmann/tint v1.1.2/go.mod h1:HIS3gSy7qNwGCj+5oRjAutErFBl4BzdQP6cJZ0NfMwE=
|
||||
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
|
||||
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
|
||||
go.opentelemetry.io/otel v1.36.0 h1:UumtzIklRBY6cI/lllNZlALOF5nNIzJVb16APdvgTXg=
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"log/slog"
|
||||
"net"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/lmittmann/tint"
|
||||
pb "gitlab.michelsen.id/phillmichelsen/tessera/pkg/pb/data_service"
|
||||
"gitlab.michelsen.id/phillmichelsen/tessera/services/data_service/internal/manager"
|
||||
"gitlab.michelsen.id/phillmichelsen/tessera/services/data_service/internal/provider/binance"
|
||||
@@ -14,26 +16,67 @@ import (
|
||||
"google.golang.org/grpc/reflection"
|
||||
)
|
||||
|
||||
func initLogger() *slog.Logger {
|
||||
level := parseLevel(env("LOG_LEVEL", "debug"))
|
||||
if env("LOG_FORMAT", "pretty") == "json" {
|
||||
return slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{
|
||||
Level: level,
|
||||
}))
|
||||
}
|
||||
return slog.New(tint.NewHandler(os.Stdout, &tint.Options{
|
||||
Level: level,
|
||||
TimeFormat: time.RFC3339Nano,
|
||||
NoColor: os.Getenv("NO_COLOR") != "",
|
||||
}))
|
||||
}
|
||||
|
||||
func parseLevel(s string) slog.Level {
|
||||
switch s {
|
||||
case "debug":
|
||||
return slog.LevelDebug
|
||||
case "warn":
|
||||
return slog.LevelWarn
|
||||
case "error":
|
||||
return slog.LevelError
|
||||
default:
|
||||
return slog.LevelInfo
|
||||
}
|
||||
}
|
||||
|
||||
func env(k, def string) string {
|
||||
if v := os.Getenv(k); v != "" {
|
||||
return v
|
||||
}
|
||||
return def
|
||||
}
|
||||
|
||||
func main() {
|
||||
fmt.Println("Starting Data Service...")
|
||||
slog.SetDefault(initLogger())
|
||||
slog.Info("starting", "svc", "data-service")
|
||||
|
||||
// Setup
|
||||
r := router.NewRouter(2048)
|
||||
m := manager.NewManager(r)
|
||||
binanceFutures := binance.NewFuturesWebsocket()
|
||||
_ = m.AddProvider("binance_futures_websocket", binanceFutures)
|
||||
binanceFutures := binance.NewFuturesWebsocket(r.IncomingChannel())
|
||||
if err := m.AddProvider("binance_futures_websocket", binanceFutures); err != nil {
|
||||
slog.Error("add provider failed", "err", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// gRPC Control Server
|
||||
grpcControlServer := grpc.NewServer()
|
||||
go func() {
|
||||
pb.RegisterDataServiceControlServer(grpcControlServer, server.NewGRPCControlServer(m))
|
||||
reflection.Register(grpcControlServer)
|
||||
grpcLis, err := net.Listen("tcp", ":50051")
|
||||
lis, err := net.Listen("tcp", ":50051")
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to listen for gRPC control: %v", err)
|
||||
slog.Error("listen failed", "cmp", "grpc-control", "addr", ":50051", "err", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
log.Println("gRPC control server listening on :50051")
|
||||
if err := grpcControlServer.Serve(grpcLis); err != nil {
|
||||
log.Fatalf("Failed to serve gRPC control: %v", err)
|
||||
slog.Info("listening", "cmp", "grpc-control", "addr", ":50051")
|
||||
if err := grpcControlServer.Serve(lis); err != nil {
|
||||
slog.Error("serve failed", "cmp", "grpc-control", "err", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}()
|
||||
|
||||
@@ -42,31 +85,17 @@ func main() {
|
||||
go func() {
|
||||
pb.RegisterDataServiceStreamingServer(grpcStreamingServer, server.NewGRPCStreamingServer(m))
|
||||
reflection.Register(grpcStreamingServer)
|
||||
grpcLis, err := net.Listen("tcp", ":50052")
|
||||
lis, err := net.Listen("tcp", ":50052")
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to listen for gRPC: %v", err)
|
||||
slog.Error("listen failed", "cmp", "grpc-streaming", "addr", ":50052", "err", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
log.Println("gRPC streaming server listening on :50052")
|
||||
if err := grpcStreamingServer.Serve(grpcLis); err != nil {
|
||||
log.Fatalf("Failed to serve gRPC: %v", err)
|
||||
slog.Info("listening", "cmp", "grpc-streaming", "addr", ":50052")
|
||||
if err := grpcStreamingServer.Serve(lis); err != nil {
|
||||
slog.Error("serve failed", "cmp", "grpc-streaming", "err", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}()
|
||||
|
||||
// Socket Streaming Server
|
||||
/*
|
||||
socketStreamingServer := server.NewSocketStreamingServer(m)
|
||||
go func() {
|
||||
socketLis, err := net.Listen("tcp", ":6000")
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to listen for socket: %v", err)
|
||||
}
|
||||
log.Println("Socket server listening on :6000")
|
||||
if err := socketStreamingServer.Serve(socketLis); err != nil {
|
||||
log.Fatalf("Socket server error: %v", err)
|
||||
}
|
||||
}()
|
||||
*/
|
||||
|
||||
// Block main forever
|
||||
select {}
|
||||
}
|
||||
|
||||
@@ -2,12 +2,13 @@ package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"os/signal"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
@@ -47,21 +48,6 @@ func toIdentifierKey(input string) (string, error) {
|
||||
return "raw::" + strings.ToLower(prov) + "." + subj, nil
|
||||
}
|
||||
|
||||
func prettyOrRaw(b []byte, pretty bool) string {
|
||||
if !pretty || len(b) == 0 {
|
||||
return string(b)
|
||||
}
|
||||
var tmp any
|
||||
if err := json.Unmarshal(b, &tmp); err != nil {
|
||||
return string(b)
|
||||
}
|
||||
out, err := json.MarshalIndent(tmp, "", " ")
|
||||
if err != nil {
|
||||
return string(b)
|
||||
}
|
||||
return string(out)
|
||||
}
|
||||
|
||||
func waitReady(ctx context.Context, conn *grpc.ClientConn) error {
|
||||
for {
|
||||
s := conn.GetState()
|
||||
@@ -77,18 +63,31 @@ func waitReady(ctx context.Context, conn *grpc.ClientConn) error {
|
||||
}
|
||||
}
|
||||
|
||||
type streamStats struct {
|
||||
TotalMsgs int64
|
||||
TotalBytes int64
|
||||
TickMsgs int64
|
||||
TickBytes int64
|
||||
}
|
||||
|
||||
type stats struct {
|
||||
TotalMsgs int64
|
||||
TotalBytes int64
|
||||
ByStream map[string]*streamStats
|
||||
}
|
||||
|
||||
func main() {
|
||||
var ids idsFlag
|
||||
var ctlAddr string
|
||||
var strAddr string
|
||||
var pretty bool
|
||||
var timeout time.Duration
|
||||
var refresh time.Duration
|
||||
|
||||
flag.Var(&ids, "id", "identifier (provider:subject or canonical key); repeatable")
|
||||
flag.StringVar(&ctlAddr, "ctl", "127.0.0.1:50051", "gRPC control address")
|
||||
flag.StringVar(&strAddr, "str", "127.0.0.1:50052", "gRPC streaming address")
|
||||
flag.BoolVar(&pretty, "pretty", true, "pretty-print JSON payloads when possible")
|
||||
flag.DurationVar(&timeout, "timeout", 10*time.Second, "start/config/connect timeout")
|
||||
flag.DurationVar(&refresh, "refresh", 1*time.Second, "dashboard refresh interval")
|
||||
flag.Parse()
|
||||
|
||||
if len(ids) == 0 {
|
||||
@@ -99,6 +98,7 @@ func main() {
|
||||
ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
|
||||
defer cancel()
|
||||
|
||||
// Control channel
|
||||
ccCtl, err := grpc.NewClient(
|
||||
ctlAddr,
|
||||
grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||
@@ -107,15 +107,7 @@ func main() {
|
||||
_, _ = fmt.Fprintf(os.Stderr, "new control client: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
defer func(ccCtl *grpc.ClientConn) {
|
||||
err := ccCtl.Close()
|
||||
if err != nil {
|
||||
_, _ = fmt.Fprintf(os.Stderr, "close control client: %v\n", err)
|
||||
os.Exit(1)
|
||||
} else {
|
||||
fmt.Println("closed control client")
|
||||
}
|
||||
}(ccCtl)
|
||||
defer ccCtl.Close()
|
||||
ccCtl.Connect()
|
||||
|
||||
ctlConnCtx, cancelCtlConn := context.WithTimeout(ctx, timeout)
|
||||
@@ -128,17 +120,20 @@ func main() {
|
||||
|
||||
ctl := pb.NewDataServiceControlClient(ccCtl)
|
||||
|
||||
// Start stream
|
||||
ctxStart, cancelStart := context.WithTimeout(ctx, timeout)
|
||||
startResp, err := ctl.StartStream(ctxStart, &pb.StartStreamRequest{})
|
||||
cancelStart()
|
||||
if err != nil {
|
||||
_, _ = fmt.Fprintf(os.Stderr, "StartClientStream: %v\n", err)
|
||||
_, _ = fmt.Fprintf(os.Stderr, "StartStream: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
streamUUID := startResp.GetStreamUuid()
|
||||
fmt.Printf("stream: %s\n", streamUUID)
|
||||
|
||||
// Configure identifiers
|
||||
var pbIDs []*pb.Identifier
|
||||
orderedIDs := make([]string, 0, len(ids))
|
||||
for _, s := range ids {
|
||||
key, err := toIdentifierKey(s)
|
||||
if err != nil {
|
||||
@@ -146,6 +141,7 @@ func main() {
|
||||
os.Exit(2)
|
||||
}
|
||||
pbIDs = append(pbIDs, &pb.Identifier{Key: key})
|
||||
orderedIDs = append(orderedIDs, key) // preserve CLI order
|
||||
}
|
||||
|
||||
ctxCfg, cancelCfg := context.WithTimeout(ctx, timeout)
|
||||
@@ -155,11 +151,12 @@ func main() {
|
||||
})
|
||||
cancelCfg()
|
||||
if err != nil {
|
||||
_, _ = fmt.Fprintf(os.Stderr, "ConfigureClientStream: %v\n", err)
|
||||
_, _ = fmt.Fprintf(os.Stderr, "ConfigureStream: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
fmt.Printf("configured %d identifiers\n", len(pbIDs))
|
||||
|
||||
// Streaming connection
|
||||
ccStr, err := grpc.NewClient(
|
||||
strAddr,
|
||||
grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||
@@ -168,15 +165,7 @@ func main() {
|
||||
_, _ = fmt.Fprintf(os.Stderr, "new streaming client: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
defer func(ccStr *grpc.ClientConn) {
|
||||
err := ccStr.Close()
|
||||
if err != nil {
|
||||
_, _ = fmt.Fprintf(os.Stderr, "close streaming client: %v\n", err)
|
||||
os.Exit(1)
|
||||
} else {
|
||||
fmt.Println("closed streaming client")
|
||||
}
|
||||
}(ccStr)
|
||||
defer ccStr.Close()
|
||||
ccStr.Connect()
|
||||
|
||||
strConnCtx, cancelStrConn := context.WithTimeout(ctx, timeout)
|
||||
@@ -192,34 +181,128 @@ func main() {
|
||||
streamCtx, streamCancel := context.WithCancel(ctx)
|
||||
defer streamCancel()
|
||||
|
||||
stream, err := str.ConnectStream(streamCtx, &pb.ConnectStreamRequest{StreamUuid: streamUUID})
|
||||
srv, err := str.ConnectStream(streamCtx, &pb.ConnectStreamRequest{StreamUuid: streamUUID})
|
||||
if err != nil {
|
||||
_, _ = fmt.Fprintf(os.Stderr, "ConnectClientStream: %v\n", err)
|
||||
_, _ = fmt.Fprintf(os.Stderr, "ConnectStream: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
fmt.Println("connected; streaming… (Ctrl-C to quit)")
|
||||
|
||||
// Receiver goroutine → channel
|
||||
type msgWrap struct {
|
||||
idKey string
|
||||
size int
|
||||
err error
|
||||
}
|
||||
msgCh := make(chan msgWrap, 1024)
|
||||
go func() {
|
||||
for {
|
||||
m, err := srv.Recv()
|
||||
if err != nil {
|
||||
msgCh <- msgWrap{err: err}
|
||||
close(msgCh)
|
||||
return
|
||||
}
|
||||
id := m.GetIdentifier().GetKey()
|
||||
msgCh <- msgWrap{idKey: id, size: len(m.GetPayload())}
|
||||
}
|
||||
}()
|
||||
|
||||
// Stats and dashboard
|
||||
st := &stats{ByStream: make(map[string]*streamStats)}
|
||||
seen := make(map[string]bool, len(orderedIDs))
|
||||
for _, id := range orderedIDs {
|
||||
seen[id] = true
|
||||
}
|
||||
tick := time.NewTicker(refresh)
|
||||
defer tick.Stop()
|
||||
|
||||
clear := func() { fmt.Print("\033[H\033[2J") }
|
||||
header := func() {
|
||||
fmt.Printf("stream: %s now: %s refresh: %s\n",
|
||||
streamUUID, time.Now().Format(time.RFC3339), refresh)
|
||||
fmt.Println("--------------------------------------------------------------------------------------")
|
||||
fmt.Printf("%-56s %10s %14s %12s %16s\n", "identifier", "msgs/s", "bytes/s", "total", "total_bytes")
|
||||
fmt.Println("--------------------------------------------------------------------------------------")
|
||||
}
|
||||
|
||||
printAndReset := func() {
|
||||
clear()
|
||||
header()
|
||||
|
||||
var totMsgsPS, totBytesPS float64
|
||||
for _, id := range orderedIDs {
|
||||
s, ok := st.ByStream[id]
|
||||
var msgsPS, bytesPS float64
|
||||
var totMsgs, totBytes int64
|
||||
if ok {
|
||||
// Convert window counts into per-second rates.
|
||||
msgsPS = float64(atomic.SwapInt64(&s.TickMsgs, 0)) / refresh.Seconds()
|
||||
bytesPS = float64(atomic.SwapInt64(&s.TickBytes, 0)) / refresh.Seconds()
|
||||
totMsgs = atomic.LoadInt64(&s.TotalMsgs)
|
||||
totBytes = atomic.LoadInt64(&s.TotalBytes)
|
||||
}
|
||||
totMsgsPS += msgsPS
|
||||
totBytesPS += bytesPS
|
||||
fmt.Printf("%-56s %10d %14d %12d %16d\n",
|
||||
id,
|
||||
int64(math.Round(msgsPS)),
|
||||
int64(math.Round(bytesPS)),
|
||||
totMsgs,
|
||||
totBytes,
|
||||
)
|
||||
}
|
||||
|
||||
fmt.Println("--------------------------------------------------------------------------------------")
|
||||
fmt.Printf("%-56s %10d %14d %12d %16d\n",
|
||||
"TOTAL",
|
||||
int64(math.Round(totMsgsPS)),
|
||||
int64(math.Round(totBytesPS)),
|
||||
atomic.LoadInt64(&st.TotalMsgs),
|
||||
atomic.LoadInt64(&st.TotalBytes),
|
||||
)
|
||||
}
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
fmt.Println("\nshutting down")
|
||||
return
|
||||
default:
|
||||
msg, err := stream.Recv()
|
||||
if err != nil {
|
||||
|
||||
case <-tick.C:
|
||||
printAndReset()
|
||||
|
||||
case mw, ok := <-msgCh:
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
if mw.err != nil {
|
||||
if ctx.Err() != nil {
|
||||
return
|
||||
}
|
||||
_, _ = fmt.Fprintf(os.Stderr, "recv: %v\n", err)
|
||||
_, _ = fmt.Fprintf(os.Stderr, "recv: %v\n", mw.err)
|
||||
os.Exit(1)
|
||||
}
|
||||
id := msg.GetIdentifier()
|
||||
fmt.Printf("[%s] bytes=%d enc=%s t=%s\n",
|
||||
id.GetKey(), len(msg.GetPayload()), msg.GetEncoding(),
|
||||
time.Now().Format(time.RFC3339Nano),
|
||||
)
|
||||
fmt.Println(prettyOrRaw(msg.GetPayload(), pretty))
|
||||
fmt.Println("---")
|
||||
|
||||
// Maintain stable order: append new identifiers at first sight.
|
||||
if !seen[mw.idKey] {
|
||||
seen[mw.idKey] = true
|
||||
orderedIDs = append(orderedIDs, mw.idKey)
|
||||
}
|
||||
|
||||
// Account
|
||||
atomic.AddInt64(&st.TotalMsgs, 1)
|
||||
atomic.AddInt64(&st.TotalBytes, int64(mw.size))
|
||||
|
||||
ss := st.ByStream[mw.idKey]
|
||||
if ss == nil {
|
||||
ss = &streamStats{}
|
||||
st.ByStream[mw.idKey] = ss
|
||||
}
|
||||
atomic.AddInt64(&ss.TotalMsgs, 1)
|
||||
atomic.AddInt64(&ss.TotalBytes, int64(mw.size))
|
||||
atomic.AddInt64(&ss.TickMsgs, 1)
|
||||
atomic.AddInt64(&ss.TickBytes, int64(mw.size))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,14 +1,6 @@
|
||||
package domain
|
||||
|
||||
type Encoding string
|
||||
|
||||
const (
|
||||
EncodingJSON Encoding = "json"
|
||||
EncodingProtobuf Encoding = "protobuf"
|
||||
)
|
||||
|
||||
type Message struct {
|
||||
Identifier Identifier
|
||||
Payload []byte
|
||||
Encoding Encoding
|
||||
}
|
||||
|
||||
@@ -39,31 +39,6 @@ func identifierSetDifferences(old map[domain.Identifier]struct{}, next []domain.
|
||||
return
|
||||
}
|
||||
|
||||
// joinErrors aggregates multiple errors.
|
||||
type joined struct{ es []error }
|
||||
|
||||
func (j joined) Error() string {
|
||||
switch n := len(j.es); {
|
||||
case n == 0:
|
||||
return ""
|
||||
case n == 1:
|
||||
return j.es[0].Error()
|
||||
default:
|
||||
s := j.es[0].Error()
|
||||
for i := 1; i < n; i++ {
|
||||
s += "; " + j.es[i].Error()
|
||||
}
|
||||
return s
|
||||
}
|
||||
}
|
||||
|
||||
func join(es []error) error {
|
||||
if len(es) == 0 {
|
||||
return nil
|
||||
}
|
||||
return joined{es}
|
||||
}
|
||||
|
||||
// resolveProvider parses a raw identifier and looks up the provider.
|
||||
func (m *Manager) resolveProvider(id domain.Identifier) (provider.Provider, string, error) {
|
||||
provName, subj, ok := id.ProviderSubject()
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
package manager
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
@@ -10,6 +12,8 @@ import (
|
||||
"gitlab.michelsen.id/phillmichelsen/tessera/services/data_service/internal/router"
|
||||
)
|
||||
|
||||
func lg() *slog.Logger { return slog.Default().With("cmp", "manager") }
|
||||
|
||||
// Manager is a single-goroutine actor that owns all state.
|
||||
type Manager struct {
|
||||
// Command channel
|
||||
@@ -24,8 +28,8 @@ type Manager struct {
|
||||
router *router.Router
|
||||
}
|
||||
|
||||
// New creates a manager and starts its run loop.
|
||||
func New(r *router.Router) *Manager {
|
||||
// NewManager creates a manager and starts its run loop.
|
||||
func NewManager(r *router.Router) *Manager {
|
||||
m := &Manager{
|
||||
cmdCh: make(chan any, 256),
|
||||
providers: make(map[string]provider.Provider),
|
||||
@@ -35,6 +39,9 @@ func New(r *router.Router) *Manager {
|
||||
}
|
||||
go r.Run()
|
||||
go m.run()
|
||||
|
||||
lg().Info("manager started")
|
||||
|
||||
return m
|
||||
}
|
||||
|
||||
@@ -42,6 +49,7 @@ func New(r *router.Router) *Manager {
|
||||
|
||||
// AddProvider adds and starts a new provider.
|
||||
func (m *Manager) AddProvider(name string, p provider.Provider) error {
|
||||
lg().Debug("add provider request", slog.String("name", name))
|
||||
resp := make(chan error, 1)
|
||||
m.cmdCh <- addProviderCmd{name: name, p: p, resp: resp}
|
||||
return <-resp
|
||||
@@ -49,6 +57,7 @@ func (m *Manager) AddProvider(name string, p provider.Provider) error {
|
||||
|
||||
// RemoveProvider stops and removes a provider, cleaning up all sessions.
|
||||
func (m *Manager) RemoveProvider(name string) error {
|
||||
lg().Debug("remove provider request", slog.String("name", name))
|
||||
resp := make(chan error, 1)
|
||||
m.cmdCh <- removeProviderCmd{name: name, resp: resp}
|
||||
return <-resp
|
||||
@@ -56,6 +65,7 @@ func (m *Manager) RemoveProvider(name string) error {
|
||||
|
||||
// NewSession creates a new session with the given idle timeout.
|
||||
func (m *Manager) NewSession(idleAfter time.Duration) (uuid.UUID, error) {
|
||||
lg().Debug("new session request", slog.Duration("idle_after", idleAfter))
|
||||
resp := make(chan struct {
|
||||
id uuid.UUID
|
||||
err error
|
||||
@@ -67,6 +77,7 @@ func (m *Manager) NewSession(idleAfter time.Duration) (uuid.UUID, error) {
|
||||
|
||||
// AttachClient attaches a client to a session, creates and returns client channels for the session.
|
||||
func (m *Manager) AttachClient(id uuid.UUID, inBuf, outBuf int) (chan<- domain.Message, <-chan domain.Message, error) {
|
||||
lg().Debug("attach client request", slog.String("session", id.String()), slog.Int("in_buf", inBuf), slog.Int("out_buf", outBuf))
|
||||
resp := make(chan struct {
|
||||
cin chan<- domain.Message
|
||||
cout <-chan domain.Message
|
||||
@@ -79,6 +90,7 @@ func (m *Manager) AttachClient(id uuid.UUID, inBuf, outBuf int) (chan<- domain.M
|
||||
|
||||
// DetachClient detaches the client from the session, closes client channels and arms timeout.
|
||||
func (m *Manager) DetachClient(id uuid.UUID) error {
|
||||
lg().Debug("detach client request", slog.String("session", id.String()))
|
||||
resp := make(chan error, 1)
|
||||
m.cmdCh <- detachCmd{sid: id, resp: resp}
|
||||
return <-resp
|
||||
@@ -86,6 +98,7 @@ func (m *Manager) DetachClient(id uuid.UUID) error {
|
||||
|
||||
// ConfigureSession sets the next set of identifiers for the session, starting and stopping streams as needed.
|
||||
func (m *Manager) ConfigureSession(id uuid.UUID, next []domain.Identifier) error {
|
||||
lg().Debug("configure session request", slog.String("session", id.String()), slog.Int("idents", len(next)))
|
||||
resp := make(chan error, 1)
|
||||
m.cmdCh <- configureCmd{sid: id, next: next, resp: resp}
|
||||
return <-resp
|
||||
@@ -93,6 +106,7 @@ func (m *Manager) ConfigureSession(id uuid.UUID, next []domain.Identifier) error
|
||||
|
||||
// CloseSession closes and removes the session, cleaning up all bindings.
|
||||
func (m *Manager) CloseSession(id uuid.UUID) error {
|
||||
lg().Debug("close session request", slog.String("session", id.String()))
|
||||
resp := make(chan error, 1)
|
||||
m.cmdCh <- closeSessionCmd{sid: id, resp: resp}
|
||||
return <-resp
|
||||
@@ -125,10 +139,12 @@ func (m *Manager) run() {
|
||||
|
||||
func (m *Manager) handleAddProvider(cmd addProviderCmd) {
|
||||
if _, ok := m.providers[cmd.name]; ok {
|
||||
lg().Warn("provider already exists", slog.String("name", cmd.name))
|
||||
cmd.resp <- fmt.Errorf("provider exists: %s", cmd.name)
|
||||
return
|
||||
}
|
||||
if err := cmd.p.Start(); err != nil {
|
||||
lg().Warn("failed to start provider", slog.String("name", cmd.name), slog.String("err", err.Error()))
|
||||
cmd.resp <- fmt.Errorf("start provider %s: %w", cmd.name, err)
|
||||
return
|
||||
}
|
||||
@@ -139,6 +155,7 @@ func (m *Manager) handleAddProvider(cmd addProviderCmd) {
|
||||
func (m *Manager) handleRemoveProvider(cmd removeProviderCmd) {
|
||||
p, ok := m.providers[cmd.name]
|
||||
if !ok {
|
||||
lg().Warn("provider not found", slog.String("name", cmd.name))
|
||||
cmd.resp <- fmt.Errorf("provider not found: %s", cmd.name)
|
||||
return
|
||||
}
|
||||
@@ -149,6 +166,7 @@ func (m *Manager) handleRemoveProvider(cmd removeProviderCmd) {
|
||||
provName, subj, ok := ident.ProviderSubject()
|
||||
if !ok || provName != cmd.name {
|
||||
// TODO: add log warning, but basically should never ever happen
|
||||
lg().Warn("identifier with mismatched provider found in session during provider removal", slog.String("session", s.id.String()), slog.String("ident", ident.Key()), slog.String("expected_provider", cmd.name), slog.String("found_provider", provName))
|
||||
continue
|
||||
}
|
||||
if s.attached && s.clientOut != nil {
|
||||
@@ -158,19 +176,19 @@ func (m *Manager) handleRemoveProvider(cmd removeProviderCmd) {
|
||||
|
||||
// decrementStreamRefCount returns true if this was the last ref. In which case we want to stop the stream.
|
||||
if ident.IsRaw() && m.decrementStreamRefCount(ident) && subj != "" {
|
||||
_ = p.StopStream(subj) // best-effort as we will remove the provider anyway
|
||||
_ = p.StopStreams([]string{subj}) // best-effort as we will remove the provider anyway
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// first iteration above is sound, but as a precaution we also clean up any dangling streamRef entries here
|
||||
// Defensive sweep: log and clear any dangling streamRef entries for this provider.
|
||||
for id := range m.streamRef {
|
||||
provName, _, ok := id.ProviderSubject()
|
||||
if !ok || provName != cmd.name {
|
||||
continue
|
||||
}
|
||||
fmt.Printf("manager: warning — dangling streamRef for %s after removing provider %s\n", id.Key(), cmd.name)
|
||||
delete(m.streamRef, id)
|
||||
lg().Warn("dangling streamRef entry found during provider removal", slog.String("ident", id.Key()), slog.String("provider", cmd.name))
|
||||
}
|
||||
|
||||
p.Stop()
|
||||
@@ -196,6 +214,8 @@ func (m *Manager) handleNewSession(cmd newSessionCmd) {
|
||||
id uuid.UUID
|
||||
err error
|
||||
}{id: s.id, err: nil}
|
||||
|
||||
lg().Info("new session created", slog.String("session", s.id.String()), slog.Duration("idle_after", cmd.idleAfter))
|
||||
}
|
||||
|
||||
func (m *Manager) handleAttach(cmd attachCmd) {
|
||||
@@ -232,6 +252,8 @@ func (m *Manager) handleAttach(cmd attachCmd) {
|
||||
cout <-chan domain.Message
|
||||
err error
|
||||
}{cin, cout, err}
|
||||
|
||||
lg().Info("client attached to session", slog.String("session", s.id.String()))
|
||||
}
|
||||
|
||||
func (m *Manager) handleDetach(cmd detachCmd) {
|
||||
@@ -252,126 +274,163 @@ func (m *Manager) handleDetach(cmd detachCmd) {
|
||||
_ = m.detachSession(cmd.sid, s)
|
||||
|
||||
cmd.resp <- nil
|
||||
|
||||
lg().Info("client detached from session", slog.String("session", s.id.String()))
|
||||
}
|
||||
|
||||
func (m *Manager) handleConfigure(c configureCmd) {
|
||||
s, ok := m.sessions[c.sid]
|
||||
func (m *Manager) handleConfigure(cmd configureCmd) {
|
||||
s, ok := m.sessions[cmd.sid]
|
||||
if !ok {
|
||||
c.resp <- ErrSessionNotFound
|
||||
cmd.resp <- ErrSessionNotFound
|
||||
return
|
||||
}
|
||||
if s.closed {
|
||||
c.resp <- ErrSessionClosed
|
||||
cmd.resp <- ErrSessionClosed
|
||||
return
|
||||
}
|
||||
|
||||
old := copySet(s.bound)
|
||||
toAdd, toDel := identifierSetDifferences(old, c.next)
|
||||
toAdd, toDel := identifierSetDifferences(old, cmd.next)
|
||||
|
||||
// 1) Handle removals first.
|
||||
var aggErrs error
|
||||
|
||||
// 1) Build batches: provider → starts(starters) and stops(subjects)
|
||||
type starter struct {
|
||||
id domain.Identifier
|
||||
subj string
|
||||
}
|
||||
startsByProv := make(map[provider.Provider][]starter)
|
||||
stopsByProv := make(map[provider.Provider][]string)
|
||||
|
||||
// Removals
|
||||
for _, ident := range toDel {
|
||||
if s.attached && s.clientOut != nil {
|
||||
m.router.DeregisterRoute(ident, s.clientOut)
|
||||
}
|
||||
delete(s.bound, ident)
|
||||
|
||||
if ident.IsRaw() {
|
||||
if m.decrementStreamRefCount(ident) {
|
||||
if p, subj, err := m.resolveProvider(ident); err == nil {
|
||||
_ = p.StopStream(subj) // fire-and-forget
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 2) Handle additions. Collect starts to await.
|
||||
type startItem struct {
|
||||
id domain.Identifier
|
||||
ch <-chan error
|
||||
}
|
||||
var starts []startItem
|
||||
var initErrs []error
|
||||
|
||||
for _, ident := range toAdd {
|
||||
// Bind intent now.
|
||||
s.bound[ident] = struct{}{}
|
||||
|
||||
if !ident.IsRaw() {
|
||||
if s.attached && s.clientOut != nil {
|
||||
m.router.RegisterRoute(ident, s.clientOut)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
p, subj, err := m.resolveProvider(ident)
|
||||
if err != nil {
|
||||
delete(s.bound, ident)
|
||||
initErrs = append(initErrs, err)
|
||||
aggErrs = errors.Join(aggErrs, fmt.Errorf("stop %s: %w", ident.Key(), err))
|
||||
continue
|
||||
}
|
||||
if subj == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if m.decrementStreamRefCount(ident) { // only when last ref
|
||||
stopsByProv[p] = append(stopsByProv[p], subj)
|
||||
}
|
||||
}
|
||||
|
||||
// Additions
|
||||
for _, ident := range toAdd {
|
||||
if !ident.IsRaw() {
|
||||
if s.attached && s.clientOut != nil {
|
||||
m.router.RegisterRoute(ident, s.clientOut)
|
||||
}
|
||||
s.bound[ident] = struct{}{}
|
||||
continue
|
||||
}
|
||||
|
||||
p, subj, err := m.resolveProvider(ident)
|
||||
if err != nil {
|
||||
aggErrs = errors.Join(aggErrs, err)
|
||||
continue
|
||||
}
|
||||
if !p.IsValidSubject(subj, false) {
|
||||
delete(s.bound, ident)
|
||||
initErrs = append(initErrs, fmt.Errorf("invalid subject %q for provider", subj))
|
||||
aggErrs = errors.Join(aggErrs, fmt.Errorf("invalid subject %q", subj))
|
||||
continue
|
||||
}
|
||||
|
||||
first := m.incrementStreamRefCount(ident)
|
||||
|
||||
if first || !p.IsStreamActive(subj) {
|
||||
ch := p.StartStream(subj, m.router.IncomingChannel())
|
||||
starts = append(starts, startItem{id: ident, ch: ch})
|
||||
} else if s.attached && s.clientOut != nil {
|
||||
// Already active, just register for this session.
|
||||
if m.incrementStreamRefCount(ident) { // first ref → start later
|
||||
startsByProv[p] = append(startsByProv[p], starter{id: ident, subj: subj})
|
||||
} else {
|
||||
// already active → bind+route now
|
||||
if s.attached && s.clientOut != nil {
|
||||
m.router.RegisterRoute(ident, s.clientOut)
|
||||
}
|
||||
s.bound[ident] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
// 3) Wait for starts initiated by this call, each with its own timeout.
|
||||
if len(starts) == 0 {
|
||||
c.resp <- join(initErrs)
|
||||
return
|
||||
}
|
||||
|
||||
type result struct {
|
||||
id domain.Identifier
|
||||
// 2) Fire provider calls
|
||||
type batchRes struct {
|
||||
prov provider.Provider
|
||||
err error
|
||||
op string // "start"/"stop"
|
||||
}
|
||||
done := make(chan result, len(starts))
|
||||
done := make(chan batchRes, len(startsByProv)+len(stopsByProv))
|
||||
|
||||
for _, si := range starts {
|
||||
// Per-start waiter.
|
||||
go func(id domain.Identifier, ch <-chan error) {
|
||||
// Start batches
|
||||
for p, items := range startsByProv {
|
||||
subjs := make([]string, 0, len(items))
|
||||
for _, it := range items {
|
||||
subjs = append(subjs, it.subj)
|
||||
}
|
||||
ack := p.StartStreams(subjs)
|
||||
go func(p provider.Provider, ack <-chan error) {
|
||||
var err error
|
||||
select {
|
||||
case err := <-ch:
|
||||
done <- result{id: id, err: err}
|
||||
case err = <-ack:
|
||||
case <-time.After(statusWaitTotal):
|
||||
done <- result{id: id, err: fmt.Errorf("timeout")}
|
||||
err = fmt.Errorf("timeout")
|
||||
}
|
||||
}(si.id, si.ch)
|
||||
done <- batchRes{prov: p, err: err, op: "start"}
|
||||
}(p, ack)
|
||||
}
|
||||
|
||||
// Collect results and apply.
|
||||
for i := 0; i < len(starts); i++ {
|
||||
// Stop batches
|
||||
for p, subjs := range stopsByProv {
|
||||
ack := p.StopStreams(subjs)
|
||||
go func(p provider.Provider, ack <-chan error) {
|
||||
var err error
|
||||
select {
|
||||
case err = <-ack:
|
||||
case <-time.After(statusWaitTotal):
|
||||
err = fmt.Errorf("timeout")
|
||||
}
|
||||
done <- batchRes{prov: p, err: err, op: "stop"}
|
||||
}(p, ack)
|
||||
}
|
||||
|
||||
// 3) Collect results
|
||||
for i := 0; i < len(startsByProv)+len(stopsByProv); i++ {
|
||||
r := <-done
|
||||
switch r.op {
|
||||
case "start":
|
||||
items := startsByProv[r.prov]
|
||||
if r.err != nil {
|
||||
// Roll back this session's bind and drop ref.
|
||||
delete(s.bound, r.id)
|
||||
_ = m.decrementStreamRefCount(r.id)
|
||||
initErrs = append(initErrs, fmt.Errorf("start %v: %w", r.id, r.err))
|
||||
// Roll back refcounts for each ident in this provider batch
|
||||
for _, it := range items {
|
||||
_ = m.decrementStreamRefCount(it.id)
|
||||
aggErrs = errors.Join(aggErrs, fmt.Errorf("start %s: %w", it.id.Key(), r.err))
|
||||
}
|
||||
continue
|
||||
}
|
||||
// Success: register for any attached sessions that are bound.
|
||||
for _, sess := range m.sessions {
|
||||
if !sess.attached || sess.clientOut == nil {
|
||||
continue
|
||||
// Success → bind and route
|
||||
for _, it := range items {
|
||||
if s.attached && s.clientOut != nil {
|
||||
m.router.RegisterRoute(it.id, s.clientOut)
|
||||
}
|
||||
s.bound[it.id] = struct{}{}
|
||||
}
|
||||
case "stop":
|
||||
if r.err != nil {
|
||||
for _, subj := range stopsByProv[r.prov] {
|
||||
aggErrs = errors.Join(aggErrs, fmt.Errorf("stop %s/%s: %w", "raw", subj, r.err))
|
||||
}
|
||||
if _, bound := sess.bound[r.id]; bound {
|
||||
m.router.RegisterRoute(r.id, sess.clientOut)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
c.resp <- join(initErrs)
|
||||
cmd.resp <- aggErrs
|
||||
|
||||
lg().Info("session configured", slog.String("session", s.id.String()), slog.Int("bound", len(s.bound)), slog.Int("to_add", len(toAdd)), slog.Int("to_del", len(toDel)))
|
||||
}
|
||||
|
||||
func (m *Manager) handleCloseSession(c closeSessionCmd) {
|
||||
@@ -382,4 +441,6 @@ func (m *Manager) handleCloseSession(c closeSessionCmd) {
|
||||
}
|
||||
m.closeSession(c.sid, s)
|
||||
c.resp <- nil
|
||||
|
||||
lg().Info("session closed", slog.String("session", s.id.String()))
|
||||
}
|
||||
|
||||
@@ -7,6 +7,21 @@ import (
|
||||
"gitlab.michelsen.id/phillmichelsen/tessera/services/data_service/internal/domain"
|
||||
)
|
||||
|
||||
// Session holds per-session state. Owned by the manager loop.
|
||||
type session struct {
|
||||
id uuid.UUID
|
||||
|
||||
clientIn chan domain.Message // caller writes
|
||||
clientOut chan domain.Message // caller reads
|
||||
|
||||
bound map[domain.Identifier]struct{}
|
||||
|
||||
closed bool
|
||||
attached bool
|
||||
idleAfter time.Duration
|
||||
idleTimer *time.Timer
|
||||
}
|
||||
|
||||
// attachSession wires channels, stops idle timer, and registers ready routes.
|
||||
// Precondition: session exists and is not attached/closed. Runs in loop.
|
||||
func (m *Manager) attachSession(s *session, inBuf, outBuf int) (chan<- domain.Message, <-chan domain.Message, error) {
|
||||
@@ -32,7 +47,7 @@ func (m *Manager) attachSession(s *session, inBuf, outBuf int) (chan<- domain.Me
|
||||
select {
|
||||
case dst <- msg:
|
||||
default:
|
||||
// drop
|
||||
lg().Warn("drop message on clientIn backpressure", "identifier", msg.Identifier.Key())
|
||||
}
|
||||
}
|
||||
}(cin, m.router.IncomingChannel())
|
||||
@@ -105,7 +120,7 @@ func (m *Manager) closeSession(sid uuid.UUID, s *session) {
|
||||
}
|
||||
if last := m.decrementStreamRefCount(ident); last {
|
||||
if p, subj, err := m.resolveProvider(ident); err == nil {
|
||||
_ = p.StopStream(subj) // do not wait
|
||||
_ = p.StopStreams([]string{subj}) // do not wait
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@ import (
|
||||
// Shared constants.
|
||||
const (
|
||||
defaultClientBuf = 256
|
||||
statusWaitTotal = 8 * time.Second
|
||||
statusWaitTotal = 10 * time.Second
|
||||
)
|
||||
|
||||
// Manager-level errors.
|
||||
@@ -24,21 +24,6 @@ var (
|
||||
ErrUnknownProvider = errorf("unknown provider")
|
||||
)
|
||||
|
||||
// Session holds per-session state. Owned by the manager loop.
|
||||
type session struct {
|
||||
id uuid.UUID
|
||||
|
||||
clientIn chan domain.Message // caller writes
|
||||
clientOut chan domain.Message // caller reads
|
||||
|
||||
bound map[domain.Identifier]struct{}
|
||||
|
||||
closed bool
|
||||
attached bool
|
||||
idleAfter time.Duration
|
||||
idleTimer *time.Timer
|
||||
}
|
||||
|
||||
// Commands posted into the manager loop. One struct per action.
|
||||
type addProviderCmd struct {
|
||||
name string
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -6,8 +6,8 @@ type Provider interface {
|
||||
Start() error
|
||||
Stop()
|
||||
|
||||
StartStream(key string, destination chan<- domain.Message) <-chan error
|
||||
StopStream(key string) <-chan error
|
||||
StartStreams(keys []string) <-chan error
|
||||
StopStreams(key []string) <-chan error
|
||||
|
||||
Fetch(key string) (domain.Message, error)
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"sync"
|
||||
|
||||
"gitlab.michelsen.id/phillmichelsen/tessera/services/data_service/internal/domain"
|
||||
@@ -25,6 +25,7 @@ func (r *Router) IncomingChannel() chan<- domain.Message {
|
||||
}
|
||||
|
||||
func (r *Router) Run() {
|
||||
slog.Default().Info("router started", "cmp", "router")
|
||||
for msg := range r.incoming {
|
||||
r.mu.RLock()
|
||||
channels := r.routes[msg.Identifier]
|
||||
@@ -33,7 +34,7 @@ func (r *Router) Run() {
|
||||
select {
|
||||
case ch <- msg:
|
||||
default:
|
||||
fmt.Println("Router could not push message to a full buffer...") // TODO: Handle full buffer case more gracefully
|
||||
slog.Default().Warn("dropping message due to backpressure", "cmp", "router", "identifier", msg.Identifier.Key())
|
||||
}
|
||||
}
|
||||
r.mu.RUnlock()
|
||||
@@ -44,6 +45,8 @@ func (r *Router) RegisterRoute(id domain.Identifier, ch chan<- domain.Message) {
|
||||
r.mu.Lock()
|
||||
r.routes[id] = append(r.routes[id], ch)
|
||||
r.mu.Unlock()
|
||||
|
||||
slog.Default().Debug("registered route", "cmp", "router", "identifier", id.Key(), "channel", ch)
|
||||
}
|
||||
|
||||
func (r *Router) DeregisterRoute(id domain.Identifier, ch chan<- domain.Message) {
|
||||
@@ -62,4 +65,6 @@ func (r *Router) DeregisterRoute(id domain.Identifier, ch chan<- domain.Message)
|
||||
r.routes[id] = slice
|
||||
}
|
||||
r.mu.Unlock()
|
||||
|
||||
slog.Default().Debug("deregistered route", "cmp", "router", "identifier", id.Key(), "channel", ch)
|
||||
}
|
||||
|
||||
@@ -46,7 +46,6 @@ func (s *GRPCStreamingServer) ConnectStream(req *pb.ConnectStreamRequest, stream
|
||||
if err := stream.Send(&pb.Message{
|
||||
Identifier: &pb.Identifier{Key: msg.Identifier.Key()},
|
||||
Payload: msg.Payload,
|
||||
Encoding: string(msg.Encoding),
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user