Refactor Binance Futures Websocket: enhance configuration management, implement shard handling, and improve subscription logic

This commit is contained in:
2025-09-17 10:57:21 +00:00
parent 78c7632394
commit 7109acc207
8 changed files with 1255 additions and 104 deletions

View File

@@ -1,52 +1,252 @@
package ws
import (
"context"
"fmt"
"log/slog"
"sync"
"sync/atomic"
"time"
"github.com/google/uuid"
"gitlab.michelsen.id/phillmichelsen/tessera/services/data_service/internal/domain"
)
type BinanceFutures struct {
cfg config
shards map[uuid.UUID]*shard
streamAssignments map[string]*shard
}
const providerName = "binance_futures"
type config struct {
type Config struct {
Endpoint string
MaxStreamsPerShard uint8
BatchInterval time.Duration
MaxStreamsPerShard uint16
RateLimitPerSec uint16
}
func NewBinanceFuturesWebsocket(cfg config) *BinanceFutures {
type BinanceFutures struct {
cfg Config
bus chan<- domain.Message
mu sync.RWMutex
shards map[uuid.UUID]*shard
assignOrder []uuid.UUID
streamAssignments map[string]*shard
pendingGlobal map[string][]chan error
ctx context.Context
cancel context.CancelFunc
idSeq atomic.Uint64
}
func NewBinanceFuturesWebsocket(cfg Config, bus chan<- domain.Message) *BinanceFutures {
if cfg.Endpoint == "" {
cfg.Endpoint = "wss://fstream.binance.com/stream"
}
if cfg.RateLimitPerSec <= 0 {
cfg.RateLimitPerSec = 5
}
if cfg.MaxStreamsPerShard == 0 {
cfg.MaxStreamsPerShard = 15
}
return &BinanceFutures{
cfg: cfg,
shards: make(map[uuid.UUID]*shard),
cfg: cfg,
bus: bus,
shards: make(map[uuid.UUID]*shard),
streamAssignments: make(map[string]*shard),
pendingGlobal: make(map[string][]chan error),
}
}
func (b *BinanceFutures) Start() error {
b.mu.Lock()
defer b.mu.Unlock()
if b.ctx != nil {
return nil
}
b.ctx, b.cancel = context.WithCancel(context.Background())
slog.Default().Info("started", slog.String("cmp", providerName))
sh, err := newShard(b.ctx, b.cfg, b.bus, b.nextReqID)
if err != nil {
slog.Default().Error("", "error", err)
return err
}
b.shards[sh.ID] = sh
b.assignOrder = []uuid.UUID{sh.ID}
// idle shard GC
go b.gcIdleShards()
return nil
}
func (b *BinanceFutures) Stop() {
return
b.mu.Lock()
if b.cancel != nil {
b.cancel()
}
// snapshot shards, then clear maps
shs := make([]*shard, 0, len(b.shards))
for _, sh := range b.shards {
shs = append(shs, sh)
}
b.shards = map[uuid.UUID]*shard{}
b.assignOrder = nil
b.streamAssignments = map[string]*shard{}
for subj, waiters := range b.pendingGlobal {
for _, ch := range waiters {
select {
case ch <- context.Canceled:
default:
}
}
delete(b.pendingGlobal, subj)
}
slog.Default().Info("stopped", slog.String("cmp", providerName))
b.mu.Unlock()
for _, sh := range shs {
sh.stop()
}
}
func (b *BinanceFutures) Subscribe(subject string) <-chan error {
return nil
ch := make(chan error, 1)
if !IsValidSubject(subject) {
ch <- fmt.Errorf("invalid subject: %s", subject)
return ch
}
b.mu.Lock()
if sh, ok := b.streamAssignments[subject]; ok && sh.isActive(subject) {
b.mu.Unlock()
ch <- nil
return ch
}
sh := b.pickShardLocked()
b.streamAssignments[subject] = sh
sh.enqueueSubscribe(subject, ch)
b.mu.Unlock()
return ch
}
func (b *BinanceFutures) Unsubscribe(subject string) <-chan error {
return nil
ch := make(chan error, 1)
b.mu.Lock()
sh, ok := b.streamAssignments[subject]
if ok {
delete(b.streamAssignments, subject) // allow reassignment later
}
b.mu.Unlock()
if !ok {
ch <- nil
return ch
}
sh.enqueueUnsubscribe(subject, ch)
return ch
}
func (b *BinanceFutures) Fetch(subject string) (domain.Message, error) {
func (b *BinanceFutures) Fetch(_ string) (domain.Message, error) {
return domain.Message{}, fmt.Errorf("fetch not supported by provider")
}
func (b *BinanceFutures) GetActiveStreams() []string { return nil }
func (b *BinanceFutures) IsStreamActive(key string) bool { return false }
func (b *BinanceFutures) IsValidSubject(key string, isFetch bool) bool { return false }
func (b *BinanceFutures) GetActiveStreams() []string {
b.mu.RLock()
defer b.mu.RUnlock()
out := make([]string, 0)
for _, sh := range b.shards {
out = append(out, sh.activeList()...)
}
return out
}
func (b *BinanceFutures) IsStreamActive(key string) bool {
b.mu.RLock()
sh := b.streamAssignments[key]
b.mu.RUnlock()
if sh == nil {
return false
}
return sh.isActive(key)
}
func (b *BinanceFutures) IsValidSubject(key string, _ bool) bool { return IsValidSubject(key) }
// pick shard by lowest load = active + pending subs; enforce cap
func (b *BinanceFutures) pickShardLocked() *shard {
var chosen *shard
minLoad := int(^uint(0) >> 1) // max int
for _, id := range b.assignOrder {
sh := b.shards[id]
if sh == nil {
continue
}
load := sh.loadEstimate()
if load < int(b.cfg.MaxStreamsPerShard) && load < minLoad {
minLoad = load
chosen = sh
}
}
if chosen != nil {
return chosen
}
// need a new shard
sh, err := newShard(b.ctx, b.cfg, b.bus, b.nextReqID)
if err != nil {
if len(b.assignOrder) > 0 {
return b.shards[b.assignOrder[0]]
}
return sh
}
b.shards[sh.ID] = sh
b.assignOrder = append(b.assignOrder, sh.ID)
return sh
}
func (b *BinanceFutures) nextReqID() uint64 { return b.idSeq.Add(1) }
// Close idle shards periodically. Keep at least one.
func (b *BinanceFutures) gcIdleShards() {
t := time.NewTicker(30 * time.Second)
defer t.Stop()
for {
select {
case <-b.ctx.Done():
return
case <-t.C:
var toStop []*shard
b.mu.Lock()
if len(b.shards) <= 1 {
b.mu.Unlock()
continue
}
for id, sh := range b.shards {
if len(b.shards)-len(toStop) <= 1 {
break // keep one
}
if sh.isIdle() {
toStop = append(toStop, sh)
delete(b.shards, id)
// prune order list
for i, v := range b.assignOrder {
if v == id {
b.assignOrder = append(b.assignOrder[:i], b.assignOrder[i+1:]...)
break
}
}
}
}
b.mu.Unlock()
for _, sh := range toStop {
slog.Default().Info("close idle shard", "cmp", providerName, "shard", sh.ID)
sh.stop()
}
}
}
}

View File

@@ -1,12 +1,465 @@
package ws
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"log/slog"
"sync"
"time"
"github.com/coder/websocket"
"github.com/google/uuid"
"gitlab.michelsen.id/phillmichelsen/tessera/services/data_service/internal/domain"
)
type shard struct {
ID uuid.UUID
conn websocket.Conn
activeStreams []string
type opType uint8
const (
opSubscribe opType = iota + 1
opUnsubscribe
)
type pendingBatch struct {
Op opType
Subjects []string
Waiters map[string][]chan error
}
type shard struct {
ID uuid.UUID
url string
cfg Config
ctx context.Context
cancel context.CancelFunc
conn *websocket.Conn
mu sync.RWMutex
active map[string]struct{}
subBatch map[string][]chan error
unsubBatch map[string][]chan error
sendQ chan []byte
rateTicker *time.Ticker
pingTicker *time.Ticker
pendingMu sync.Mutex
pendingByID map[uint64]*pendingBatch
nextReqID func() uint64
wg sync.WaitGroup
bus chan<- domain.Message
}
func newShard(pctx context.Context, cfg Config, bus chan<- domain.Message, next func() uint64) (*shard, error) {
id := uuid.New()
ctx, cancel := context.WithCancel(pctx)
sh := &shard{
ID: id,
url: cfg.Endpoint,
cfg: cfg,
ctx: ctx,
cancel: cancel,
active: make(map[string]struct{}),
subBatch: make(map[string][]chan error),
unsubBatch: make(map[string][]chan error),
sendQ: make(chan []byte, 256),
pendingByID: make(map[uint64]*pendingBatch),
nextReqID: next,
bus: bus,
}
// per-shard rate limiter; also drives batch flushing
rate := cfg.RateLimitPerSec
if rate <= 0 {
rate = 1
}
interval := time.Second / time.Duration(rate)
sh.rateTicker = time.NewTicker(interval)
sh.pingTicker = time.NewTicker(30 * time.Second)
slog.Default().Info("shard created", "cmp", providerName, "shard", sh.ID.String())
if err := sh.connect(); err != nil {
slog.Default().Error("shard connection failed", "cmp", providerName, "shard", sh.ID.String(), "error", err)
return nil, err
}
sh.startLoops()
return sh, nil
}
func (s *shard) connect() error {
dctx, cancel := context.WithTimeout(s.ctx, 10*time.Second)
defer cancel()
c, _, err := websocket.Dial(dctx, s.url, &websocket.DialOptions{})
if err != nil {
slog.Default().Error("shard connection error", "cmp", providerName, "shard", s.ID.String(), "error", err)
return err
}
s.conn = c
slog.Default().Info("shard connected", "cmp", providerName, "shard", s.ID.String())
return nil
}
func (s *shard) startLoops() {
s.wg.Add(3)
go s.writeLoop()
go s.readLoop()
go s.pingLoop()
}
func (s *shard) stop() {
s.cancel()
if s.conn != nil {
_ = s.conn.Close(websocket.StatusNormalClosure, "shutdown")
}
if s.rateTicker != nil {
s.rateTicker.Stop()
}
if s.pingTicker != nil {
s.pingTicker.Stop()
}
s.wg.Wait()
s.pendingMu.Lock()
for _, p := range s.pendingByID {
for _, arr := range p.Waiters {
for _, ch := range arr {
select {
case ch <- context.Canceled:
default:
}
}
}
}
s.pendingByID = map[uint64]*pendingBatch{}
s.pendingMu.Unlock()
s.mu.Lock()
for _, arr := range s.subBatch {
for _, ch := range arr {
select {
case ch <- context.Canceled:
default:
}
}
}
for _, arr := range s.unsubBatch {
for _, ch := range arr {
select {
case ch <- context.Canceled:
default:
}
}
}
s.subBatch = map[string][]chan error{}
s.unsubBatch = map[string][]chan error{}
s.mu.Unlock()
slog.Default().Info("shard stopped", "cmp", providerName, "shard", s.ID.String())
}
func (s *shard) enqueueSubscribe(subject string, ch chan error) {
s.mu.Lock()
s.subBatch[subject] = append(s.subBatch[subject], ch)
s.mu.Unlock()
slog.Default().Debug("shard enqueue subscribe", "cmp", providerName, "shard", s.ID, "subject", subject)
}
func (s *shard) enqueueUnsubscribe(subject string, ch chan error) {
s.mu.Lock()
s.unsubBatch[subject] = append(s.unsubBatch[subject], ch)
s.mu.Unlock()
slog.Default().Debug("shard enqueue unsubscribe", "cmp", providerName, "shard", s.ID, "subject", subject)
}
func (s *shard) isActive(subj string) bool {
s.mu.RLock()
_, ok := s.active[subj]
s.mu.RUnlock()
return ok
}
func (s *shard) activeCount() int {
s.mu.RLock()
n := len(s.active)
s.mu.RUnlock()
return n
}
func (s *shard) loadEstimate() int { // active + pending subscribes
s.mu.RLock()
n := len(s.active) + len(s.subBatch)
s.mu.RUnlock()
return n
}
func (s *shard) isIdle() bool {
s.mu.RLock()
idle := len(s.active) == 0 && len(s.subBatch) == 0 && len(s.unsubBatch) == 0
s.mu.RUnlock()
return idle
}
func (s *shard) activeList() []string {
s.mu.RLock()
defer s.mu.RUnlock()
out := make([]string, 0, len(s.active))
for k := range s.active {
out = append(out, k)
}
return out
}
func (s *shard) writeLoop() {
defer s.wg.Done()
for {
select {
case <-s.ctx.Done():
return
case <-s.rateTicker.C:
// snapshot and clear pending batch operations
var subs, unsubs map[string][]chan error
s.mu.Lock()
if len(s.subBatch) > 0 {
subs = s.subBatch
s.subBatch = make(map[string][]chan error)
}
if len(s.unsubBatch) > 0 {
unsubs = s.unsubBatch
s.unsubBatch = make(map[string][]chan error)
}
s.mu.Unlock()
// send SUBSCRIBE batch
if len(subs) > 0 {
params := make([]string, 0, len(subs))
waiters := make(map[string][]chan error, len(subs))
for k, v := range subs {
params = append(params, k)
waiters[k] = v
}
id := s.nextReqID()
frame := map[string]any{"method": "SUBSCRIBE", "params": params, "id": id}
payload, _ := json.Marshal(frame)
s.recordPending(id, opSubscribe, params, waiters)
if err := s.writeFrame(payload); err != nil {
s.reconnect()
return
}
}
// send UNSUBSCRIBE batch
if len(unsubs) > 0 {
params := make([]string, 0, len(unsubs))
waiters := make(map[string][]chan error, len(unsubs))
for k, v := range unsubs {
params = append(params, k)
waiters[k] = v
}
id := s.nextReqID()
frame := map[string]any{"method": "UNSUBSCRIBE", "params": params, "id": id}
payload, _ := json.Marshal(frame)
s.recordPending(id, opUnsubscribe, params, waiters)
if err := s.writeFrame(payload); err != nil {
s.reconnect()
return
}
}
// optional: one queued ad-hoc frame per tick
select {
case msg := <-s.sendQ:
if err := s.writeFrame(msg); err != nil {
s.reconnect()
return
}
default:
}
}
}
}
func (s *shard) writeFrame(msg []byte) error {
wctx, cancel := context.WithTimeout(s.ctx, 5*time.Second)
defer cancel()
err := s.conn.Write(wctx, websocket.MessageText, msg)
if err != nil {
slog.Default().Warn("shard write error", "cmp", providerName, "shard", s.ID, "error", err)
}
return err
}
func (s *shard) readLoop() {
defer s.wg.Done()
for {
select {
case <-s.ctx.Done():
return
default:
// longer idle timeout when no active subscriptions
timeout := 60 * time.Second
if s.activeCount() == 0 {
timeout = 5 * time.Minute
}
rctx, cancel := context.WithTimeout(s.ctx, timeout)
_, data, err := s.conn.Read(rctx)
cancel()
if err != nil {
if errors.Is(err, context.DeadlineExceeded) {
slog.Default().Debug("shard read idle timeout", "cmp", providerName, "shard", s.ID)
continue
}
slog.Default().Warn("shard read error", "cmp", providerName, "shard", s.ID, "error", err)
s.reconnect()
return
}
if bytes.Contains(data, []byte("\"id\"")) {
var ack struct {
ID uint64 `json:"id"`
Result *json.RawMessage `json:"result"`
Error *struct {
Code int `json:"code"`
Msg string `json:"msg"`
} `json:"error"`
}
if json.Unmarshal(data, &ack) == nil && ack.ID != 0 {
if ack.Error != nil {
slog.Default().Warn("shard ack error", "cmp", providerName, "shard", s.ID, "id", ack.ID, "code", ack.Error.Code, "msg", ack.Error.Msg)
s.resolvePending(ack.ID, fmt.Errorf("binance error %d: %s", ack.Error.Code, ack.Error.Msg))
} else {
slog.Default().Debug("shard ack ok", "cmp", providerName, "shard", s.ID, "id", ack.ID)
s.resolvePending(ack.ID, nil)
}
continue
}
}
var frame struct {
Stream string `json:"stream"`
Data json.RawMessage `json:"data"`
}
if json.Unmarshal(data, &frame) == nil && frame.Stream != "" {
id, err := domain.RawID(providerName, frame.Stream)
if err == nil {
select {
case s.bus <- domain.Message{Identifier: id, Payload: frame.Data}:
default:
}
}
continue
}
slog.Default().Debug("shard unknown message", "cmp", providerName, "shard", s.ID, "data", string(data))
}
}
}
func (s *shard) pingLoop() {
defer s.wg.Done()
for {
select {
case <-s.ctx.Done():
return
case <-s.pingTicker.C:
ctx, cancel := context.WithTimeout(s.ctx, 5*time.Second)
err := s.conn.Ping(ctx)
cancel()
if err != nil {
slog.Default().Warn("shard ping failed", "cmp", providerName, "shard", s.ID, "error", err)
s.reconnect()
return
}
}
}
}
func (s *shard) recordPending(id uint64, op opType, subjects []string, waiters map[string][]chan error) {
s.pendingMu.Lock()
s.pendingByID[id] = &pendingBatch{Op: op, Subjects: subjects, Waiters: waiters}
s.pendingMu.Unlock()
}
func (s *shard) resolvePending(id uint64, err error) {
s.pendingMu.Lock()
p := s.pendingByID[id]
delete(s.pendingByID, id)
s.pendingMu.Unlock()
if p == nil {
return
}
if err == nil {
s.mu.Lock()
if p.Op == opSubscribe {
for _, subj := range p.Subjects {
s.active[subj] = struct{}{}
}
slog.Default().Debug("shard subscribed", "cmp", providerName, "shard", s.ID, "subjects", p.Subjects)
} else {
for _, subj := range p.Subjects {
delete(s.active, subj)
}
slog.Default().Debug("shard unsubscribed", "cmp", providerName, "shard", s.ID, "subjects", p.Subjects)
}
s.mu.Unlock()
} else {
slog.Default().Warn("shard pending error", "cmp", providerName, "shard", s.ID, "error", err)
}
for _, arr := range p.Waiters {
for _, ch := range arr {
select {
case ch <- err:
default:
}
}
}
}
func (s *shard) queue(payload []byte) {
select {
case s.sendQ <- payload:
default:
slog.Default().Warn("shard sendQ full, dropping one message", "cmp", providerName, "shard", s.ID)
<-s.sendQ
s.sendQ <- payload
}
}
func (s *shard) reconnect() {
reconnectStartTime := time.Now()
if s.conn != nil {
_ = s.conn.Close(websocket.StatusGoingAway, "reconnect")
}
for {
select {
case <-s.ctx.Done():
return
default:
if err := s.connect(); err != nil {
time.Sleep(200 * time.Millisecond)
continue
}
// re-stage current actives for batch subscribe on next tick
s.mu.RLock()
for k := range s.active {
s.subBatch[k] = append(s.subBatch[k], nil)
}
s.mu.RUnlock()
// restart loops
s.startLoops()
slog.Default().Info("shard reconnected", "cmp", providerName, "shard", s.ID, "downtime", time.Since(reconnectStartTime).String())
return
}
}
}

View File

@@ -0,0 +1,21 @@
package ws
import "regexp"
var (
reAggTrade = regexp.MustCompile(`^[a-z0-9]+@aggTrade$`)
reTrade = regexp.MustCompile(`^[a-z0-9]+@trade$`)
reMarkPrice = regexp.MustCompile(`^[a-z0-9]+@markPrice(@1s)?$`)
reKline = regexp.MustCompile(`^[a-z0-9]+@kline_(1s|1m|3m|5m|15m|30m|1h|2h|4h|6h|8h|12h|1d|3d|1w|1M)$`)
reBookTicker = regexp.MustCompile(`^[a-z0-9]+@bookTicker$`)
reDepth = regexp.MustCompile(`^[a-z0-9]+@depth(@100ms)?$`)
)
func IsValidSubject(s string) bool {
return reAggTrade.MatchString(s) ||
reTrade.MatchString(s) ||
reMarkPrice.MatchString(s) ||
reKline.MatchString(s) ||
reBookTicker.MatchString(s) ||
reDepth.MatchString(s)
}