|
- package connectivity
-
- import (
- "context"
- "math/rand"
- "sync"
- "time"
-
- "gitlink.org.cn/cloudream/common/pkgs/async"
- "gitlink.org.cn/cloudream/common/pkgs/logger"
- stgglb "gitlink.org.cn/cloudream/jcs-pub/common/globals"
- corrpc "gitlink.org.cn/cloudream/jcs-pub/common/pkgs/rpc/coordinator"
- hubrpc "gitlink.org.cn/cloudream/jcs-pub/common/pkgs/rpc/hub"
- jcstypes "gitlink.org.cn/cloudream/jcs-pub/common/types"
- )
-
- type CollectorEvent interface {
- IsCollectorEvent() bool
- }
-
- type ExitEvent struct {
- CollectorEvent
- Err error
- }
-
- type CollectedEvent struct {
- CollectorEvent
- }
-
- type Connectivity struct {
- ToHubID jcstypes.HubID
- Latency *time.Duration
- TestTime time.Time
- }
-
- type Collector struct {
- cfg Config
- enabled bool
- collectNow chan any
- done chan any
- connectivities map[jcstypes.HubID]Connectivity
- lock *sync.RWMutex
- }
-
- func NewEnabled(cfg Config) *Collector {
- rpt := Collector{
- cfg: cfg,
- enabled: true,
- collectNow: make(chan any, 1),
- done: make(chan any, 1),
- connectivities: make(map[jcstypes.HubID]Connectivity),
- lock: &sync.RWMutex{},
- }
- return &rpt
- }
- func NewDisabled() *Collector {
- return &Collector{
- enabled: false,
- collectNow: make(chan any, 1),
- done: make(chan any, 1),
- connectivities: make(map[jcstypes.HubID]Connectivity),
- lock: &sync.RWMutex{},
- }
- }
-
- func (r *Collector) GetAll() map[jcstypes.HubID]Connectivity {
- r.lock.RLock()
- defer r.lock.RUnlock()
-
- ret := make(map[jcstypes.HubID]Connectivity)
- for k, v := range r.connectivities {
- ret[k] = v
- }
-
- return ret
- }
-
- // 启动一次收集
- func (r *Collector) CollecNow() {
- select {
- case r.collectNow <- nil:
- default:
- }
- }
-
- // 就地进行收集,会阻塞当前线程。如果模块未启用,则不会有任何效果
- func (r *Collector) CollectInPlace() {
- if !r.enabled {
- return
- }
-
- r.testing()
- }
-
- func (r *Collector) Start() *async.UnboundChannel[CollectorEvent] {
- log := logger.WithField("Mod", "Collector")
-
- ch := async.NewUnboundChannel[CollectorEvent]()
- go func() {
- if !r.enabled {
- return
- }
-
- // 为了防止同时启动的节点会集中进行Ping,所以第一次上报间隔为0-TestInterval秒之间随机
- startup := true
- firstReportLatency := time.Duration(float64(r.cfg.TestInterval) * float64(time.Second) * rand.Float64())
- ticker := time.NewTicker(firstReportLatency)
-
- loop:
- for {
- select {
- case <-ticker.C:
- log.Infof("collecting...")
- if r.testing() {
- ch.Send(CollectedEvent{})
- }
- if startup {
- startup = false
- ticker.Reset(time.Duration(r.cfg.TestInterval) * time.Second)
- }
-
- case <-r.collectNow:
- log.Infof("collecting...")
- if r.testing() {
- ch.Send(CollectedEvent{})
- }
-
- case <-r.done:
- ticker.Stop()
- break loop
- }
- }
-
- ch.Send(ExitEvent{})
- }()
-
- return ch
- }
-
- func (r *Collector) Stop() {
- select {
- case r.done <- nil:
- default:
- }
- }
-
- func (r *Collector) testing() bool {
- coorCli := stgglb.CoordinatorRPCPool.Get()
- defer coorCli.Release()
-
- getHubResp, cerr := coorCli.GetHubs(context.Background(), corrpc.NewGetHubs(nil))
- if cerr != nil {
- return false
- }
-
- wg := sync.WaitGroup{}
- cons := make([]Connectivity, len(getHubResp.Hubs))
- for i, hub := range getHubResp.Hubs {
- tmpIdx := i
- tmpHub := hub
-
- wg.Add(1)
- go func() {
- defer wg.Done()
- cons[tmpIdx] = r.ping(*tmpHub)
- }()
- }
-
- wg.Wait()
-
- r.lock.Lock()
- // 删除所有hub的记录,然后重建,避免hub数量变化时导致残余数据
- r.connectivities = make(map[jcstypes.HubID]Connectivity)
- for _, con := range cons {
- r.connectivities[con.ToHubID] = con
- }
- r.lock.Unlock()
-
- return true
- }
-
- func (r *Collector) ping(hub jcstypes.Hub) Connectivity {
- log := logger.WithType[Collector]("").WithField("HubID", hub.HubID)
-
- var ip string
- var port int
- switch addr := hub.Address.(type) {
- case *jcstypes.GRPCAddressInfo:
- // TODO 重新设计选择LocalIP的策略
- ip = addr.ExternalIP
- port = addr.ExternalGRPCPort
- default:
- // TODO 增加对HTTP模式的hub的支持
-
- log.Warnf("unsupported address type: %v", addr)
-
- return Connectivity{
- ToHubID: hub.HubID,
- Latency: nil,
- TestTime: time.Now(),
- }
- }
-
- agtCli := stgglb.HubRPCPool.Get(ip, port)
- defer agtCli.Release()
-
- // 第一次ping保证网络连接建立成功
- _, err := agtCli.Ping(context.Background(), &hubrpc.Ping{})
- if err != nil {
- log.Warnf("pre ping: %v", err)
- return Connectivity{
- ToHubID: hub.HubID,
- Latency: nil,
- TestTime: time.Now(),
- }
- }
-
- // 后几次ping计算延迟
- var avgLatency time.Duration
- for i := 0; i < 3; i++ {
- start := time.Now()
- _, err := agtCli.Ping(context.Background(), &hubrpc.Ping{})
- if err != nil {
- log.Warnf("ping: %v", err)
- return Connectivity{
- ToHubID: hub.HubID,
- Latency: nil,
- TestTime: time.Now(),
- }
- }
-
- latency := time.Since(start)
- avgLatency += latency
-
- // 每次ping之间间隔1秒
- <-time.After(time.Second)
- }
- latency := avgLatency / 3
-
- return Connectivity{
- ToHubID: hub.HubID,
- Latency: &latency,
- TestTime: time.Now(),
- }
- }
|