You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

collector.go 4.9 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. package connectivity
  2. import (
  3. "math/rand"
  4. "sync"
  5. "time"
  6. "gitlink.org.cn/cloudream/common/pkgs/logger"
  7. cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
  8. stgglb "gitlink.org.cn/cloudream/storage/common/globals"
  9. coormq "gitlink.org.cn/cloudream/storage/common/pkgs/mq/coordinator"
  10. )
  11. type Connectivity struct {
  12. ToNodeID cdssdk.NodeID
  13. Delay *time.Duration
  14. TestTime time.Time
  15. }
  16. type Collector struct {
  17. cfg *Config
  18. onCollected func(collector *Collector)
  19. collectNow chan any
  20. close chan any
  21. connectivities map[cdssdk.NodeID]Connectivity
  22. lock *sync.RWMutex
  23. }
  24. func NewCollector(cfg *Config, onCollected func(collector *Collector)) Collector {
  25. rpt := Collector{
  26. cfg: cfg,
  27. collectNow: make(chan any),
  28. close: make(chan any),
  29. connectivities: make(map[cdssdk.NodeID]Connectivity),
  30. lock: &sync.RWMutex{},
  31. onCollected: onCollected,
  32. }
  33. go rpt.serve()
  34. return rpt
  35. }
  36. func NewCollectorWithInitData(cfg *Config, onCollected func(collector *Collector), initData map[cdssdk.NodeID]Connectivity) Collector {
  37. rpt := Collector{
  38. cfg: cfg,
  39. collectNow: make(chan any),
  40. close: make(chan any),
  41. connectivities: initData,
  42. lock: &sync.RWMutex{},
  43. onCollected: onCollected,
  44. }
  45. go rpt.serve()
  46. return rpt
  47. }
  48. func (r *Collector) Get(nodeID cdssdk.NodeID) *Connectivity {
  49. r.lock.RLock()
  50. defer r.lock.RUnlock()
  51. con, ok := r.connectivities[nodeID]
  52. if ok {
  53. return &con
  54. }
  55. return nil
  56. }
  57. func (r *Collector) GetAll() map[cdssdk.NodeID]Connectivity {
  58. r.lock.RLock()
  59. defer r.lock.RUnlock()
  60. ret := make(map[cdssdk.NodeID]Connectivity)
  61. for k, v := range r.connectivities {
  62. ret[k] = v
  63. }
  64. return ret
  65. }
  66. // 启动一次收集
  67. func (r *Collector) CollecNow() {
  68. select {
  69. case r.collectNow <- nil:
  70. default:
  71. }
  72. }
  73. // 就地进行收集,会阻塞当前线程
  74. func (r *Collector) CollectInPlace() {
  75. r.testing()
  76. }
  77. func (r *Collector) Close() {
  78. select {
  79. case r.close <- nil:
  80. default:
  81. }
  82. }
  83. func (r *Collector) serve() {
  84. log := logger.WithType[Collector]("")
  85. log.Info("start connectivity reporter")
  86. // 为了防止同时启动的节点会集中进行Ping,所以第一次上报间隔为0-TestInterval秒之间随机
  87. startup := true
  88. firstReportDelay := time.Duration(float64(r.cfg.TestInterval) * float64(time.Second) * rand.Float64())
  89. ticker := time.NewTicker(firstReportDelay)
  90. loop:
  91. for {
  92. select {
  93. case <-ticker.C:
  94. r.testing()
  95. if startup {
  96. startup = false
  97. ticker.Reset(time.Duration(r.cfg.TestInterval) * time.Second)
  98. }
  99. case <-r.collectNow:
  100. r.testing()
  101. case <-r.close:
  102. ticker.Stop()
  103. break loop
  104. }
  105. }
  106. log.Info("stop connectivity reporter")
  107. }
  108. func (r *Collector) testing() {
  109. log := logger.WithType[Collector]("")
  110. log.Debug("do testing")
  111. coorCli, err := stgglb.CoordinatorMQPool.Acquire()
  112. if err != nil {
  113. return
  114. }
  115. defer stgglb.CoordinatorMQPool.Release(coorCli)
  116. getNodeResp, err := coorCli.GetNodes(coormq.NewGetNodes(nil))
  117. if err != nil {
  118. return
  119. }
  120. wg := sync.WaitGroup{}
  121. cons := make([]Connectivity, len(getNodeResp.Nodes))
  122. for i, node := range getNodeResp.Nodes {
  123. tmpIdx := i
  124. tmpNode := node
  125. wg.Add(1)
  126. go func() {
  127. defer wg.Done()
  128. cons[tmpIdx] = r.ping(tmpNode)
  129. }()
  130. }
  131. wg.Wait()
  132. r.lock.Lock()
  133. // 删除所有node的记录,然后重建,避免node数量变化时导致残余数据
  134. r.connectivities = make(map[cdssdk.NodeID]Connectivity)
  135. for _, con := range cons {
  136. r.connectivities[con.ToNodeID] = con
  137. }
  138. r.lock.Unlock()
  139. if r.onCollected != nil {
  140. r.onCollected(r)
  141. }
  142. }
  143. func (r *Collector) ping(node cdssdk.Node) Connectivity {
  144. log := logger.WithType[Collector]("").WithField("NodeID", node.NodeID)
  145. ip := node.ExternalIP
  146. port := node.ExternalGRPCPort
  147. if node.LocationID == stgglb.Local.LocationID {
  148. ip = node.LocalIP
  149. port = node.LocalGRPCPort
  150. }
  151. agtCli, err := stgglb.AgentRPCPool.Acquire(ip, port)
  152. if err != nil {
  153. log.Warnf("new agent %v:%v rpc client: %w", ip, port, err)
  154. return Connectivity{
  155. ToNodeID: node.NodeID,
  156. Delay: nil,
  157. TestTime: time.Now(),
  158. }
  159. }
  160. defer stgglb.AgentRPCPool.Release(agtCli)
  161. // 第一次ping保证网络连接建立成功
  162. err = agtCli.Ping()
  163. if err != nil {
  164. log.Warnf("pre ping: %v", err)
  165. return Connectivity{
  166. ToNodeID: node.NodeID,
  167. Delay: nil,
  168. TestTime: time.Now(),
  169. }
  170. }
  171. // 后几次ping计算延迟
  172. var avgDelay time.Duration
  173. for i := 0; i < 3; i++ {
  174. start := time.Now()
  175. err = agtCli.Ping()
  176. if err != nil {
  177. log.Warnf("ping: %v", err)
  178. return Connectivity{
  179. ToNodeID: node.NodeID,
  180. Delay: nil,
  181. TestTime: time.Now(),
  182. }
  183. }
  184. delay := time.Since(start)
  185. avgDelay += delay
  186. // 每次ping之间间隔1秒
  187. <-time.After(time.Second)
  188. }
  189. delay := avgDelay / 3
  190. return Connectivity{
  191. ToNodeID: node.NodeID,
  192. Delay: &delay,
  193. TestTime: time.Now(),
  194. }
  195. }

本项目旨在将云际存储公共基础设施化,使个人及企业可低门槛使用高效的云际存储服务(安装开箱即用云际存储客户端即可,无需关注其他组件的部署),同时支持用户灵活便捷定制云际存储的功能细节。