You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

iterator.go 11 kB

2 years ago
2 years ago
1 year ago
2 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. package downloader
  2. import (
  3. "context"
  4. "fmt"
  5. "io"
  6. "math"
  7. "reflect"
  8. "time"
  9. "github.com/samber/lo"
  10. "gitlink.org.cn/cloudream/common/pkgs/bitmap"
  11. "gitlink.org.cn/cloudream/common/pkgs/ioswitch/exec"
  12. "gitlink.org.cn/cloudream/common/pkgs/logger"
  13. cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
  14. "gitlink.org.cn/cloudream/common/utils/io2"
  15. "gitlink.org.cn/cloudream/common/utils/math2"
  16. "gitlink.org.cn/cloudream/common/utils/sort2"
  17. "gitlink.org.cn/cloudream/storage/common/consts"
  18. stgglb "gitlink.org.cn/cloudream/storage/common/globals"
  19. stgmod "gitlink.org.cn/cloudream/storage/common/models"
  20. "gitlink.org.cn/cloudream/storage/common/pkgs/distlock"
  21. "gitlink.org.cn/cloudream/storage/common/pkgs/ioswitch2"
  22. "gitlink.org.cn/cloudream/storage/common/pkgs/ioswitch2/parser"
  23. "gitlink.org.cn/cloudream/storage/common/pkgs/iterator"
  24. coormq "gitlink.org.cn/cloudream/storage/common/pkgs/mq/coordinator"
  25. )
  26. type DownloadNodeInfo struct {
  27. Node cdssdk.Node
  28. ObjectPinned bool
  29. Blocks []stgmod.ObjectBlock
  30. Distance float64
  31. }
  32. type DownloadContext struct {
  33. Distlock *distlock.Service
  34. }
  35. type DownloadObjectIterator struct {
  36. OnClosing func()
  37. downloader *Downloader
  38. reqs []downloadReqeust2
  39. currentIndex int
  40. inited bool
  41. coorCli *coormq.Client
  42. allNodes map[cdssdk.NodeID]cdssdk.Node
  43. }
  44. func NewDownloadObjectIterator(downloader *Downloader, downloadObjs []downloadReqeust2) *DownloadObjectIterator {
  45. return &DownloadObjectIterator{
  46. downloader: downloader,
  47. reqs: downloadObjs,
  48. }
  49. }
  50. func (i *DownloadObjectIterator) MoveNext() (*Downloading, error) {
  51. if !i.inited {
  52. if err := i.init(); err != nil {
  53. return nil, err
  54. }
  55. i.inited = true
  56. }
  57. if i.currentIndex >= len(i.reqs) {
  58. return nil, iterator.ErrNoMoreItem
  59. }
  60. item, err := i.doMove()
  61. i.currentIndex++
  62. return item, err
  63. }
  64. func (i *DownloadObjectIterator) init() error {
  65. coorCli, err := stgglb.CoordinatorMQPool.Acquire()
  66. if err != nil {
  67. return fmt.Errorf("new coordinator client: %w", err)
  68. }
  69. i.coorCli = coorCli
  70. allNodeIDs := make(map[cdssdk.NodeID]bool)
  71. for _, obj := range i.reqs {
  72. if obj.Detail == nil {
  73. continue
  74. }
  75. for _, p := range obj.Detail.PinnedAt {
  76. allNodeIDs[p] = true
  77. }
  78. for _, b := range obj.Detail.Blocks {
  79. allNodeIDs[b.NodeID] = true
  80. }
  81. }
  82. getNodes, err := coorCli.GetNodes(coormq.NewGetNodes(lo.Keys(allNodeIDs)))
  83. if err != nil {
  84. return fmt.Errorf("getting nodes: %w", err)
  85. }
  86. i.allNodes = make(map[cdssdk.NodeID]cdssdk.Node)
  87. for _, n := range getNodes.Nodes {
  88. i.allNodes[n.NodeID] = n
  89. }
  90. return nil
  91. }
  92. func (iter *DownloadObjectIterator) doMove() (*Downloading, error) {
  93. req := iter.reqs[iter.currentIndex]
  94. if req.Detail == nil {
  95. return &Downloading{
  96. Object: nil,
  97. File: nil,
  98. Request: req.Raw,
  99. }, nil
  100. }
  101. switch red := req.Detail.Object.Redundancy.(type) {
  102. case *cdssdk.NoneRedundancy:
  103. reader, err := iter.downloadNoneOrRepObject(req)
  104. if err != nil {
  105. return nil, fmt.Errorf("downloading object: %w", err)
  106. }
  107. return &Downloading{
  108. Object: &req.Detail.Object,
  109. File: reader,
  110. Request: req.Raw,
  111. }, nil
  112. case *cdssdk.RepRedundancy:
  113. reader, err := iter.downloadNoneOrRepObject(req)
  114. if err != nil {
  115. return nil, fmt.Errorf("downloading rep object: %w", err)
  116. }
  117. return &Downloading{
  118. Object: &req.Detail.Object,
  119. File: reader,
  120. Request: req.Raw,
  121. }, nil
  122. case *cdssdk.ECRedundancy:
  123. reader, err := iter.downloadECObject(req, red)
  124. if err != nil {
  125. return nil, fmt.Errorf("downloading ec object: %w", err)
  126. }
  127. return &Downloading{
  128. Object: &req.Detail.Object,
  129. File: reader,
  130. Request: req.Raw,
  131. }, nil
  132. case *cdssdk.LRCRedundancy:
  133. reader, err := iter.downloadLRCObject(req, red)
  134. if err != nil {
  135. return nil, fmt.Errorf("downloading lrc object: %w", err)
  136. }
  137. return &Downloading{
  138. Object: &req.Detail.Object,
  139. File: reader,
  140. Request: req.Raw,
  141. }, nil
  142. }
  143. return nil, fmt.Errorf("unsupported redundancy type: %v", reflect.TypeOf(req.Detail.Object.Redundancy))
  144. }
  145. func (i *DownloadObjectIterator) Close() {
  146. if i.OnClosing != nil {
  147. i.OnClosing()
  148. }
  149. }
  150. func (iter *DownloadObjectIterator) downloadNoneOrRepObject(obj downloadReqeust2) (io.ReadCloser, error) {
  151. allNodes, err := iter.sortDownloadNodes(obj)
  152. if err != nil {
  153. return nil, err
  154. }
  155. bsc, blocks := iter.getMinReadingBlockSolution(allNodes, 1)
  156. osc, node := iter.getMinReadingObjectSolution(allNodes, 1)
  157. if bsc < osc {
  158. logger.Debugf("downloading object from node %v(%v)", blocks[0].Node.Name, blocks[0].Node.NodeID)
  159. return iter.downloadFromNode(&blocks[0].Node, obj)
  160. }
  161. if osc == math.MaxFloat64 {
  162. // bsc >= osc,如果osc是MaxFloat64,那么bsc也一定是,也就意味着没有足够块来恢复文件
  163. return nil, fmt.Errorf("no node has this object")
  164. }
  165. logger.Debugf("downloading object from node %v(%v)", node.Name, node.NodeID)
  166. return iter.downloadFromNode(node, obj)
  167. }
  168. func (iter *DownloadObjectIterator) downloadECObject(req downloadReqeust2, ecRed *cdssdk.ECRedundancy) (io.ReadCloser, error) {
  169. allNodes, err := iter.sortDownloadNodes(req)
  170. if err != nil {
  171. return nil, err
  172. }
  173. bsc, blocks := iter.getMinReadingBlockSolution(allNodes, ecRed.K)
  174. osc, node := iter.getMinReadingObjectSolution(allNodes, ecRed.K)
  175. if bsc < osc {
  176. var logStrs []any = []any{"downloading ec object from blocks: "}
  177. for i, b := range blocks {
  178. if i > 0 {
  179. logStrs = append(logStrs, ", ")
  180. }
  181. logStrs = append(logStrs, fmt.Sprintf("%v@%v(%v)", b.Block.Index, b.Node.Name, b.Node.NodeID))
  182. }
  183. logger.Debug(logStrs...)
  184. pr, pw := io.Pipe()
  185. go func() {
  186. readPos := req.Raw.Offset
  187. totalReadLen := req.Detail.Object.Size - req.Raw.Offset
  188. if req.Raw.Length >= 0 {
  189. totalReadLen = math2.Min(req.Raw.Length, totalReadLen)
  190. }
  191. firstStripIndex := readPos / int64(ecRed.K) / int64(ecRed.ChunkSize)
  192. stripIter := NewStripIterator(req.Detail.Object, blocks, ecRed, firstStripIndex, iter.downloader.strips, iter.downloader.cfg.ECStripPrefetchCount)
  193. defer stripIter.Close()
  194. for totalReadLen > 0 {
  195. strip, err := stripIter.MoveNext()
  196. if err == iterator.ErrNoMoreItem {
  197. pw.CloseWithError(io.ErrUnexpectedEOF)
  198. return
  199. }
  200. if err != nil {
  201. pw.CloseWithError(err)
  202. return
  203. }
  204. readRelativePos := readPos - strip.Position
  205. nextStripPos := strip.Position + int64(ecRed.K)*int64(ecRed.ChunkSize)
  206. curReadLen := math2.Min(totalReadLen, nextStripPos-readPos)
  207. err = io2.WriteAll(pw, strip.Data[readRelativePos:readRelativePos+curReadLen])
  208. if err != nil {
  209. pw.CloseWithError(err)
  210. return
  211. }
  212. totalReadLen -= curReadLen
  213. readPos += curReadLen
  214. }
  215. pw.Close()
  216. }()
  217. return pr, nil
  218. }
  219. // bsc >= osc,如果osc是MaxFloat64,那么bsc也一定是,也就意味着没有足够块来恢复文件
  220. if osc == math.MaxFloat64 {
  221. return nil, fmt.Errorf("no enough blocks to reconstruct the file, want %d, get only %d", ecRed.K, len(blocks))
  222. }
  223. logger.Debugf("downloading ec object from node %v(%v)", node.Name, node.NodeID)
  224. return iter.downloadFromNode(node, req)
  225. }
  226. func (iter *DownloadObjectIterator) sortDownloadNodes(req downloadReqeust2) ([]*DownloadNodeInfo, error) {
  227. var nodeIDs []cdssdk.NodeID
  228. for _, id := range req.Detail.PinnedAt {
  229. if !lo.Contains(nodeIDs, id) {
  230. nodeIDs = append(nodeIDs, id)
  231. }
  232. }
  233. for _, b := range req.Detail.Blocks {
  234. if !lo.Contains(nodeIDs, b.NodeID) {
  235. nodeIDs = append(nodeIDs, b.NodeID)
  236. }
  237. }
  238. downloadNodeMap := make(map[cdssdk.NodeID]*DownloadNodeInfo)
  239. for _, id := range req.Detail.PinnedAt {
  240. node, ok := downloadNodeMap[id]
  241. if !ok {
  242. mod := iter.allNodes[id]
  243. node = &DownloadNodeInfo{
  244. Node: mod,
  245. ObjectPinned: true,
  246. Distance: iter.getNodeDistance(mod),
  247. }
  248. downloadNodeMap[id] = node
  249. }
  250. node.ObjectPinned = true
  251. }
  252. for _, b := range req.Detail.Blocks {
  253. node, ok := downloadNodeMap[b.NodeID]
  254. if !ok {
  255. mod := iter.allNodes[b.NodeID]
  256. node = &DownloadNodeInfo{
  257. Node: mod,
  258. Distance: iter.getNodeDistance(mod),
  259. }
  260. downloadNodeMap[b.NodeID] = node
  261. }
  262. node.Blocks = append(node.Blocks, b)
  263. }
  264. return sort2.Sort(lo.Values(downloadNodeMap), func(left, right *DownloadNodeInfo) int {
  265. return sort2.Cmp(left.Distance, right.Distance)
  266. }), nil
  267. }
  268. func (iter *DownloadObjectIterator) getMinReadingBlockSolution(sortedNodes []*DownloadNodeInfo, k int) (float64, []downloadBlock) {
  269. gotBlocksMap := bitmap.Bitmap64(0)
  270. var gotBlocks []downloadBlock
  271. dist := float64(0.0)
  272. for _, n := range sortedNodes {
  273. for _, b := range n.Blocks {
  274. if !gotBlocksMap.Get(b.Index) {
  275. gotBlocks = append(gotBlocks, downloadBlock{
  276. Node: n.Node,
  277. Block: b,
  278. })
  279. gotBlocksMap.Set(b.Index, true)
  280. dist += n.Distance
  281. }
  282. if len(gotBlocks) >= k {
  283. return dist, gotBlocks
  284. }
  285. }
  286. }
  287. return math.MaxFloat64, gotBlocks
  288. }
  289. func (iter *DownloadObjectIterator) getMinReadingObjectSolution(sortedNodes []*DownloadNodeInfo, k int) (float64, *cdssdk.Node) {
  290. dist := math.MaxFloat64
  291. var downloadNode *cdssdk.Node
  292. for _, n := range sortedNodes {
  293. if n.ObjectPinned && float64(k)*n.Distance < dist {
  294. dist = float64(k) * n.Distance
  295. downloadNode = &n.Node
  296. }
  297. }
  298. return dist, downloadNode
  299. }
  300. func (iter *DownloadObjectIterator) getNodeDistance(node cdssdk.Node) float64 {
  301. if stgglb.Local.NodeID != nil {
  302. if node.NodeID == *stgglb.Local.NodeID {
  303. return consts.NodeDistanceSameNode
  304. }
  305. }
  306. if node.LocationID == stgglb.Local.LocationID {
  307. return consts.NodeDistanceSameLocation
  308. }
  309. c := iter.downloader.conn.Get(node.NodeID)
  310. if c == nil || c.Delay == nil || *c.Delay > time.Duration(float64(time.Millisecond)*iter.downloader.cfg.HighLatencyNodeMs) {
  311. return consts.NodeDistanceHighLatencyNode
  312. }
  313. return consts.NodeDistanceOther
  314. }
  315. func (iter *DownloadObjectIterator) downloadFromNode(node *cdssdk.Node, req downloadReqeust2) (io.ReadCloser, error) {
  316. var strHandle *exec.DriverReadStream
  317. ft := ioswitch2.NewFromTo()
  318. toExec, handle := ioswitch2.NewToDriver(-1)
  319. toExec.Range = exec.Range{
  320. Offset: req.Raw.Offset,
  321. }
  322. if req.Raw.Length != -1 {
  323. len := req.Raw.Length
  324. toExec.Range.Length = &len
  325. }
  326. ft.AddFrom(ioswitch2.NewFromNode(req.Detail.Object.FileHash, node, -1)).AddTo(toExec)
  327. strHandle = handle
  328. parser := parser.NewParser(cdssdk.DefaultECRedundancy)
  329. plans := exec.NewPlanBuilder()
  330. if err := parser.Parse(ft, plans); err != nil {
  331. return nil, fmt.Errorf("parsing plan: %w", err)
  332. }
  333. exec := plans.Execute()
  334. go exec.Wait(context.TODO())
  335. return exec.BeginRead(strHandle)
  336. }

本项目旨在将云际存储公共基础设施化,使个人及企业可低门槛使用高效的云际存储服务(安装开箱即用云际存储客户端即可,无需关注其他组件的部署),同时支持用户灵活便捷定制云际存储的功能细节。