You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

download_object_iterator.go 10 kB

2 years ago
2 years ago
2 years ago
2 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388
  1. package iterator
  2. import (
  3. "fmt"
  4. "io"
  5. "math"
  6. "reflect"
  7. "github.com/samber/lo"
  8. "gitlink.org.cn/cloudream/common/pkgs/bitmap"
  9. "gitlink.org.cn/cloudream/common/pkgs/logger"
  10. cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
  11. myio "gitlink.org.cn/cloudream/common/utils/io"
  12. "gitlink.org.cn/cloudream/common/utils/sort2"
  13. "gitlink.org.cn/cloudream/storage/common/consts"
  14. stgglb "gitlink.org.cn/cloudream/storage/common/globals"
  15. stgmod "gitlink.org.cn/cloudream/storage/common/models"
  16. stgmodels "gitlink.org.cn/cloudream/storage/common/models"
  17. "gitlink.org.cn/cloudream/storage/common/pkgs/db/model"
  18. "gitlink.org.cn/cloudream/storage/common/pkgs/distlock"
  19. "gitlink.org.cn/cloudream/storage/common/pkgs/ec"
  20. coormq "gitlink.org.cn/cloudream/storage/common/pkgs/mq/coordinator"
  21. )
  22. type DownloadingObjectIterator = Iterator[*IterDownloadingObject]
  23. type IterDownloadingObject struct {
  24. Object model.Object
  25. File io.ReadCloser
  26. }
  27. type DownloadNodeInfo struct {
  28. Node cdssdk.Node
  29. ObjectPinned bool
  30. Blocks []stgmod.ObjectBlock
  31. Distance float64
  32. }
  33. type DownloadContext struct {
  34. Distlock *distlock.Service
  35. }
  36. type DownloadObjectIterator struct {
  37. OnClosing func()
  38. objectDetails []stgmodels.ObjectDetail
  39. currentIndex int
  40. inited bool
  41. downloadCtx *DownloadContext
  42. coorCli *coormq.Client
  43. allNodes map[cdssdk.NodeID]cdssdk.Node
  44. }
  45. func NewDownloadObjectIterator(objectDetails []stgmodels.ObjectDetail, downloadCtx *DownloadContext) *DownloadObjectIterator {
  46. return &DownloadObjectIterator{
  47. objectDetails: objectDetails,
  48. downloadCtx: downloadCtx,
  49. }
  50. }
  51. func (i *DownloadObjectIterator) MoveNext() (*IterDownloadingObject, error) {
  52. if !i.inited {
  53. if err := i.init(); err != nil {
  54. return nil, err
  55. }
  56. i.inited = true
  57. }
  58. if i.currentIndex >= len(i.objectDetails) {
  59. return nil, ErrNoMoreItem
  60. }
  61. item, err := i.doMove()
  62. i.currentIndex++
  63. return item, err
  64. }
  65. func (i *DownloadObjectIterator) init() error {
  66. coorCli, err := stgglb.CoordinatorMQPool.Acquire()
  67. if err != nil {
  68. return fmt.Errorf("new coordinator client: %w", err)
  69. }
  70. i.coorCli = coorCli
  71. allNodeIDs := make(map[cdssdk.NodeID]bool)
  72. for _, obj := range i.objectDetails {
  73. for _, p := range obj.PinnedAt {
  74. allNodeIDs[p] = true
  75. }
  76. for _, b := range obj.Blocks {
  77. allNodeIDs[b.NodeID] = true
  78. }
  79. }
  80. getNodes, err := coorCli.GetNodes(coormq.NewGetNodes(lo.Keys(allNodeIDs)))
  81. if err != nil {
  82. return fmt.Errorf("getting nodes: %w", err)
  83. }
  84. i.allNodes = make(map[cdssdk.NodeID]cdssdk.Node)
  85. for _, n := range getNodes.Nodes {
  86. i.allNodes[n.NodeID] = n
  87. }
  88. return nil
  89. }
  90. func (iter *DownloadObjectIterator) doMove() (*IterDownloadingObject, error) {
  91. obj := iter.objectDetails[iter.currentIndex]
  92. switch red := obj.Object.Redundancy.(type) {
  93. case *cdssdk.NoneRedundancy:
  94. reader, err := iter.downloadNoneOrRepObject(obj)
  95. if err != nil {
  96. return nil, fmt.Errorf("downloading object: %w", err)
  97. }
  98. return &IterDownloadingObject{
  99. Object: obj.Object,
  100. File: reader,
  101. }, nil
  102. case *cdssdk.RepRedundancy:
  103. reader, err := iter.downloadNoneOrRepObject(obj)
  104. if err != nil {
  105. return nil, fmt.Errorf("downloading rep object: %w", err)
  106. }
  107. return &IterDownloadingObject{
  108. Object: obj.Object,
  109. File: reader,
  110. }, nil
  111. case *cdssdk.ECRedundancy:
  112. reader, err := iter.downloadECObject(obj, red)
  113. if err != nil {
  114. return nil, fmt.Errorf("downloading ec object: %w", err)
  115. }
  116. return &IterDownloadingObject{
  117. Object: obj.Object,
  118. File: reader,
  119. }, nil
  120. }
  121. return nil, fmt.Errorf("unsupported redundancy type: %v", reflect.TypeOf(obj.Object.Redundancy))
  122. }
  123. func (i *DownloadObjectIterator) Close() {
  124. if i.OnClosing != nil {
  125. i.OnClosing()
  126. }
  127. }
  128. func (iter *DownloadObjectIterator) downloadNoneOrRepObject(obj stgmodels.ObjectDetail) (io.ReadCloser, error) {
  129. allNodes, err := iter.sortDownloadNodes(obj)
  130. if err != nil {
  131. return nil, err
  132. }
  133. bsc, blocks := iter.getMinReadingBlockSolution(allNodes, 1)
  134. osc, node := iter.getMinReadingObjectSolution(allNodes, 1)
  135. if bsc < osc {
  136. return downloadFile(iter.downloadCtx, blocks[0].Node, blocks[0].Block.FileHash)
  137. }
  138. // bsc >= osc,如果osc是MaxFloat64,那么bsc也一定是,也就意味着没有足够块来恢复文件
  139. if osc == math.MaxFloat64 {
  140. return nil, fmt.Errorf("no node has this object")
  141. }
  142. return downloadFile(iter.downloadCtx, *node, obj.Object.FileHash)
  143. }
  144. func (iter *DownloadObjectIterator) downloadECObject(obj stgmodels.ObjectDetail, ecRed *cdssdk.ECRedundancy) (io.ReadCloser, error) {
  145. allNodes, err := iter.sortDownloadNodes(obj)
  146. if err != nil {
  147. return nil, err
  148. }
  149. bsc, blocks := iter.getMinReadingBlockSolution(allNodes, ecRed.K)
  150. osc, node := iter.getMinReadingObjectSolution(allNodes, ecRed.K)
  151. if bsc < osc {
  152. var fileStrs []io.ReadCloser
  153. rs, err := ec.NewRs(ecRed.K, ecRed.N, ecRed.ChunkSize)
  154. if err != nil {
  155. return nil, fmt.Errorf("new rs: %w", err)
  156. }
  157. for i, b := range blocks {
  158. str, err := downloadFile(iter.downloadCtx, b.Node, b.Block.FileHash)
  159. if err != nil {
  160. for i -= 1; i >= 0; i-- {
  161. fileStrs[i].Close()
  162. }
  163. return nil, fmt.Errorf("donwloading file: %w", err)
  164. }
  165. fileStrs = append(fileStrs, str)
  166. }
  167. fileReaders, filesCloser := myio.ToReaders(fileStrs)
  168. var indexes []int
  169. for _, b := range blocks {
  170. indexes = append(indexes, b.Block.Index)
  171. }
  172. outputs, outputsCloser := myio.ToReaders(rs.ReconstructData(fileReaders, indexes))
  173. return myio.AfterReadClosed(myio.Length(myio.ChunkedJoin(outputs, int(ecRed.ChunkSize)), obj.Object.Size), func(c io.ReadCloser) {
  174. filesCloser()
  175. outputsCloser()
  176. }), nil
  177. }
  178. // bsc >= osc,如果osc是MaxFloat64,那么bsc也一定是,也就意味着没有足够块来恢复文件
  179. if osc == math.MaxFloat64 {
  180. return nil, fmt.Errorf("no enough blocks to reconstruct the file, want %d, get only %d", ecRed.K, len(blocks))
  181. }
  182. return downloadFile(iter.downloadCtx, *node, obj.Object.FileHash)
  183. }
  184. func (iter *DownloadObjectIterator) sortDownloadNodes(obj stgmodels.ObjectDetail) ([]*DownloadNodeInfo, error) {
  185. var nodeIDs []cdssdk.NodeID
  186. for _, id := range obj.PinnedAt {
  187. if !lo.Contains(nodeIDs, id) {
  188. nodeIDs = append(nodeIDs, id)
  189. }
  190. }
  191. for _, b := range obj.Blocks {
  192. if !lo.Contains(nodeIDs, b.NodeID) {
  193. nodeIDs = append(nodeIDs, b.NodeID)
  194. }
  195. }
  196. downloadNodeMap := make(map[cdssdk.NodeID]*DownloadNodeInfo)
  197. for _, id := range obj.PinnedAt {
  198. node, ok := downloadNodeMap[id]
  199. if !ok {
  200. mod := iter.allNodes[id]
  201. node = &DownloadNodeInfo{
  202. Node: mod,
  203. ObjectPinned: true,
  204. Distance: iter.getNodeDistance(mod),
  205. }
  206. downloadNodeMap[id] = node
  207. }
  208. node.ObjectPinned = true
  209. }
  210. for _, b := range obj.Blocks {
  211. node, ok := downloadNodeMap[b.NodeID]
  212. if !ok {
  213. mod := iter.allNodes[b.NodeID]
  214. node = &DownloadNodeInfo{
  215. Node: mod,
  216. Distance: iter.getNodeDistance(mod),
  217. }
  218. downloadNodeMap[b.NodeID] = node
  219. }
  220. node.Blocks = append(node.Blocks, b)
  221. }
  222. return sort2.Sort(lo.Values(downloadNodeMap), func(left, right *DownloadNodeInfo) int {
  223. return sort2.Cmp(left.Distance, right.Distance)
  224. }), nil
  225. }
  226. type downloadBlock struct {
  227. Node cdssdk.Node
  228. Block stgmod.ObjectBlock
  229. }
  230. func (iter *DownloadObjectIterator) getMinReadingBlockSolution(sortedNodes []*DownloadNodeInfo, k int) (float64, []downloadBlock) {
  231. gotBlocksMap := bitmap.Bitmap64(0)
  232. var gotBlocks []downloadBlock
  233. dist := float64(0.0)
  234. for _, n := range sortedNodes {
  235. for _, b := range n.Blocks {
  236. if !gotBlocksMap.Get(b.Index) {
  237. gotBlocks = append(gotBlocks, downloadBlock{
  238. Node: n.Node,
  239. Block: b,
  240. })
  241. gotBlocksMap.Set(b.Index, true)
  242. dist += n.Distance
  243. }
  244. if len(gotBlocks) >= k {
  245. return dist, gotBlocks
  246. }
  247. }
  248. }
  249. return math.MaxFloat64, gotBlocks
  250. }
  251. func (iter *DownloadObjectIterator) getMinReadingObjectSolution(sortedNodes []*DownloadNodeInfo, k int) (float64, *cdssdk.Node) {
  252. dist := math.MaxFloat64
  253. var downloadNode *cdssdk.Node
  254. for _, n := range sortedNodes {
  255. if n.ObjectPinned && float64(k)*n.Distance < dist {
  256. dist = float64(k) * n.Distance
  257. downloadNode = &n.Node
  258. }
  259. }
  260. return dist, downloadNode
  261. }
  262. func (iter *DownloadObjectIterator) getNodeDistance(node cdssdk.Node) float64 {
  263. if stgglb.Local.NodeID != nil {
  264. if node.NodeID == *stgglb.Local.NodeID {
  265. return consts.NodeDistanceSameNode
  266. }
  267. }
  268. if node.LocationID == stgglb.Local.LocationID {
  269. return consts.NodeDistanceSameLocation
  270. }
  271. return consts.NodeDistanceOther
  272. }
  273. func downloadFile(ctx *DownloadContext, node cdssdk.Node, fileHash string) (io.ReadCloser, error) {
  274. // 如果客户端与节点在同一个地域,则使用内网地址连接节点
  275. nodeIP := node.ExternalIP
  276. grpcPort := node.ExternalGRPCPort
  277. if node.LocationID == stgglb.Local.LocationID {
  278. nodeIP = node.LocalIP
  279. grpcPort = node.LocalGRPCPort
  280. logger.Infof("client and node %d are at the same location, use local ip", node.NodeID)
  281. }
  282. if stgglb.IPFSPool != nil {
  283. logger.Infof("try to use local IPFS to download file")
  284. reader, err := downloadFromLocalIPFS(ctx, fileHash)
  285. if err == nil {
  286. return reader, nil
  287. }
  288. logger.Warnf("download from local IPFS failed, so try to download from node %s, err: %s", nodeIP, err.Error())
  289. }
  290. return downloadFromNode(ctx, node.NodeID, nodeIP, grpcPort, fileHash)
  291. }
  292. func downloadFromNode(ctx *DownloadContext, nodeID cdssdk.NodeID, nodeIP string, grpcPort int, fileHash string) (io.ReadCloser, error) {
  293. agtCli, err := stgglb.AgentRPCPool.Acquire(nodeIP, grpcPort)
  294. if err != nil {
  295. return nil, fmt.Errorf("new agent grpc client: %w", err)
  296. }
  297. reader, err := agtCli.GetIPFSFile(fileHash)
  298. if err != nil {
  299. return nil, fmt.Errorf("getting ipfs file: %w", err)
  300. }
  301. reader = myio.AfterReadClosed(reader, func(io.ReadCloser) {
  302. agtCli.Close()
  303. })
  304. return reader, nil
  305. }
  306. func downloadFromLocalIPFS(ctx *DownloadContext, fileHash string) (io.ReadCloser, error) {
  307. ipfsCli, err := stgglb.IPFSPool.Acquire()
  308. if err != nil {
  309. return nil, fmt.Errorf("new ipfs client: %w", err)
  310. }
  311. reader, err := ipfsCli.OpenRead(fileHash)
  312. if err != nil {
  313. return nil, fmt.Errorf("read ipfs file failed, err: %w", err)
  314. }
  315. reader = myio.AfterReadClosed(reader, func(io.ReadCloser) {
  316. ipfsCli.Close()
  317. })
  318. return reader, nil
  319. }

本项目旨在将云际存储公共基础设施化,使个人及企业可低门槛使用高效的云际存储服务(安装开箱即用云际存储客户端即可,无需关注其他组件的部署),同时支持用户灵活便捷定制云际存储的功能细节。