You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

storage_load_package.go 10 kB

1 year ago
1 year ago
1 year ago
1 year ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. package task
  2. import (
  3. "fmt"
  4. "io"
  5. "math"
  6. "os"
  7. "path/filepath"
  8. "time"
  9. "github.com/samber/lo"
  10. "gitlink.org.cn/cloudream/common/pkgs/bitmap"
  11. "gitlink.org.cn/cloudream/common/pkgs/ipfs"
  12. "gitlink.org.cn/cloudream/common/pkgs/task"
  13. cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
  14. "gitlink.org.cn/cloudream/common/utils/io2"
  15. "gitlink.org.cn/cloudream/common/utils/reflect2"
  16. "gitlink.org.cn/cloudream/common/utils/sort2"
  17. "gitlink.org.cn/cloudream/storage/common/consts"
  18. stgglb "gitlink.org.cn/cloudream/storage/common/globals"
  19. stgmod "gitlink.org.cn/cloudream/storage/common/models"
  20. "gitlink.org.cn/cloudream/storage/common/pkgs/distlock/reqbuilder"
  21. "gitlink.org.cn/cloudream/storage/common/pkgs/ec"
  22. coormq "gitlink.org.cn/cloudream/storage/common/pkgs/mq/coordinator"
  23. "gitlink.org.cn/cloudream/storage/common/utils"
  24. )
  25. type StorageLoadPackage struct {
  26. PackagePath string
  27. LocalBase string
  28. RemoteBase string
  29. userID cdssdk.UserID
  30. packageID cdssdk.PackageID
  31. storageID cdssdk.StorageID
  32. pinnedBlocks []stgmod.ObjectBlock
  33. }
  34. func NewStorageLoadPackage(userID cdssdk.UserID, packageID cdssdk.PackageID, storageID cdssdk.StorageID) *StorageLoadPackage {
  35. return &StorageLoadPackage{
  36. userID: userID,
  37. packageID: packageID,
  38. storageID: storageID,
  39. }
  40. }
  41. func (t *StorageLoadPackage) Execute(task *task.Task[TaskContext], ctx TaskContext, complete CompleteFn) {
  42. err := t.do(task, ctx)
  43. complete(err, CompleteOption{
  44. RemovingDelay: time.Minute,
  45. })
  46. }
  47. func (t *StorageLoadPackage) do(task *task.Task[TaskContext], ctx TaskContext) error {
  48. coorCli, err := stgglb.CoordinatorMQPool.Acquire()
  49. if err != nil {
  50. return fmt.Errorf("new coordinator client: %w", err)
  51. }
  52. defer stgglb.CoordinatorMQPool.Release(coorCli)
  53. ipfsCli, err := stgglb.IPFSPool.Acquire()
  54. if err != nil {
  55. return fmt.Errorf("new IPFS client: %w", err)
  56. }
  57. defer stgglb.IPFSPool.Release(ipfsCli)
  58. getStgResp, err := coorCli.GetStorage(coormq.ReqGetStorage(t.userID, t.storageID))
  59. if err != nil {
  60. return fmt.Errorf("request to coordinator: %w", err)
  61. }
  62. t.PackagePath = utils.MakeLoadedPackagePath(t.userID, t.packageID)
  63. fullLocalPath := filepath.Join(getStgResp.Storage.LocalBase, t.PackagePath)
  64. if err = os.MkdirAll(fullLocalPath, 0755); err != nil {
  65. return fmt.Errorf("creating output directory: %w", err)
  66. }
  67. getObjectDetails, err := coorCli.GetPackageObjectDetails(coormq.ReqGetPackageObjectDetails(t.packageID))
  68. if err != nil {
  69. return fmt.Errorf("getting package object details: %w", err)
  70. }
  71. mutex, err := reqbuilder.NewBuilder().
  72. // 提前占位
  73. Metadata().StoragePackage().CreateOne(t.userID, t.storageID, t.packageID).
  74. // 保护在storage目录中下载的文件
  75. Storage().Buzy(t.storageID).
  76. // 保护下载文件时同时保存到IPFS的文件
  77. IPFS().Buzy(getStgResp.Storage.NodeID).
  78. MutexLock(ctx.distlock)
  79. if err != nil {
  80. return fmt.Errorf("acquire locks failed, err: %w", err)
  81. }
  82. defer mutex.Unlock()
  83. for _, obj := range getObjectDetails.Objects {
  84. err := t.downloadOne(coorCli, ipfsCli, fullLocalPath, obj)
  85. if err != nil {
  86. return err
  87. }
  88. }
  89. _, err = coorCli.StoragePackageLoaded(coormq.NewStoragePackageLoaded(t.userID, t.storageID, t.packageID, t.pinnedBlocks))
  90. if err != nil {
  91. return fmt.Errorf("loading package to storage: %w", err)
  92. }
  93. // TODO 要防止下载的临时文件被删除
  94. return err
  95. }
  96. func (t *StorageLoadPackage) downloadOne(coorCli *coormq.Client, ipfsCli *ipfs.PoolClient, dir string, obj stgmod.ObjectDetail) error {
  97. var file io.ReadCloser
  98. switch red := obj.Object.Redundancy.(type) {
  99. case *cdssdk.NoneRedundancy:
  100. reader, err := t.downloadNoneOrRepObject(ipfsCli, obj)
  101. if err != nil {
  102. return fmt.Errorf("downloading object: %w", err)
  103. }
  104. file = reader
  105. case *cdssdk.RepRedundancy:
  106. reader, err := t.downloadNoneOrRepObject(ipfsCli, obj)
  107. if err != nil {
  108. return fmt.Errorf("downloading rep object: %w", err)
  109. }
  110. file = reader
  111. case *cdssdk.ECRedundancy:
  112. reader, pinnedBlocks, err := t.downloadECObject(coorCli, ipfsCli, obj, red)
  113. if err != nil {
  114. return fmt.Errorf("downloading ec object: %w", err)
  115. }
  116. file = reader
  117. t.pinnedBlocks = append(t.pinnedBlocks, pinnedBlocks...)
  118. default:
  119. return fmt.Errorf("unknow redundancy type: %v", reflect2.TypeOfValue(obj.Object.Redundancy))
  120. }
  121. defer file.Close()
  122. fullPath := filepath.Join(dir, obj.Object.Path)
  123. lastDirPath := filepath.Dir(fullPath)
  124. if err := os.MkdirAll(lastDirPath, 0755); err != nil {
  125. return fmt.Errorf("creating object last dir: %w", err)
  126. }
  127. outputFile, err := os.Create(fullPath)
  128. if err != nil {
  129. return fmt.Errorf("creating object file: %w", err)
  130. }
  131. defer outputFile.Close()
  132. if _, err := io.Copy(outputFile, file); err != nil {
  133. return fmt.Errorf("writting object to file: %w", err)
  134. }
  135. return nil
  136. }
  137. func (t *StorageLoadPackage) downloadNoneOrRepObject(ipfsCli *ipfs.PoolClient, obj stgmod.ObjectDetail) (io.ReadCloser, error) {
  138. if len(obj.Blocks) == 0 && len(obj.PinnedAt) == 0 {
  139. return nil, fmt.Errorf("no node has this object")
  140. }
  141. // 不管实际有没有成功
  142. ipfsCli.Pin(obj.Object.FileHash)
  143. file, err := ipfsCli.OpenRead(obj.Object.FileHash)
  144. if err != nil {
  145. return nil, err
  146. }
  147. return file, nil
  148. }
  149. func (t *StorageLoadPackage) downloadECObject(coorCli *coormq.Client, ipfsCli *ipfs.PoolClient, obj stgmod.ObjectDetail, ecRed *cdssdk.ECRedundancy) (io.ReadCloser, []stgmod.ObjectBlock, error) {
  150. allNodes, err := t.sortDownloadNodes(coorCli, obj)
  151. if err != nil {
  152. return nil, nil, err
  153. }
  154. bsc, blocks := t.getMinReadingBlockSolution(allNodes, ecRed.K)
  155. osc, _ := t.getMinReadingObjectSolution(allNodes, ecRed.K)
  156. if bsc < osc {
  157. var fileStrs []io.ReadCloser
  158. rs, err := ec.NewStreamRs(ecRed.K, ecRed.N, ecRed.ChunkSize)
  159. if err != nil {
  160. return nil, nil, fmt.Errorf("new rs: %w", err)
  161. }
  162. for i := range blocks {
  163. // 不管实际有没有成功
  164. ipfsCli.Pin(blocks[i].Block.FileHash)
  165. str, err := ipfsCli.OpenRead(blocks[i].Block.FileHash)
  166. if err != nil {
  167. for i -= 1; i >= 0; i-- {
  168. fileStrs[i].Close()
  169. }
  170. return nil, nil, fmt.Errorf("donwloading file: %w", err)
  171. }
  172. fileStrs = append(fileStrs, str)
  173. }
  174. fileReaders, filesCloser := io2.ToReaders(fileStrs)
  175. var indexes []int
  176. var pinnedBlocks []stgmod.ObjectBlock
  177. for _, b := range blocks {
  178. indexes = append(indexes, b.Block.Index)
  179. pinnedBlocks = append(pinnedBlocks, stgmod.ObjectBlock{
  180. ObjectID: b.Block.ObjectID,
  181. Index: b.Block.Index,
  182. NodeID: *stgglb.Local.NodeID,
  183. FileHash: b.Block.FileHash,
  184. })
  185. }
  186. outputs, outputsCloser := io2.ToReaders(rs.ReconstructData(fileReaders, indexes))
  187. return io2.AfterReadClosed(io2.Length(io2.ChunkedJoin(outputs, int(ecRed.ChunkSize)), obj.Object.Size), func(c io.ReadCloser) {
  188. filesCloser()
  189. outputsCloser()
  190. }), pinnedBlocks, nil
  191. }
  192. // bsc >= osc,如果osc是MaxFloat64,那么bsc也一定是,也就意味着没有足够块来恢复文件
  193. if osc == math.MaxFloat64 {
  194. return nil, nil, fmt.Errorf("no enough blocks to reconstruct the file, want %d, get only %d", ecRed.K, len(blocks))
  195. }
  196. // 如果是直接读取的文件,那么就不需要Pin文件块
  197. str, err := ipfsCli.OpenRead(obj.Object.FileHash)
  198. return str, nil, err
  199. }
  200. type downloadNodeInfo struct {
  201. Node cdssdk.Node
  202. ObjectPinned bool
  203. Blocks []stgmod.ObjectBlock
  204. Distance float64
  205. }
  206. func (t *StorageLoadPackage) sortDownloadNodes(coorCli *coormq.Client, obj stgmod.ObjectDetail) ([]*downloadNodeInfo, error) {
  207. var nodeIDs []cdssdk.NodeID
  208. for _, id := range obj.PinnedAt {
  209. if !lo.Contains(nodeIDs, id) {
  210. nodeIDs = append(nodeIDs, id)
  211. }
  212. }
  213. for _, b := range obj.Blocks {
  214. if !lo.Contains(nodeIDs, b.NodeID) {
  215. nodeIDs = append(nodeIDs, b.NodeID)
  216. }
  217. }
  218. getNodes, err := coorCli.GetNodes(coormq.NewGetNodes(nodeIDs))
  219. if err != nil {
  220. return nil, fmt.Errorf("getting nodes: %w", err)
  221. }
  222. downloadNodeMap := make(map[cdssdk.NodeID]*downloadNodeInfo)
  223. for _, id := range obj.PinnedAt {
  224. node, ok := downloadNodeMap[id]
  225. if !ok {
  226. mod := *getNodes.GetNode(id)
  227. node = &downloadNodeInfo{
  228. Node: mod,
  229. ObjectPinned: true,
  230. Distance: t.getNodeDistance(mod),
  231. }
  232. downloadNodeMap[id] = node
  233. }
  234. node.ObjectPinned = true
  235. }
  236. for _, b := range obj.Blocks {
  237. node, ok := downloadNodeMap[b.NodeID]
  238. if !ok {
  239. mod := *getNodes.GetNode(b.NodeID)
  240. node = &downloadNodeInfo{
  241. Node: mod,
  242. Distance: t.getNodeDistance(mod),
  243. }
  244. downloadNodeMap[b.NodeID] = node
  245. }
  246. node.Blocks = append(node.Blocks, b)
  247. }
  248. return sort2.Sort(lo.Values(downloadNodeMap), func(left, right *downloadNodeInfo) int {
  249. return sort2.Cmp(left.Distance, right.Distance)
  250. }), nil
  251. }
  252. type downloadBlock struct {
  253. Node cdssdk.Node
  254. Block stgmod.ObjectBlock
  255. }
  256. func (t *StorageLoadPackage) getMinReadingBlockSolution(sortedNodes []*downloadNodeInfo, k int) (float64, []downloadBlock) {
  257. gotBlocksMap := bitmap.Bitmap64(0)
  258. var gotBlocks []downloadBlock
  259. dist := float64(0.0)
  260. for _, n := range sortedNodes {
  261. for _, b := range n.Blocks {
  262. if !gotBlocksMap.Get(b.Index) {
  263. gotBlocks = append(gotBlocks, downloadBlock{
  264. Node: n.Node,
  265. Block: b,
  266. })
  267. gotBlocksMap.Set(b.Index, true)
  268. dist += n.Distance
  269. }
  270. if len(gotBlocks) >= k {
  271. return dist, gotBlocks
  272. }
  273. }
  274. }
  275. return math.MaxFloat64, gotBlocks
  276. }
  277. func (t *StorageLoadPackage) getMinReadingObjectSolution(sortedNodes []*downloadNodeInfo, k int) (float64, *cdssdk.Node) {
  278. dist := math.MaxFloat64
  279. var downloadNode *cdssdk.Node
  280. for _, n := range sortedNodes {
  281. if n.ObjectPinned && float64(k)*n.Distance < dist {
  282. dist = float64(k) * n.Distance
  283. downloadNode = &n.Node
  284. }
  285. }
  286. return dist, downloadNode
  287. }
  288. func (t *StorageLoadPackage) getNodeDistance(node cdssdk.Node) float64 {
  289. if stgglb.Local.NodeID != nil {
  290. if node.NodeID == *stgglb.Local.NodeID {
  291. return consts.NodeDistanceSameNode
  292. }
  293. }
  294. if node.LocationID == stgglb.Local.LocationID {
  295. return consts.NodeDistanceSameLocation
  296. }
  297. return consts.NodeDistanceOther
  298. }

本项目旨在将云际存储公共基础设施化,使个人及企业可低门槛使用高效的云际存储服务(安装开箱即用云际存储客户端即可,无需关注其他组件的部署),同时支持用户灵活便捷定制云际存储的功能细节。