You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

parser.go 31 kB

11 months ago
11 months ago
11 months ago
11 months ago
11 months ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108
  1. package parser
  2. import (
  3. "fmt"
  4. "math"
  5. "gitlink.org.cn/cloudream/common/pkgs/ioswitch/dag"
  6. "gitlink.org.cn/cloudream/common/pkgs/ioswitch/exec"
  7. "gitlink.org.cn/cloudream/common/pkgs/ioswitch/plan"
  8. cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
  9. "gitlink.org.cn/cloudream/common/utils/lo2"
  10. "gitlink.org.cn/cloudream/common/utils/math2"
  11. "gitlink.org.cn/cloudream/storage/common/pkgs/ioswitch2"
  12. "gitlink.org.cn/cloudream/storage/common/pkgs/ioswitch2/ops2"
  13. "gitlink.org.cn/cloudream/storage/common/pkgs/storage/factory"
  14. "gitlink.org.cn/cloudream/storage/common/pkgs/storage/types"
  15. )
  16. type IndexedStream struct {
  17. Stream *dag.StreamVar
  18. StreamIndex ioswitch2.StreamIndex
  19. }
  20. type ParseContext struct {
  21. Ft ioswitch2.FromTo
  22. DAG *ops2.GraphNodeBuilder
  23. // 为了产生所有To所需的数据范围,而需要From打开的范围。
  24. // 这个范围是基于整个文件的,且上下界都取整到条带大小的整数倍,因此上界是有可能超过文件大小的。
  25. ToNodes map[ioswitch2.To]ops2.ToNode
  26. IndexedStreams []IndexedStream
  27. StreamRange math2.Range
  28. UseEC bool // 是否使用纠删码
  29. UseSegment bool // 是否使用分段
  30. }
  31. func Parse(ft ioswitch2.FromTo, blder *exec.PlanBuilder) error {
  32. ctx := ParseContext{
  33. Ft: ft,
  34. DAG: ops2.NewGraphNodeBuilder(),
  35. ToNodes: make(map[ioswitch2.To]ops2.ToNode),
  36. }
  37. // 分成两个阶段:
  38. // 1. 基于From和To生成更多指令,初步匹配to的需求
  39. err := checkEncodingParams(&ctx)
  40. if err != nil {
  41. return err
  42. }
  43. // 计算一下打开流的范围
  44. calcStreamRange(&ctx)
  45. err = extend(&ctx)
  46. if err != nil {
  47. return err
  48. }
  49. // 2. 优化上一步生成的指令
  50. err = fixSegmentJoin(&ctx)
  51. if err != nil {
  52. return err
  53. }
  54. err = fixSegmentSplit(&ctx)
  55. if err != nil {
  56. return err
  57. }
  58. // 对于删除指令的优化,需要反复进行,直到没有变化为止。
  59. // 从目前实现上来说不会死循环
  60. for {
  61. opted := false
  62. if removeUnusedJoin(&ctx) {
  63. opted = true
  64. }
  65. if removeUnusedMultiplyOutput(&ctx) {
  66. opted = true
  67. }
  68. if removeUnusedSplit(&ctx) {
  69. opted = true
  70. }
  71. if omitSplitJoin(&ctx) {
  72. opted = true
  73. }
  74. if removeUnusedSegmentJoin(&ctx) {
  75. opted = true
  76. }
  77. if removeUnusedSegmentSplit(&ctx) {
  78. opted = true
  79. }
  80. if omitSegmentSplitJoin(&ctx) {
  81. opted = true
  82. }
  83. if !opted {
  84. break
  85. }
  86. }
  87. // 确定指令执行位置的过程,也需要反复进行,直到没有变化为止。
  88. for pin(&ctx) {
  89. }
  90. // 下面这些只需要执行一次,但需要按顺序
  91. removeUnusedFromNode(&ctx)
  92. useMultipartUploadToShardStore(&ctx)
  93. dropUnused(&ctx)
  94. storeShardWriteResult(&ctx)
  95. generateRange(&ctx)
  96. generateClone(&ctx)
  97. return plan.Generate(ctx.DAG.Graph, blder)
  98. }
  99. func findOutputStream(ctx *ParseContext, streamIndex ioswitch2.StreamIndex) *dag.StreamVar {
  100. var ret *dag.StreamVar
  101. for _, s := range ctx.IndexedStreams {
  102. if s.StreamIndex == streamIndex {
  103. ret = s.Stream
  104. break
  105. }
  106. }
  107. return ret
  108. }
  109. // 检查使用不同编码时参数是否设置到位
  110. func checkEncodingParams(ctx *ParseContext) error {
  111. for _, f := range ctx.Ft.Froms {
  112. if f.GetStreamIndex().IsEC() {
  113. ctx.UseEC = true
  114. if ctx.Ft.ECParam == nil {
  115. return fmt.Errorf("EC encoding parameters not set")
  116. }
  117. }
  118. if f.GetStreamIndex().IsSegment() {
  119. ctx.UseSegment = true
  120. if ctx.Ft.SegmentParam == nil {
  121. return fmt.Errorf("segment parameters not set")
  122. }
  123. }
  124. }
  125. for _, t := range ctx.Ft.Toes {
  126. if t.GetStreamIndex().IsEC() {
  127. ctx.UseEC = true
  128. if ctx.Ft.ECParam == nil {
  129. return fmt.Errorf("EC encoding parameters not set")
  130. }
  131. }
  132. if t.GetStreamIndex().IsSegment() {
  133. ctx.UseSegment = true
  134. if ctx.Ft.SegmentParam == nil {
  135. return fmt.Errorf("segment parameters not set")
  136. }
  137. }
  138. }
  139. return nil
  140. }
  141. // 计算输入流的打开范围。如果From或者To中包含EC的流,则会将打开范围扩大到条带大小的整数倍。
  142. func calcStreamRange(ctx *ParseContext) {
  143. rng := math2.NewRange(math.MaxInt64, 0)
  144. for _, to := range ctx.Ft.Toes {
  145. strIdx := to.GetStreamIndex()
  146. if strIdx.IsRaw() {
  147. toRng := to.GetRange()
  148. rng.ExtendStart(toRng.Offset)
  149. if toRng.Length != nil {
  150. rng.ExtendEnd(toRng.Offset + *toRng.Length)
  151. } else {
  152. rng.Length = nil
  153. }
  154. } else if strIdx.IsEC() {
  155. toRng := to.GetRange()
  156. stripSize := ctx.Ft.ECParam.StripSize()
  157. blkStartIndex := math2.FloorDiv(toRng.Offset, int64(ctx.Ft.ECParam.ChunkSize))
  158. rng.ExtendStart(blkStartIndex * stripSize)
  159. if toRng.Length != nil {
  160. blkEndIndex := math2.CeilDiv(toRng.Offset+*toRng.Length, int64(ctx.Ft.ECParam.ChunkSize))
  161. rng.ExtendEnd(blkEndIndex * stripSize)
  162. } else {
  163. rng.Length = nil
  164. }
  165. } else if strIdx.IsSegment() {
  166. // Segment节点的Range是相对于本段的,需要加上本段的起始位置
  167. toRng := to.GetRange()
  168. segStart := ctx.Ft.SegmentParam.CalcSegmentStart(strIdx.Index)
  169. offset := toRng.Offset + segStart
  170. rng.ExtendStart(offset)
  171. if toRng.Length != nil {
  172. rng.ExtendEnd(offset + *toRng.Length)
  173. } else {
  174. rng.Length = nil
  175. }
  176. }
  177. }
  178. if ctx.UseEC {
  179. stripSize := ctx.Ft.ECParam.StripSize()
  180. rng.ExtendStart(math2.Floor(rng.Offset, stripSize))
  181. if rng.Length != nil {
  182. rng.ExtendEnd(math2.Ceil(rng.Offset+*rng.Length, stripSize))
  183. }
  184. }
  185. ctx.StreamRange = rng
  186. }
  187. func extend(ctx *ParseContext) error {
  188. for _, fr := range ctx.Ft.Froms {
  189. frNode, err := buildFromNode(ctx, fr)
  190. if err != nil {
  191. return err
  192. }
  193. ctx.IndexedStreams = append(ctx.IndexedStreams, IndexedStream{
  194. Stream: frNode.Output().Var(),
  195. StreamIndex: fr.GetStreamIndex(),
  196. })
  197. // 对于完整文件的From,生成Split指令
  198. if fr.GetStreamIndex().IsRaw() {
  199. // 只有输入输出需要EC编码的块时,才生成相关指令
  200. if ctx.UseEC {
  201. splitNode := ctx.DAG.NewChunkedSplit(ctx.Ft.ECParam.ChunkSize, ctx.Ft.ECParam.K)
  202. splitNode.Split(frNode.Output().Var())
  203. for i := 0; i < ctx.Ft.ECParam.K; i++ {
  204. ctx.IndexedStreams = append(ctx.IndexedStreams, IndexedStream{
  205. Stream: splitNode.SubStream(i),
  206. StreamIndex: ioswitch2.ECStream(i),
  207. })
  208. }
  209. }
  210. // 同上
  211. if ctx.UseSegment {
  212. splitNode := ctx.DAG.NewSegmentSplit(ctx.Ft.SegmentParam.Segments)
  213. frNode.Output().Var().ToSlot(splitNode.InputSlot())
  214. for i := 0; i < len(ctx.Ft.SegmentParam.Segments); i++ {
  215. ctx.IndexedStreams = append(ctx.IndexedStreams, IndexedStream{
  216. Stream: splitNode.Segment(i),
  217. StreamIndex: ioswitch2.SegmentStream(i),
  218. })
  219. }
  220. }
  221. }
  222. }
  223. if ctx.UseEC {
  224. // 如果有K个不同的文件块流,则生成Multiply指令,同时针对其生成的流,生成Join指令
  225. ecInputStrs := make(map[int]*dag.StreamVar)
  226. for _, s := range ctx.IndexedStreams {
  227. if s.StreamIndex.IsEC() && ecInputStrs[s.StreamIndex.Index] == nil {
  228. ecInputStrs[s.StreamIndex.Index] = s.Stream
  229. if len(ecInputStrs) == ctx.Ft.ECParam.K {
  230. break
  231. }
  232. }
  233. }
  234. if len(ecInputStrs) == ctx.Ft.ECParam.K {
  235. mulNode := ctx.DAG.NewECMultiply(*ctx.Ft.ECParam)
  236. for i, s := range ecInputStrs {
  237. mulNode.AddInput(s, i)
  238. }
  239. for i := 0; i < ctx.Ft.ECParam.N; i++ {
  240. ctx.IndexedStreams = append(ctx.IndexedStreams, IndexedStream{
  241. Stream: mulNode.NewOutput(i),
  242. StreamIndex: ioswitch2.ECStream(i),
  243. })
  244. }
  245. joinNode := ctx.DAG.NewChunkedJoin(ctx.Ft.ECParam.ChunkSize)
  246. for i := 0; i < ctx.Ft.ECParam.K; i++ {
  247. // 不可能找不到流
  248. joinNode.AddInput(findOutputStream(ctx, ioswitch2.ECStream(i)))
  249. }
  250. ctx.IndexedStreams = append(ctx.IndexedStreams, IndexedStream{
  251. Stream: joinNode.Joined(),
  252. StreamIndex: ioswitch2.RawStream(),
  253. })
  254. }
  255. }
  256. if ctx.UseSegment {
  257. // 先假设有所有的顺序分段,生成Join指令,后续根据Range再实际计算是否缺少流
  258. joinNode := ctx.DAG.NewSegmentJoin(ctx.Ft.SegmentParam.Segments)
  259. for i := 0; i < ctx.Ft.SegmentParam.SegmentCount(); i++ {
  260. str := findOutputStream(ctx, ioswitch2.SegmentStream(i))
  261. if str != nil {
  262. str.ToSlot(joinNode.InputSlot(i))
  263. }
  264. }
  265. ctx.IndexedStreams = append(ctx.IndexedStreams, IndexedStream{
  266. Stream: joinNode.Joined(),
  267. StreamIndex: ioswitch2.RawStream(),
  268. })
  269. // SegmentJoin生成的Join指令可以用来生成EC块
  270. if ctx.UseEC {
  271. splitNode := ctx.DAG.NewChunkedSplit(ctx.Ft.ECParam.ChunkSize, ctx.Ft.ECParam.K)
  272. splitNode.Split(joinNode.Joined())
  273. mulNode := ctx.DAG.NewECMultiply(*ctx.Ft.ECParam)
  274. for i := 0; i < ctx.Ft.ECParam.K; i++ {
  275. mulNode.AddInput(splitNode.SubStream(i), i)
  276. ctx.IndexedStreams = append(ctx.IndexedStreams, IndexedStream{
  277. Stream: splitNode.SubStream(i),
  278. StreamIndex: ioswitch2.ECStream(i),
  279. })
  280. }
  281. for i := 0; i < ctx.Ft.ECParam.N; i++ {
  282. ctx.IndexedStreams = append(ctx.IndexedStreams, IndexedStream{
  283. Stream: mulNode.NewOutput(i),
  284. StreamIndex: ioswitch2.ECStream(i),
  285. })
  286. }
  287. }
  288. }
  289. // 为每一个To找到一个输入流
  290. for _, to := range ctx.Ft.Toes {
  291. toNode, err := buildToNode(ctx, to)
  292. if err != nil {
  293. return err
  294. }
  295. ctx.ToNodes[to] = toNode
  296. str := findOutputStream(ctx, to.GetStreamIndex())
  297. if str == nil {
  298. return fmt.Errorf("no output stream found for data index %d", to.GetStreamIndex())
  299. }
  300. toNode.SetInput(str)
  301. }
  302. return nil
  303. }
  304. func buildFromNode(ctx *ParseContext, f ioswitch2.From) (ops2.FromNode, error) {
  305. var repRange math2.Range
  306. repRange.Offset = ctx.StreamRange.Offset
  307. if ctx.StreamRange.Length != nil {
  308. repRngLen := *ctx.StreamRange.Length
  309. repRange.Length = &repRngLen
  310. }
  311. var blkRange math2.Range
  312. if ctx.UseEC {
  313. blkRange.Offset = ctx.StreamRange.Offset / int64(ctx.Ft.ECParam.ChunkSize*ctx.Ft.ECParam.K) * int64(ctx.Ft.ECParam.ChunkSize)
  314. if ctx.StreamRange.Length != nil {
  315. blkRngLen := *ctx.StreamRange.Length / int64(ctx.Ft.ECParam.ChunkSize*ctx.Ft.ECParam.K) * int64(ctx.Ft.ECParam.ChunkSize)
  316. blkRange.Length = &blkRngLen
  317. }
  318. }
  319. switch f := f.(type) {
  320. case *ioswitch2.FromShardstore:
  321. t := ctx.DAG.NewShardRead(f, f.Storage.StorageID, types.NewOpen(f.FileHash))
  322. if f.StreamIndex.IsRaw() {
  323. t.Open.WithNullableLength(repRange.Offset, repRange.Length)
  324. } else if f.StreamIndex.IsEC() {
  325. t.Open.WithNullableLength(blkRange.Offset, blkRange.Length)
  326. } else if f.StreamIndex.IsSegment() {
  327. segStart := ctx.Ft.SegmentParam.CalcSegmentStart(f.StreamIndex.Index)
  328. segLen := ctx.Ft.SegmentParam.Segments[f.StreamIndex.Index]
  329. segEnd := segStart + segLen
  330. // 打开的范围不超过本段的范围
  331. openOff := ctx.StreamRange.Offset - segStart
  332. openOff = math2.Clamp(openOff, 0, segLen)
  333. openLen := segLen
  334. if ctx.StreamRange.Length != nil {
  335. strEnd := ctx.StreamRange.Offset + *ctx.StreamRange.Length
  336. openEnd := math2.Min(strEnd, segEnd)
  337. openLen = openEnd - segStart - openOff
  338. }
  339. t.Open.WithNullableLength(openOff, &openLen)
  340. }
  341. switch addr := f.Hub.Address.(type) {
  342. case *cdssdk.HttpAddressInfo:
  343. t.Env().ToEnvWorker(&ioswitch2.HttpHubWorker{Hub: f.Hub})
  344. t.Env().Pinned = true
  345. case *cdssdk.GRPCAddressInfo:
  346. t.Env().ToEnvWorker(&ioswitch2.AgentWorker{Hub: f.Hub, Address: *addr})
  347. t.Env().Pinned = true
  348. default:
  349. return nil, fmt.Errorf("unsupported node address type %T", addr)
  350. }
  351. return t, nil
  352. case *ioswitch2.FromDriver:
  353. n := ctx.DAG.NewFromDriver(f, f.Handle)
  354. n.Env().ToEnvDriver()
  355. n.Env().Pinned = true
  356. if f.StreamIndex.IsRaw() {
  357. f.Handle.RangeHint.Offset = repRange.Offset
  358. f.Handle.RangeHint.Length = repRange.Length
  359. } else if f.StreamIndex.IsEC() {
  360. f.Handle.RangeHint.Offset = blkRange.Offset
  361. f.Handle.RangeHint.Length = blkRange.Length
  362. } else if f.StreamIndex.IsSegment() {
  363. segStart := ctx.Ft.SegmentParam.CalcSegmentStart(f.StreamIndex.Index)
  364. segLen := ctx.Ft.SegmentParam.Segments[f.StreamIndex.Index]
  365. segEnd := segStart + segLen
  366. // 打开的范围不超过本段的范围
  367. openOff := repRange.Offset - segStart
  368. openOff = math2.Clamp(openOff, 0, segLen)
  369. openLen := segLen
  370. if repRange.Length != nil {
  371. repEnd := repRange.Offset + *repRange.Length
  372. openEnd := math2.Min(repEnd, segEnd)
  373. openLen = openEnd - openOff
  374. }
  375. f.Handle.RangeHint.Offset = openOff
  376. f.Handle.RangeHint.Length = &openLen
  377. }
  378. return n, nil
  379. default:
  380. return nil, fmt.Errorf("unsupported from type %T", f)
  381. }
  382. }
  383. func buildToNode(ctx *ParseContext, t ioswitch2.To) (ops2.ToNode, error) {
  384. switch t := t.(type) {
  385. case *ioswitch2.ToShardStore:
  386. n := ctx.DAG.NewShardWrite(t, t.Storage, t.FileHashStoreKey)
  387. if err := setEnvByAddress(n, t.Hub, t.Hub.Address); err != nil {
  388. return nil, err
  389. }
  390. n.Env().Pinned = true
  391. return n, nil
  392. case *ioswitch2.ToDriver:
  393. n := ctx.DAG.NewToDriver(t, t.Handle)
  394. n.Env().ToEnvDriver()
  395. n.Env().Pinned = true
  396. return n, nil
  397. case *ioswitch2.LoadToShared:
  398. n := ctx.DAG.NewSharedLoad(t, t.Storage.StorageID, t.ObjectPath)
  399. if err := setEnvByAddress(n, t.Hub, t.Hub.Address); err != nil {
  400. return nil, err
  401. }
  402. n.Env().Pinned = true
  403. return n, nil
  404. default:
  405. return nil, fmt.Errorf("unsupported to type %T", t)
  406. }
  407. }
  408. func setEnvByAddress(n dag.Node, hub cdssdk.Hub, addr cdssdk.HubAddressInfo) error {
  409. switch addr := addr.(type) {
  410. case *cdssdk.HttpAddressInfo:
  411. n.Env().ToEnvWorker(&ioswitch2.HttpHubWorker{Hub: hub})
  412. case *cdssdk.GRPCAddressInfo:
  413. n.Env().ToEnvWorker(&ioswitch2.AgentWorker{Hub: hub, Address: *addr})
  414. default:
  415. return fmt.Errorf("unsupported node address type %T", addr)
  416. }
  417. return nil
  418. }
  419. // 根据StreamRange,调整SegmentSplit中分段的个数和每段的大小
  420. func fixSegmentSplit(ctx *ParseContext) error {
  421. var err error
  422. dag.WalkOnlyType[*ops2.SegmentSplitNode](ctx.DAG.Graph, func(node *ops2.SegmentSplitNode) bool {
  423. var strEnd *int64
  424. if ctx.StreamRange.Length != nil {
  425. e := ctx.StreamRange.Offset + *ctx.StreamRange.Length
  426. strEnd = &e
  427. }
  428. startSeg, endSeg := ctx.Ft.SegmentParam.CalcSegmentRange(ctx.StreamRange.Offset, strEnd)
  429. // 关闭超出范围的分段
  430. for i := endSeg; i < len(node.Segments); i++ {
  431. node.OutputStreams().Get(i).ClearAllDst()
  432. }
  433. node.OutputStreams().Slots.RemoveRange(endSeg, ctx.Ft.SegmentParam.SegmentCount()-endSeg)
  434. node.Segments = lo2.RemoveRange(node.Segments, endSeg, ctx.Ft.SegmentParam.SegmentCount()-endSeg)
  435. for i := 0; i < startSeg; i++ {
  436. node.OutputStreams().Get(i).ClearAllDst()
  437. }
  438. node.OutputStreams().Slots.RemoveRange(0, startSeg)
  439. node.Segments = lo2.RemoveRange(node.Segments, 0, startSeg)
  440. // StreamRange开始的位置可能在某个分段的中间,此时这个分段的大小等于流开始位置到分段结束位置的距离
  441. startSegStart := ctx.Ft.SegmentParam.CalcSegmentStart(startSeg)
  442. node.Segments[0] -= ctx.StreamRange.Offset - startSegStart
  443. // StreamRange结束的位置可能在某个分段的中间,此时这个分段的大小就等于流结束位置到分段起始位置的距离
  444. if strEnd != nil {
  445. endSegStart := ctx.Ft.SegmentParam.CalcSegmentStart(endSeg - 1)
  446. node.Segments[len(node.Segments)-1] = *strEnd - endSegStart
  447. }
  448. return true
  449. })
  450. return err
  451. }
  452. // 从SegmentJoin中删除未使用的分段
  453. func fixSegmentJoin(ctx *ParseContext) error {
  454. var err error
  455. dag.WalkOnlyType[*ops2.SegmentJoinNode](ctx.DAG.Graph, func(node *ops2.SegmentJoinNode) bool {
  456. start := ctx.StreamRange.Offset
  457. var end *int64
  458. if ctx.StreamRange.Length != nil {
  459. e := ctx.StreamRange.Offset + *ctx.StreamRange.Length
  460. end = &e
  461. }
  462. startSeg, endSeg := ctx.Ft.SegmentParam.CalcSegmentRange(start, end)
  463. // 关闭超出范围的分段
  464. for i := endSeg; i < len(node.Segments); i++ {
  465. node.InputStreams().Get(i).NotTo(node)
  466. }
  467. node.InputStreams().Slots.RemoveRange(endSeg, ctx.Ft.SegmentParam.SegmentCount()-endSeg)
  468. node.Segments = lo2.RemoveRange(node.Segments, endSeg, ctx.Ft.SegmentParam.SegmentCount()-endSeg)
  469. for i := 0; i < startSeg; i++ {
  470. node.InputStreams().Get(i).NotTo(node)
  471. }
  472. node.InputStreams().Slots.RemoveRange(0, startSeg)
  473. node.Segments = lo2.RemoveRange(node.Segments, 0, startSeg)
  474. // StreamRange开始的位置可能在某个分段的中间,此时这个分段的大小等于流开始位置到分段结束位置的距离
  475. startSegStart := ctx.Ft.SegmentParam.CalcSegmentStart(startSeg)
  476. node.Segments[0] -= ctx.StreamRange.Offset - startSegStart
  477. // 检查一下必须的分段是否都被加入到Join中
  478. for i := 0; i < node.InputStreams().Len(); i++ {
  479. if node.InputStreams().Get(i) == nil {
  480. err = fmt.Errorf("segment %v missed to join an raw stream", i+startSeg)
  481. return false
  482. }
  483. }
  484. return true
  485. })
  486. return err
  487. }
  488. // 删除未使用的SegmentJoin
  489. func removeUnusedSegmentJoin(ctx *ParseContext) bool {
  490. changed := false
  491. dag.WalkOnlyType[*ops2.SegmentJoinNode](ctx.DAG.Graph, func(node *ops2.SegmentJoinNode) bool {
  492. if node.Joined().Dst.Len() > 0 {
  493. return true
  494. }
  495. node.RemoveAllInputs()
  496. ctx.DAG.RemoveNode(node)
  497. return true
  498. })
  499. return changed
  500. }
  501. // 删除未使用的SegmentSplit
  502. func removeUnusedSegmentSplit(ctx *ParseContext) bool {
  503. changed := false
  504. dag.WalkOnlyType[*ops2.SegmentSplitNode](ctx.DAG.Graph, func(typ *ops2.SegmentSplitNode) bool {
  505. // Split出来的每一个流都没有被使用,才能删除这个指令
  506. for _, out := range typ.OutputStreams().Slots.RawArray() {
  507. if out.Dst.Len() > 0 {
  508. return true
  509. }
  510. }
  511. typ.RemoveAllStream()
  512. ctx.DAG.RemoveNode(typ)
  513. changed = true
  514. return true
  515. })
  516. return changed
  517. }
  518. // 如果Split的结果被完全用于Join,则省略Split和Join指令
  519. func omitSegmentSplitJoin(ctx *ParseContext) bool {
  520. changed := false
  521. dag.WalkOnlyType[*ops2.SegmentSplitNode](ctx.DAG.Graph, func(splitNode *ops2.SegmentSplitNode) bool {
  522. // 随便找一个输出流的目的地
  523. splitOut := splitNode.OutputStreams().Get(0)
  524. if splitOut.Dst.Len() != 1 {
  525. return true
  526. }
  527. dstNode := splitOut.Dst.Get(0)
  528. // 这个目的地要是一个Join指令
  529. joinNode, ok := dstNode.(*ops2.SegmentJoinNode)
  530. if !ok {
  531. return true
  532. }
  533. if splitNode.OutputStreams().Len() != joinNode.Joined().Dst.Len() {
  534. return true
  535. }
  536. // Join指令的输入必须全部来自Split指令的输出,且位置要相同
  537. for i := 0; i < splitNode.OutputStreams().Len(); i++ {
  538. splitOut := splitNode.OutputStreams().Get(i)
  539. joinIn := joinNode.InputStreams().Get(i)
  540. if splitOut != joinIn {
  541. return true
  542. }
  543. if splitOut != nil && splitOut.Dst.Len() != 1 {
  544. return true
  545. }
  546. }
  547. // 所有条件都满足,可以开始省略操作,将Join操作的目的地的输入流替换为Split操作的输入流:
  548. // F->Split->Join->T 变换为:F->T
  549. splitInput := splitNode.InputStreams().Get(0)
  550. for _, to := range joinNode.Joined().Dst.RawArray() {
  551. splitInput.To(to, to.InputStreams().IndexOf(joinNode.Joined()))
  552. }
  553. splitInput.NotTo(splitNode)
  554. // 并删除这两个指令
  555. ctx.DAG.RemoveNode(joinNode)
  556. ctx.DAG.RemoveNode(splitNode)
  557. changed = true
  558. return true
  559. })
  560. return changed
  561. }
  562. // 删除输出流未被使用的Join指令
  563. func removeUnusedJoin(ctx *ParseContext) bool {
  564. changed := false
  565. dag.WalkOnlyType[*ops2.ChunkedJoinNode](ctx.DAG.Graph, func(node *ops2.ChunkedJoinNode) bool {
  566. if node.Joined().Dst.Len() > 0 {
  567. return true
  568. }
  569. node.RemoveAllInputs()
  570. ctx.DAG.RemoveNode(node)
  571. return true
  572. })
  573. return changed
  574. }
  575. // 减少未使用的Multiply指令的输出流。如果减少到0,则删除该指令
  576. func removeUnusedMultiplyOutput(ctx *ParseContext) bool {
  577. changed := false
  578. dag.WalkOnlyType[*ops2.ECMultiplyNode](ctx.DAG.Graph, func(node *ops2.ECMultiplyNode) bool {
  579. outArr := node.OutputStreams().Slots.RawArray()
  580. for i2, out := range outArr {
  581. if out.Dst.Len() > 0 {
  582. continue
  583. }
  584. outArr[i2] = nil
  585. node.OutputIndexes[i2] = -2
  586. changed = true
  587. }
  588. node.OutputStreams().Slots.SetRawArray(lo2.RemoveAllDefault(outArr))
  589. node.OutputIndexes = lo2.RemoveAll(node.OutputIndexes, -2)
  590. // 如果所有输出流都被删除,则删除该指令
  591. if node.OutputStreams().Len() == 0 {
  592. node.RemoveAllInputs()
  593. ctx.DAG.RemoveNode(node)
  594. changed = true
  595. }
  596. return true
  597. })
  598. return changed
  599. }
  600. // 删除未使用的Split指令
  601. func removeUnusedSplit(ctx *ParseContext) bool {
  602. changed := false
  603. dag.WalkOnlyType[*ops2.ChunkedSplitNode](ctx.DAG.Graph, func(typ *ops2.ChunkedSplitNode) bool {
  604. // Split出来的每一个流都没有被使用,才能删除这个指令
  605. for _, out := range typ.OutputStreams().Slots.RawArray() {
  606. if out.Dst.Len() > 0 {
  607. return true
  608. }
  609. }
  610. typ.RemoveAllStream()
  611. ctx.DAG.RemoveNode(typ)
  612. changed = true
  613. return true
  614. })
  615. return changed
  616. }
  617. // 如果Split的结果被完全用于Join,则省略Split和Join指令
  618. func omitSplitJoin(ctx *ParseContext) bool {
  619. changed := false
  620. dag.WalkOnlyType[*ops2.ChunkedSplitNode](ctx.DAG.Graph, func(splitNode *ops2.ChunkedSplitNode) bool {
  621. // Split指令的每一个输出都有且只有一个目的地
  622. var dstNode dag.Node
  623. for _, out := range splitNode.OutputStreams().Slots.RawArray() {
  624. if out.Dst.Len() != 1 {
  625. return true
  626. }
  627. if dstNode == nil {
  628. dstNode = out.Dst.Get(0)
  629. } else if dstNode != out.Dst.Get(0) {
  630. return true
  631. }
  632. }
  633. if dstNode == nil {
  634. return true
  635. }
  636. // 且这个目的地要是一个Join指令
  637. joinNode, ok := dstNode.(*ops2.ChunkedJoinNode)
  638. if !ok {
  639. return true
  640. }
  641. // 同时这个Join指令的输入也必须全部来自Split指令的输出。
  642. // 由于上面判断了Split指令的输出目的地都相同,所以这里只要判断Join指令的输入数量是否与Split指令的输出数量相同即可
  643. if joinNode.InputStreams().Len() != splitNode.OutputStreams().Len() {
  644. return true
  645. }
  646. // 所有条件都满足,可以开始省略操作,将Join操作的目的地的输入流替换为Split操作的输入流:
  647. // F->Split->Join->T 变换为:F->T
  648. splitInput := splitNode.InputStreams().Get(0)
  649. for _, to := range joinNode.Joined().Dst.RawArray() {
  650. splitInput.To(to, to.InputStreams().IndexOf(joinNode.Joined()))
  651. }
  652. splitInput.NotTo(splitNode)
  653. // 并删除这两个指令
  654. ctx.DAG.RemoveNode(joinNode)
  655. ctx.DAG.RemoveNode(splitNode)
  656. changed = true
  657. return true
  658. })
  659. return changed
  660. }
  661. // 通过流的输入输出位置来确定指令的执行位置。
  662. // To系列的指令都会有固定的执行位置,这些位置会随着pin操作逐步扩散到整个DAG,
  663. // 所以理论上不会出现有指令的位置始终无法确定的情况。
  664. func pin(ctx *ParseContext) bool {
  665. changed := false
  666. ctx.DAG.Walk(func(node dag.Node) bool {
  667. if node.Env().Pinned {
  668. return true
  669. }
  670. var toEnv *dag.NodeEnv
  671. for _, out := range node.OutputStreams().Slots.RawArray() {
  672. for _, to := range out.Dst.RawArray() {
  673. if to.Env().Type == dag.EnvUnknown {
  674. continue
  675. }
  676. if toEnv == nil {
  677. toEnv = to.Env()
  678. } else if !toEnv.Equals(to.Env()) {
  679. toEnv = nil
  680. break
  681. }
  682. }
  683. }
  684. if toEnv != nil {
  685. if !node.Env().Equals(toEnv) {
  686. changed = true
  687. }
  688. *node.Env() = *toEnv
  689. return true
  690. }
  691. // 否则根据输入流的始发地来固定
  692. var fromEnv *dag.NodeEnv
  693. for _, in := range node.InputStreams().Slots.RawArray() {
  694. if in.Src.Env().Type == dag.EnvUnknown {
  695. continue
  696. }
  697. if fromEnv == nil {
  698. fromEnv = in.Src.Env()
  699. } else if !fromEnv.Equals(in.Src.Env()) {
  700. fromEnv = nil
  701. break
  702. }
  703. }
  704. if fromEnv != nil {
  705. if !node.Env().Equals(fromEnv) {
  706. changed = true
  707. }
  708. *node.Env() = *fromEnv
  709. }
  710. return true
  711. })
  712. return changed
  713. }
  714. // 删除未使用的From流,不会删除FromDriver
  715. func removeUnusedFromNode(ctx *ParseContext) {
  716. dag.WalkOnlyType[ops2.FromNode](ctx.DAG.Graph, func(node ops2.FromNode) bool {
  717. if _, ok := node.(*ops2.FromDriverNode); ok {
  718. return true
  719. }
  720. if node.Output().Var().Dst.Len() == 0 {
  721. ctx.DAG.RemoveNode(node)
  722. }
  723. return true
  724. })
  725. }
  726. // 对于所有未使用的流,增加Drop指令
  727. func dropUnused(ctx *ParseContext) {
  728. ctx.DAG.Walk(func(node dag.Node) bool {
  729. for _, out := range node.OutputStreams().Slots.RawArray() {
  730. if out.Dst.Len() == 0 {
  731. n := ctx.DAG.NewDropStream()
  732. *n.Env() = *node.Env()
  733. n.SetInput(out)
  734. }
  735. }
  736. return true
  737. })
  738. }
  739. // 将SegmentJoin指令替换成分片上传指令
  740. func useMultipartUploadToShardStore(ctx *ParseContext) {
  741. dag.WalkOnlyType[*ops2.SegmentJoinNode](ctx.DAG.Graph, func(joinNode *ops2.SegmentJoinNode) bool {
  742. if joinNode.Joined().Dst.Len() != 1 {
  743. return true
  744. }
  745. joinDst := joinNode.Joined().Dst.Get(0)
  746. shardNode, ok := joinDst.(*ops2.ShardWriteNode)
  747. if !ok {
  748. return true
  749. }
  750. // SegmentJoin的输出流的范围必须与ToShardStore的输入流的范围相同,
  751. // 虽然可以通过调整SegmentJoin的输入流来调整范围,但太复杂,暂不支持
  752. toStrIdx := shardNode.GetTo().GetStreamIndex()
  753. toStrRng := shardNode.GetTo().GetRange()
  754. if toStrIdx.IsRaw() {
  755. if !toStrRng.Equals(ctx.StreamRange) {
  756. return true
  757. }
  758. } else {
  759. return true
  760. }
  761. // Join的目的地必须支持MultipartUpload功能才能替换成分片上传
  762. multiUpload, err := factory.GetBuilder(shardNode.Storage).CreateMultiparter()
  763. if err != nil {
  764. return true
  765. }
  766. // Join的每一个段的大小必须超过最小分片大小。
  767. // 目前只支持拆分超过最大分片的流,不支持合并多个小段流以达到最小分片大小。
  768. for _, size := range joinNode.Segments {
  769. if size < multiUpload.MinPartSize() {
  770. return true
  771. }
  772. }
  773. initNode := ctx.DAG.NewMultipartInitiator(shardNode.Storage)
  774. initNode.Env().CopyFrom(shardNode.Env())
  775. partNumber := 1
  776. for i, size := range joinNode.Segments {
  777. joinInput := joinNode.InputSlot(i)
  778. if size > multiUpload.MaxPartSize() {
  779. // 如果一个分段的大小大于最大分片大小,则需要拆分为多个小段上传
  780. // 拆分以及上传指令直接在流的产生节点执行
  781. splits := math2.SplitLessThan(size, multiUpload.MaxPartSize())
  782. splitNode := ctx.DAG.NewSegmentSplit(splits)
  783. splitNode.Env().CopyFrom(joinInput.Var().Src.Env())
  784. joinInput.Var().ToSlot(splitNode.InputSlot())
  785. for i2 := 0; i2 < len(splits); i2++ {
  786. uploadNode := ctx.DAG.NewMultipartUpload(shardNode.Storage, partNumber, splits[i2])
  787. uploadNode.Env().CopyFrom(joinInput.Var().Src.Env())
  788. initNode.UploadArgsVar().ToSlot(uploadNode.UploadArgsSlot())
  789. splitNode.SegmentVar(i2).ToSlot(uploadNode.PartStreamSlot())
  790. uploadNode.UploadResultVar().ToSlot(initNode.AppendPartInfoSlot())
  791. partNumber++
  792. }
  793. } else {
  794. // 否则直接上传整个分段
  795. uploadNode := ctx.DAG.NewMultipartUpload(shardNode.Storage, partNumber, size)
  796. // 上传指令直接在流的产生节点执行
  797. uploadNode.Env().CopyFrom(joinInput.Var().Src.Env())
  798. initNode.UploadArgsVar().ToSlot(uploadNode.UploadArgsSlot())
  799. joinInput.Var().ToSlot(uploadNode.PartStreamSlot())
  800. uploadNode.UploadResultVar().ToSlot(initNode.AppendPartInfoSlot())
  801. partNumber++
  802. }
  803. joinInput.Var().NotTo(joinNode)
  804. }
  805. bypassNode := ctx.DAG.NewBypassToShardStore(shardNode.Storage.Storage.StorageID, shardNode.FileHashStoreKey)
  806. bypassNode.Env().CopyFrom(shardNode.Env())
  807. // 分片上传Node产生的结果送到bypassNode,bypassNode将处理结果再送回分片上传Node
  808. initNode.BypassFileInfoVar().ToSlot(bypassNode.BypassFileInfoSlot())
  809. bypassNode.BypassCallbackVar().ToSlot(initNode.BypassCallbackSlot())
  810. // 最后删除Join指令和ToShardStore指令
  811. ctx.DAG.RemoveNode(joinNode)
  812. ctx.DAG.RemoveNode(shardNode)
  813. // 因为ToShardStore已经被替换,所以对应的To也要删除。
  814. // 虽然会跳过后续的Range过程,但由于之前做的流范围判断,不加Range也可以
  815. ctx.Ft.Toes = lo2.Remove(ctx.Ft.Toes, shardNode.GetTo())
  816. return true
  817. })
  818. }
  819. // 为IPFS写入指令存储结果
  820. func storeShardWriteResult(ctx *ParseContext) {
  821. dag.WalkOnlyType[*ops2.ShardWriteNode](ctx.DAG.Graph, func(n *ops2.ShardWriteNode) bool {
  822. if n.FileHashStoreKey == "" {
  823. return true
  824. }
  825. storeNode := ctx.DAG.NewStore()
  826. storeNode.Env().ToEnvDriver()
  827. storeNode.Store(n.FileHashStoreKey, n.FileHashVar())
  828. return true
  829. })
  830. dag.WalkOnlyType[*ops2.BypassToShardStoreNode](ctx.DAG.Graph, func(n *ops2.BypassToShardStoreNode) bool {
  831. if n.FileHashStoreKey == "" {
  832. return true
  833. }
  834. storeNode := ctx.DAG.NewStore()
  835. storeNode.Env().ToEnvDriver()
  836. storeNode.Store(n.FileHashStoreKey, n.FileHashVar())
  837. return true
  838. })
  839. }
  840. // 生成Range指令。StreamRange可能超过文件总大小,但Range指令会在数据量不够时不报错而是正常返回
  841. func generateRange(ctx *ParseContext) {
  842. for i := 0; i < len(ctx.Ft.Toes); i++ {
  843. to := ctx.Ft.Toes[i]
  844. toNode := ctx.ToNodes[to]
  845. toStrIdx := to.GetStreamIndex()
  846. toRng := to.GetRange()
  847. if toStrIdx.IsRaw() {
  848. n := ctx.DAG.NewRange()
  849. toInput := toNode.Input()
  850. *n.Env() = *toInput.Var().Src.Env()
  851. rnged := n.RangeStream(toInput.Var(), math2.Range{
  852. Offset: toRng.Offset - ctx.StreamRange.Offset,
  853. Length: toRng.Length,
  854. })
  855. toInput.Var().NotTo(toNode)
  856. toNode.SetInput(rnged)
  857. } else if toStrIdx.IsEC() {
  858. stripSize := int64(ctx.Ft.ECParam.ChunkSize * ctx.Ft.ECParam.K)
  859. blkStartIdx := ctx.StreamRange.Offset / stripSize
  860. blkStart := blkStartIdx * int64(ctx.Ft.ECParam.ChunkSize)
  861. n := ctx.DAG.NewRange()
  862. toInput := toNode.Input()
  863. *n.Env() = *toInput.Var().Src.Env()
  864. rnged := n.RangeStream(toInput.Var(), math2.Range{
  865. Offset: toRng.Offset - blkStart,
  866. Length: toRng.Length,
  867. })
  868. toInput.Var().NotTo(toNode)
  869. toNode.SetInput(rnged)
  870. } else if toStrIdx.IsSegment() {
  871. // if frNode, ok := toNode.Input().Var().From().Node.(ops2.FromNode); ok {
  872. // // 目前只有To也是分段时,才可能对接一个提供分段的From,此时不需要再生成Range指令
  873. // if frNode.GetFrom().GetStreamIndex().IsSegment() {
  874. // continue
  875. // }
  876. // }
  877. // segStart := ctx.Ft.SegmentParam.CalcSegmentStart(toStrIdx.Index)
  878. // strStart := segStart + toRng.Offset
  879. // n := ctx.DAG.NewRange()
  880. // toInput := toNode.Input()
  881. // *n.Env() = *toInput.Var().From().Node.Env()
  882. // rnged := n.RangeStream(toInput.Var(), exec.Range{
  883. // Offset: strStart - ctx.StreamRange.Offset,
  884. // Length: toRng.Length,
  885. // })
  886. // toInput.Var().NotTo(toNode, toInput.Index)
  887. // toNode.SetInput(rnged)
  888. }
  889. }
  890. }
  891. // 生成Clone指令
  892. func generateClone(ctx *ParseContext) {
  893. ctx.DAG.Walk(func(node dag.Node) bool {
  894. for _, outVar := range node.OutputStreams().Slots.RawArray() {
  895. if outVar.Dst.Len() <= 1 {
  896. continue
  897. }
  898. c := ctx.DAG.NewCloneStream()
  899. *c.Env() = *node.Env()
  900. for _, dst := range outVar.Dst.RawArray() {
  901. c.NewOutput().To(dst, dst.InputStreams().IndexOf(outVar))
  902. }
  903. outVar.Dst.Resize(0)
  904. c.SetInput(outVar)
  905. }
  906. for _, outVar := range node.OutputValues().Slots.RawArray() {
  907. if outVar.Dst.Len() <= 1 {
  908. continue
  909. }
  910. t := ctx.DAG.NewCloneValue()
  911. *t.Env() = *node.Env()
  912. for _, dst := range outVar.Dst.RawArray() {
  913. t.NewOutput().To(dst, dst.InputValues().IndexOf(outVar))
  914. }
  915. outVar.Dst.Resize(0)
  916. t.SetInput(outVar)
  917. }
  918. return true
  919. })
  920. }

本项目旨在将云际存储公共基础设施化,使个人及企业可低门槛使用高效的云际存储服务(安装开箱即用云际存储客户端即可,无需关注其他组件的部署),同时支持用户灵活便捷定制云际存储的功能细节。