You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

parser.go 15 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599
  1. package parser
  2. import (
  3. "fmt"
  4. "math"
  5. "gitlink.org.cn/cloudream/common/pkgs/ioswitch/dag"
  6. "gitlink.org.cn/cloudream/common/pkgs/ioswitch/exec"
  7. "gitlink.org.cn/cloudream/common/pkgs/ioswitch/plan"
  8. "gitlink.org.cn/cloudream/common/pkgs/ioswitch/plan/ops"
  9. "gitlink.org.cn/cloudream/common/pkgs/ipfs"
  10. cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
  11. "gitlink.org.cn/cloudream/common/utils/lo2"
  12. "gitlink.org.cn/cloudream/common/utils/math2"
  13. "gitlink.org.cn/cloudream/storage/common/pkgs/ioswitch2"
  14. "gitlink.org.cn/cloudream/storage/common/pkgs/ioswitch2/ops2"
  15. )
  16. type DefaultParser struct {
  17. EC cdssdk.ECRedundancy
  18. }
  19. func NewParser(ec cdssdk.ECRedundancy) *DefaultParser {
  20. return &DefaultParser{
  21. EC: ec,
  22. }
  23. }
  24. type ParseContext struct {
  25. Ft ioswitch2.FromTo
  26. DAG *dag.Graph
  27. // 为了产生所有To所需的数据范围,而需要From打开的范围。
  28. // 这个范围是基于整个文件的,且上下界都取整到条带大小的整数倍,因此上界是有可能超过文件大小的。
  29. StreamRange exec.Range
  30. }
  31. func (p *DefaultParser) Parse(ft ioswitch2.FromTo, blder *exec.PlanBuilder) error {
  32. ctx := ParseContext{Ft: ft, DAG: dag.NewGraph()}
  33. // 分成两个阶段:
  34. // 1. 基于From和To生成更多指令,初步匹配to的需求
  35. // 计算一下打开流的范围
  36. p.calcStreamRange(&ctx)
  37. err := p.extend(&ctx, ft)
  38. if err != nil {
  39. return err
  40. }
  41. // 2. 优化上一步生成的指令
  42. // 对于删除指令的优化,需要反复进行,直到没有变化为止。
  43. // 从目前实现上来说不会死循环
  44. for {
  45. opted := false
  46. if p.removeUnusedJoin(&ctx) {
  47. opted = true
  48. }
  49. if p.removeUnusedMultiplyOutput(&ctx) {
  50. opted = true
  51. }
  52. if p.removeUnusedSplit(&ctx) {
  53. opted = true
  54. }
  55. if p.omitSplitJoin(&ctx) {
  56. opted = true
  57. }
  58. if !opted {
  59. break
  60. }
  61. }
  62. // 确定指令执行位置的过程,也需要反复进行,直到没有变化为止。
  63. for p.pin(&ctx) {
  64. }
  65. // 下面这些只需要执行一次,但需要按顺序
  66. p.dropUnused(&ctx)
  67. p.storeIPFSWriteResult(&ctx)
  68. p.generateClone(&ctx)
  69. p.generateRange(&ctx)
  70. return plan.Generate(ctx.DAG, blder)
  71. }
  72. func (p *DefaultParser) findOutputStream(ctx *ParseContext, streamIndex int) *dag.StreamVar {
  73. var ret *dag.StreamVar
  74. ctx.DAG.Walk(func(n *dag.Node) bool {
  75. for _, o := range n.OutputStreams {
  76. if o != nil && ioswitch2.SProps(o).StreamIndex == streamIndex {
  77. ret = o
  78. return false
  79. }
  80. }
  81. return true
  82. })
  83. return ret
  84. }
  85. // 计算输入流的打开范围。会把流的范围按条带大小取整
  86. func (p *DefaultParser) calcStreamRange(ctx *ParseContext) {
  87. stripSize := int64(p.EC.ChunkSize * p.EC.K)
  88. rng := exec.Range{
  89. Offset: math.MaxInt64,
  90. }
  91. for _, to := range ctx.Ft.Toes {
  92. if to.GetDataIndex() == -1 {
  93. toRng := to.GetRange()
  94. rng.ExtendStart(math2.Floor(toRng.Offset, stripSize))
  95. if toRng.Length != nil {
  96. rng.ExtendEnd(math2.Ceil(toRng.Offset+*toRng.Length, stripSize))
  97. } else {
  98. rng.Length = nil
  99. }
  100. } else {
  101. toRng := to.GetRange()
  102. blkStartIndex := math2.FloorDiv(toRng.Offset, int64(p.EC.ChunkSize))
  103. rng.ExtendStart(blkStartIndex * stripSize)
  104. if toRng.Length != nil {
  105. blkEndIndex := math2.CeilDiv(toRng.Offset+*toRng.Length, int64(p.EC.ChunkSize))
  106. rng.ExtendEnd(blkEndIndex * stripSize)
  107. } else {
  108. rng.Length = nil
  109. }
  110. }
  111. }
  112. ctx.StreamRange = rng
  113. }
  114. func (p *DefaultParser) extend(ctx *ParseContext, ft ioswitch2.FromTo) error {
  115. for _, fr := range ft.Froms {
  116. frNode, err := p.buildFromNode(ctx, &ft, fr)
  117. if err != nil {
  118. return err
  119. }
  120. // 对于完整文件的From,生成Split指令
  121. if fr.GetDataIndex() == -1 {
  122. node, _ := dag.NewNode(ctx.DAG, &ops2.ChunkedSplitType{ChunkSize: p.EC.ChunkSize, OutputCount: p.EC.K}, &ioswitch2.NodeProps{})
  123. frNode.OutputStreams[0].To(node, 0)
  124. }
  125. }
  126. // 如果有K个不同的文件块流,则生成Multiply指令,同时针对其生成的流,生成Join指令
  127. ecInputStrs := make(map[int]*dag.StreamVar)
  128. loop:
  129. for _, o := range ctx.DAG.Nodes {
  130. for _, s := range o.OutputStreams {
  131. prop := ioswitch2.SProps(s)
  132. if prop.StreamIndex >= 0 && ecInputStrs[prop.StreamIndex] == nil {
  133. ecInputStrs[prop.StreamIndex] = s
  134. if len(ecInputStrs) == p.EC.K {
  135. break loop
  136. }
  137. }
  138. }
  139. }
  140. if len(ecInputStrs) == p.EC.K {
  141. mulNode, mulType := dag.NewNode(ctx.DAG, &ops2.MultiplyType{
  142. EC: p.EC,
  143. }, &ioswitch2.NodeProps{})
  144. for _, s := range ecInputStrs {
  145. mulType.AddInput(mulNode, s, ioswitch2.SProps(s).StreamIndex)
  146. }
  147. for i := 0; i < p.EC.N; i++ {
  148. mulType.NewOutput(mulNode, i)
  149. }
  150. joinNode, _ := dag.NewNode(ctx.DAG, &ops2.ChunkedJoinType{
  151. InputCount: p.EC.K,
  152. ChunkSize: p.EC.ChunkSize,
  153. }, &ioswitch2.NodeProps{})
  154. for i := 0; i < p.EC.K; i++ {
  155. // 不可能找不到流
  156. p.findOutputStream(ctx, i).To(joinNode, i)
  157. }
  158. ioswitch2.SProps(joinNode.OutputStreams[0]).StreamIndex = -1
  159. }
  160. // 为每一个To找到一个输入流
  161. for _, to := range ft.Toes {
  162. n, err := p.buildToNode(ctx, &ft, to)
  163. if err != nil {
  164. return err
  165. }
  166. str := p.findOutputStream(ctx, to.GetDataIndex())
  167. if str == nil {
  168. return fmt.Errorf("no output stream found for data index %d", to.GetDataIndex())
  169. }
  170. str.To(n, 0)
  171. }
  172. return nil
  173. }
  174. func (p *DefaultParser) buildFromNode(ctx *ParseContext, ft *ioswitch2.FromTo, f ioswitch2.From) (*dag.Node, error) {
  175. var repRange exec.Range
  176. var blkRange exec.Range
  177. repRange.Offset = ctx.StreamRange.Offset
  178. blkRange.Offset = ctx.StreamRange.Offset / int64(p.EC.ChunkSize*p.EC.K) * int64(p.EC.ChunkSize)
  179. if ctx.StreamRange.Length != nil {
  180. repRngLen := *ctx.StreamRange.Length
  181. repRange.Length = &repRngLen
  182. blkRngLen := *ctx.StreamRange.Length / int64(p.EC.ChunkSize*p.EC.K) * int64(p.EC.ChunkSize)
  183. blkRange.Length = &blkRngLen
  184. }
  185. switch f := f.(type) {
  186. case *ioswitch2.FromNode:
  187. n, t := dag.NewNode(ctx.DAG, &ops2.IPFSReadType{
  188. FileHash: f.FileHash,
  189. Option: ipfs.ReadOption{
  190. Offset: 0,
  191. Length: -1,
  192. },
  193. }, &ioswitch2.NodeProps{
  194. From: f,
  195. })
  196. ioswitch2.SProps(n.OutputStreams[0]).StreamIndex = f.DataIndex
  197. if f.DataIndex == -1 {
  198. t.Option.Offset = repRange.Offset
  199. if repRange.Length != nil {
  200. t.Option.Length = *repRange.Length
  201. }
  202. } else {
  203. t.Option.Offset = blkRange.Offset
  204. if blkRange.Length != nil {
  205. t.Option.Length = *blkRange.Length
  206. }
  207. }
  208. if f.Node != nil {
  209. n.Env.ToEnvWorker(&ioswitch2.AgentWorker{Node: *f.Node})
  210. n.Env.Pinned = true
  211. }
  212. return n, nil
  213. case *ioswitch2.FromDriver:
  214. n, _ := dag.NewNode(ctx.DAG, &ops.FromDriverType{Handle: f.Handle}, &ioswitch2.NodeProps{From: f})
  215. n.Env.ToEnvDriver()
  216. n.Env.Pinned = true
  217. ioswitch2.SProps(n.OutputStreams[0]).StreamIndex = f.DataIndex
  218. if f.DataIndex == -1 {
  219. f.Handle.RangeHint.Offset = repRange.Offset
  220. f.Handle.RangeHint.Length = repRange.Length
  221. } else {
  222. f.Handle.RangeHint.Offset = blkRange.Offset
  223. f.Handle.RangeHint.Length = blkRange.Length
  224. }
  225. return n, nil
  226. default:
  227. return nil, fmt.Errorf("unsupported from type %T", f)
  228. }
  229. }
  230. func (p *DefaultParser) buildToNode(ctx *ParseContext, ft *ioswitch2.FromTo, t ioswitch2.To) (*dag.Node, error) {
  231. switch t := t.(type) {
  232. case *ioswitch2.ToNode:
  233. n, _ := dag.NewNode(ctx.DAG, &ops2.IPFSWriteType{
  234. FileHashStoreKey: t.FileHashStoreKey,
  235. Range: t.Range,
  236. }, &ioswitch2.NodeProps{
  237. To: t,
  238. })
  239. n.Env.ToEnvWorker(&ioswitch2.AgentWorker{Node: t.Node})
  240. n.Env.Pinned = true
  241. return n, nil
  242. case *ioswitch2.ToDriver:
  243. n, _ := dag.NewNode(ctx.DAG, &ops.ToDriverType{Handle: t.Handle, Range: t.Range}, &ioswitch2.NodeProps{To: t})
  244. n.Env.ToEnvDriver()
  245. n.Env.Pinned = true
  246. return n, nil
  247. default:
  248. return nil, fmt.Errorf("unsupported to type %T", t)
  249. }
  250. }
  251. // 删除输出流未被使用的Join指令
  252. func (p *DefaultParser) removeUnusedJoin(ctx *ParseContext) bool {
  253. changed := false
  254. dag.WalkOnlyType[*ops2.ChunkedJoinType](ctx.DAG, func(node *dag.Node, typ *ops2.ChunkedJoinType) bool {
  255. if len(node.OutputStreams[0].Toes) > 0 {
  256. return true
  257. }
  258. for _, in := range node.InputStreams {
  259. in.NotTo(node)
  260. }
  261. ctx.DAG.RemoveNode(node)
  262. return true
  263. })
  264. return changed
  265. }
  266. // 减少未使用的Multiply指令的输出流。如果减少到0,则删除该指令
  267. func (p *DefaultParser) removeUnusedMultiplyOutput(ctx *ParseContext) bool {
  268. changed := false
  269. dag.WalkOnlyType[*ops2.MultiplyType](ctx.DAG, func(node *dag.Node, typ *ops2.MultiplyType) bool {
  270. for i2, out := range node.OutputStreams {
  271. if len(out.Toes) > 0 {
  272. continue
  273. }
  274. node.OutputStreams[i2] = nil
  275. typ.OutputIndexes[i2] = -2
  276. changed = true
  277. }
  278. node.OutputStreams = lo2.RemoveAllDefault(node.OutputStreams)
  279. typ.OutputIndexes = lo2.RemoveAll(typ.OutputIndexes, -2)
  280. // 如果所有输出流都被删除,则删除该指令
  281. if len(node.OutputStreams) == 0 {
  282. for _, in := range node.InputStreams {
  283. in.NotTo(node)
  284. }
  285. ctx.DAG.RemoveNode(node)
  286. changed = true
  287. }
  288. return true
  289. })
  290. return changed
  291. }
  292. // 删除未使用的Split指令
  293. func (p *DefaultParser) removeUnusedSplit(ctx *ParseContext) bool {
  294. changed := false
  295. dag.WalkOnlyType[*ops2.ChunkedSplitType](ctx.DAG, func(node *dag.Node, typ *ops2.ChunkedSplitType) bool {
  296. // Split出来的每一个流都没有被使用,才能删除这个指令
  297. for _, out := range node.OutputStreams {
  298. if len(out.Toes) > 0 {
  299. return true
  300. }
  301. }
  302. node.InputStreams[0].NotTo(node)
  303. ctx.DAG.RemoveNode(node)
  304. changed = true
  305. return true
  306. })
  307. return changed
  308. }
  309. // 如果Split的结果被完全用于Join,则省略Split和Join指令
  310. func (p *DefaultParser) omitSplitJoin(ctx *ParseContext) bool {
  311. changed := false
  312. dag.WalkOnlyType[*ops2.ChunkedSplitType](ctx.DAG, func(splitNode *dag.Node, typ *ops2.ChunkedSplitType) bool {
  313. // Split指令的每一个输出都有且只有一个目的地
  314. var joinNode *dag.Node
  315. for _, out := range splitNode.OutputStreams {
  316. if len(out.Toes) != 1 {
  317. continue
  318. }
  319. if joinNode == nil {
  320. joinNode = out.Toes[0].Node
  321. } else if joinNode != out.Toes[0].Node {
  322. return true
  323. }
  324. }
  325. if joinNode == nil {
  326. return true
  327. }
  328. // 且这个目的地要是一个Join指令
  329. _, ok := joinNode.Type.(*ops2.ChunkedJoinType)
  330. if !ok {
  331. return true
  332. }
  333. // 同时这个Join指令的输入也必须全部来自Split指令的输出。
  334. // 由于上面判断了Split指令的输出目的地都相同,所以这里只要判断Join指令的输入数量是否与Split指令的输出数量相同即可
  335. if len(joinNode.InputStreams) != len(splitNode.OutputStreams) {
  336. return true
  337. }
  338. // 所有条件都满足,可以开始省略操作,将Join操作的目的地的输入流替换为Split操作的输入流:
  339. // F->Split->Join->T 变换为:F->T
  340. splitNode.InputStreams[0].NotTo(splitNode)
  341. for _, out := range joinNode.OutputStreams[0].Toes {
  342. splitNode.InputStreams[0].To(out.Node, out.SlotIndex)
  343. }
  344. // 并删除这两个指令
  345. ctx.DAG.RemoveNode(joinNode)
  346. ctx.DAG.RemoveNode(splitNode)
  347. changed = true
  348. return true
  349. })
  350. return changed
  351. }
  352. // 通过流的输入输出位置来确定指令的执行位置。
  353. // To系列的指令都会有固定的执行位置,这些位置会随着pin操作逐步扩散到整个DAG,
  354. // 所以理论上不会出现有指令的位置始终无法确定的情况。
  355. func (p *DefaultParser) pin(ctx *ParseContext) bool {
  356. changed := false
  357. ctx.DAG.Walk(func(node *dag.Node) bool {
  358. if node.Env.Pinned {
  359. return true
  360. }
  361. var toEnv *dag.NodeEnv
  362. for _, out := range node.OutputStreams {
  363. for _, to := range out.Toes {
  364. if to.Node.Env.Type == dag.EnvUnknown {
  365. continue
  366. }
  367. if toEnv == nil {
  368. toEnv = &to.Node.Env
  369. } else if !toEnv.Equals(to.Node.Env) {
  370. toEnv = nil
  371. break
  372. }
  373. }
  374. }
  375. if toEnv != nil {
  376. if !node.Env.Equals(*toEnv) {
  377. changed = true
  378. }
  379. node.Env = *toEnv
  380. return true
  381. }
  382. // 否则根据输入流的始发地来固定
  383. var fromEnv *dag.NodeEnv
  384. for _, in := range node.InputStreams {
  385. if in.From.Node.Env.Type == dag.EnvUnknown {
  386. continue
  387. }
  388. if fromEnv == nil {
  389. fromEnv = &in.From.Node.Env
  390. } else if !fromEnv.Equals(in.From.Node.Env) {
  391. fromEnv = nil
  392. break
  393. }
  394. }
  395. if fromEnv != nil {
  396. if !node.Env.Equals(*fromEnv) {
  397. changed = true
  398. }
  399. node.Env = *fromEnv
  400. }
  401. return true
  402. })
  403. return changed
  404. }
  405. // 对于所有未使用的流,增加Drop指令
  406. func (p *DefaultParser) dropUnused(ctx *ParseContext) {
  407. ctx.DAG.Walk(func(node *dag.Node) bool {
  408. for _, out := range node.OutputStreams {
  409. if len(out.Toes) == 0 {
  410. n := ctx.DAG.NewNode(&ops.DropType{}, &ioswitch2.NodeProps{})
  411. n.Env = node.Env
  412. out.To(n, 0)
  413. }
  414. }
  415. return true
  416. })
  417. }
  418. // 为IPFS写入指令存储结果
  419. func (p *DefaultParser) storeIPFSWriteResult(ctx *ParseContext) {
  420. dag.WalkOnlyType[*ops2.IPFSWriteType](ctx.DAG, func(node *dag.Node, typ *ops2.IPFSWriteType) bool {
  421. if typ.FileHashStoreKey == "" {
  422. return true
  423. }
  424. n, t := dag.NewNode(ctx.DAG, &ops.StoreType{
  425. StoreKey: typ.FileHashStoreKey,
  426. }, &ioswitch2.NodeProps{})
  427. n.Env.ToEnvDriver()
  428. t.Store(n, node.OutputValues[0])
  429. return true
  430. })
  431. }
  432. // 生成Range指令。StreamRange可能超过文件总大小,但Range指令会在数据量不够时不报错而是正常返回
  433. func (p *DefaultParser) generateRange(ctx *ParseContext) {
  434. ctx.DAG.Walk(func(node *dag.Node) bool {
  435. props := ioswitch2.NProps(node)
  436. if props.To == nil {
  437. return true
  438. }
  439. toDataIdx := props.To.GetDataIndex()
  440. toRng := props.To.GetRange()
  441. if toDataIdx == -1 {
  442. n := ctx.DAG.NewNode(&ops2.RangeType{
  443. Range: exec.Range{
  444. Offset: toRng.Offset - ctx.StreamRange.Offset,
  445. Length: toRng.Length,
  446. },
  447. }, &ioswitch2.NodeProps{})
  448. n.Env = node.InputStreams[0].From.Node.Env
  449. node.InputStreams[0].To(n, 0)
  450. node.InputStreams[0].NotTo(node)
  451. n.OutputStreams[0].To(node, 0)
  452. } else {
  453. stripSize := int64(p.EC.ChunkSize * p.EC.K)
  454. blkStartIdx := ctx.StreamRange.Offset / stripSize
  455. blkStart := blkStartIdx * int64(p.EC.ChunkSize)
  456. n := ctx.DAG.NewNode(&ops2.RangeType{
  457. Range: exec.Range{
  458. Offset: toRng.Offset - blkStart,
  459. Length: toRng.Length,
  460. },
  461. }, &ioswitch2.NodeProps{})
  462. n.Env = node.InputStreams[0].From.Node.Env
  463. node.InputStreams[0].To(n, 0)
  464. node.InputStreams[0].NotTo(node)
  465. n.OutputStreams[0].To(node, 0)
  466. }
  467. return true
  468. })
  469. }
  470. // 生成Clone指令
  471. func (p *DefaultParser) generateClone(ctx *ParseContext) {
  472. ctx.DAG.Walk(func(node *dag.Node) bool {
  473. for _, out := range node.OutputStreams {
  474. if len(out.Toes) <= 1 {
  475. continue
  476. }
  477. n, t := dag.NewNode(ctx.DAG, &ops2.CloneStreamType{}, &ioswitch2.NodeProps{})
  478. n.Env = node.Env
  479. for _, to := range out.Toes {
  480. str := t.NewOutput(n)
  481. str.Props = &ioswitch2.VarProps{StreamIndex: ioswitch2.SProps(out).StreamIndex}
  482. str.To(to.Node, to.SlotIndex)
  483. }
  484. out.Toes = nil
  485. out.To(n, 0)
  486. }
  487. for _, out := range node.OutputValues {
  488. if len(out.Toes) <= 1 {
  489. continue
  490. }
  491. n, t := dag.NewNode(ctx.DAG, &ops2.CloneVarType{}, &ioswitch2.NodeProps{})
  492. n.Env = node.Env
  493. for _, to := range out.Toes {
  494. t.NewOutput(n).To(to.Node, to.SlotIndex)
  495. }
  496. out.Toes = nil
  497. out.To(n, 0)
  498. }
  499. return true
  500. })
  501. }

本项目旨在将云际存储公共基础设施化,使个人及企业可低门槛使用高效的云际存储服务(安装开箱即用云际存储客户端即可,无需关注其他组件的部署),同时支持用户灵活便捷定制云际存储的功能细节。