You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

markdown.go 12 kB

12 years ago
12 years ago
12 years ago
10 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
12 years ago
12 years ago
12 years ago
12 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389
  1. // Copyright 2014 The Gogs Authors. All rights reserved.
  2. // Use of this source code is governed by a MIT-style
  3. // license that can be found in the LICENSE file.
  4. package markdown
  5. import (
  6. "bytes"
  7. "fmt"
  8. "io"
  9. "path"
  10. "path/filepath"
  11. "regexp"
  12. "strings"
  13. "github.com/Unknwon/com"
  14. "github.com/microcosm-cc/bluemonday"
  15. "github.com/russross/blackfriday"
  16. "golang.org/x/net/html"
  17. "github.com/gogits/gogs/modules/base"
  18. "github.com/gogits/gogs/modules/setting"
  19. )
  20. const (
  21. ISSUE_NAME_STYLE_NUMERIC = "numeric"
  22. ISSUE_NAME_STYLE_ALPHANUMERIC = "alphanumeric"
  23. )
  24. var Sanitizer = bluemonday.UGCPolicy()
  25. // BuildSanitizer initializes sanitizer with allowed attributes based on settings.
  26. // This function should only be called once during entire application lifecycle.
  27. func BuildSanitizer() {
  28. // Normal markdown-stuff
  29. Sanitizer.AllowAttrs("class").Matching(regexp.MustCompile(`[\p{L}\p{N}\s\-_',:\[\]!\./\\\(\)&]*`)).OnElements("code")
  30. // Checkboxes
  31. Sanitizer.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input")
  32. Sanitizer.AllowAttrs("checked", "disabled").OnElements("input")
  33. // Custom URL-Schemes
  34. Sanitizer.AllowURLSchemes(setting.Markdown.CustomURLSchemes...)
  35. }
  36. var validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`)
  37. // isLink reports whether link fits valid format.
  38. func isLink(link []byte) bool {
  39. return validLinksPattern.Match(link)
  40. }
  41. // IsMarkdownFile reports whether name looks like a Markdown file
  42. // based on its extension.
  43. func IsMarkdownFile(name string) bool {
  44. name = strings.ToLower(name)
  45. switch filepath.Ext(name) {
  46. case ".md", ".markdown", ".mdown", ".mkd":
  47. return true
  48. }
  49. return false
  50. }
  51. // IsReadmeFile reports whether name looks like a README file
  52. // based on its extension.
  53. func IsReadmeFile(name string) bool {
  54. name = strings.ToLower(name)
  55. if len(name) < 6 {
  56. return false
  57. } else if len(name) == 6 {
  58. return name == "readme"
  59. }
  60. return name[:7] == "readme."
  61. }
  62. var (
  63. // MentionPattern matches string that mentions someone, e.g. @Unknwon
  64. MentionPattern = regexp.MustCompile(`(\s|^)@[0-9a-zA-Z_\.]+`)
  65. // CommitPattern matches link to certain commit with or without trailing hash,
  66. // e.g. https://try.gogs.io/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2
  67. CommitPattern = regexp.MustCompile(`(\s|^)https?.*commit/[0-9a-zA-Z]+(#+[0-9a-zA-Z-]*)?`)
  68. // IssueFullPattern matches link to an issue with or without trailing hash,
  69. // e.g. https://try.gogs.io/gogs/gogs/issues/4#issue-685
  70. IssueFullPattern = regexp.MustCompile(`(\s|^)https?.*issues/[0-9]+(#+[0-9a-zA-Z-]*)?`)
  71. // IssueNumericPattern matches string that references to a numeric issue, e.g. #1287
  72. IssueNumericPattern = regexp.MustCompile(`( |^|\()#[0-9]+\b`)
  73. // IssueAlphanumericPattern matches string that references to an alphanumeric issue, e.g. ABC-1234
  74. IssueAlphanumericPattern = regexp.MustCompile(`( |^|\()[A-Z]{1,10}-[1-9][0-9]*\b`)
  75. // Sha1CurrentPattern matches string that represents a commit SHA, e.g. d8a994ef243349f321568f9e36d5c3f444b99cae
  76. Sha1CurrentPattern = regexp.MustCompile(`\b[0-9a-f]{40}\b`)
  77. )
  78. // Renderer is a extended version of underlying render object.
  79. type Renderer struct {
  80. blackfriday.Renderer
  81. urlPrefix string
  82. }
  83. // Link defines how formal links should be processed to produce corresponding HTML elements.
  84. func (r *Renderer) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
  85. if len(link) > 0 && !isLink(link) {
  86. if link[0] != '#' {
  87. link = []byte(path.Join(r.urlPrefix, string(link)))
  88. }
  89. }
  90. r.Renderer.Link(out, link, title, content)
  91. }
  92. // AutoLink defines how auto-detected links should be processed to produce corresponding HTML elements.
  93. // Reference for kind: https://github.com/russross/blackfriday/blob/master/markdown.go#L69-L76
  94. func (r *Renderer) AutoLink(out *bytes.Buffer, link []byte, kind int) {
  95. if kind != blackfriday.LINK_TYPE_NORMAL {
  96. r.Renderer.AutoLink(out, link, kind)
  97. return
  98. }
  99. // Since this method could only possibly serve one link at a time,
  100. // we do not need to find all.
  101. if bytes.HasPrefix(link, []byte(setting.AppUrl)) {
  102. m := CommitPattern.Find(link)
  103. if m != nil {
  104. m = bytes.TrimSpace(m)
  105. i := strings.Index(string(m), "commit/")
  106. j := strings.Index(string(m), "#")
  107. if j == -1 {
  108. j = len(m)
  109. }
  110. out.WriteString(fmt.Sprintf(` <code><a href="%s">%s</a></code>`, m, base.ShortSha(string(m[i+7:j]))))
  111. return
  112. }
  113. m = IssueFullPattern.Find(link)
  114. if m != nil {
  115. m = bytes.TrimSpace(m)
  116. i := strings.Index(string(m), "issues/")
  117. j := strings.Index(string(m), "#")
  118. if j == -1 {
  119. j = len(m)
  120. }
  121. out.WriteString(fmt.Sprintf(`<a href="%s">#%s</a>`, m, base.ShortSha(string(m[i+7:j]))))
  122. return
  123. }
  124. }
  125. r.Renderer.AutoLink(out, link, kind)
  126. }
  127. // ListItem defines how list items should be processed to produce corresponding HTML elements.
  128. func (options *Renderer) ListItem(out *bytes.Buffer, text []byte, flags int) {
  129. // Detect procedures to draw checkboxes.
  130. switch {
  131. case bytes.HasPrefix(text, []byte("[ ] ")):
  132. text = append([]byte(`<input type="checkbox" disabled="" />`), text[3:]...)
  133. case bytes.HasPrefix(text, []byte("[x] ")):
  134. text = append([]byte(`<input type="checkbox" disabled="" checked="" />`), text[3:]...)
  135. }
  136. options.Renderer.ListItem(out, text, flags)
  137. }
  138. // Note: this section is for purpose of increase performance and
  139. // reduce memory allocation at runtime since they are constant literals.
  140. var (
  141. svgSuffix = []byte(".svg")
  142. svgSuffixWithMark = []byte(".svg?")
  143. spaceBytes = []byte(" ")
  144. spaceEncodedBytes = []byte("%20")
  145. space = " "
  146. spaceEncoded = "%20"
  147. )
  148. // Image defines how images should be processed to produce corresponding HTML elements.
  149. func (r *Renderer) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) {
  150. prefix := strings.Replace(r.urlPrefix, "/src/", "/raw/", 1)
  151. if len(link) > 0 {
  152. if isLink(link) {
  153. // External link with .svg suffix usually means CI status.
  154. // TODO: define a keyword to allow non-svg images render as external link.
  155. if bytes.HasSuffix(link, svgSuffix) || bytes.Contains(link, svgSuffixWithMark) {
  156. r.Renderer.Image(out, link, title, alt)
  157. return
  158. }
  159. } else {
  160. if link[0] != '/' {
  161. prefix += "/"
  162. }
  163. link = bytes.Replace([]byte((prefix + string(link))), spaceBytes, spaceEncodedBytes, -1)
  164. fmt.Println(333, string(link))
  165. }
  166. }
  167. out.WriteString(`<a href="`)
  168. out.Write(link)
  169. out.WriteString(`">`)
  170. r.Renderer.Image(out, link, title, alt)
  171. out.WriteString("</a>")
  172. }
  173. // cutoutVerbosePrefix cutouts URL prefix including sub-path to
  174. // return a clean unified string of request URL path.
  175. func cutoutVerbosePrefix(prefix string) string {
  176. count := 0
  177. for i := 0; i < len(prefix); i++ {
  178. if prefix[i] == '/' {
  179. count++
  180. }
  181. if count >= 3+setting.AppSubUrlDepth {
  182. return prefix[:i]
  183. }
  184. }
  185. return prefix
  186. }
  187. // RenderIssueIndexPattern renders issue indexes to corresponding links.
  188. func RenderIssueIndexPattern(rawBytes []byte, urlPrefix string, metas map[string]string) []byte {
  189. urlPrefix = cutoutVerbosePrefix(urlPrefix)
  190. pattern := IssueNumericPattern
  191. if metas["style"] == ISSUE_NAME_STYLE_ALPHANUMERIC {
  192. pattern = IssueAlphanumericPattern
  193. }
  194. ms := pattern.FindAll(rawBytes, -1)
  195. for _, m := range ms {
  196. if m[0] == ' ' || m[0] == '(' {
  197. m = m[1:] // ignore leading space or opening parentheses
  198. }
  199. var link string
  200. if metas == nil {
  201. link = fmt.Sprintf(`<a href="%s/issues/%s">%s</a>`, urlPrefix, m[1:], m)
  202. } else {
  203. // Support for external issue tracker
  204. if metas["style"] == ISSUE_NAME_STYLE_ALPHANUMERIC {
  205. metas["index"] = string(m)
  206. } else {
  207. metas["index"] = string(m[1:])
  208. }
  209. link = fmt.Sprintf(`<a href="%s">%s</a>`, com.Expand(metas["format"], metas), m)
  210. }
  211. rawBytes = bytes.Replace(rawBytes, m, []byte(link), 1)
  212. }
  213. return rawBytes
  214. }
  215. // RenderSha1CurrentPattern renders SHA1 strings to corresponding links that assumes in the same repository.
  216. func RenderSha1CurrentPattern(rawBytes []byte, urlPrefix string) []byte {
  217. ms := Sha1CurrentPattern.FindAll(rawBytes, -1)
  218. for _, m := range ms {
  219. rawBytes = bytes.Replace(rawBytes, m, []byte(fmt.Sprintf(
  220. `<a href="%s/commit/%s"><code>%s</code></a>`, urlPrefix, m, base.ShortSha(string(m)))), -1)
  221. }
  222. return rawBytes
  223. }
  224. // RenderSpecialLink renders mentions, indexes and SHA1 strings to corresponding links.
  225. func RenderSpecialLink(rawBytes []byte, urlPrefix string, metas map[string]string) []byte {
  226. ms := MentionPattern.FindAll(rawBytes, -1)
  227. for _, m := range ms {
  228. m = bytes.TrimSpace(m)
  229. rawBytes = bytes.Replace(rawBytes, m,
  230. []byte(fmt.Sprintf(`<a href="%s/%s">%s</a>`, setting.AppSubUrl, m[1:], m)), -1)
  231. }
  232. rawBytes = RenderIssueIndexPattern(rawBytes, urlPrefix, metas)
  233. rawBytes = RenderSha1CurrentPattern(rawBytes, urlPrefix)
  234. return rawBytes
  235. }
  236. // RenderRaw renders Markdown to HTML without handling special links.
  237. func RenderRaw(body []byte, urlPrefix string) []byte {
  238. htmlFlags := 0
  239. htmlFlags |= blackfriday.HTML_SKIP_STYLE
  240. htmlFlags |= blackfriday.HTML_OMIT_CONTENTS
  241. renderer := &Renderer{
  242. Renderer: blackfriday.HtmlRenderer(htmlFlags, "", ""),
  243. urlPrefix: urlPrefix,
  244. }
  245. // set up the parser
  246. extensions := 0
  247. extensions |= blackfriday.EXTENSION_NO_INTRA_EMPHASIS
  248. extensions |= blackfriday.EXTENSION_TABLES
  249. extensions |= blackfriday.EXTENSION_FENCED_CODE
  250. extensions |= blackfriday.EXTENSION_AUTOLINK
  251. extensions |= blackfriday.EXTENSION_STRIKETHROUGH
  252. extensions |= blackfriday.EXTENSION_SPACE_HEADERS
  253. extensions |= blackfriday.EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK
  254. if setting.Markdown.EnableHardLineBreak {
  255. extensions |= blackfriday.EXTENSION_HARD_LINE_BREAK
  256. }
  257. body = blackfriday.Markdown(body, renderer, extensions)
  258. return body
  259. }
  260. var (
  261. leftAngleBracket = []byte("</")
  262. rightAngleBracket = []byte(">")
  263. )
  264. var noEndTags = []string{"img", "input", "br", "hr"}
  265. // PostProcess treats different types of HTML differently,
  266. // and only renders special links for plain text blocks.
  267. func PostProcess(rawHtml []byte, urlPrefix string, metas map[string]string) []byte {
  268. startTags := make([]string, 0, 5)
  269. var buf bytes.Buffer
  270. tokenizer := html.NewTokenizer(bytes.NewReader(rawHtml))
  271. OUTER_LOOP:
  272. for html.ErrorToken != tokenizer.Next() {
  273. token := tokenizer.Token()
  274. switch token.Type {
  275. case html.TextToken:
  276. buf.Write(RenderSpecialLink([]byte(token.String()), urlPrefix, metas))
  277. case html.StartTagToken:
  278. buf.WriteString(token.String())
  279. tagName := token.Data
  280. // If this is an excluded tag, we skip processing all output until a close tag is encountered.
  281. if strings.EqualFold("a", tagName) || strings.EqualFold("code", tagName) || strings.EqualFold("pre", tagName) {
  282. stackNum := 1
  283. for html.ErrorToken != tokenizer.Next() {
  284. token = tokenizer.Token()
  285. // Copy the token to the output verbatim
  286. buf.WriteString(token.String())
  287. if token.Type == html.StartTagToken {
  288. stackNum++
  289. }
  290. // If this is the close tag to the outer-most, we are done
  291. if token.Type == html.EndTagToken {
  292. stackNum--
  293. if stackNum <= 0 && strings.EqualFold(tagName, token.Data) {
  294. break
  295. }
  296. }
  297. }
  298. continue OUTER_LOOP
  299. }
  300. if !com.IsSliceContainsStr(noEndTags, token.Data) {
  301. startTags = append(startTags, token.Data)
  302. }
  303. case html.EndTagToken:
  304. if len(startTags) == 0 {
  305. buf.WriteString(token.String())
  306. break
  307. }
  308. buf.Write(leftAngleBracket)
  309. buf.WriteString(startTags[len(startTags)-1])
  310. buf.Write(rightAngleBracket)
  311. startTags = startTags[:len(startTags)-1]
  312. default:
  313. buf.WriteString(token.String())
  314. }
  315. }
  316. if io.EOF == tokenizer.Err() {
  317. return buf.Bytes()
  318. }
  319. // If we are not at the end of the input, then some other parsing error has occurred,
  320. // so return the input verbatim.
  321. return rawHtml
  322. }
  323. // Render renders Markdown to HTML with special links.
  324. func Render(rawBytes []byte, urlPrefix string, metas map[string]string) []byte {
  325. urlPrefix = strings.Replace(urlPrefix, space, spaceEncoded, -1)
  326. result := RenderRaw(rawBytes, urlPrefix)
  327. result = PostProcess(result, urlPrefix, metas)
  328. result = Sanitizer.SanitizeBytes(result)
  329. return result
  330. }
  331. // RenderString renders Markdown to HTML with special links and returns string type.
  332. func RenderString(raw, urlPrefix string, metas map[string]string) string {
  333. return string(Render([]byte(raw), urlPrefix, metas))
  334. }