Browse Source

Migrated H2 id collection for ToC to MarkdownDeep

tags/0.9.0
Frans Bouma 10 years ago
parent
commit
40402eac34
3 changed files with 297 additions and 269 deletions
  1. +11
    -3
      src/DocNet/Utils.cs
  2. +18
    -4
      src/MarkdownDeep/Block.cs
  3. +268
    -262
      src/MarkdownDeep/MardownDeep.cs

+ 11
- 3
src/DocNet/Utils.cs View File

@@ -40,10 +40,18 @@ namespace Docnet
// createdAnchorCollector.AddRange(parser.CollectedH2AnchorNameTuples);
//}

var parser = new MarkdownDeep.Markdown();
parser.ExtraMode = true;
parser.GitHubCodeBlocks = true;
var parser = new MarkdownDeep.Markdown
{
ExtraMode = true,
GitHubCodeBlocks = true,
AutoHeadingIDs = true,
NewWindowForExternalLinks = true
};

#warning SET DocumentRoot and DocumentLocation for image features.

var toReturn = parser.Transform(toConvert);
createdAnchorCollector.AddRange(parser.CreatedH2IdCollector);
return toReturn;
}



+ 18
- 4
src/MarkdownDeep/Block.cs View File

@@ -16,6 +16,7 @@ using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Web.UI;

namespace MarkdownDeep
{
@@ -162,10 +163,11 @@ namespace MarkdownDeep
case BlockType.h4:
case BlockType.h5:
case BlockType.h6:
string id = string.Empty;
if (m.ExtraMode && !m.SafeMode)
{
b.Append("<" + blockType.ToString());
string id = ResolveHeaderID(m);
id = ResolveHeaderID(m);
if (!String.IsNullOrEmpty(id))
{
b.Append(" id=\"");
@@ -181,7 +183,19 @@ namespace MarkdownDeep
{
b.Append("<" + blockType.ToString() + ">");
}
m.SpanFormatter.Format(b, buf, contentStart, contentLen);
if(blockType == BlockType.h2 && !string.IsNullOrWhiteSpace(id))
{
// collect h2 id + text in collector
var h2ContentSb = new StringBuilder();
m.SpanFormatter.Format(h2ContentSb, buf, contentStart, contentLen);
var h2ContentAsString = h2ContentSb.ToString();
b.Append(h2ContentAsString);
m.CreatedH2IdCollector.Add(new Tuple<string, string>(id, h2ContentAsString));
}
else
{
m.SpanFormatter.Format(b, buf, contentStart, contentLen);
}
b.Append("</" + blockType.ToString() + ">\n");
break;

@@ -246,7 +260,7 @@ namespace MarkdownDeep
return;

case BlockType.codeblock:
if(m.FormatCodeBlock == null)
if(m.FormatCodeBlockFunc == null)
{
var dataArgument = this.data as string ?? string.Empty;
if(m.GitHubCodeBlocks && !string.IsNullOrWhiteSpace(dataArgument))
@@ -272,7 +286,7 @@ namespace MarkdownDeep
m.HtmlEncodeAndConvertTabsToSpaces(sb, line.buf, line.contentStart, line.contentLen);
sb.Append("\n");
}
b.Append(m.FormatCodeBlock(m, sb.ToString()));
b.Append(m.FormatCodeBlockFunc(m, sb.ToString()));
}
return;



+ 268
- 262
src/MarkdownDeep/MardownDeep.cs View File

@@ -31,6 +31,19 @@ namespace MarkdownDeep

public class Markdown
{
#region Members
private StringBuilder m_StringBuilder;
private StringBuilder m_StringBuilderFinal;
private StringScanner m_StringScanner;
private SpanFormatter m_SpanFormatter;
private Dictionary<string, LinkDefinition> m_LinkDefinitions;
private Dictionary<string, Block> m_Footnotes;
private List<Block> m_UsedFootnotes;
private Dictionary<string, bool> m_UsedHeaderIDs;
private Dictionary<string, Abbreviation> m_AbbreviationMap;
private List<Abbreviation> m_AbbreviationList;
#endregion

// Constructor
public Markdown()
{
@@ -43,6 +56,7 @@ namespace MarkdownDeep
m_Footnotes = new Dictionary<string, Block>();
m_UsedFootnotes = new List<Block>();
m_UsedHeaderIDs = new Dictionary<string, bool>();
this.CreatedH2IdCollector = new List<Tuple<string, string>>();
}

internal List<Block> ProcessBlocks(string str)
@@ -58,6 +72,8 @@ namespace MarkdownDeep
// Process blocks
return new BlockProcessor(this, MarkdownInHtml).Process(str);
}


public string Transform(string str)
{
Dictionary<string, LinkDefinition> defs;
@@ -201,150 +217,13 @@ namespace MarkdownDeep
// Done
return sb.ToString();
}

public int SummaryLength
{
get;
set;
}


/// <summary>
/// Set to true to enable GitHub style codeblocks, which enables GitHub style codeblocks:
/// ```cs
/// code
/// ```
/// will result in specifying the specified name after the first ``` as the class in the code element. Which can then be used with highlight.js.
/// </summary>
public bool GitHubCodeBlocks { get; set; }

// Set to true to only allow whitelisted safe html tags
public bool SafeMode
{
get;
set;
}

// Set to true to enable ExtraMode, which enables the same set of
// features as implemented by PHP Markdown Extra.
// - Markdown in html (eg: <div markdown="1"> or <div markdown="deep"> )
// - Header ID attributes
// - Fenced code blocks
// - Definition lists
// - Footnotes
// - Abbreviations
// - Simple tables
public bool ExtraMode
{
get;
set;
}

// When set, all html block level elements automatically support
// markdown syntax within them.
// (Similar to Pandoc's handling of markdown in html)
public bool MarkdownInHtml
{
get;
set;
}

// When set, all headings will have an auto generated ID attribute
// based on the heading text (uses the same algorithm as Pandoc)
public bool AutoHeadingIDs
{
get;
set;
}

// When set, all non-qualified urls (links and images) will
// be qualified using this location as the base.
// Useful when rendering RSS feeds that require fully qualified urls.
public string UrlBaseLocation
{
get;
set;
}

// When set, all non-qualified urls (links and images) begining with a slash
// will qualified by prefixing with this string.
// Useful when rendering RSS feeds that require fully qualified urls.
public string UrlRootLocation
{
get;
set;
}

// When true, all fully qualified urls will be give `target="_blank"' attribute
// causing them to appear in a separate browser window/tab
// ie: relative links open in same window, qualified links open externally
public bool NewWindowForExternalLinks
{
get;
set;
}

// When true, all urls (qualified or not) will get target="_blank" attribute
// (useful for preview mode on posts)
public bool NewWindowForLocalLinks
{
get;
set;
}

// When set, will try to determine the width/height for local images by searching
// for an appropriately named file relative to the specified location
// Local file system location of the document root. Used to locate image
// files that start with slash.
// Typical value: c:\inetpub\www\wwwroot
public string DocumentRoot
{
get;
set;
}

// Local file system location of the current document. Used to locate relative
// path images for image size.
// Typical value: c:\inetpub\www\wwwroot\subfolder
public string DocumentLocation
{
get;
set;
}

// Limit the width of images (0 for no limit)
public int MaxImageWidth
{
get;
set;
}

// Set rel="nofollow" on all links
public bool NoFollowLinks
{
get;
set;
}

/// <summary>
/// Add the NoFollow attribute to all external links.
/// </summary>
public bool NoFollowExternalLinks
{
get;
set;
}



public Func<string, string> QualifyUrl;

// Override to qualify non-local image and link urls
public virtual string OnQualifyUrl(string url)
{
if (QualifyUrl != null)
if (QualifyUrlFunc != null)
{
var q = QualifyUrl(url);
var q = QualifyUrlFunc(url);
if (q != null)
return q;
}
@@ -392,20 +271,17 @@ namespace MarkdownDeep

if (!UrlBaseLocation.EndsWith("/"))
return UrlBaseLocation + "/" + url;
else
return UrlBaseLocation + url;
return UrlBaseLocation + url;
}
}

public Func<ImageInfo, bool> GetImageSize;

// Override to supply the size of an image
public virtual bool OnGetImageSize(string url, bool TitledImage, out int width, out int height)
{
if (GetImageSize != null)
if (GetImageSizeFunc != null)
{
var info = new ImageInfo() { url = url, titled_image=TitledImage };
if (GetImageSize(info))
if (GetImageSizeFunc(info))
{
width = info.width;
height = info.height;
@@ -461,15 +337,13 @@ namespace MarkdownDeep
}
}


public Func<HtmlTag, bool> PrepareLink;
// Override to modify the attributes of a link
public virtual void OnPrepareLink(HtmlTag tag)
{
if (PrepareLink != null)
if (PrepareLinkFunc != null)
{
if (PrepareLink(tag))
if (PrepareLinkFunc(tag))
return;
}

@@ -488,8 +362,6 @@ namespace MarkdownDeep
tag.attributes["rel"] = "nofollow";
}


// New window?
if ( (NewWindowForExternalLinks && Utils.IsUrlFullyQualified(url)) ||
(NewWindowForLocalLinks && !Utils.IsUrlFullyQualified(url)) )
@@ -501,16 +373,12 @@ namespace MarkdownDeep
tag.attributes["href"] = OnQualifyUrl(url);
}

public Func<HtmlTag, bool, bool> PrepareImage;

internal bool RenderingTitledImage = false;

// Override to modify the attributes of an image
public virtual void OnPrepareImage(HtmlTag tag, bool TitledImage)
{
if (PrepareImage != null)
if (PrepareImageFunc != null)
{
if (PrepareImage(tag, TitledImage))
if (PrepareImageFunc(tag, TitledImage))
return;
}

@@ -526,86 +394,6 @@ namespace MarkdownDeep
tag.attributes["src"] = OnQualifyUrl(tag.attributes["src"]);
}

// Set the html class for the footnotes div
// (defaults to "footnotes")
// btw fyi: you can use css to disable the footnotes horizontal rule. eg:
// div.footnotes hr { display:none }
public string HtmlClassFootnotes
{
get;
set;
}

// Callback to format a code block (ie: apply syntax highlighting)
// string FormatCodeBlock(code)
// Code = code block content (ie: the code to format)
// Return the formatted code, including <pre> and <code> tags
public Func<Markdown, string, string> FormatCodeBlock;

// when set to true, will remove head blocks and make content available
// as HeadBlockContent
public bool ExtractHeadBlocks
{
get;
set;
}

// Retrieve extracted head block content
public string HeadBlockContent
{
get;
internal set;
}

// Treats "===" as a user section break
public bool UserBreaks
{
get;
set;
}

// Set the classname for titled images
// A titled image is defined as a paragraph that contains an image and nothing else.
// If not set (the default), this features is disabled, otherwise the output is:
//
// <div class="<%=this.HtmlClassTitledImags%>">
// <img src="image.png" />
// <p>Alt text goes here</p>
// </div>
//
// Use CSS to style the figure and the caption
public string HtmlClassTitledImages
{
// TODO:
get;
set;
}

// Set a format string to be rendered before headings
// {0} = section number
// (useful for rendering links that can lead to a page that edits that section)
// (eg: "<a href=/edit/page?section={0}>"
public string SectionHeader
{
get;
set;
}

// Set a format string to be rendered after each section heading
public string SectionHeadingSuffix
{
get;
set;
}

// Set a format string to be rendered after the section content (ie: before
// the next section heading, or at the end of the document).
public string SectionFooter
{
get;
set;
}

public virtual void OnSectionHeader(StringBuilder dest, int Index)
{
if (SectionHeader != null)
@@ -630,13 +418,12 @@ namespace MarkdownDeep
}
}

bool IsSectionHeader(Block b)
private bool IsSectionHeader(Block b)
{
return b.blockType >= BlockType.h1 && b.blockType <= BlockType.h3;
}



// Split the markdown into sections, one section for each
// top level heading
public static List<string> SplitUserSections(string markdown)
@@ -967,14 +754,6 @@ namespace MarkdownDeep
}


internal SpanFormatter SpanFormatter
{
get
{
return m_SpanFormatter;
}
}

#region Block Pooling

// We cache and re-use blocks for performance
@@ -996,19 +775,246 @@ namespace MarkdownDeep

#endregion

// Attributes
StringBuilder m_StringBuilder;
StringBuilder m_StringBuilderFinal;
StringScanner m_StringScanner;
SpanFormatter m_SpanFormatter;
Dictionary<string, LinkDefinition> m_LinkDefinitions;
Dictionary<string, Block> m_Footnotes;
List<Block> m_UsedFootnotes;
Dictionary<string, bool> m_UsedHeaderIDs;
Dictionary<string, Abbreviation> m_AbbreviationMap;
List<Abbreviation> m_AbbreviationList;


#region Properties
internal bool RenderingTitledImage { get; set; }

internal SpanFormatter SpanFormatter
{
get
{
return m_SpanFormatter;
}
}


public int SummaryLength
{
get;
set;
}


/// <summary>
/// Set to true to enable GitHub style codeblocks, which enables GitHub style codeblocks:
/// ```cs
/// code
/// ```
/// will result in specifying the specified name after the first ``` as the class in the code element. Which can then be used with highlight.js.
/// </summary>
public bool GitHubCodeBlocks { get; set; }

// Set to true to only allow whitelisted safe html tags
public bool SafeMode
{
get;
set;
}

// Set to true to enable ExtraMode, which enables the same set of
// features as implemented by PHP Markdown Extra.
// - Markdown in html (eg: <div markdown="1"> or <div markdown="deep"> )
// - Header ID attributes
// - Fenced code blocks
// - Definition lists
// - Footnotes
// - Abbreviations
// - Simple tables
public bool ExtraMode
{
get;
set;
}

// When set, all html block level elements automatically support
// markdown syntax within them.
// (Similar to Pandoc's handling of markdown in html)
public bool MarkdownInHtml
{
get;
set;
}

// When set, all headings will have an auto generated ID attribute
// based on the heading text (uses the same algorithm as Pandoc)
public bool AutoHeadingIDs
{
get;
set;
}

// When set, all non-qualified urls (links and images) will
// be qualified using this location as the base.
// Useful when rendering RSS feeds that require fully qualified urls.
public string UrlBaseLocation
{
get;
set;
}

// When set, all non-qualified urls (links and images) begining with a slash
// will qualified by prefixing with this string.
// Useful when rendering RSS feeds that require fully qualified urls.
public string UrlRootLocation
{
get;
set;
}

// When true, all fully qualified urls will be give `target="_blank"' attribute
// causing them to appear in a separate browser window/tab
// ie: relative links open in same window, qualified links open externally
public bool NewWindowForExternalLinks
{
get;
set;
}

// When true, all urls (qualified or not) will get target="_blank" attribute
// (useful for preview mode on posts)
public bool NewWindowForLocalLinks
{
get;
set;
}

// When set, will try to determine the width/height for local images by searching
// for an appropriately named file relative to the specified location
// Local file system location of the document root. Used to locate image
// files that start with slash.
// Typical value: c:\inetpub\www\wwwroot
public string DocumentRoot
{
get;
set;
}

// Local file system location of the current document. Used to locate relative
// path images for image size.
// Typical value: c:\inetpub\www\wwwroot\subfolder
public string DocumentLocation
{
get;
set;
}

// Limit the width of images (0 for no limit)
public int MaxImageWidth
{
get;
set;
}

// Set rel="nofollow" on all links
public bool NoFollowLinks
{
get;
set;
}

/// <summary>
/// Add the NoFollow attribute to all external links.
/// </summary>
public bool NoFollowExternalLinks
{
get;
set;
}


/// <summary>
/// Collector for the created id's for H2 headers. First element in Tuple is id name, second is name for ToC (the text for H2). Id's are generated
/// by the parser and use pandoc algorithm, as AutoHeadingId's is switched on.
/// </summary>
public List<Tuple<string, string>> CreatedH2IdCollector { get; private set; }


// Set the html class for the footnotes div
// (defaults to "footnotes")
// btw fyi: you can use css to disable the footnotes horizontal rule. eg:
// div.footnotes hr { display:none }
public string HtmlClassFootnotes
{
get;
set;
}

public Func<string, string> QualifyUrlFunc { get; set; }
public Func<ImageInfo, bool> GetImageSizeFunc { get; set; }
public Func<HtmlTag, bool> PrepareLinkFunc { get; set; }
public Func<HtmlTag, bool, bool> PrepareImageFunc { get; set; }
// Callback to format a code block (ie: apply syntax highlighting)
// string FormatCodeBlock(code)
// Code = code block content (ie: the code to format)
// Return the formatted code, including <pre> and <code> tags
public Func<Markdown, string, string> FormatCodeBlockFunc { get; set; }

// when set to true, will remove head blocks and make content available
// as HeadBlockContent
public bool ExtractHeadBlocks
{
get;
set;
}

// Retrieve extracted head block content
public string HeadBlockContent
{
get;
internal set;
}

// Treats "===" as a user section break
public bool UserBreaks
{
get;
set;
}

// Set the classname for titled images
// A titled image is defined as a paragraph that contains an image and nothing else.
// If not set (the default), this features is disabled, otherwise the output is:
//
// <div class="<%=this.HtmlClassTitledImags%>">
// <img src="image.png" />
// <p>Alt text goes here</p>
// </div>
//
// Use CSS to style the figure and the caption
public string HtmlClassTitledImages
{
// TODO:
get;
set;
}

// Set a format string to be rendered before headings
// {0} = section number
// (useful for rendering links that can lead to a page that edits that section)
// (eg: "<a href=/edit/page?section={0}>"
public string SectionHeader
{
get;
set;
}

// Set a format string to be rendered after each section heading
public string SectionHeadingSuffix
{
get;
set;
}

// Set a format string to be rendered after the section content (ie: before
// the next section heading, or at the end of the document).
public string SectionFooter
{
get;
set;
}

#endregion

}

}

Loading…
Cancel
Save