From 9a6b4add71cf3fe59c70ea8e084bedf29bcd52c4 Mon Sep 17 00:00:00 2001
From: Robin Stocker a This is Sparta
+ * Example: See the following packages for details:
+ * Start with the {@link #builder} method to configure the renderer. Example:
+ *
+ * Set it to {@code "
+ * Set it to {@code " "} to ignore line wrapping in the source.
+ *
+ * @param softbreak HTML for softbreak
+ * @return {@code this}
+ */
public Builder softbreak(String softbreak) {
this.softbreak = softbreak;
return this;
}
/**
- * Whether {@link HtmlTag} and {@link HtmlBlock} should be escaped.
+ * Whether {@link HtmlTag} and {@link HtmlBlock} should be escaped, defaults to {@value #ESCAPE_HTML_DEFAULT}.
*
* Note that {@link HtmlTag} is only a tag itself, not the text between an opening tag and a closing tag. So markup
* in the text will be parsed as normal and is not affected by this option.
@@ -84,7 +122,9 @@ public Builder escapeHtml(boolean escapeHtml) {
}
/**
- * Whether URLs of link or images should be percent-encoded. If enabled, the following is done:
+ * Whether URLs of link or images should be percent-encoded, defaults to {@value #PERCENT_ENCODE_URLS_DEFAULT}.
+ *
+ * If enabled, the following is done:
*
+ * Start with the {@link #builder} method, configure the parser and build it. Example:
+ *
* Note that this method is thread-safe (a new parser state is used for each invocation).
*
@@ -47,7 +61,16 @@ public Node parse(String input) {
Node document = documentParser.parse(input);
return postProcess(document);
}
-
+
+ /**
+ * Parse the specified reader into a tree of nodes. The caller is responsible for closing the reader.
+ *
+ * Note that this method is thread-safe (a new parser state is used for each invocation).
+ *
+ * @param input the reader to parse
+ * @return the root node
+ * @throws IOException when reading throws an exception
+ */
public Node parseReader(Reader input) throws IOException {
InlineParserImpl inlineParser = new InlineParserImpl(specialCharacters, delimiterCharacters, delimiterProcessors);
DocumentParser documentParser = new DocumentParser(blockParserFactories, inlineParser);
@@ -61,19 +84,25 @@ private Node postProcess(Node document) {
}
return document;
}
-
+
+ /**
+ * Builder for configuring a {@link Parser}.
+ */
public static class Builder {
private final List
* Note that {@link HtmlTag} is only a tag itself, not the text between an opening tag and a closing tag. So markup
* in the text will be parsed as normal and is not affected by this option.
@@ -122,7 +119,7 @@ public Builder escapeHtml(boolean escapeHtml) {
}
/**
- * Whether URLs of link or images should be percent-encoded, defaults to {@value #PERCENT_ENCODE_URLS_DEFAULT}.
+ * Whether URLs of link or images should be percent-encoded, defaults to {@code false}.
*
* If enabled, the following is done:
* Example:
+ * Create it with {@link #create()} and then configure it on the builders
+ * ({@link org.commonmark.parser.Parser.Builder#extensions(Iterable)},
+ * {@link org.commonmark.html.HtmlRenderer.Builder#extensions(Iterable)}).
+ * See {@link org.commonmark.ext.autolink.AutolinkExtension}
+ * Create it with {@link #create()} and then configure it on the builders
+ * ({@link org.commonmark.parser.Parser.Builder#extensions(Iterable)},
+ * {@link org.commonmark.html.HtmlRenderer.Builder#extensions(Iterable)}).
+ * See {@link org.commonmark.ext.gfm.strikethrough.StrikethroughExtension}
+ * Create it with {@link #create()} and then configure it on the builders
+ * ({@link org.commonmark.parser.Parser.Builder#extensions(Iterable)},
+ * {@link org.commonmark.html.HtmlRenderer.Builder#extensions(Iterable)}).
+ * See {@link org.commonmark.ext.gfm.tables.TablesExtension}
+ * The parsed links are turned into normal {@link org.commonmark.node.Link} nodes.
+ * \n
\n");
}
+ @Test
+ public void orderedListMarkerOnly() {
+ assertRendering("2.", "\n\n
\n\n\n
\n");
+ }
+
}
From 0fde8ae3187f62078436ed885532d6cff2083189 Mon Sep 17 00:00:00 2001
From: Robin Stocker
+ *
+ */
+package org.commonmark;
diff --git a/commonmark/src/main/java/org/commonmark/parser/block/package-info.java b/commonmark/src/main/java/org/commonmark/parser/block/package-info.java
new file mode 100644
index 000000000..095d4d565
--- /dev/null
+++ b/commonmark/src/main/java/org/commonmark/parser/block/package-info.java
@@ -0,0 +1,4 @@
+/**
+ * Types for extending block parsing
+ */
+package org.commonmark.parser.block;
diff --git a/commonmark/src/main/java/org/commonmark/parser/package-info.java b/commonmark/src/main/java/org/commonmark/parser/package-info.java
new file mode 100644
index 000000000..2afb3b96d
--- /dev/null
+++ b/commonmark/src/main/java/org/commonmark/parser/package-info.java
@@ -0,0 +1,4 @@
+/**
+ * Parsing input text to AST nodes (see {@link org.commonmark.parser.Parser})
+ */
+package org.commonmark.parser;
diff --git a/commonmark/src/main/javadoc/overview.html b/commonmark/src/main/javadoc/overview.html
new file mode 100644
index 000000000..77c5944bf
--- /dev/null
+++ b/commonmark/src/main/javadoc/overview.html
@@ -0,0 +1,22 @@
+
+
+Java implementation of CommonMark for parsing markdown and rendering to HTML (core library)
+
+
+ import org.commonmark.html.HtmlRenderer;
+ import org.commonmark.node.*;
+ import org.commonmark.parser.Parser;
+
+ Parser parser = Parser.builder().build();
+ Node document = parser.parse("This is *Sparta*");
+ HtmlRenderer renderer = HtmlRenderer.builder().escapeHtml(true).build();
+ renderer.render(document); // "<p>This is <em>Sparta</em></p>\n"
+
+
+
+
From 3dfc3c5bf2a47baa30aa3fb06cbb237f306f05a5 Mon Sep 17 00:00:00 2001
From: Robin Stocker
+ */
public class HtmlRenderer {
private static final Map
+ * HtmlRenderer renderer = HtmlRenderer.builder().escapeHtml(true).build();
+ * renderer.render(node);
+ *
" to make them hard breaks
- // set to " " if you want to ignore line wrapping in source
+ /**
+ * Builder for configuring an {@link HtmlRenderer}. See methods for default configuration.
+ */
public static class Builder {
+ private static final boolean ESCAPE_HTML_DEFAULT = false;
+ private static final boolean PERCENT_ENCODE_URLS_DEFAULT = false;
+
private String softbreak = "\n";
- private boolean escapeHtml = false;
- private boolean percentEncodeUrls = false;
+ private boolean escapeHtml = ESCAPE_HTML_DEFAULT;
+ private boolean percentEncodeUrls = PERCENT_ENCODE_URLS_DEFAULT;
private List
"} (or {@code "
"} to make them hard breaks.
+ *
*
*
- * @param percentEncodeUrls true to percent-encode, false for leaving as-is; default is false
+ * @param percentEncodeUrls true to percent-encode, false for leaving as-is
* @return {@code this}
*/
public Builder percentEncodeUrls(boolean percentEncodeUrls) {
@@ -100,6 +140,12 @@ public Builder percentEncodeUrls(boolean percentEncodeUrls) {
return this;
}
+ /**
+ * Add an attribute provider for adding/changing HTML attributes to the rendered tags.
+ *
+ * @param attributeProvider the attribute provider to add
+ * @return {@code this}
+ */
public Builder attributeProvider(AttributeProvider attributeProvider) {
this.attributeProviders.add(attributeProvider);
return this;
@@ -112,7 +158,7 @@ public Builder customHtmlRenderer(CustomHtmlRenderer customHtmlRenderer) {
/**
* @param extensions extensions to use on this HTML renderer
- * @return this
+ * @return {@code this}
*/
public Builder extensions(Iterable extends Extension> extensions) {
for (Extension extension : extensions) {
@@ -123,14 +169,10 @@ public Builder extensions(Iterable extends Extension> extensions) {
}
return this;
}
-
- public HtmlRenderer build() {
- return new HtmlRenderer(this);
- }
}
/**
- * Extension for HTML renderer.
+ * Extension for {@link HtmlRenderer}.
*/
public interface HtmlRendererExtension extends Extension {
void extend(Builder rendererBuilder);
diff --git a/commonmark/src/main/java/org/commonmark/parser/Parser.java b/commonmark/src/main/java/org/commonmark/parser/Parser.java
index 7d46dfb0d..3fe25d5b6 100644
--- a/commonmark/src/main/java/org/commonmark/parser/Parser.java
+++ b/commonmark/src/main/java/org/commonmark/parser/Parser.java
@@ -13,6 +13,15 @@
import java.util.List;
import java.util.Map;
+/**
+ * Parses input text to a tree of nodes.
+ *
+ */
public class Parser {
private final List
+ * Parser parser = Parser.builder().build();
+ * Node document = parser.parse("input text");
+ *
From 2196a22a7ca26a2eaae9c6587ccb0e4e4a97bb40 Mon Sep 17 00:00:00 2001
From: Robin Stocker
- import org.commonmark.html.HtmlRenderer;
+ import org.commonmark.html.HtmlRenderer;
import org.commonmark.node.*;
import org.commonmark.parser.Parser;
From 542a93c096640f74aa5865cd1fdaab24996c71e7 Mon Sep 17 00:00:00 2001
From: Robin Stocker
+ * The parsed strikethrough text regions are turned into {@link Strikethrough} nodes. + *
*/ public class StrikethroughExtension implements Parser.ParserExtension, HtmlRenderer.HtmlRendererExtension { diff --git a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/StrikethroughDelimiterProcessor.java b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughDelimiterProcessor.java similarity index 93% rename from commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/StrikethroughDelimiterProcessor.java rename to commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughDelimiterProcessor.java index 40019a55a..14a847e2d 100644 --- a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/StrikethroughDelimiterProcessor.java +++ b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughDelimiterProcessor.java @@ -1,5 +1,6 @@ -package org.commonmark.ext.gfm.strikethrough; +package org.commonmark.ext.gfm.strikethrough.internal; +import org.commonmark.ext.gfm.strikethrough.Strikethrough; import org.commonmark.node.Node; import org.commonmark.node.Text; import org.commonmark.parser.DelimiterProcessor; diff --git a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/StrikethroughHtmlRenderer.java b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughHtmlRenderer.java similarity index 87% rename from commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/StrikethroughHtmlRenderer.java rename to commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughHtmlRenderer.java index 650a075ec..b89efc13a 100644 --- a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/StrikethroughHtmlRenderer.java +++ b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughHtmlRenderer.java @@ -1,5 +1,6 @@ -package org.commonmark.ext.gfm.strikethrough; +package org.commonmark.ext.gfm.strikethrough.internal; +import org.commonmark.ext.gfm.strikethrough.Strikethrough; import org.commonmark.html.CustomHtmlRenderer; import org.commonmark.html.HtmlWriter; import org.commonmark.node.Node; diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableBlock.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableBlock.java index 0e060b8b3..c46fc27ef 100644 --- a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableBlock.java +++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableBlock.java @@ -2,5 +2,8 @@ import org.commonmark.node.CustomBlock; +/** + * Table block containing a {@link TableHead} and optionally a {@link TableBody}. + */ public class TableBlock extends CustomBlock { } diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableBody.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableBody.java index f0dd9b227..ddc80deb3 100644 --- a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableBody.java +++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableBody.java @@ -2,5 +2,8 @@ import org.commonmark.node.CustomNode; +/** + * Body part of a {@link TableBlock} containing {@link TableRow TableRows}. + */ public class TableBody extends CustomNode { } diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableCell.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableCell.java index cb2ea66a3..61880c6c3 100644 --- a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableCell.java +++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableCell.java @@ -2,11 +2,17 @@ import org.commonmark.node.CustomNode; +/** + * Table cell of a {@link TableRow} containing inline nodes. + */ public class TableCell extends CustomNode { private boolean header; private Alignment alignment; + /** + * @return whether the cell is a header or not + */ public boolean isHeader() { return header; } @@ -15,6 +21,9 @@ public void setHeader(boolean header) { this.header = header; } + /** + * @return the cell alignment + */ public Alignment getAlignment() { return alignment; } @@ -23,7 +32,11 @@ public void setAlignment(Alignment alignment) { this.alignment = alignment; } + /** + * How the cell is aligned horizontally. + */ public enum Alignment { LEFT, CENTER, RIGHT } + } diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableHead.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableHead.java index 4de7ff9b1..96a95e620 100644 --- a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableHead.java +++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableHead.java @@ -2,5 +2,8 @@ import org.commonmark.node.CustomNode; +/** + * Head part of a {@link TableBlock} containing {@link TableRow TableRows}. + */ public class TableHead extends CustomNode { } diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableRow.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableRow.java index a3e32ce17..1325875d0 100644 --- a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableRow.java +++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableRow.java @@ -2,5 +2,8 @@ import org.commonmark.node.CustomNode; +/** + * Table row of a {@link TableHead} or {@link TableBody} containing {@link TableCell TableCells}. + */ public class TableRow extends CustomNode { } diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TablesExtension.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TablesExtension.java index 1dec67aea..84e58f391 100644 --- a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TablesExtension.java +++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TablesExtension.java @@ -1,6 +1,8 @@ package org.commonmark.ext.gfm.tables; import org.commonmark.Extension; +import org.commonmark.ext.gfm.tables.internal.TableBlockParser; +import org.commonmark.ext.gfm.tables.internal.TableHtmlRenderer; import org.commonmark.parser.Parser; import org.commonmark.html.HtmlRenderer; @@ -11,6 +13,9 @@ * ({@link org.commonmark.parser.Parser.Builder#extensions(Iterable)}, * {@link org.commonmark.html.HtmlRenderer.Builder#extensions(Iterable)}). * + *+ * The parsed tables are turned into {@link TableBlock} blocks. + *
*/ public class TablesExtension implements Parser.ParserExtension, HtmlRenderer.HtmlRendererExtension { diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableBlockParser.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableBlockParser.java similarity index 98% rename from commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableBlockParser.java rename to commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableBlockParser.java index 5d366f346..7d4fe2110 100644 --- a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableBlockParser.java +++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableBlockParser.java @@ -1,5 +1,6 @@ -package org.commonmark.ext.gfm.tables; +package org.commonmark.ext.gfm.tables.internal; +import org.commonmark.ext.gfm.tables.*; import org.commonmark.node.Block; import org.commonmark.node.Node; import org.commonmark.parser.InlineParser; diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableHtmlRenderer.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableHtmlRenderer.java similarity index 97% rename from commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableHtmlRenderer.java rename to commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableHtmlRenderer.java index aacd45689..dbe6eb61a 100644 --- a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableHtmlRenderer.java +++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableHtmlRenderer.java @@ -1,5 +1,6 @@ -package org.commonmark.ext.gfm.tables; +package org.commonmark.ext.gfm.tables.internal; +import org.commonmark.ext.gfm.tables.*; import org.commonmark.html.CustomHtmlRenderer; import org.commonmark.html.HtmlWriter; import org.commonmark.node.Node; From 0700a122a8ed9c0ff139e3f90d9bc2a30b30e7c8 Mon Sep 17 00:00:00 2001 From: Robin Stocker~foo~
\n"); @@ -57,7 +64,7 @@ public void insideBlockQuote() { } @Override - protected Iterable extends Extension> getExtensions() { - return Collections.singleton(StrikethroughExtension.create()); + protected String render(String source) { + return RENDERER.render(PARSER.parse(source)); } } diff --git a/commonmark-ext-gfm-tables/pom.xml b/commonmark-ext-gfm-tables/pom.xml index 935a5533e..6708de797 100644 --- a/commonmark-ext-gfm-tables/pom.xml +++ b/commonmark-ext-gfm-tables/pom.xml @@ -17,15 +17,9 @@Abc|Def
\n"); @@ -289,8 +296,7 @@ public void tableEndWithoutEmptyLine() { } @Override - protected Iterable extends Extension> getExtensions() { - return Collections.singleton(TablesExtension.create()); + protected String render(String source) { + return RENDERER.render(PARSER.parse(source)); } - } diff --git a/commonmark-integration-test/pom.xml b/commonmark-integration-test/pom.xml index 0b69e1d41..804107e42 100644 --- a/commonmark-integration-test/pom.xml +++ b/commonmark-integration-test/pom.xml @@ -40,8 +40,8 @@This is Sparta
\n" ``` -This uses the parser and renderer with default options, except for escaping raw -HTML tags and blocks. For all the available options, see other methods on the -builder objects. +This uses the parser and renderer with default options. Both builders have +methods for configuring their behavior, e.g. calling `escapeHtml(true)` on +`HtmlRenderer` will escape raw HTML tags and blocks. For all available +options, see methods on the builders. -Note that this library doesn't try to sanitize HTML; that is the responsibility -of the caller. +Note that this library doesn't try to sanitize the resulting HTML; that is +the responsibility of the caller. #### Use a visitor to process parsed nodes From d7cd926840d6edbc3c4b35ebb78938fb009a8144 Mon Sep 17 00:00:00 2001 From: Robin Stocker
- * Note that {@link HtmlTag} is only a tag itself, not the text between an opening tag and a closing tag. So markup
- * in the text will be parsed as normal and is not affected by this option.
+ * Note that {@link HtmlInline} is only a tag itself, not the text between an opening tag and a closing tag. So
+ * markup in the text will be parsed as normal and is not affected by this option.
*
* @param escapeHtml true for escaping, false for preserving raw HTML
* @return {@code this}
@@ -347,11 +347,11 @@ public void visit(Code code) {
}
@Override
- public void visit(HtmlTag htmlTag) {
+ public void visit(HtmlInline htmlInline) {
if (escapeHtml) {
- html.raw(escape(htmlTag.getLiteral(), false));
+ html.raw(escape(htmlInline.getLiteral(), false));
} else {
- html.raw(htmlTag.getLiteral());
+ html.raw(htmlInline.getLiteral());
}
}
diff --git a/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java b/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java
index 0cc9dbdac..33a110c4f 100644
--- a/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java
+++ b/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java
@@ -299,7 +299,7 @@ private boolean parseInline() {
res = parseCloseBracket();
break;
case '<':
- res = parseAutolink() || parseHtmlTag();
+ res = parseAutolink() || parseHtmlInline();
break;
case '&':
res = parseEntity();
@@ -709,12 +709,12 @@ private boolean parseAutolink() {
}
/**
- * Attempt to parse a raw HTML tag.
+ * Attempt to parse inline HTML.
*/
- private boolean parseHtmlTag() {
+ private boolean parseHtmlInline() {
String m = match(HTML_TAG);
if (m != null) {
- HtmlTag node = new HtmlTag();
+ HtmlInline node = new HtmlInline();
node.setLiteral(m);
appendNode(node);
return true;
diff --git a/commonmark/src/main/java/org/commonmark/node/AbstractVisitor.java b/commonmark/src/main/java/org/commonmark/node/AbstractVisitor.java
index c7ecbe150..ab43793c4 100644
--- a/commonmark/src/main/java/org/commonmark/node/AbstractVisitor.java
+++ b/commonmark/src/main/java/org/commonmark/node/AbstractVisitor.java
@@ -54,8 +54,8 @@ public void visit(HorizontalRule horizontalRule) {
}
@Override
- public void visit(HtmlTag htmlTag) {
- visitChildren(htmlTag);
+ public void visit(HtmlInline htmlInline) {
+ visitChildren(htmlInline);
}
@Override
diff --git a/commonmark/src/main/java/org/commonmark/node/HtmlTag.java b/commonmark/src/main/java/org/commonmark/node/HtmlInline.java
similarity index 77%
rename from commonmark/src/main/java/org/commonmark/node/HtmlTag.java
rename to commonmark/src/main/java/org/commonmark/node/HtmlInline.java
index 4271e743c..291fcde3c 100644
--- a/commonmark/src/main/java/org/commonmark/node/HtmlTag.java
+++ b/commonmark/src/main/java/org/commonmark/node/HtmlInline.java
@@ -3,9 +3,9 @@
/**
* Inline HTML element.
*
- * @see CommonMark Spec
+ * @see CommonMark Spec
*/
-public class HtmlTag extends Node {
+public class HtmlInline extends Node {
private String literal;
diff --git a/commonmark/src/main/java/org/commonmark/node/Visitor.java b/commonmark/src/main/java/org/commonmark/node/Visitor.java
index 3f0b4fb6c..5dc4d0dcc 100644
--- a/commonmark/src/main/java/org/commonmark/node/Visitor.java
+++ b/commonmark/src/main/java/org/commonmark/node/Visitor.java
@@ -25,7 +25,7 @@ public interface Visitor {
void visit(HorizontalRule horizontalRule);
- void visit(HtmlTag htmlTag);
+ void visit(HtmlInline htmlInline);
void visit(HtmlBlock htmlBlock);
From ed7c628ac9af876014e3fbcce700451b32b9bb41 Mon Sep 17 00:00:00 2001
From: Robin Stocker - bar #5 bolt #foobar #hashtag #→foo Bar foofoo
.
-More than six `#` characters is not a header:
+More than six `#` characters is not a heading:
.
####### foo
@@ -613,23 +613,31 @@ More than six `#` characters is not a header:
.
At least one space is required between the `#` characters and the
-header's contents, unless the header is empty. Note that many
+heading's contents, unless the heading is empty. Note that many
implementations currently do not require the space. However, the
space was required by the
[original ATX implementation](http://www.aaronsw.com/2002/atx/atx.py),
and it helps prevent things like the following from being parsed as
-headers:
+headings:
.
#5 bolt
-#foobar
+#hashtag
.
foo #
.
-ATX headers need not be separated from surrounding content by blank
+ATX headings need not be separated from surrounding content by blank
lines, and they can interrupt paragraphs:
.
@@ -764,7 +772,7 @@ Bar foo
Foo
.
-The header content can be indented up to three spaces, and need
+The heading content can be indented up to three spaces, and need
not line up with the underlining:
.
@@ -866,7 +874,7 @@ Foo
.
-The setext header underline can be indented up to three spaces, and
+The setext heading underline can be indented up to three spaces, and
may have trailing spaces:
.
@@ -886,7 +894,7 @@ Foo
---
of dashes"/>
. -The setext header underline cannot be a [lazy continuation +The setext heading underline cannot be a [lazy continuation line] in a list item or block quote: . @@ -960,7 +968,7 @@ line] in a list item or block quote:Baz
. -Setext headers cannot be empty: +Setext headings cannot be empty: . @@ -1004,9 +1012,9 @@ Setext headers cannot be empty:====
. -Setext header text lines must not be interpretable as block +Setext heading text lines must not be interpretable as block constructs other than paragraphs. So, the line of dashes -in these examples gets interpreted as a horizontal rule: +in these examples gets interpreted as a thematic break: . --- @@ -1045,7 +1053,7 @@ in these examples gets interpreted as a horizontal rule:foo
-foo
foo
-. +```````````````````````````````` -. + +```````````````````````````````` example *bar* *baz* . *bar*baz
-. +```````````````````````````````` + Note that anything on the last line after the end tag will be included in the [HTML block]: -. +```````````````````````````````` example 1. *bar* @@ -2064,11 +2302,12 @@ foo 1. *bar* -. +```````````````````````````````` + A comment (type 2): -. +```````````````````````````````` example -. +```````````````````````````````` + A processing instruction (type 3): -. +```````````````````````````````` example '; @@ -2095,19 +2335,21 @@ A processing instruction (type 3): echo '>'; ?> -. +```````````````````````````````` + A declaration (type 4): -. +```````````````````````````````` example . -. +```````````````````````````````` + CDATA (type 5): -. +```````````````````````````````` example -. +```````````````````````````````` + The opening tag can be indented 1-3 spaces, but not 4: -. +```````````````````````````````` example @@ -2145,9 +2388,10 @@ The opening tag can be indented 1-3 spaces, but not 4:<!-- foo -->
-.
+````````````````````````````````
-.
+
+```````````````````````````````` example
<div>
-.
+````````````````````````````````
+
An HTML block of types 1--6 can interrupt a paragraph, and need not be
preceded by a blank line.
-.
+```````````````````````````````` example
Foo
Foo baz
-. +```````````````````````````````` + This rule differs from John Gruber's original Markdown syntax specification, which says: @@ -2229,7 +2477,7 @@ simply separate the Markdown from the HTML using blank lines: Compare: -. +```````````````````````````````` exampleEmphasized text.
` tags, but as described @@ -2318,7 +2570,7 @@ lines. ## Link reference definitions -A [link reference definition](@link-reference-definition) +A [link reference definition](@) consists of a [link label], indented up to three spaces, followed by a colon (`:`), optional [whitespace] (including up to one [line ending]), a [link destination], @@ -2326,24 +2578,25 @@ optional [whitespace] (including up to one [line ending]), and an optional [link title], which if it is present must be separated from the [link destination] by [whitespace]. -No further [non-whitespace character]s may occur on the line. +No further [non-whitespace characters] may occur on the line. A [link reference definition] does not correspond to a structural element of a document. Instead, it -defines a label which can be used in [reference link]s +defines a label which can be used in [reference links] and reference-style [images] elsewhere in the document. [Link reference definitions] can come either before or after the links that use them. -. +```````````````````````````````` example [foo]: /url "title" [foo] . -. +```````````````````````````````` -. + +```````````````````````````````` example [foo]: /url 'the title' @@ -2351,29 +2604,32 @@ them. [foo] . -. +```````````````````````````````` -. + +```````````````````````````````` example [Foo*bar\]]:my_(url) 'title (with parens)' [Foo*bar\]] . -. +```````````````````````````````` -. + +```````````````````````````````` example [Foo bar]: --. +```````````````````````````````` + The Laziness clause allows us to omit the `>` before a paragraph continuation line: -. +```````````````````````````````` example > # Foo > bar baz @@ -2844,12 +3132,13 @@ baz+ 'title' [Foo bar] . -. +```````````````````````````````` + The title may extend over multiple lines: -. +```````````````````````````````` example [foo]: /url ' title line1 @@ -2387,11 +2643,12 @@ title line1 line2 ">foo -. +```````````````````````````````` + However, it may not contain a [blank line]: -. +```````````````````````````````` example [foo]: /url 'title with blank line' @@ -2401,123 +2658,135 @@ with blank line' [foo]: /url 'title
with blank line'
[foo]
-. +```````````````````````````````` + The title may be omitted: -. +```````````````````````````````` example [foo]: /url [foo] . -. +```````````````````````````````` + The link destination may not be omitted: -. +```````````````````````````````` example [foo]: [foo] .[foo]:
[foo]
-. +```````````````````````````````` + Both title and destination can contain backslash escapes and literal backslashes: -. +```````````````````````````````` example [foo]: /url\bar\*baz "foo\"bar\baz" [foo] . -. +```````````````````````````````` + A link can come before its corresponding definition: -. +```````````````````````````````` example [foo] [foo]: url . -. +```````````````````````````````` + If there are several matching definitions, the first one takes precedence: -. +```````````````````````````````` example [foo] [foo]: first [foo]: second . -. +```````````````````````````````` + As noted in the section on [Links], matching of labels is case-insensitive (see [matches]). -. +```````````````````````````````` example [FOO]: /url [Foo] . -. +```````````````````````````````` -. + +```````````````````````````````` example [ΑΓΩ]: /φου [αγω] . -. +```````````````````````````````` + Here is a link reference definition with no corresponding link. It contributes nothing to the document. -. +```````````````````````````````` example [foo]: /url . -. +```````````````````````````````` + Here is another one: -. +```````````````````````````````` example [ foo ]: /url bar .bar
-. +```````````````````````````````` + This is not a link reference definition, because there are -[non-whitespace character]s after the title: +[non-whitespace characters] after the title: -. +```````````````````````````````` example [foo]: /url "title" ok .[foo]: /url "title" ok
-. +```````````````````````````````` + This is a link reference definition, but it has no title: -. +```````````````````````````````` example [foo]: /url "title" ok ."title" ok
-. +```````````````````````````````` + This is not a link reference definition, because it is indented four spaces: -. +```````````````````````````````` example [foo]: /url "title" [foo] @@ -2525,12 +2794,13 @@ four spaces:[foo]: /url "title"[foo]
-. +```````````````````````````````` + This is not a link reference definition, because it occurs inside a code block: -. +```````````````````````````````` example ``` [foo]: /url ``` @@ -2540,11 +2810,12 @@ a code block:[foo]: /url[foo]
-. +```````````````````````````````` + A [link reference definition] cannot interrupt a paragraph. -. +```````````````````````````````` example Foo [bar]: /baz @@ -2553,12 +2824,13 @@ FooFoo [bar]: /baz
[bar]
-. +```````````````````````````````` + However, it can directly follow other block elements, such as headings and thematic breaks, and it need not be followed by a blank line. -. +```````````````````````````````` example # [Foo] [foo]: /url > bar @@ -2567,12 +2839,13 @@ and thematic breaks, and it need not be followed by a blank line.-. +```````````````````````````````` + -Several [link reference definition]s +Several [link reference definitions] can occur one after another, without intervening blank lines. -. +```````````````````````````````` example [foo]: /foo-url "foo" [bar]: /bar-url "bar" @@ -2585,14 +2858,15 @@ can occur one after another, without intervening blank lines. -. +```````````````````````````````` -[Link reference definition]s can occur + +[Link reference definitions] can occur inside block containers, like lists and block quotations. They affect the entire document, not just the container in which they are defined: -. +```````````````````````````````` example [foo] > [foo]: /url @@ -2600,13 +2874,14 @@ are defined:bar
-. +```````````````````````````````` + ## Paragraphs A sequence of non-blank lines that cannot be interpreted as other -kinds of blocks forms a [paragraph](@paragraph). +kinds of blocks forms a [paragraph](@). The contents of the paragraph are the result of parsing the paragraph's raw content as inlines. The paragraph's raw content is formed by concatenating the lines and removing initial and final @@ -2614,18 +2889,19 @@ is formed by concatenating the lines and removing initial and final A simple example with two paragraphs: -. +```````````````````````````````` example aaa bbb .aaa
bbb
-. +```````````````````````````````` + Paragraphs can contain multiple lines, but no blank lines: -. +```````````````````````````````` example aaa bbb @@ -2636,11 +2912,12 @@ ddd bbbccc ddd
-. +```````````````````````````````` + Multiple blank lines between paragraph have no effect: -. +```````````````````````````````` example aaa @@ -2648,22 +2925,24 @@ bbb .aaa
bbb
-. +```````````````````````````````` + Leading spaces are skipped: -. +```````````````````````````````` example aaa bbb .aaa bbb
-. +```````````````````````````````` + Lines after the first may be indented any amount, since indented code blocks cannot interrupt paragraphs. -. +```````````````````````````````` example aaa bbb ccc @@ -2671,49 +2950,53 @@ aaaaaa bbb ccc
-. +```````````````````````````````` + However, the first line may be indented at most three spaces, or an indented code block will be triggered: -. +```````````````````````````````` example aaa bbb .aaa bbb
-. +```````````````````````````````` -. + +```````````````````````````````` example aaa bbb .aaabbb
-. +```````````````````````````````` + Final spaces are stripped before inline parsing, so a paragraph that ends with two or more spaces will not end with a [hard line break]: -. +```````````````````````````````` example aaa bbb .aaa
-. +```````````````````````````````` + ## Blank lines -[Blank line]s between block-level elements are ignored, +[Blank lines] between block-level elements are ignored, except for the role they play in determining whether a [list] is [tight] or [loose]. Blank lines at the beginning and end of the document are also ignored. -. +```````````````````````````````` example aaa @@ -2725,7 +3008,8 @@ aaa .
bbbaaa
aaa
-. +```````````````````````````````` + # Container blocks @@ -2750,7 +3034,7 @@ these constructions. (A recipe is provided below in the section entitled ## Block quotes -A [block quote marker](@block-quote-marker) +A [block quote marker](@) consists of 0-3 spaces of initial indent, plus (a) the character `>` together with a following space, or (b) a single character `>` not followed by a space. @@ -2767,7 +3051,7 @@ The following rules define [block quotes]: more lines in which the next [non-whitespace character] after the [block quote marker] is [paragraph continuation text] is a block quote with *Bs* as its content. - [Paragraph continuation text](@paragraph-continuation-text) is text + [Paragraph continuation text](@) is text that will be parsed as part of the content of a paragraph, but does not occur at the beginning of the paragraph. @@ -2778,7 +3062,7 @@ Nothing else counts as a [block quote](#block-quotes). Here is a simple example: -. +```````````````````````````````` example > # Foo > bar > baz @@ -2788,11 +3072,12 @@ Here is a simple example:bar baz
-. +```````````````````````````````` + The spaces after the `>` characters can be omitted: -. +```````````````````````````````` example ># Foo >bar > baz @@ -2802,11 +3087,12 @@ The spaces after the `>` characters can be omitted:bar baz
-. +```````````````````````````````` + The `>` characters can be indented 1-3 spaces: -. +```````````````````````````````` example > # Foo > bar > baz @@ -2816,11 +3102,12 @@ The `>` characters can be indented 1-3 spaces:bar baz
-. +```````````````````````````````` + Four spaces gives us a code block: -. +```````````````````````````````` example > # Foo > bar > baz @@ -2829,12 +3116,13 @@ Four spaces gives us a code block: > bar > baz
bar baz
-. +```````````````````````````````` + A block quote can contain some lazy and some non-lazy continuation lines: -. +```````````````````````````````` example > bar baz > foo @@ -2859,10 +3148,11 @@ baz baz foo -. +```````````````````````````````` + Laziness only applies to lines that would have been continuations of -paragraphs had they been prepended with [block quote marker]s. +paragraphs had they been prepended with [block quote markers]. For example, the `> ` cannot be omitted in the second line of ``` markdown @@ -2872,7 +3162,7 @@ For example, the `> ` cannot be omitted in the second line of without changing the meaning: -. +```````````````````````````````` example > foo --- . @@ -2880,7 +3170,8 @@ without changing the meaning:foo
bar
-.
+````````````````````````````````
-.
+
+```````````````````````````````` example
> ```
foo
```
@@ -2930,12 +3223,13 @@ foo
foo
-.
+````````````````````````````````
+
Note that in the following case, we have a paragraph
continuation line:
-.
+```````````````````````````````` example
> foo
- bar
.
@@ -2943,7 +3237,8 @@ continuation line:
foo - bar
-. +```````````````````````````````` + To see why, note that in @@ -2958,25 +3253,27 @@ interrupt paragraphs, so it is a [paragraph continuation line]. A block quote can be empty: -. +```````````````````````````````` example > .-. +```````````````````````````````` -. + +```````````````````````````````` example > > > .
-. +```````````````````````````````` + A block quote can have initial or final blank lines: -. +```````````````````````````````` example > > foo > @@ -2984,11 +3281,12 @@ A block quote can have initial or final blank lines:
-. +```````````````````````````````` + A blank line always separates block quotes: -. +```````````````````````````````` example > foo > bar @@ -2999,7 +3297,8 @@ A blank line always separates block quotes:foo
-. +```````````````````````````````` + (Most current Markdown implementations, including John Gruber's original `Markdown.pl`, will parse this example as a single block quote @@ -3009,7 +3308,7 @@ whether two block quotes or one are wanted.) Consecutiveness means that if we put these block quotes together, we get a single block quote: -. +```````````````````````````````` example > foo > bar . @@ -3017,11 +3316,12 @@ we get a single block quote:bar
foo bar
-. +```````````````````````````````` + To get a block quote with two paragraphs, use: -. +```````````````````````````````` example > foo > > bar @@ -3030,11 +3330,12 @@ To get a block quote with two paragraphs, use:foo
bar
-. +```````````````````````````````` + Block quotes can interrupt paragraphs: -. +```````````````````````````````` example foo > bar . @@ -3042,12 +3343,13 @@ foo-. +```````````````````````````````` + In general, blank lines are not needed before or after block quotes: -. +```````````````````````````````` example > aaa *** > bbb @@ -3059,12 +3361,13 @@ quotes:bar
-. +```````````````````````````````` + However, because of laziness, a blank line is needed between a block quote and a following paragraph: -. +```````````````````````````````` example > bar baz . @@ -3072,9 +3375,10 @@ bazbbb
bar baz
-. +```````````````````````````````` -. + +```````````````````````````````` example > bar baz @@ -3083,9 +3387,10 @@ bazbar
baz
-. +```````````````````````````````` -. + +```````````````````````````````` example > bar > baz @@ -3094,13 +3399,14 @@ bazbar
baz
-. +```````````````````````````````` + It is a consequence of the Laziness rule that any number of initial `>`s may be omitted on a continuation line of a nested block quote: -. +```````````````````````````````` example > > > foo bar . @@ -3112,9 +3418,10 @@ bar -. +```````````````````````````````` -. + +```````````````````````````````` example >>> foo > bar >>baz @@ -3128,14 +3435,15 @@ baz -. +```````````````````````````````` + When including an indented code block in a block quote, remember that the [block quote marker] includes both the `>` and a following space. So *five spaces* are needed after the `>`: -. +```````````````````````````````` example > code > not code @@ -3147,18 +3455,19 @@ the `>`:-. +```````````````````````````````` + ## List items -A [list marker](@list-marker) is a +A [list marker](@) is a [bullet list marker] or an [ordered list marker]. -A [bullet list marker](@bullet-list-marker) +A [bullet list marker](@) is a `-`, `+`, or `*` character. -An [ordered list marker](@ordered-list-marker) +An [ordered list marker](@) is a sequence of 1--9 arabic digits (`0-9`), followed by either a `.` character or a `)` character. (The reason for the length limit is that with 10 digits we start seeing integer overflows @@ -3179,7 +3488,7 @@ The following rules define [list items]: For example, let *Ls* be the lines -. +```````````````````````````````` example A paragraph with two lines. @@ -3194,13 +3503,14 @@ with two lines.not code
-. +```````````````````````````````` + And let *M* be the marker `1.`, and *N* = 2. Then rule #1 says that the following is an ordered list item with start number 1, and the same contents as *Ls*: -. +```````````````````````````````` example 1. A paragraph with two lines. @@ -3219,7 +3529,8 @@ with two lines. -. +```````````````````````````````` + The most important thing to notice is that the position of the text after the list marker determines how much indentation @@ -3232,7 +3543,7 @@ item. Here are some examples showing how far content must be indented to be put under the list item: -. +```````````````````````````````` example - one two @@ -3241,9 +3552,10 @@ put under the list item:A block quote.
two
-. +```````````````````````````````` -. + +```````````````````````````````` example - one two @@ -3254,9 +3566,10 @@ put under the list item:two
-. +```````````````````````````````` -. + +```````````````````````````````` example - one two @@ -3266,9 +3579,10 @@ put under the list item: two
-.
+````````````````````````````````
-.
+
+```````````````````````````````` example
- one
two
@@ -3279,7 +3593,8 @@ put under the list item:
two
-. +```````````````````````````````` + It is tempting to think of this in terms of columns: the continuation blocks must be indented at least to the column of the first @@ -3289,7 +3604,7 @@ is needed. Which column this indentation reaches will depend on how the list item is embedded in other constructions, as shown by this example: -. +```````````````````````````````` example > > 1. one >> >> two @@ -3304,7 +3619,8 @@ this example: -. +```````````````````````````````` + Here `two` occurs in the same column as the list marker `1.`, but is actually contained in the list item, because there is @@ -3315,7 +3631,7 @@ occurs far to the right of the initial text of the list item, `one`, but it is not considered part of the list item, because it is not indented far enough past the blockquote marker: -. +```````````````````````````````` example >>- one >> > > two @@ -3328,25 +3644,27 @@ far enough past the blockquote marker:two
-. +```````````````````````````````` + Note that at least one space is needed between the list marker and any following content, so these are not list items: -. +```````````````````````````````` example -one 2.two .-one
2.two
-. +```````````````````````````````` + A list item may not contain blocks that are separated by more than one blank line. Thus, two blank lines will end a list, unless the two blanks are contained in a [fenced code block]. -. +```````````````````````````````` example - foo bar @@ -3403,11 +3721,12 @@ bar -. +```````````````````````````````` + A list item may contain any kind of block: -. +```````````````````````````````` example 1. foo ``` @@ -3429,14 +3748,15 @@ A list item may contain any kind of block: -. +```````````````````````````````` + A list item that contains an indented code block will preserve empty lines within the code block verbatim, unless there are two or more empty lines in a row (since as described above, two blank lines end the list): -. +```````````````````````````````` example - Foo bar @@ -3452,9 +3772,10 @@ baz -. +```````````````````````````````` -. + +```````````````````````````````` example - Foo bar @@ -3471,49 +3792,55 @@ baz baz
-.
+````````````````````````````````
+
Note that ordered list start numbers must be nine digits or less:
-.
+```````````````````````````````` example
123456789. ok
.
1234567890. not ok
-. +```````````````````````````````` + A start number may begin with 0s: -. +```````````````````````````````` example 0. ok .-1. not ok
-. +```````````````````````````````` + 2. **Item starting with indented code.** If a sequence of lines *Ls* @@ -3532,7 +3859,7 @@ An indented code block will have to be indented four spaces beyond the edge of the region where text will be included in the list item. In the following case that is 6 spaces: -. +```````````````````````````````` example - foo bar @@ -3544,11 +3871,12 @@ In the following case that is 6 spaces: -. +```````````````````````````````` + And in this case it is 11 spaces: -. +```````````````````````````````` example 10. foo bar @@ -3560,13 +3888,14 @@ And in this case it is 11 spaces: -. +```````````````````````````````` + If the *first* block in the list item is an indented code block, then by rule #2, the contents must be indented *one* space after the list marker: -. +```````````````````````````````` example indented code paragraph @@ -3578,9 +3907,10 @@ paragraphparagraph
more code
-.
+````````````````````````````````
-.
+
+```````````````````````````````` example
1. indented code
paragraph
@@ -3596,12 +3926,13 @@ paragraph
-.
+````````````````````````````````
+
Note that an additional space indent is interpreted as space
inside the code block:
-.
+```````````````````````````````` example
1. indented code
paragraph
@@ -3617,7 +3948,8 @@ inside the code block:
-.
+````````````````````````````````
+
Note that rules #1 and #2 only apply to two cases: (a) cases
in which the lines to be included in a list item begin with a
@@ -3627,16 +3959,17 @@ block. In a case like the following, where the first block begins with
a three-space indent, the rules do not allow us to form a list item by
indenting the whole thing and prepending a list marker:
-.
+```````````````````````````````` example
foo
bar
.
foo
bar
-. +```````````````````````````````` -. + +```````````````````````````````` example - foo bar @@ -3645,14 +3978,15 @@ barbar
-. +```````````````````````````````` + This is not a significant restriction, because when a block begins with 1-3 spaces indent, the indentation can always be removed without a change in interpretation, allowing rule #1 to be applied. So, in the above case: -. +```````````````````````````````` example - foo bar @@ -3663,7 +3997,8 @@ the above case:bar
-. +```````````````````````````````` + 3. **Item starting with a blank line.** If a sequence of lines *Ls* starting with a single [blank line] constitute a (possibly empty) @@ -3679,7 +4014,7 @@ the above case: Here are some list items that start with a blank line but are not empty: -. +```````````````````````````````` example - foo - @@ -3700,13 +4035,14 @@ Here are some list items that start with a blank line but are not empty: -. +```````````````````````````````` + A list item can begin with at most one blank line. In the following example, `foo` is not part of the list item: -. +```````````````````````````````` example - foo @@ -3715,11 +4051,12 @@ item:foo
-. +```````````````````````````````` + Here is an empty bullet list item: -. +```````````````````````````````` example - foo - - bar @@ -3729,11 +4066,12 @@ Here is an empty bullet list item:` tags, while paragraphs in a tight list are not.) Changing the bullet or ordered list delimiter starts a new list: -. +```````````````````````````````` example - foo - bar + baz @@ -4294,9 +4650,10 @@ Changing the bullet or ordered list delimiter starts a new list:
bim
-.
+````````````````````````````````
+
Thus, two blank lines can be used to separate consecutive lists of
the same type, or to separate a list from an indented code block
that would otherwise be parsed as a subparagraph of the final list
item:
-.
+```````````````````````````````` example
- foo
- bar
@@ -4481,9 +4844,10 @@ item:
code
-.
+````````````````````````````````
+
List items need not be indented to the same level. The following
list items will be treated as items at the same list level,
since none is indented enough to belong to the previous list
item:
-.
+```````````````````````````````` example
- a
- b
- c
@@ -4533,9 +4898,10 @@ item:
c
-. +```````````````````````````````` + This is a loose list, because there is a blank line between two of the list items: -. +```````````````````````````````` example - a - b @@ -4575,11 +4942,12 @@ two of the list items:c
-. +```````````````````````````````` + So is this, with a empty second item: -. +```````````````````````````````` example * a * @@ -4594,13 +4962,14 @@ So is this, with a empty second item:c
-. +```````````````````````````````` + These are loose lists, even though there is no space between the items, because one of the items directly contains two block-level elements with a blank line between them: -. +```````````````````````````````` example - a - b @@ -4619,9 +4988,10 @@ with a blank line between them:d
-. +```````````````````````````````` -. + +```````````````````````````````` example - a - b @@ -4639,11 +5009,12 @@ with a blank line between them:d
-. +```````````````````````````````` + This is a tight list, because the blank lines are in a code block: -. +```````````````````````````````` example - a - ``` b @@ -4662,13 +5033,14 @@ This is a tight list, because the blank lines are in a code block:bar
-. +```````````````````````````````` + Here the outer list is loose, the inner list tight: -. +```````````````````````````````` example * foo * bar @@ -4789,9 +5167,10 @@ Here the outer list is loose, the inner list tight:baz
-. +```````````````````````````````` -. + +```````````````````````````````` example - a - b - c @@ -4816,7 +5195,8 @@ Here the outer list is loose, the inner list tight: -. +```````````````````````````````` + # Inlines @@ -4824,11 +5204,12 @@ Inlines are parsed sequentially from the beginning of the character stream to the end (left to right, in left-to-right languages). Thus, for example, in -. +```````````````````````````````` example `hi`lo` .hilo`
!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
-. +```````````````````````````````` + Backslashes before other characters are treated as literal backslashes: -. +```````````````````````````````` example \→\A\a\ \3\φ\« .\→\A\a\ \3\φ\«
-. +```````````````````````````````` + Escaped characters are treated as regular characters and do not have their usual Markdown meanings: -. +```````````````````````````````` example \*not emphasized* \\emphasis
-. +```````````````````````````````` + A backslash at the end of the line is a [hard line break]: -. +```````````````````````````````` example foo\ bar .foo
bar
\[\`
\[\]
-.
+````````````````````````````````
-.
+
+```````````````````````````````` example
~~~
\[\]
~~~
.
\[\]
-.
+````````````````````````````````
-.
+
+```````````````````````````````` example
foo
-.
+````````````````````````````````
+
## Entity and numeric character references
All valid HTML entity references and numeric character
-references, except those occuring in code blocks, code spans,
-and raw HTML, are recognized as such and treated as equivalent to the
+references, except those occuring in code blocks and code spans,
+are recognized as such and treated as equivalent to the
corresponding Unicode characters. Conforming CommonMark parsers
need not store information about whether a particular character
was represented in the source using a Unicode character or
an entity reference.
-[Entity references](@entity-references) consist of `&` + any of the valid
+[Entity references](@) consist of `&` + any of the valid
HTML5 entity names + `;`. The
document & © Æ Ď ¾ ℋ ⅆ ∲ ≧̸
-. +```````````````````````````````` + [Decimal numeric character -references](@decimal-numeric-character-references) +references](@) consist of `` + a string of 1--8 arabic digits + `;`. A numeric character reference is parsed as the corresponding Unicode character. Invalid Unicode code points will be replaced by -the "unknown code point" character (`U+FFFD`). For security reasons, +the REPLACEMENT CHARACTER (`U+FFFD`). For security reasons, the code point `U+0000` will also be replaced by `U+FFFD`. -. +```````````````````````````````` example # Ӓ Ϡ .# Ӓ Ϡ � �
-. +```````````````````````````````` + [Hexadecimal numeric character -references](@hexadecimal-numeric-character-references) consist of `` + +references](@) consist of `` + either `X` or `x` + a string of 1-8 hexadecimal digits + `;`. They too are parsed as the corresponding Unicode character (this time specified with a hexadecimal numeral instead of decimal). -. +```````````````````````````````` example " ആ ಫ ." ആ ಫ
-. +```````````````````````````````` + Here are some nonentities: -. +```````````````````````````````` example   &x; -&ThisIsWayTooLongToBeAnEntityIsntIt; &hi?; +&ThisIsNotDefined; &hi?; .  &x; &#; &#x; -&ThisIsWayTooLongToBeAnEntityIsntIt; &hi?;
-. +&ThisIsNotDefined; &hi?; +```````````````````````````````` + Although HTML5 does accept some entity references without a trailing semicolon (such as `©`), these are not recognized here, because it makes the grammar too ambiguous: -. +```````````````````````````````` example © .©
-. +```````````````````````````````` + Strings that are not on the list of HTML5 named entities are not recognized as entity references either: -. +```````````````````````````````` example &MadeUpEntity; .&MadeUpEntity;
-. +```````````````````````````````` + Entity and numeric character references are recognized in any -context besides code spans or code blocks or raw HTML, including -URLs, [link title]s, and [fenced code block][] [info string]s: +context besides code spans or code blocks, including +URLs, [link titles], and [fenced code block][] [info strings]: -. +```````````````````````````````` example . -. +```````````````````````````````` -. + +```````````````````````````````` example [foo](/föö "föö") . -. +```````````````````````````````` -. + +```````````````````````````````` example [foo] [foo]: /föö "föö" . -. +```````````````````````````````` -. + +```````````````````````````````` example ``` föö foo ``` .foo
-.
+````````````````````````````````
+
Entity and numeric character references are treated as literal
-text in code spans and code blocks, and in raw HTML:
+text in code spans and code blocks:
-.
+```````````````````````````````` example
`föö`
.
föö
föfö
-.
+````````````````````````````````
-.
-
-.
-
-.
## Code spans
-A [backtick string](@backtick-string)
+A [backtick string](@)
is a string of one or more backtick characters (`` ` ``) that is neither
preceded nor followed by a backtick.
-A [code span](@code-span) begins with a backtick string and ends with
+A [code span](@) begins with a backtick string and ends with
a backtick string of equal length. The contents of the code span are
the characters between the two backtick strings, with leading and
-trailing spaces and [line ending]s removed, and
+trailing spaces and [line endings] removed, and
[whitespace] collapsed to single spaces.
This is a simple code span:
-.
+```````````````````````````````` example
`foo`
.
foo
foo ` bar
``
foo
foo bar baz
foo `` bar
foo\bar`
*foo*
[not a link](/foo)
<a href="">`
<http://foo.bar.baz>`
```foo``
-. +```````````````````````````````` -. + +```````````````````````````````` example `foo .`foo
-. +```````````````````````````````` + ## Emphasis and strong emphasis @@ -5296,19 +5711,19 @@ no emphasis: foo_bar_baz The rules given below capture all of these patterns, while allowing for efficient parsing strategies that do not backtrack. -First, some definitions. A [delimiter run](@delimiter-run) is either +First, some definitions. A [delimiter run](@) is either a sequence of one or more `*` characters that is not preceded or followed by a `*` character, or a sequence of one or more `_` characters that is not preceded or followed by a `_` character. -A [left-flanking delimiter run](@left-flanking-delimiter-run) is +A [left-flanking delimiter run](@) is a [delimiter run] that is (a) not followed by [Unicode whitespace], and (b) either not followed by a [punctuation character], or preceded by [Unicode whitespace] or a [punctuation character]. For purposes of this definition, the beginning and the end of the line count as Unicode whitespace. -A [right-flanking delimiter run](@right-flanking-delimiter-run) is +A [right-flanking delimiter run](@) is a [delimiter run] that is (a) not preceded by [Unicode whitespace], and (b) either not preceded by a [punctuation character], or followed by [Unicode whitespace] or a [punctuation character]. @@ -5359,7 +5774,7 @@ are a bit more complex than the ones given here.) The following rules define emphasis and strong emphasis: -1. A single `*` character [can open emphasis](@can-open-emphasis) +1. A single `*` character [can open emphasis](@) iff (if and only if) it is part of a [left-flanking delimiter run]. 2. A single `_` character [can open emphasis] iff @@ -5368,7 +5783,7 @@ The following rules define emphasis and strong emphasis: or (b) part of a [right-flanking delimiter run] preceded by punctuation. -3. A single `*` character [can close emphasis](@can-close-emphasis) +3. A single `*` character [can close emphasis](@) iff it is part of a [right-flanking delimiter run]. 4. A single `_` character [can close emphasis] iff @@ -5377,7 +5792,7 @@ The following rules define emphasis and strong emphasis: or (b) part of a [left-flanking delimiter run] followed by punctuation. -5. A double `**` [can open strong emphasis](@can-open-strong-emphasis) +5. A double `**` [can open strong emphasis](@) iff it is part of a [left-flanking delimiter run]. 6. A double `__` [can open strong emphasis] iff @@ -5386,7 +5801,7 @@ The following rules define emphasis and strong emphasis: or (b) part of a [right-flanking delimiter run] preceded by punctuation. -7. A double `**` [can close strong emphasis](@can-close-strong-emphasis) +7. A double `**` [can close strong emphasis](@) iff it is part of a [right-flanking delimiter run]. 8. A double `__` [can close strong emphasis] @@ -5453,141 +5868,157 @@ These rules can be illustrated through a series of examples. Rule 1: -. +```````````````````````````````` example *foo bar* .foo bar
-. +```````````````````````````````` + This is not emphasis, because the opening `*` is followed by whitespace, and hence not part of a [left-flanking delimiter run]: -. +```````````````````````````````` example a * foo bar* .a * foo bar*
-. +```````````````````````````````` + This is not emphasis, because the opening `*` is preceded by an alphanumeric and followed by punctuation, and hence not part of a [left-flanking delimiter run]: -. +```````````````````````````````` example a*"foo"* .a*"foo"*
-. +```````````````````````````````` + Unicode nonbreaking spaces count as whitespace, too: -. +```````````````````````````````` example * a * .* a *
-. +```````````````````````````````` + Intraword emphasis with `*` is permitted: -. +```````````````````````````````` example foo*bar* .foobar
-. +```````````````````````````````` -. + +```````````````````````````````` example 5*6*78 .5678
-. +```````````````````````````````` + Rule 2: -. +```````````````````````````````` example _foo bar_ .foo bar
-. +```````````````````````````````` + This is not emphasis, because the opening `_` is followed by whitespace: -. +```````````````````````````````` example _ foo bar_ ._ foo bar_
-. +```````````````````````````````` + This is not emphasis, because the opening `_` is preceded by an alphanumeric and followed by punctuation: -. +```````````````````````````````` example a_"foo"_ .a_"foo"_
-. +```````````````````````````````` + Emphasis with `_` is not allowed inside words: -. +```````````````````````````````` example foo_bar_ .foo_bar_
-. +```````````````````````````````` -. + +```````````````````````````````` example 5_6_78 .5_6_78
-. +```````````````````````````````` -. + +```````````````````````````````` example пристаням_стремятся_ .пристаням_стремятся_
-. +```````````````````````````````` + Here `_` does not generate emphasis, because the first delimiter run is right-flanking and the second left-flanking: -. +```````````````````````````````` example aa_"bb"_cc .aa_"bb"_cc
-. +```````````````````````````````` + This is emphasis, even though the opening delimiter is both left- and right-flanking, because it is preceded by punctuation: -. +```````````````````````````````` example foo-_(bar)_ .foo-(bar)
-. +```````````````````````````````` + Rule 3: This is not emphasis, because the closing delimiter does not match the opening delimiter: -. +```````````````````````````````` example _foo* ._foo*
-. +```````````````````````````````` + This is not emphasis, because the closing `*` is preceded by whitespace: -. +```````````````````````````````` example *foo bar * .*foo bar *
-. +```````````````````````````````` + A newline also counts as whitespace: -. +```````````````````````````````` example *foo bar * . @@ -5595,34 +6026,38 @@ A newline also counts as whitespace:*(*foo)
-. +```````````````````````````````` + The point of this restriction is more easily appreciated with this example: -. +```````````````````````````````` example *(*foo*)* .(foo)
-. +```````````````````````````````` + Intraword emphasis with `*` is allowed: -. +```````````````````````````````` example *foo*bar .foobar
-. +```````````````````````````````` + Rule 4: @@ -5630,164 +6065,184 @@ Rule 4: This is not emphasis, because the closing `_` is preceded by whitespace: -. +```````````````````````````````` example _foo bar _ ._foo bar _
-. +```````````````````````````````` + This is not emphasis, because the second `_` is preceded by punctuation and followed by an alphanumeric: -. +```````````````````````````````` example _(_foo) ._(_foo)
-. +```````````````````````````````` + This is emphasis within emphasis: -. +```````````````````````````````` example _(_foo_)_ .(foo)
-. +```````````````````````````````` + Intraword emphasis is disallowed for `_`: -. +```````````````````````````````` example _foo_bar ._foo_bar
-. +```````````````````````````````` -. + +```````````````````````````````` example _пристаням_стремятся ._пристаням_стремятся
-. +```````````````````````````````` -. + +```````````````````````````````` example _foo_bar_baz_ .foo_bar_baz
-. +```````````````````````````````` + This is emphasis, even though the closing delimiter is both left- and right-flanking, because it is followed by punctuation: -. +```````````````````````````````` example _(bar)_. .(bar).
-. +```````````````````````````````` + Rule 5: -. +```````````````````````````````` example **foo bar** .foo bar
-. +```````````````````````````````` + This is not strong emphasis, because the opening delimiter is followed by whitespace: -. +```````````````````````````````` example ** foo bar** .** foo bar**
-. +```````````````````````````````` + This is not strong emphasis, because the opening `**` is preceded by an alphanumeric and followed by punctuation, and hence not part of a [left-flanking delimiter run]: -. +```````````````````````````````` example a**"foo"** .a**"foo"**
-. +```````````````````````````````` + Intraword strong emphasis with `**` is permitted: -. +```````````````````````````````` example foo**bar** .foobar
-. +```````````````````````````````` + Rule 6: -. +```````````````````````````````` example __foo bar__ .foo bar
-. +```````````````````````````````` + This is not strong emphasis, because the opening delimiter is followed by whitespace: -. +```````````````````````````````` example __ foo bar__ .__ foo bar__
-. +```````````````````````````````` + A newline counts as whitespace: -. +```````````````````````````````` example __ foo bar__ .__ foo bar__
-. +```````````````````````````````` + This is not strong emphasis, because the opening `__` is preceded by an alphanumeric and followed by punctuation: -. +```````````````````````````````` example a__"foo"__ .a__"foo"__
-. +```````````````````````````````` + Intraword strong emphasis is forbidden with `__`: -. +```````````````````````````````` example foo__bar__ .foo__bar__
-. +```````````````````````````````` -. + +```````````````````````````````` example 5__6__78 .5__6__78
-. +```````````````````````````````` -. + +```````````````````````````````` example пристаням__стремятся__ .пристаням__стремятся__
-. +```````````````````````````````` -. + +```````````````````````````````` example __foo, __bar__, baz__ .foo, bar, baz
-. +```````````````````````````````` + This is strong emphasis, even though the opening delimiter is both left- and right-flanking, because it is preceded by punctuation: -. +```````````````````````````````` example foo-__(bar)__ .foo-(bar)
-. +```````````````````````````````` + Rule 7: @@ -5795,11 +6250,12 @@ Rule 7: This is not strong emphasis, because the closing delimiter is preceded by whitespace: -. +```````````````````````````````` example **foo bar ** .**foo bar **
-. +```````````````````````````````` + (Nor can it be interpreted as an emphasized `*foo bar *`, because of Rule 11.) @@ -5807,215 +6263,242 @@ Rule 11.) This is not strong emphasis, because the second `**` is preceded by punctuation and followed by an alphanumeric: -. +```````````````````````````````` example **(**foo) .**(**foo)
-. +```````````````````````````````` + The point of this restriction is more easily appreciated with these examples: -. +```````````````````````````````` example *(**foo**)* .(foo)
-. +```````````````````````````````` -. + +```````````````````````````````` example **Gomphocarpus (*Gomphocarpus physocarpus*, syn. *Asclepias physocarpa*)** .Gomphocarpus (Gomphocarpus physocarpus, syn. Asclepias physocarpa)
-. +```````````````````````````````` -. + +```````````````````````````````` example **foo "*bar*" foo** .foo "bar" foo
-. +```````````````````````````````` + Intraword emphasis: -. +```````````````````````````````` example **foo**bar .foobar
-. +```````````````````````````````` + Rule 8: This is not strong emphasis, because the closing delimiter is preceded by whitespace: -. +```````````````````````````````` example __foo bar __ .__foo bar __
-. +```````````````````````````````` + This is not strong emphasis, because the second `__` is preceded by punctuation and followed by an alphanumeric: -. +```````````````````````````````` example __(__foo) .__(__foo)
-. +```````````````````````````````` + The point of this restriction is more easily appreciated with this example: -. +```````````````````````````````` example _(__foo__)_ .(foo)
-. +```````````````````````````````` + Intraword strong emphasis is forbidden with `__`: -. +```````````````````````````````` example __foo__bar .__foo__bar
-. +```````````````````````````````` -. + +```````````````````````````````` example __пристаням__стремятся .__пристаням__стремятся
-. +```````````````````````````````` -. + +```````````````````````````````` example __foo__bar__baz__ .foo__bar__baz
-. +```````````````````````````````` + This is strong emphasis, even though the closing delimiter is both left- and right-flanking, because it is followed by punctuation: -. +```````````````````````````````` example __(bar)__. .(bar).
-. +```````````````````````````````` + Rule 9: Any nonempty sequence of inline elements can be the contents of an emphasized span. -. +```````````````````````````````` example *foo [bar](/url)* .foo bar
-. +```````````````````````````````` -. + +```````````````````````````````` example *foo bar* .foo bar
-. +```````````````````````````````` + In particular, emphasis and strong emphasis can be nested inside emphasis: -. +```````````````````````````````` example _foo __bar__ baz_ .foo bar baz
-. +```````````````````````````````` -. + +```````````````````````````````` example _foo _bar_ baz_ .foo bar baz
-. +```````````````````````````````` -. + +```````````````````````````````` example __foo_ bar_ .foo bar
-. +```````````````````````````````` -. + +```````````````````````````````` example *foo *bar** .foo bar
-. +```````````````````````````````` -. + +```````````````````````````````` example *foo **bar** baz* .foo bar baz
-. +```````````````````````````````` + But note: -. +```````````````````````````````` example *foo**bar**baz* .foobarbaz
-. +```````````````````````````````` + The difference is that in the preceding case, the internal delimiters [can close emphasis], while in the cases with spaces, they cannot. -. +```````````````````````````````` example ***foo** bar* .foo bar
-. +```````````````````````````````` -. + +```````````````````````````````` example *foo **bar*** .foo bar
-. +```````````````````````````````` + Note, however, that in the following case we get no strong emphasis, because the opening delimiter is closed by the first `*` before `bar`: -. +```````````````````````````````` example *foo**bar*** .foobar**
-. +```````````````````````````````` + Indefinite levels of nesting are possible: -. +```````````````````````````````` example *foo **bar *baz* bim** bop* .foo bar baz bim bop
-. +```````````````````````````````` -. + +```````````````````````````````` example *foo [*bar*](/url)* .foo bar
-. +```````````````````````````````` + There can be no empty emphasis or strong emphasis: -. +```````````````````````````````` example ** is not an empty emphasis .** is not an empty emphasis
-. +```````````````````````````````` -. + +```````````````````````````````` example **** is not an empty strong emphasis .**** is not an empty strong emphasis
-. +```````````````````````````````` + Rule 10: @@ -6023,431 +6506,492 @@ Rule 10: Any nonempty sequence of inline elements can be the contents of an strongly emphasized span. -. +```````````````````````````````` example **foo [bar](/url)** .foo bar
-. +```````````````````````````````` -. + +```````````````````````````````` example **foo bar** .foo bar
-. +```````````````````````````````` + In particular, emphasis and strong emphasis can be nested inside strong emphasis: -. +```````````````````````````````` example __foo _bar_ baz__ .foo bar baz
-. +```````````````````````````````` -. + +```````````````````````````````` example __foo __bar__ baz__ .foo bar baz
-. +```````````````````````````````` -. + +```````````````````````````````` example ____foo__ bar__ .foo bar
-. +```````````````````````````````` -. + +```````````````````````````````` example **foo **bar**** .foo bar
-. +```````````````````````````````` -. + +```````````````````````````````` example **foo *bar* baz** .foo bar baz
-. +```````````````````````````````` + But note: -. +```````````````````````````````` example **foo*bar*baz** .foobarbaz**
-. +```````````````````````````````` + The difference is that in the preceding case, the internal delimiters [can close emphasis], while in the cases with spaces, they cannot. -. +```````````````````````````````` example ***foo* bar** .foo bar
-. +```````````````````````````````` -. + +```````````````````````````````` example **foo *bar*** .foo bar
-. +```````````````````````````````` + Indefinite levels of nesting are possible: -. +```````````````````````````````` example **foo *bar **baz** bim* bop** .foo bar baz bim bop
-. +```````````````````````````````` -. + +```````````````````````````````` example **foo [*bar*](/url)** .foo bar
-. +```````````````````````````````` + There can be no empty emphasis or strong emphasis: -. +```````````````````````````````` example __ is not an empty emphasis .__ is not an empty emphasis
-. +```````````````````````````````` -. + +```````````````````````````````` example ____ is not an empty strong emphasis .____ is not an empty strong emphasis
-. +```````````````````````````````` + Rule 11: -. +```````````````````````````````` example foo *** .foo ***
-. +```````````````````````````````` -. + +```````````````````````````````` example foo *\** .foo *
-. +```````````````````````````````` -. + +```````````````````````````````` example foo *_* .foo _
-. +```````````````````````````````` -. + +```````````````````````````````` example foo ***** .foo *****
-. +```````````````````````````````` -. + +```````````````````````````````` example foo **\*** .foo *
-. +```````````````````````````````` -. + +```````````````````````````````` example foo **_** .foo _
-. +```````````````````````````````` + Note that when delimiters do not match evenly, Rule 11 determines that the excess literal `*` characters will appear outside of the emphasis, rather than inside it: -. +```````````````````````````````` example **foo* .*foo
-. +```````````````````````````````` -. + +```````````````````````````````` example *foo** .foo*
-. +```````````````````````````````` -. + +```````````````````````````````` example ***foo** .*foo
-. +```````````````````````````````` -. + +```````````````````````````````` example ****foo* .***foo
-. +```````````````````````````````` -. + +```````````````````````````````` example **foo*** .foo*
-. +```````````````````````````````` -. + +```````````````````````````````` example *foo**** .foo***
-. +```````````````````````````````` + Rule 12: -. +```````````````````````````````` example foo ___ .foo ___
-. +```````````````````````````````` -. + +```````````````````````````````` example foo _\__ .foo _
-. +```````````````````````````````` -. + +```````````````````````````````` example foo _*_ .foo *
-. +```````````````````````````````` -. + +```````````````````````````````` example foo _____ .foo _____
-. +```````````````````````````````` -. + +```````````````````````````````` example foo __\___ .foo _
-. +```````````````````````````````` -. + +```````````````````````````````` example foo __*__ .foo *
-. +```````````````````````````````` -. + +```````````````````````````````` example __foo_ ._foo
-. +```````````````````````````````` + Note that when delimiters do not match evenly, Rule 12 determines that the excess literal `_` characters will appear outside of the emphasis, rather than inside it: -. +```````````````````````````````` example _foo__ .foo_
-. +```````````````````````````````` -. + +```````````````````````````````` example ___foo__ ._foo
-. +```````````````````````````````` -. + +```````````````````````````````` example ____foo_ .___foo
-. +```````````````````````````````` -. + +```````````````````````````````` example __foo___ .foo_
-. +```````````````````````````````` -. + +```````````````````````````````` example _foo____ .foo___
-. +```````````````````````````````` + Rule 13 implies that if you want emphasis nested directly inside emphasis, you must use different delimiters: -. +```````````````````````````````` example **foo** .foo
-. +```````````````````````````````` -. + +```````````````````````````````` example *_foo_* .foo
-. +```````````````````````````````` -. + +```````````````````````````````` example __foo__ .foo
-. +```````````````````````````````` -. + +```````````````````````````````` example _*foo*_ .foo
-. +```````````````````````````````` + However, strong emphasis within strong emphasis is possible without switching delimiters: -. +```````````````````````````````` example ****foo**** .foo
-. +```````````````````````````````` -. + +```````````````````````````````` example ____foo____ .foo
-. +```````````````````````````````` + Rule 13 can be applied to arbitrarily long sequences of delimiters: -. +```````````````````````````````` example ******foo****** .foo
-. +```````````````````````````````` + Rule 14: -. +```````````````````````````````` example ***foo*** .foo
-. +```````````````````````````````` -. + +```````````````````````````````` example _____foo_____ .foo
-. +```````````````````````````````` + Rule 15: -. +```````````````````````````````` example *foo _bar* baz_ .foo _bar baz_
-. +```````````````````````````````` -. + +```````````````````````````````` example **foo*bar** .foobar*
-. +```````````````````````````````` -. + +```````````````````````````````` example *foo __bar *baz bim__ bam* .foo bar *baz bim bam
-. +```````````````````````````````` + Rule 16: -. +```````````````````````````````` example **foo **bar baz** .**foo bar baz
-. +```````````````````````````````` -. + +```````````````````````````````` example *foo *bar baz* .*foo bar baz
-. +```````````````````````````````` + Rule 17: -. +```````````````````````````````` example *[bar*](/url) .*bar*
-. +```````````````````````````````` -. + +```````````````````````````````` example _foo [bar_](/url) ._foo bar_
-. +```````````````````````````````` -. + +```````````````````````````````` example **
a *
a _
[link](/my uri)
-. +```````````````````````````````` -. + +```````````````````````````````` example [link]() . - -. +[link](</my uri>)
+```````````````````````````````` -The destination cannot contain line breaks, even with pointy braces: -. +```````````````````````````````` example [link](foo bar) .[link](foo bar)
-. +```````````````````````````````` -. + +```````````````````````````````` example [link]([link](
[link](foo(and(bar)))
-. +```````````````````````````````` -. + +```````````````````````````````` example [link](foo(and\(bar\))) . -. +```````````````````````````````` -. + +```````````````````````````````` example [link]([link](/url "title "and" title")
-. +```````````````````````````````` + But it is easy to work around this by using a different quote type: -. +```````````````````````````````` example [link](/url 'title "and" title') . -. +```````````````````````````````` + (Note: `Markdown.pl` did allow double quotes inside a double-quoted title, and its test suite included a test demonstrating this. @@ -6711,144 +7280,161 @@ the same way in inline links and link reference definitions.) [Whitespace] is allowed around the destination and title: -. +```````````````````````````````` example [link]( /uri "title" ) . -. +```````````````````````````````` + But it is not allowed between the link text and the following parenthesis: -. +```````````````````````````````` example [link] (/uri) .[link] (/uri)
-. +```````````````````````````````` + The link text may contain balanced brackets, but not unbalanced ones, unless they are escaped: -. +```````````````````````````````` example [link [foo [bar]]](/uri) . -. +```````````````````````````````` -. + +```````````````````````````````` example [link] bar](/uri) .[link] bar](/uri)
-. +```````````````````````````````` -. + +```````````````````````````````` example [link [bar](/uri) .[link bar
-. +```````````````````````````````` -. + +```````````````````````````````` example [link \[bar](/uri) . -. +```````````````````````````````` + The link text may contain inline content: -. +```````````````````````````````` example [link *foo **bar** `#`*](/uri) . -. +```````````````````````````````` -. + +```````````````````````````````` example [](/uri) . -. +```````````````````````````````` + However, links may not contain other links, at any level of nesting. -. +```````````````````````````````` example [foo [bar](/uri)](/uri) .[foo bar](/uri)
-. +```````````````````````````````` -. + +```````````````````````````````` example [foo *[bar [baz](/uri)](/uri)*](/uri) .[foo [bar baz](/uri)](/uri)
-. +```````````````````````````````` -. + +```````````````````````````````` example ](uri2)](uri3) .*foo*
-. +```````````````````````````````` -. + +```````````````````````````````` example [foo *bar](baz*) . -. +```````````````````````````````` + Note that brackets that *aren't* part of links do not take precedence: -. +```````````````````````````````` example *foo [bar* baz] .foo [bar baz]
-. +```````````````````````````````` + These cases illustrate the precedence of HTML tags, code spans, and autolinks over link grouping: -. +```````````````````````````````` example [foo[foo
[foo](/uri)
[foohttp://example.com/?search=](uri)
-. +```````````````````````````````` -There are three kinds of [reference link](@reference-link)s: + +There are three kinds of [reference link](@)s: [full](#full-reference-link), [collapsed](#collapsed-reference-link), and [shortcut](#shortcut-reference-link). -A [full reference link](@full-reference-link) +A [full reference link](@) consists of a [link text] immediately followed by a [link label] that [matches] a [link reference definition] elsewhere in the document. -A [link label](@link-label) begins with a left bracket (`[`) and ends +A [link label](@) begins with a left bracket (`[`) and ends with the first right bracket (`]`) that is not backslash-escaped. Between these brackets there must be at least one [non-whitespace character]. Unescaped square bracket characters are not allowed in -[link label]s. A link label can have at most 999 +[link labels]. A link label can have at most 999 characters inside the square brackets. -One label [matches](@matches) +One label [matches](@) another just in case their normalized forms are equal. To normalize a label, perform the *Unicode case fold* and collapse consecutive internal [whitespace] to a single space. If there are multiple @@ -6861,165 +7447,181 @@ matching [link reference definition]. Here is a simple example: -. +```````````````````````````````` example [foo][bar] [bar]: /url "title" . -. +```````````````````````````````` + The rules for the [link text] are the same as with -[inline link]s. Thus: +[inline links]. Thus: The link text may contain balanced brackets, but not unbalanced ones, unless they are escaped: -. +```````````````````````````````` example [link [foo [bar]]][ref] [ref]: /uri . -. +```````````````````````````````` -. + +```````````````````````````````` example [link \[bar][ref] [ref]: /uri . -. +```````````````````````````````` + The link text may contain inline content: -. +```````````````````````````````` example [link *foo **bar** `#`*][ref] [ref]: /uri . -. +```````````````````````````````` -. + +```````````````````````````````` example [][ref] [ref]: /uri . -. +```````````````````````````````` + However, links may not contain other links, at any level of nesting. -. +```````````````````````````````` example [foo [bar](/uri)][ref] [ref]: /uri . -. +```````````````````````````````` -. + +```````````````````````````````` example [foo *bar [baz][ref]*][ref] [ref]: /uri . -. +```````````````````````````````` + -(In the examples above, we have two [shortcut reference link]s +(In the examples above, we have two [shortcut reference links] instead of one [full reference link].) The following cases illustrate the precedence of link text grouping over emphasis grouping: -. +```````````````````````````````` example *[foo*][ref] [ref]: /uri .*foo*
-. +```````````````````````````````` -. + +```````````````````````````````` example [foo *bar][ref] [ref]: /uri . -. +```````````````````````````````` + These cases illustrate the precedence of HTML tags, code spans, and autolinks over link grouping: -. +```````````````````````````````` example [foo[foo
[foo][ref]
[foohttp://example.com/?search=][ref]
-. +```````````````````````````````` + Matching is case-insensitive: -. +```````````````````````````````` example [foo][BaR] [bar]: /url "title" . -. +```````````````````````````````` + Unicode case fold is used: -. +```````````````````````````````` example [Толпой][Толпой] is a Russian word. [ТОЛПОЙ]: /url .Толпой is a Russian word.
-. +```````````````````````````````` + Consecutive internal [whitespace] is treated as one space for purposes of determining matching: -. +```````````````````````````````` example [Foo bar]: /url [Baz][Foo bar] . -. +```````````````````````````````` + No [whitespace] is allowed between the [link text] and the [link label]: -. +```````````````````````````````` example [foo] [bar] [bar]: /url "title" .[foo] bar
-. +```````````````````````````````` -. + +```````````````````````````````` example [foo] [bar] @@ -7027,15 +7629,16 @@ No [whitespace] is allowed between the [link text] and the .[foo] bar
-. +```````````````````````````````` + This is a departure from John Gruber's original Markdown syntax description, which explicitly allows whitespace between the link text and the link label. It brings reference links in line with -[inline link]s, which (according to both original Markdown and +[inline links], which (according to both original Markdown and this spec) cannot have whitespace after the link text. More importantly, it prevents inadvertent capture of consecutive -[shortcut reference link]s. If whitespace is allowed between the +[shortcut reference links]. If whitespace is allowed between the link text and the link label, then in the following we will have a single reference link, not two shortcut reference links, as intended: @@ -7048,7 +7651,7 @@ intended: [bar]: /url2 ``` -(Note that [shortcut reference link]s were introduced by Gruber +(Note that [shortcut reference links] were introduced by Gruber himself in a beta version of `Markdown.pl`, but never included in the official syntax description. Without shortcut reference links, it is harmless to allow space between the link text and @@ -7056,10 +7659,10 @@ link label; but once shortcut references are introduced, it is too dangerous to allow this, as it frequently leads to unintended results.) -When there are multiple matching [link reference definition]s, +When there are multiple matching [link reference definitions], the first is used: -. +```````````````````````````````` example [foo]: /url1 [foo]: /url2 @@ -7067,80 +7670,88 @@ the first is used: [bar][foo] . -. +```````````````````````````````` + Note that matching is performed on normalized strings, not parsed inline content. So the following does not match, even though the labels define equivalent inline content: -. +```````````````````````````````` example [bar][foo\!] [foo!]: /url .[bar][foo!]
-. +```````````````````````````````` -[Link label]s cannot contain brackets, unless they are + +[Link labels] cannot contain brackets, unless they are backslash-escaped: -. +```````````````````````````````` example [foo][ref[] [ref[]: /uri .[foo][ref[]
[ref[]: /uri
-. +```````````````````````````````` -. + +```````````````````````````````` example [foo][ref[bar]] [ref[bar]]: /uri .[foo][ref[bar]]
[ref[bar]]: /uri
-. +```````````````````````````````` -. + +```````````````````````````````` example [[[foo]]] [[[foo]]]: /url .[[[foo]]]
[[[foo]]]: /url
-. +```````````````````````````````` -. + +```````````````````````````````` example [foo][ref\[] [ref\[]: /uri . -. +```````````````````````````````` + Note that in this example `]` is not backslash-escaped: -. +```````````````````````````````` example [bar\\]: /uri [bar\\] . -. +```````````````````````````````` + A [link label] must contain at least one [non-whitespace character]: -. +```````````````````````````````` example [] []: /uri .[]
[]: /uri
-. +```````````````````````````````` -. + +```````````````````````````````` example [ ] @@ -7151,9 +7762,10 @@ A [link label] must contain at least one [non-whitespace character]: ][ ]: /uri
-. +```````````````````````````````` -A [collapsed reference link](@collapsed-reference-link) + +A [collapsed reference link](@) consists of a [link label] that [matches] a [link reference definition] elsewhere in the document, followed by the string `[]`. @@ -7162,37 +7774,40 @@ which are used as the link's text. The link's URI and title are provided by the matching reference link definition. Thus, `[foo][]` is equivalent to `[foo][foo]`. -. +```````````````````````````````` example [foo][] [foo]: /url "title" . -. +```````````````````````````````` -. + +```````````````````````````````` example [*foo* bar][] [*foo* bar]: /url "title" . -. +```````````````````````````````` + The link labels are case-insensitive: -. +```````````````````````````````` example [Foo][] [foo]: /url "title" . -. +```````````````````````````````` + As with full reference links, [whitespace] is not allowed between the two sets of brackets: -. +```````````````````````````````` example [foo] [] @@ -7200,9 +7815,10 @@ allowed between the two sets of brackets: .foo []
-. +```````````````````````````````` + -A [shortcut reference link](@shortcut-reference-link) +A [shortcut reference link](@) consists of a [link label] that [matches] a [link reference definition] elsewhere in the document and is not followed by `[]` or a link label. @@ -7211,132 +7827,144 @@ which are used as the link's text. the link's URI and title are provided by the matching link reference definition. Thus, `[foo]` is equivalent to `[foo][]`. -. +```````````````````````````````` example [foo] [foo]: /url "title" . -. +```````````````````````````````` -. + +```````````````````````````````` example [*foo* bar] [*foo* bar]: /url "title" . -. +```````````````````````````````` -. + +```````````````````````````````` example [[*foo* bar]] [*foo* bar]: /url "title" .[foo bar]
-. +```````````````````````````````` -. + +```````````````````````````````` example [[bar [foo] [foo]: /url .[[bar foo
-. +```````````````````````````````` + The link labels are case-insensitive: -. +```````````````````````````````` example [Foo] [foo]: /url "title" . -. +```````````````````````````````` + A space after the link text should be preserved: -. +```````````````````````````````` example [foo] bar [foo]: /url .foo bar
-. +```````````````````````````````` + If you just want bracketed text, you can backslash-escape the opening bracket to avoid links: -. +```````````````````````````````` example \[foo] [foo]: /url "title" .[foo]
-. +```````````````````````````````` + Note that this is a link, because a link label ends with the first following closing bracket: -. +```````````````````````````````` example [foo*]: /url *[foo*] .*foo*
-. +```````````````````````````````` + Full references take precedence over shortcut references: -. +```````````````````````````````` example [foo][bar] [foo]: /url1 [bar]: /url2 . -. +```````````````````````````````` + In the following case `[bar][baz]` is parsed as a reference, `[foo]` as normal text: -. +```````````````````````````````` example [foo][bar][baz] [baz]: /url .[foo]bar
-. +```````````````````````````````` + Here, though, `[foo][bar]` is parsed as a reference, since `[bar]` is defined: -. +```````````````````````````````` example [foo][bar][baz] [baz]: /url1 [bar]: /url2 . -. +```````````````````````````````` + Here `[foo]` is not parsed as a shortcut reference, because it is followed by a link label (even though `[bar]` is not defined): -. +```````````````````````````````` example [foo][bar][baz] [baz]: /url1 [foo]: /url2 .[foo]bar
-. +```````````````````````````````` + ## Images Syntax for images is like the syntax for links, with one difference. Instead of [link text], we have an -[image description](@image-description). The rules for this are the +[image description](@). The rules for this are the same as for [link text], except that (a) an image description starts with ` .



My 
[]
![[foo]]
[[foo]]: /url "title"
-. +```````````````````````````````` + The link labels are case-insensitive: -. +```````````````````````````````` example ![Foo] [foo]: /url "title" .![foo]
-. +```````````````````````````````` + If you want a link after a literal `!`, backslash-escape the `!`: -. +```````````````````````````````` example \![foo] [foo]: /url "title" .!foo
-. +```````````````````````````````` + ## Autolinks -[Autolink](@autolink)s are absolute URIs and email addresses inside +[Autolink](@)s are absolute URIs and email addresses inside `<` and `>`. They are parsed as links, with the URL or email address as the link label. -A [URI autolink](@uri-autolink) consists of `<`, followed by an +A [URI autolink](@) consists of `<`, followed by an [absolute URI] not containing `<`, followed by `>`. It is parsed as a link to the URI, with the URI as the link's label. -An [absolute URI](@absolute-uri), +An [absolute URI](@), for these purposes, consists of a [scheme] followed by a colon (`:`) followed by zero or more characters other than ASCII [whitespace] and control characters, `<`, and `>`. If -the URI includes these characters, you must use percent-encoding +the URI includes these characters, they must be percent-encoded (e.g. `%20` for a space). -The following [schemes](@scheme) -are recognized (case-insensitive): -`coap`, `doi`, `javascript`, `aaa`, `aaas`, `about`, `acap`, `cap`, -`cid`, `crid`, `data`, `dav`, `dict`, `dns`, `file`, `ftp`, `geo`, `go`, -`gopher`, `h323`, `http`, `https`, `iax`, `icap`, `im`, `imap`, `info`, -`ipp`, `iris`, `iris.beep`, `iris.xpc`, `iris.xpcs`, `iris.lwz`, `ldap`, -`mailto`, `mid`, `msrp`, `msrps`, `mtqp`, `mupdate`, `news`, `nfs`, -`ni`, `nih`, `nntp`, `opaquelocktoken`, `pop`, `pres`, `rtsp`, -`service`, `session`, `shttp`, `sieve`, `sip`, `sips`, `sms`, `snmp`,` -soap.beep`, `soap.beeps`, `tag`, `tel`, `telnet`, `tftp`, `thismessage`, -`tn3270`, `tip`, `tv`, `urn`, `vemmi`, `ws`, `wss`, `xcon`, -`xcon-userid`, `xmlrpc.beep`, `xmlrpc.beeps`, `xmpp`, `z39.50r`, -`z39.50s`, `adiumxtra`, `afp`, `afs`, `aim`, `apt`,` attachment`, `aw`, -`beshare`, `bitcoin`, `bolo`, `callto`, `chrome`,` chrome-extension`, -`com-eventbrite-attendee`, `content`, `cvs`,` dlna-playsingle`, -`dlna-playcontainer`, `dtn`, `dvb`, `ed2k`, `facetime`, `feed`, -`finger`, `fish`, `gg`, `git`, `gizmoproject`, `gtalk`, `hcp`, `icon`, -`ipn`, `irc`, `irc6`, `ircs`, `itms`, `jar`, `jms`, `keyparc`, `lastfm`, -`ldaps`, `magnet`, `maps`, `market`,` message`, `mms`, `ms-help`, -`msnim`, `mumble`, `mvn`, `notes`, `oid`, `palm`, `paparazzi`, -`platform`, `proxy`, `psyc`, `query`, `res`, `resource`, `rmi`, `rsync`, -`rtmp`, `secondlife`, `sftp`, `sgn`, `skype`, `smb`, `soldat`, -`spotify`, `ssh`, `steam`, `svn`, `teamspeak`, `things`, `udp`, -`unreal`, `ut2004`, `ventrilo`, `view-source`, `webcal`, `wtai`, -`wyciwyg`, `xfire`, `xri`, `ymsgr`. +For purposes of this spec, a [scheme](@) is any sequence +of 2--32 characters beginning with an ASCII letter and followed +by any combination of ASCII letters, digits, or the symbols plus +("+"), period ("."), or hyphen ("-"). Here are some valid autolinks: -. +```````````````````````````````` examplehttp://foo.bar.baz/test?q=hello&id=22&boolean
-. +```````````````````````````````` -. + +```````````````````````````````` example<http://foo.bar/baz bim>
-. +```````````````````````````````` + Backslash-escapes do not work inside autolinks: -. +```````````````````````````````` example<foo+@bar.example.com>
-. +```````````````````````````````` + These are not autolinks: -. +```````````````````````````````` example <> .<>
-. +```````````````````````````````` -. -<heck://bing.bong>
-. -. +```````````````````````````````` example < http://foo.bar > .< http://foo.bar >
-. +```````````````````````````````` + +```````````````````````````````` example +<m:abc>
+```````````````````````````````` + + +```````````````````````````````` example<foo.bar.baz>
-. +```````````````````````````````` -. -<localhost:5001/foo>
-. -. +```````````````````````````````` example http://example.com .http://example.com
-. +```````````````````````````````` -. + +```````````````````````````````` example foo@bar.example.com .foo@bar.example.com
-. +```````````````````````````````` + ## Raw HTML @@ -7712,380 +8383,416 @@ so custom tags (and even, say, DocBook tags) may be used. Here is the grammar for tags: -A [tag name](@tag-name) consists of an ASCII letter +A [tag name](@) consists of an ASCII letter followed by zero or more ASCII letters, digits, or hyphens (`-`). -An [attribute](@attribute) consists of [whitespace], +An [attribute](@) consists of [whitespace], an [attribute name], and an optional [attribute value specification]. -An [attribute name](@attribute-name) +An [attribute name](@) consists of an ASCII letter, `_`, or `:`, followed by zero or more ASCII letters, digits, `_`, `.`, `:`, or `-`. (Note: This is the XML specification restricted to ASCII. HTML5 is laxer.) -An [attribute value specification](@attribute-value-specification) +An [attribute value specification](@) consists of optional [whitespace], a `=` character, optional [whitespace], and an [attribute value]. -An [attribute value](@attribute-value) +An [attribute value](@) consists of an [unquoted attribute value], a [single-quoted attribute value], or a [double-quoted attribute value]. -An [unquoted attribute value](@unquoted-attribute-value) +An [unquoted attribute value](@) is a nonempty string of characters not including spaces, `"`, `'`, `=`, `<`, `>`, or `` ` ``. -A [single-quoted attribute value](@single-quoted-attribute-value) +A [single-quoted attribute value](@) consists of `'`, zero or more characters not including `'`, and a final `'`. -A [double-quoted attribute value](@double-quoted-attribute-value) +A [double-quoted attribute value](@) consists of `"`, zero or more characters not including `"`, and a final `"`. -An [open tag](@open-tag) consists of a `<` character, a [tag name], -zero or more [attribute]s, optional [whitespace], an optional `/` +An [open tag](@) consists of a `<` character, a [tag name], +zero or more [attributes], optional [whitespace], an optional `/` character, and a `>` character. -A [closing tag](@closing-tag) consists of the string ``, a +A [closing tag](@) consists of the string ``, a [tag name], optional [whitespace], and the character `>`. -An [HTML comment](@html-comment) consists of ``, +An [HTML comment](@) consists of ``, where *text* does not start with `>` or `->`, does not end with `-`, and does not contain `--`. (See the [HTML5 spec](http://www.w3.org/TR/html5/syntax.html#comments).) -A [processing instruction](@processing-instruction) +A [processing instruction](@) consists of the string ``, a string of characters not including the string `?>`, and the string `?>`. -A [declaration](@declaration) consists of the +A [declaration](@) consists of the string ``, and the character `>`. -A [CDATA section](@cdata-section) consists of +A [CDATA section](@) consists of the string ``, and the string `]]>`. -An [HTML tag](@html-tag) consists of an [open tag], a [closing tag], +An [HTML tag](@) consists of an [open tag], a [closing tag], an [HTML comment], a [processing instruction], a [declaration], or a [CDATA section]. Here are some simple open tags: -. +```````````````````````````````` exampleFoo
<33> <__>
-. +```````````````````````````````` + Illegal attribute names: -. +```````````````````````````````` example .<a h*#ref="hi">
-. +```````````````````````````````` + Illegal attribute values: -. +```````````````````````````````` example .</a href="foo">
-. +```````````````````````````````` + Comments: -. +```````````````````````````````` example foo .foo
-. +```````````````````````````````` -. + +```````````````````````````````` example foo .foo <!-- not a comment -- two hyphens -->
-. +```````````````````````````````` + Not comments: -. +```````````````````````````````` example foo foo --> foo .foo <!--> foo -->
foo <!-- foo--->
-. +```````````````````````````````` + Processing instructions: -. +```````````````````````````````` example foo .foo
-. +```````````````````````````````` + Declarations: -. +```````````````````````````````` example foo .foo
-. +```````````````````````````````` + CDATA sections: -. +```````````````````````````````` example foo &<]]> .foo &<]]>
-. +```````````````````````````````` + Entity and numeric character references are preserved in HTML attributes: -. +```````````````````````````````` example foo . -. +```````````````````````````````` + Backslash escapes do not work in HTML attributes: -. +```````````````````````````````` example foo . -. +```````````````````````````````` -. + +```````````````````````````````` example .<a href=""">
-. +```````````````````````````````` + ## Hard line breaks A line break (not in a code span or HTML tag) that is preceded by two or more spaces and does not occur at the end of a block -is parsed as a [hard line break](@hard-line-break) (rendered +is parsed as a [hard line break](@) (rendered in HTML as a `foo
baz
foo
baz
foo
baz
foo
bar
foo
bar
foo
bar
foo
bar
code span
code\ span
foo\
-. +```````````````````````````````` -. + +```````````````````````````````` example foo .foo
-. +```````````````````````````````` -. + +```````````````````````````````` example ### foo\ .foo baz
-. +```````````````````````````````` + Spaces at the end of the line and beginning of the next line are removed: -. +```````````````````````````````` example foo baz .foo baz
-. +```````````````````````````````` + A conforming parser may render a soft line break in HTML either as a line break or as a space. @@ -8125,34 +8834,37 @@ as hard line breaks. Any characters not given an interpretation by the above rules will be parsed as plain textual content. -. +```````````````````````````````` example hello $.;'there .hello $.;'there
-. +```````````````````````````````` -. + +```````````````````````````````` example Foo χρῆν .Foo χρῆν
-. +```````````````````````````````` + Internal spaces are preserved verbatim: -. +```````````````````````````````` example Multiple spaces .Multiple spaces
-. +```````````````````````````````` + -# Appendix: A parsing strategy {-} +# Appendix: A parsing strategy In this appendix we describe some features of the parsing strategy used in the CommonMark reference implementations. -## Overview {-} +## Overview Parsing has two phases: @@ -8190,7 +8902,7 @@ marked by arrows: "aliquando id" ``` -## Phase 1: block structure {-} +## Phase 1: block structure Each line that is processed has an effect on this tree. The line is analyzed and, depending on its contents, the document may be altered @@ -8227,8 +8939,8 @@ markers like `>`, list markers, and indentation have been consumed). This is text that can be incorporated into the last open block (a paragraph, code block, heading, or raw HTML). -Setext headings are formed when we detect that the second line of -a paragraph is a setext heading line. +Setext headings are formed when we see a line of a paragraph +that is a setext heading line. Reference link definitions are detected when a paragraph is closed; the accumulated text lines are parsed to see if they begin with @@ -8332,7 +9044,7 @@ We thus obtain the final tree: "aliquando id" ``` -## Phase 2: inline structure {-} +## Phase 2: inline structure Once all of the input has been parsed, all open blocks are closed. @@ -8363,7 +9075,7 @@ Notice how the [line ending] in the first paragraph has been parsed as a `softbreak`, and the asterisks in the first list item have become an `emph`. -### An algorithm for parsing nested emphasis and links {-} +### An algorithm for parsing nested emphasis and links By far the trickiest part of inline parsing is handling emphasis, strong emphasis, links, and images. This is done using the following @@ -8375,7 +9087,7 @@ When we're parsing inlines and we hit either - a `[` or `. +add a pointer to this text node to the [delimiter stack](@). The [delimiter stack] is a doubly linked list. Each element contains a pointer to a text node, plus information about @@ -8393,7 +9105,7 @@ procedure (see below). When we hit the end of the input, we call the *process emphasis* procedure (see below), with `stack_bottom` = NULL. -#### *look for link or image* {-} +#### *look for link or image* Starting at the top of the delimiter stack, we look backwards through the stack for an opening `[` or `![` delimiter. @@ -8424,7 +9136,7 @@ through the stack for an opening `[` or `![` delimiter. `[` delimiters before the opening delimiter to *inactive*. (This will prevent us from getting links within links.) -#### *process emphasis* {-} +#### *process emphasis* Parameter `stack_bottom` sets a lower bound to how far we descend in the [delimiter stack]. If it is NULL, we can From 265b4ac9457a4dd7d35881c67f758c65f4b5306b Mon Sep 17 00:00:00 2001 From: Robin Stocker\n\n"); } + @Test + public void delimited() { + Node document = PARSER.parse("~~foo~~"); + Strikethrough strikethrough = (Strikethrough) document.getFirstChild().getFirstChild(); + assertEquals("~~", strikethrough.getOpeningDelimiter()); + assertEquals("~~", strikethrough.getClosingDelimiter()); + } + @Override protected String render(String source) { return RENDERER.render(PARSER.parse(source)); diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/EmphasisDelimiterProcessor.java b/commonmark/src/main/java/org/commonmark/internal/inline/EmphasisDelimiterProcessor.java index 5532c816a..1b9ebc371 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/EmphasisDelimiterProcessor.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/EmphasisDelimiterProcessor.java @@ -1,11 +1,10 @@ package org.commonmark.internal.inline; -import org.commonmark.parser.DelimiterProcessor; -import org.commonmark.node.Delimited; import org.commonmark.node.Emphasis; import org.commonmark.node.Node; import org.commonmark.node.StrongEmphasis; import org.commonmark.node.Text; +import org.commonmark.parser.DelimiterProcessor; public abstract class EmphasisDelimiterProcessor implements DelimiterProcessor { @@ -27,9 +26,10 @@ public int getDelimiterUse(int openerCount, int closerCount) { @Override public void process(Text opener, Text closer, int delimiterUse) { + String singleDelimiter = String.valueOf(getDelimiterChar()); Node emphasis = delimiterUse == 1 - ? new Emphasis(getDelimiterChar(), delimiterUse) - : new StrongEmphasis(getDelimiterChar(), delimiterUse); + ? new Emphasis(singleDelimiter) + : new StrongEmphasis(singleDelimiter + singleDelimiter); Node tmp = opener.getNext(); while (tmp != null && tmp != closer) { diff --git a/commonmark/src/main/java/org/commonmark/node/Delimited.java b/commonmark/src/main/java/org/commonmark/node/Delimited.java index dc65a772a..ef02c84ad 100644 --- a/commonmark/src/main/java/org/commonmark/node/Delimited.java +++ b/commonmark/src/main/java/org/commonmark/node/Delimited.java @@ -1,9 +1,17 @@ package org.commonmark.node; +/** + * A node that uses delimiters in the source form (e.g.strike
\nthat
*bold*).
+ */
public interface Delimited {
- char getDelimiterChar();
-
- int getDelimiterCount();
+ /**
+ * @return the opening (beginning) delimiter, e.g. *
+ */
+ String getOpeningDelimiter();
+ /**
+ * @return the closing (ending) delimiter, e.g. *
+ */
+ String getClosingDelimiter();
}
diff --git a/commonmark/src/main/java/org/commonmark/node/Emphasis.java b/commonmark/src/main/java/org/commonmark/node/Emphasis.java
index 39069de04..9877e7b63 100644
--- a/commonmark/src/main/java/org/commonmark/node/Emphasis.java
+++ b/commonmark/src/main/java/org/commonmark/node/Emphasis.java
@@ -2,22 +2,27 @@
public class Emphasis extends Node implements Delimited {
- private final char delimiterChar;
- private final int delimiterCount;
+ private String delimiter;
- public Emphasis(char delimiterChar, int delimiterCount) {
- this.delimiterChar = delimiterChar;
- this.delimiterCount = delimiterCount;
+ public Emphasis() {
+ }
+
+ public Emphasis(String delimiter) {
+ this.delimiter = delimiter;
+ }
+
+ public void setDelimiter(String delimiter) {
+ this.delimiter = delimiter;
}
@Override
- public char getDelimiterChar() {
- return delimiterChar;
+ public String getOpeningDelimiter() {
+ return delimiter;
}
@Override
- public int getDelimiterCount() {
- return delimiterCount;
+ public String getClosingDelimiter() {
+ return delimiter;
}
@Override
diff --git a/commonmark/src/main/java/org/commonmark/node/StrongEmphasis.java b/commonmark/src/main/java/org/commonmark/node/StrongEmphasis.java
index eeb9e9343..dbff571cd 100644
--- a/commonmark/src/main/java/org/commonmark/node/StrongEmphasis.java
+++ b/commonmark/src/main/java/org/commonmark/node/StrongEmphasis.java
@@ -2,27 +2,31 @@
public class StrongEmphasis extends Node implements Delimited {
- private char delimiterChar;
- private int delimiterCount;
+ private String delimiter;
- public StrongEmphasis(char delimiterChar, int delimiterCount) {
- this.delimiterChar = delimiterChar;
- this.delimiterCount = delimiterCount;
+ public StrongEmphasis() {
+ }
+
+ public StrongEmphasis(String delimiter) {
+ this.delimiter = delimiter;
+ }
+
+ public void setDelimiter(String delimiter) {
+ this.delimiter = delimiter;
}
@Override
- public char getDelimiterChar() {
- return delimiterChar;
+ public String getOpeningDelimiter() {
+ return delimiter;
}
@Override
- public int getDelimiterCount() {
- return delimiterCount;
+ public String getClosingDelimiter() {
+ return delimiter;
}
@Override
public void accept(Visitor visitor) {
visitor.visit(this);
}
-
}
diff --git a/commonmark/src/test/java/org/commonmark/test/DelimitedTest.java b/commonmark/src/test/java/org/commonmark/test/DelimitedTest.java
index 813192bdc..a34a32c44 100644
--- a/commonmark/src/test/java/org/commonmark/test/DelimitedTest.java
+++ b/commonmark/src/test/java/org/commonmark/test/DelimitedTest.java
@@ -1,41 +1,38 @@
package org.commonmark.test;
-import org.commonmark.node.Node;
-import org.commonmark.node.Delimited;
-import org.commonmark.node.Emphasis;
-import org.commonmark.node.StrongEmphasis;
-import org.commonmark.node.Visitor;
-import org.commonmark.node.AbstractVisitor;
+import org.commonmark.node.*;
import org.commonmark.parser.Parser;
import org.junit.Test;
+import java.util.ArrayList;
+import java.util.List;
+
import static org.junit.Assert.assertEquals;
-import static org.commonmark.internal.util.Debugging.log;
-import static org.commonmark.internal.util.Debugging.toStringTree;
public class DelimitedTest {
@Test
- public void one() {
-
- final Parser.Builder builder = Parser.builder();
- final Parser parser = builder.build();
- final Node document = parser.parse(getText());
- final java.util.ListFOO bar
\n"); + assertRendering("f{oo ba}r", "fOO BAr
\n"); + assertRendering("{{foo} bar", "{FOO bar
\n"); + assertRendering("{foo}} bar", "FOO} bar
\n"); + assertRendering("{{foo} bar}", "FOO BAR
\n"); + assertRendering("{foo bar", "{foo bar
\n"); + assertRendering("foo} bar", "foo} bar
\n"); + assertRendering("}foo} bar", "}foo} bar
\n"); + assertRendering("{foo{ bar", "{foo{ bar
\n"); + assertRendering("}foo{ bar", "}foo{ bar
\n"); + } + + @Override + protected String render(String source) { + Node node = PARSER.parse(source); + return RENDERER.render(node); + } + + private static class AsymmetricDelimiterProcessor implements DelimiterProcessor { + + @Override + public char getOpeningDelimiterChar() { + return '{'; + } + + @Override + public char getClosingDelimiterChar() { + return '}'; + } + + @Override + public int getMinDelimiterCount() { + return 1; + } + + @Override + public int getDelimiterUse(int openerCount, int closerCount) { + return 1; + } + + @Override + public void process(Text opener, Text closer, int delimiterUse) { + UpperCaseNode content = new UpperCaseNode(); + Node tmp = opener.getNext(); + while (tmp != null && tmp != closer) { + Node next = tmp.getNext(); + content.appendChild(tmp); + tmp = next; + } + opener.insertAfter(content); + } + } + + private static class UpperCaseNode extends CustomNode { + } + + private static class UpperCaseNodeRenderer implements CustomHtmlRenderer { + @Override + public boolean render(Node node, HtmlWriter htmlWriter, Visitor visitor) { + if (node instanceof UpperCaseNode) { + UpperCaseNode upperCaseNode = (UpperCaseNode) node; + for (Node child = upperCaseNode.getFirstChild(); child != null; child = child.getNext()) { + if (child instanceof Text) { + Text text = (Text) child; + text.setLiteral(text.getLiteral().toUpperCase(Locale.ENGLISH)); + } + child.accept(visitor); + } + return true; + } + return false; + } + } +} From 45f0ebe9e6ff6c0def1fd4ce9e52470cbac1ec4d Mon Sep 17 00:00:00 2001 From: JinneeJ