diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 000000000..b32794271 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,61 @@ +# See https://docs.github.com/en/actions/language-and-framework-guides/building-and-testing-java-with-maven + +name: ci + +on: [push, pull_request] + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + java: [11, 17, 21, 25] + steps: + - name: Checkout sources + uses: actions/checkout@v4 + + - name: Set up JDK + uses: actions/setup-java@v4 + with: + java-version: ${{ matrix.java }} + distribution: 'zulu' + + - name: Build + run: mvn -B package javadoc:javadoc + + coverage: + runs-on: ubuntu-latest + if: ${{ github.event_name == 'push' }} + steps: + - name: Checkout sources + uses: actions/checkout@v4 + + - name: Set up JDK + uses: actions/setup-java@v4 + with: + java-version: 11 + distribution: 'zulu' + + - name: Build with coverage + run: mvn -B -Pcoverage clean test jacoco:report-aggregate + + - name: Publish coverage + uses: codecov/codecov-action@v4 + with: + fail_ci_if_error: true + token: ${{ secrets.CODECOV_TOKEN }} + + android-compatibility: + runs-on: ubuntu-latest + steps: + - name: Checkout sources + uses: actions/checkout@v4 + + - name: Set up JDK + uses: actions/setup-java@v4 + with: + java-version: 11 + distribution: 'zulu' + + - name: Android Lint checks + run: cd commonmark-android-test && ./gradlew :app:lint diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 000000000..c0531ca55 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,43 @@ +# See: +# https://docs.github.com/en/free-pro-team@latest/actions/guides/publishing-java-packages-with-maven +# https://central.sonatype.org/pages/apache-maven.html +# https://github.com/actions/setup-java + +name: release + +on: + workflow_dispatch: + +jobs: + release: + environment: maven_central + runs-on: ubuntu-latest + steps: + - name: Checkout sources + uses: actions/checkout@v4 + + - name: Set up Maven Central repository + uses: actions/setup-java@v4 + with: + java-version: 24 + distribution: 'zulu' + # See https://central.sonatype.org/publish/publish-portal-maven/ + server-id: central + server-username: CENTRAL_USERNAME # env variable to use for username in release + server-password: CENTRAL_PASSWORD # env variable to use for password in release + gpg-private-key: ${{ secrets.MAVEN_GPG_PRIVATE_KEY }} + gpg-passphrase: MAVEN_GPG_PASSPHRASE # env variable to use for passphrase in release + + - name: Set up Git user + run: | + git config --global user.name "${{ secrets.GIT_USER_NAME }}" + git config --global user.email "${{ secrets.GIT_USER_EMAIL }}" + + - name: Release + run: | + mvn -B -Dusername=${{ secrets.GH_USERNAME }} -Dpassword=${{ secrets.GH_ACCESS_TOKEN }} release:prepare + mvn -B release:perform + env: + CENTRAL_USERNAME: ${{ secrets.CENTRAL_USERNAME }} + CENTRAL_PASSWORD: ${{ secrets.CENTRAL_PASSWORD }} + MAVEN_GPG_PASSPHRASE: ${{ secrets.MAVEN_GPG_PASSPHRASE }} diff --git a/.gitignore b/.gitignore index a156931f0..d998d8890 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,6 @@ # Maven target/ + +# macOS +.DS_Store diff --git a/.mvn/wrapper/maven-wrapper.properties b/.mvn/wrapper/maven-wrapper.properties new file mode 100644 index 000000000..4d245050f --- /dev/null +++ b/.mvn/wrapper/maven-wrapper.properties @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +wrapperVersion=3.3.2 +distributionType=only-script +distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.6.3/apache-maven-3.6.3-bin.zip diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index f73430272..000000000 --- a/.travis.yml +++ /dev/null @@ -1,31 +0,0 @@ -language: android - -matrix: - include: - - jdk: oraclejdk8 - env: TEST=java - - jdk: openjdk11 - env: TEST=java - - jdk: oraclejdk8 - env: TEST=android - dist: precise - android: - components: - - android-16 - - build-tools-21.1.1 - - extra-android-m2repository - - sys-img-armeabi-v7a-android-16 - - allow_failures: - - env: TEST=android - -script: - - 'if [ $TEST = java ]; then mvn test -Dsurefire.useFile=false; fi' - - 'if [ $TEST = android ]; then mvn install -DskipTests && cd commonmark-android-test && travis_retry ./.travis.sh; fi' - -after_success: | - if [ $TRAVIS_JDK_VERSION = oraclejdk8 ] && [ $TEST = java ]; then - # Calculate test coverage - mvn clean test jacoco:report-aggregate -Pcoverage - bash <(curl -s https://codecov.io/bash) - fi diff --git a/CHANGELOG.md b/CHANGELOG.md index acd7c044a..9c5c67268 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,275 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). This project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html), with the exception that 0.x versions can break between minor versions. +## [Unreleased] +### Added +- Allow customizing HTML attributes for alert title `

` tag via `AttributeProvider` + +## [0.28.0] - 2026-03-31 +### Added +- New extension for alerts (aka callouts/admonitions) + - Syntax: + ``` + > [!NOTE] + > The text of the note. + ``` + - As types you can use NOTE, TIP, IMPORTANT, WARNING, CAUTION; or configure the + extension to add additional ones. + - Use class `AlertsExtension` in artifact `commonmark-ext-gfm-alerts` (#420) +- New option `maxOpenBlockParsers` for `Parser.Builder` to set an overall limit + for the depth of block parsing. If set, any nesting beyond the limit will be + parsed as paragraph text instead. The default remains unlimited. + +## [0.27.1] - 2026-01-14 +### Fixed +- Line(s) after a hard line break would sometimes also get an unwanted hard + line break, e.g. if they ended in emphasis or other non-text inlines (#415) +- `TextContentRenderer` (for plain text): Fix nested lists on the same line (#413) +- Fix minor performance regression with pathological input (deeply nested + brackets) that was introduced in version 0.23.0. + +## [0.27.0] - 2025-10-12 +### Added +- Autolink extension: Now supports configuration of different link types that + should be recognized and converted to links. See `AutolinkExtension#builder` + + | Type | Default? | Description | + |---------|----------|--------------------------------------------------------| + | `URL` | Yes | URL with a protocol such as `https://example.com` | + | `EMAIL` | Yes | Email address such as `foo@example.com` | + | `WWW` | Yes | Address beginning with `www` such as `www.example.com` | + + Note that this changes the behavior of `AutolinkExtension.create()` to now also + include `WWW` links by default. To re-enable the previous behavior, use: + + ```java + AutolinkExtension.builder().linkTypes(AutolinkType.URL, AutolinkType.EMAIL).build(); + ``` + +## [0.26.0] - 2025-09-13 +### Changed +- A `LinkProcessor` using `replaceWith` now also stops outer links from being + parsed as links, same as with `wrapTextIn`. This prevents nested links, see + footnotes change below. +### Fixed +- Fix rendering of image alt text to include contents of code spans (`` `code` ``). (#398) +- footnotes: Fix footnotes nested within links. Before, both the link and the + footnote reference would be parsed and lead to nested `` elements, which + is disallowed. Now, only the footnote is parsed and the outer link becomes + plain text; this matches the behavior of links. (#400) + +## [0.25.1] - 2025-08-01 +### Fixed +- footnotes: Fix parsing of footnote definitions containing multiple paragraphs + separated by blank lines. Before it only worked if paragraphs were separated + by lines of 4 spaces. (#388) + +## [0.25.0] - 2025-06-20 +### Added +- Include OSGi metadata in jars (`META-INF/MANIFEST.MF` files) (#378) +- More documentation with examples for `Node` classes (#370) +### Changed +- GitHub tables: Tables are now parsed even if there's no blank line before the + table heading, matching GitHub's behavior. (#381) +### Fixed +- `MarkdownRenderer`: Fix precedence for `nodeRendererFactory`: Factories passed + to the builder can now override rendering for core node types. (#368) +- `MarkdownRenderer`: Fix exception with ordered lists with a long first number + followed by a shorter one (#382) +- Fix warning in Eclipse about "missing 'requires transitive'" (#358) +- Fix Android incompatibility with `requireNonNullElseGet` (#369) + +## [0.24.0] - 2024-10-21 +### Added +- `SourceSpan` on nodes now have a `getInputIndex` to get the index within the + original input string (in addition to the existing line/column indexes). + This is useful when looking up the input source: It can now be done using + `substring` instead of having to split the input into lines first (#348) +- Configurable line break rendering for `TextContentRenderer` via `lineBreakRendering` + on the builder; e.g. `LineBreakRendering.SEPARATE_BLOCKS` will render an empty + line between blocks (#344) +### Changed +- Adopted small changes from OpenJDK vendoring to make updates easier for them (#343) +### Fixed +- Enable overriding of built-in node rendering for `TextContentRenderer` (#346) + +## [0.23.0] - 2024-09-16 +### Added +- New extension for footnotes! + - Syntax: + ``` + Main text[^1] + + [^1]: Additional text in a footnote + ``` + - Inline footnotes like `^[inline footnote]` are also supported when enabled + via an option in `FootnotesExtension.Builder` + - Use class `FootnotesExtension` in artifact `commonmark-ext-footnotes` (#332) +- New option `omitSingleParagraphP` in `HtmlRenderer.Builder` for not using `

` + tags for when a document only has one paragraph (#150) +- Support for custom link processing during inline parsing (e.g. `[foo]`), + see `Parser.Builder#linkProcessor` +- Support for extending inline parsing with custom inline content parsers. See + `Parser.Builder#customInlineContentParserFactory`. This allows users/extensions + to hook into inline parsing on a deeper level than before (e.g. with delimiter + processors). It can be used to add support for math/latex formulas or other inline + syntax. (#321) +### Changed +- The default `DefaultUrlSanitizer` now also allows `data` as a protocol. Use the + constructor with a list to customize this. (#329) +- `LinkReferenceDefinition` now extends `Block` (it was extending `Node` + directly before) +- `MarkdownRenderer`: Don't escape `=` text if it's the first node in a block (#335) +### Fixed +- Fix parsing of link reference definitions with incorrect title syntax (followed + by characters other than space/tab). In that case, the title was set to the + partially-parsed title and the source spans were wrong. (#315) +- Fix source spans of blocks with lazy continuation lines (#337) +- `MarkdownRenderer`: Preserve thematic break literals (#331) + +## [0.22.0] - 2024-03-15 +### Added +- New `MarkdownRenderer` for rendering nodes to Markdown (CommonMark) (#306)! + Note that while care is taken to produce equivalent Markdown, some differences + in the original Markdown (if parsed) are not preserved, such as: + - The type of heading used + - The type of link used (reference links will be rendered as inline links) + - Whether special characters are escaped or not + - Leading and trailing whitespace +- Modular JAR (JPMS): All artifacts now include module descriptors (module-info) + so jlink can be used; the old `Automatic-Module-Name` manifest entries were removed +- New package `org.commonmark.parser.beta` containing classes that are not part of + the stable API but are exported from the module because they might be useful for + extension parsers +- New package `org.commonmark.text` for text related utilities that are useful for + both parsing and rendering +- `TableCell` now has `getWidth` returning the number of dash and colon characters + in the delimiter row, useful for rendering proportional width tables (#296) +- `ThematicBreak` now has `getLiteral` containing the string that was used in the + source when parsing (#309) +- `ListItem` now has `getMarkerIndent` and `getContentIndent` for retrieving the + space between the start of the line and the marker/content +- Deprecated a some properties of `BulletList`, `OrderedList`, `FencedCodeBlock` + and replaced with nullable ones because they might not be set when constructing + these nodes manually instead of via parsing +### Changed +- Java 11 or later is now required (dropping support for Java 8) +- Update to CommonMark spec 0.31.2 +### Fixed +- Fix `LinkReferenceDefinition` having null `SourceSpan` when title is present + and parsing with source spans option enabled (#310) + +## [0.21.0] - 2022-11-17 +### Added +- GitHub strikethrough: With the previous version we adjusted the + extension to also accept the single tilde syntax. But if you use + another extension that uses the single tilde syntax, you will get a + conflict. To avoid that, `StrikethroughExtension` can now be + configured to require two tildes like before, see Javadoc. + +## [0.20.0] - 2022-10-20 +### Fixed +- GitHub tables: A single pipe (optional whitespace) now ends a table + instead of crashing or being treated as an empty row, for consistency + with GitHub (#255). +- GitHub strikethrough: A single tilde now also works, and more than two + tildes are not accepted anymore. This brings us in line with what + GitHub actually does, which is a bit underspecified (#267) +- The autolink extension now handles source spans correctly (#209) + +## [0.19.0] - 2022-06-02 +### Added +- YAML front matter extension: Limited support for single and double + quoted string values (#260) +### Changed +- Check argument of `enabledBlockTypes` when building parser instead of NPEing later + +## [0.18.2] - 2022-02-24 +### Changed +- Test against Java 17 +- Bundle LICENSE.txt with artifacts (in addition to Maven metadata) + +## [0.18.1] - 2021-11-29 +### Fixed +- Fix tables with leading/trailing header pipes and trailing spaces (#244). + This was a regression in 0.16.1 which is now fixed. + +## [0.18.0] - 2021-06-30 +### Changed +- Update to CommonMark spec 0.30: + - Add `textarea` to list of literal HTML block tags. + Like `script`, `style`, and `pre`, `textarea` blocks can contain + blank lines without the contents being interpreted as commonmark. + - Fix case folding for link reference labels in some cases + (e.g. `ẞ` and `SS` should match) + - Allow lowercase ASCII in HTML declaration + - Don't let type 7 HTML blocks interrupt lazy paragraphs either +- Preserve the original case for the label of `LinkReferenceDefinition`. + Before, we used to store the normalized version (lowercase, collapsed whitespace). + +## [0.17.2] - 2021-05-14 +### Changed +- Pass original instead of normalized label to `InlineParserContext` for lookup (#204). + This allows custom contexts to change the lookup logic and have access to the original + label content. + In case you have a custom implementation of `InlineParserContext`, you might need to adjust + it to do normalization. + +## [0.17.1] - 2021-02-03 +### Fixed +- Fix emphasis surrounded by non-BMP punctuation/whitespace characters + (characters that are longer than one UTF-16 "char"). Note that this is + an edge case with rarely used Unicode characters, which a lot of other + implementations don't handle correctly. +- Fix tables where the row starts with spaces and then the first `|` - + rows that didn't have spaces before were not affected (#199). This bug + is present in 0.16.1 and 0.17.0. + +## [0.17.0] - 2021-01-15 +### Changed +- **ACTION REQUIRED**: Maven groupId has changed from `com.atlassian.commonmark` to `org.commonmark` + - To continue getting new versions of commonmark-java, change the Maven coordinates in your dependencies: + - Old: `com.atlassian.commonmark` + - New: `org.commonmark` + +## [0.16.1] - 2020-12-11 +### Added +- Support for including source spans on block and inline nodes (#1): + - Answer for "Where in the source input (line/column position and length) does this node come from?" + - Useful for things like editors that want to keep the input and rendered output scrolled to the same lines, + or start editing on the node that was selected. + - Use `includeSourceSpans` on `Parser.Builder` to enable, + either with `IncludeSourceSpans.BLOCKS` or `IncludeSourceSpans.BLOCKS_AND_INLINES` + - Read data with `Node.getSourceSpans` + - Note that enabling this has a small performance impact on parsing (about 10%) +### Changed +- In order to support source spans (see above), a few of the extension + APIs changed. It should only affect users implementing their own + extensions. See the Javadoc to see what changed. +- YAML front matter extension: Support dots in key names + +## [0.15.2] - 2020-07-20 +### Fixed +- image-attributes extension: Fix unexpected altering of text in case + parsing of attributes fails, e.g. `{NN} text` -> `{NN text}`, thanks @jk1 + +## [0.15.1] - 2020-05-29 +### Added +- Add text content rendering support for `InsExtension` + +## [0.15.0] - 2020-05-21 +### Added +- Extension for width/height attributes for images, thanks @dohertyfjatl + - Syntax: `![text](/url.png){width=640 height=480}` + - Use class `ImageAttributesExtension` in artifact `commonmark-ext-image-attributes` +- Extension for task lists (GitHub-style), thanks @dohertyfjatl + - Syntax: + ``` + - [x] task #1 + - [ ] task #2 + ``` + - Use class `TaskListItemsExtension` in artifact `commonmark-ext-task-list-items` + ## [0.14.0] - 2020-01-22 ### Added - Add `sanitizeUrls` to `HtmlRenderer.Builder` to enable sanitizing URLs @@ -238,7 +507,7 @@ API breaking changes (caused by changes in spec): - Rename `HorizontalRule` to `ThematicBreak` - Rename `HtmlTag` to `HtmlInline` - Replace `MatchedBlockParser#getParagraphStartLine` with `#getParagraphContent` - that returns the current content if the the matched block is a paragraph + that returns the current content if the matched block is a paragraph ## [0.3.2] - 2016-01-07 ### Fixed @@ -268,24 +537,46 @@ API breaking changes (caused by changes in spec): Initial release of commonmark-java, a port of commonmark.js with extensions for autolinking URLs, GitHub flavored strikethrough and tables. - -[0.14.0]: https://github.com/atlassian/commonmark-java/compare/commonmark-parent-0.13.1...commonmark-parent-0.14.0 -[0.13.1]: https://github.com/atlassian/commonmark-java/compare/commonmark-parent-0.13.0...commonmark-parent-0.13.1 -[0.13.0]: https://github.com/atlassian/commonmark-java/compare/commonmark-parent-0.12.1...commonmark-parent-0.13.0 -[0.12.1]: https://github.com/atlassian/commonmark-java/compare/commonmark-parent-0.11.0...commonmark-parent-0.12.1 -[0.11.0]: https://github.com/atlassian/commonmark-java/compare/commonmark-parent-0.10.0...commonmark-parent-0.11.0 -[0.10.0]: https://github.com/atlassian/commonmark-java/compare/commonmark-parent-0.9.0...commonmark-parent-0.10.0 -[0.9.0]: https://github.com/atlassian/commonmark-java/compare/commonmark-parent-0.8.0...commonmark-parent-0.9.0 -[0.8.0]: https://github.com/atlassian/commonmark-java/compare/commonmark-parent-0.7.1...commonmark-parent-0.8.0 -[0.7.1]: https://github.com/atlassian/commonmark-java/compare/commonmark-parent-0.7.0...commonmark-parent-0.7.1 -[0.7.0]: https://github.com/atlassian/commonmark-java/compare/commonmark-parent-0.6.0...commonmark-parent-0.7.0 -[0.6.0]: https://github.com/atlassian/commonmark-java/compare/commonmark-parent-0.5.1...commonmark-parent-0.6.0 -[0.5.1]: https://github.com/atlassian/commonmark-java/compare/commonmark-parent-0.5.0...commonmark-parent-0.5.1 -[0.5.0]: https://github.com/atlassian/commonmark-java/compare/commonmark-parent-0.4.1...commonmark-parent-0.5.0 -[0.4.1]: https://github.com/atlassian/commonmark-java/compare/commonmark-parent-0.4.0...commonmark-parent-0.4.1 -[0.4.0]: https://github.com/atlassian/commonmark-java/compare/commonmark-parent-0.3.2...commonmark-parent-0.4.0 -[0.3.2]: https://github.com/atlassian/commonmark-java/compare/commonmark-parent-0.3.1...commonmark-parent-0.3.2 -[0.3.1]: https://github.com/atlassian/commonmark-java/compare/commonmark-parent-0.3.0...commonmark-parent-0.3.1 -[0.3.0]: https://github.com/atlassian/commonmark-java/compare/commonmark-parent-0.2.0...commonmark-parent-0.3.0 -[0.2.0]: https://github.com/atlassian/commonmark-java/compare/commonmark-parent-0.1.0...commonmark-parent-0.2.0 -[0.1.0]: https://github.com/atlassian/commonmark-java/commits/commonmark-parent-0.1.0 +[Unreleased]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.28.0...main +[0.28.0]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.27.1...commonmark-parent-0.28.0 +[0.27.1]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.27.0...commonmark-parent-0.27.1 +[0.27.0]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.26.0...commonmark-parent-0.27.0 +[0.26.0]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.25.1...commonmark-parent-0.26.0 +[0.25.1]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.25.0...commonmark-parent-0.25.1 +[0.25.0]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.24.0...commonmark-parent-0.25.0 +[0.24.0]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.23.0...commonmark-parent-0.24.0 +[0.23.0]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.22.0...commonmark-parent-0.23.0 +[0.22.0]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.21.0...commonmark-parent-0.22.0 +[0.21.0]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.20.0...commonmark-parent-0.21.0 +[0.20.0]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.19.0...commonmark-parent-0.20.0 +[0.19.0]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.18.2...commonmark-parent-0.19.0 +[0.18.2]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.18.1...commonmark-parent-0.18.2 +[0.18.1]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.18.0...commonmark-parent-0.18.1 +[0.18.0]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.17.2...commonmark-parent-0.18.0 +[0.17.2]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.17.1...commonmark-parent-0.17.2 +[0.17.1]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.17.0...commonmark-parent-0.17.1 +[0.17.0]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.16.1...commonmark-parent-0.17.0 +[0.16.1]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.15.2...commonmark-parent-0.16.1 +[0.15.2]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.15.1...commonmark-parent-0.15.2 +[0.15.1]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.15.0...commonmark-parent-0.15.1 +[0.15.0]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.14.0...commonmark-parent-0.15.0 +[0.14.0]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.13.1...commonmark-parent-0.14.0 +[0.13.1]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.13.0...commonmark-parent-0.13.1 +[0.13.0]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.12.1...commonmark-parent-0.13.0 +[0.12.1]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.11.0...commonmark-parent-0.12.1 +[0.11.0]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.10.0...commonmark-parent-0.11.0 +[0.10.0]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.9.0...commonmark-parent-0.10.0 +[0.9.0]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.8.0...commonmark-parent-0.9.0 +[0.8.0]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.7.1...commonmark-parent-0.8.0 +[0.7.1]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.7.0...commonmark-parent-0.7.1 +[0.7.0]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.6.0...commonmark-parent-0.7.0 +[0.6.0]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.5.1...commonmark-parent-0.6.0 +[0.5.1]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.5.0...commonmark-parent-0.5.1 +[0.5.0]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.4.1...commonmark-parent-0.5.0 +[0.4.1]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.4.0...commonmark-parent-0.4.1 +[0.4.0]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.3.2...commonmark-parent-0.4.0 +[0.3.2]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.3.1...commonmark-parent-0.3.2 +[0.3.1]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.3.0...commonmark-parent-0.3.1 +[0.3.0]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.2.0...commonmark-parent-0.3.0 +[0.2.0]: https://github.com/commonmark/commonmark-java/compare/commonmark-parent-0.1.0...commonmark-parent-0.2.0 +[0.1.0]: https://github.com/commonmark/commonmark-java/commits/commonmark-parent-0.1.0 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a187cea4b..6bb2f1640 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -16,25 +16,15 @@ existing issues with label "help wanted". For bigger changes, make sure you start a discussion first by creating an issue and explaining the intended change. -CLA ---- - -Atlassian requires contributors to sign a Contributor License Agreement, -known as a CLA. This serves as a record stating that the contributor is -entitled to contribute the code/documentation/translation to the project -and is willing to have it used in distributions and derivative works -(or is willing to transfer ownership). - -Prior to accepting your first contribution we ask that you please follow the -appropriate link below to digitally sign the CLA. The Corporate CLA is for those -who are contributing as a member of an organization and the individual CLA is -for those contributing as an individual. - -* [CLA for corporate contributors](https://na2.docusign.net/Member/PowerFormSigning.aspx?PowerFormId=e1c17c66-ca4d-4aab-a953-2c231af4a20b) -* [CLA for individuals](https://na2.docusign.net/Member/PowerFormSigning.aspx?PowerFormId=3f94fbdc-2fbe-46ac-b14c-5d152700ae5d) +The [sourcespy dashboard](https://sourcespy.com/github/commonmarkcommonmarkjava/) +provides a high level overview of the repository including +[class diagram](https://sourcespy.com/github/commonmarkcommonmarkjava/xx-omodel-.html), +[module dependencies](https://sourcespy.com/github/commonmarkcommonmarkjava/xx-omodulesc-.html), +[module hierarchy](https://sourcespy.com/github/commonmarkcommonmarkjava/xx-omodules-.html), +[external libraries](https://sourcespy.com/github/commonmarkcommonmarkjava/xx-ojavalibs-.html), +and other components of the system. Releasing --------- -Releases are done from an Atlassian internal build server: -https://engservices-bamboo.internal.atlassian.com/browse/CM +Releases are done from the "release" workflow on GitHub Actions. diff --git a/LICENSE.txt b/LICENSE.txt index 1e011418e..604b777d3 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,4 +1,4 @@ -Copyright (c) 2015-2016, Atlassian Pty Ltd +Copyright (c) 2015, Robin Stocker All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/README.md b/README.md index 1ebe736a5..845226729 100644 --- a/README.md +++ b/README.md @@ -4,36 +4,39 @@ commonmark-java Java library for parsing and rendering [Markdown] text according to the [CommonMark] specification (and some extensions). -[![Maven Central status](https://img.shields.io/maven-central/v/com.atlassian.commonmark/commonmark.svg)](https://search.maven.org/#search%7Cga%7C1%7Cg%3A%22com.atlassian.commonmark%22) -[![javadoc](https://www.javadoc.io/badge/com.atlassian.commonmark/commonmark.svg?color=blue)](https://www.javadoc.io/doc/com.atlassian.commonmark/commonmark) -[![Build status](https://travis-ci.org/atlassian/commonmark-java.svg?branch=master)](https://travis-ci.org/atlassian/commonmark-java) -[![codecov](https://codecov.io/gh/atlassian/commonmark-java/branch/master/graph/badge.svg)](https://codecov.io/gh/atlassian/commonmark-java) +[![Maven Central status](https://img.shields.io/maven-central/v/org.commonmark/commonmark.svg)](https://search.maven.org/#search%7Cga%7C1%7Cg%3A%22org.commonmark%22) +[![javadoc](https://www.javadoc.io/badge/org.commonmark/commonmark.svg?color=blue)](https://www.javadoc.io/doc/org.commonmark/commonmark) +[![ci](https://github.com/commonmark/commonmark-java/workflows/ci/badge.svg)](https://github.com/commonmark/commonmark-java/actions?query=workflow%3Aci) +[![codecov](https://codecov.io/gh/commonmark/commonmark-java/branch/main/graph/badge.svg)](https://codecov.io/gh/commonmark/commonmark-java) +[![SourceSpy Dashboard](https://sourcespy.com/shield.svg)](https://sourcespy.com/github/commonmarkcommonmarkjava/) Introduction ------------ -Provides classes for parsing input to an abstract syntax tree of nodes -(AST), visiting and manipulating nodes, and rendering to HTML. It -started out as a port of [commonmark.js], but has since evolved into a -full library with a nice API and the following features: +Provides classes for parsing input to an abstract syntax tree (AST), +visiting and manipulating nodes, and rendering to HTML or back to Markdown. +It started out as a port of [commonmark.js], but has since evolved into an +extensible library with the following features: * Small (core has no dependencies, extensions in separate artifacts) -* Fast (10-20 times faster than pegdown, see benchmarks in repo) +* Fast (10-20 times faster than [pegdown] which used to be a popular Markdown + library, see benchmarks in repo) * Flexible (manipulate the AST after parsing, customize HTML rendering) * Extensible (tables, strikethrough, autolinking and more, see below) -The library is supported on Java 8 and Java 9. It should work on Java 7 -and Android too, but that is on a best-effort basis, please report -problems. For Android the minimum API level is 15, see the -[commonmark-android-test](commonmark-android-test) directory. +The library is supported on Java 11 and later. It works on Android too, +but that is on a best-effort basis, please report problems. For Android the +minimum API level is 19, see the +[commonmark-android-test](commonmark-android-test) +directory. Coordinates for core library (see all on [Maven Central]): ```xml - com.atlassian.commonmark + org.commonmark commonmark - 0.14.0 + 0.28.0 ``` @@ -42,12 +45,14 @@ The module names to use in Java 9 are `org.commonmark`, Note that for 0.x releases of this library, the API is not considered stable yet and may break between minor releases. After 1.0, [Semantic Versioning] will -be followed. +be followed. A package containing `beta` means it's not subject to stable API +guarantees yet; but for normal usage it should not be necessary to use. See the [spec.txt](commonmark-test-util/src/main/resources/spec.txt) file if you're wondering which version of the spec is currently implemented. Also check out the [CommonMark dingus] for getting familiar -with the syntax or trying out edge cases. +with the syntax or trying out edge cases. If you clone the repository, +you can also use the `DingusApp` class to try out things interactively. Usage @@ -61,9 +66,9 @@ import org.commonmark.parser.Parser; import org.commonmark.renderer.html.HtmlRenderer; Parser parser = Parser.builder().build(); -Node document = parser.parse("This is *Sparta*"); +Node document = parser.parse("This is *Markdown*"); HtmlRenderer renderer = HtmlRenderer.builder().build(); -renderer.render(document); // "

This is Sparta

\n" +renderer.render(document); // "

This is Markdown

\n" ``` This uses the parser and renderer with default options. Both builders have @@ -79,8 +84,23 @@ to which tags are allowed, etc. That is the responsibility of the caller, and if you expose the resulting HTML, you probably want to run a sanitizer on it after this. -For rendering to plain text, there's also a `TextContentRenderer` with -a very similar API. +#### Render to Markdown + +```java +import org.commonmark.node.*; +import org.commonmark.renderer.markdown.MarkdownRenderer; + +MarkdownRenderer renderer = MarkdownRenderer.builder().build(); +Node document = new Document(); +Heading heading = new Heading(); +heading.setLevel(2); +heading.appendChild(new Text("My title")); +document.appendChild(heading); + +renderer.render(document); // "## My title\n" +``` + +For rendering to plain text with minimal markup, there's also `TextContentRenderer`. #### Use a visitor to process parsed nodes @@ -110,6 +130,31 @@ class WordCountVisitor extends AbstractVisitor { } ``` +#### Source positions + +If you want to know where a parsed `Node` appeared in the input source text, +you can request the parser to return source positions like this: + +```java +var parser = Parser.builder().includeSourceSpans(IncludeSourceSpans.BLOCKS_AND_INLINES).build(); +``` + +Then parse nodes and inspect source positions: + +```java +var source = "foo\n\nbar *baz*"; +var doc = parser.parse(source); +var emphasis = doc.getLastChild().getLastChild(); +var s = emphasis.getSourceSpans().get(0); +s.getLineIndex(); // 2 (third line) +s.getColumnIndex(); // 4 (fifth column) +s.getInputIndex(); // 9 (string index 9) +s.getLength(); // 5 +source.substring(s.getInputIndex(), s.getInputIndex() + s.getLength()); // "*baz*" +``` + +If you're only interested in blocks and not inlines, use `IncludeSourceSpans.BLOCKS`. + #### Add or change attributes of HTML elements Sometimes you might want to customize how HTML is rendered. If all you @@ -176,7 +221,7 @@ class IndentedCodeBlockNodeRenderer implements NodeRenderer { @Override public Set> getNodeTypes() { // Return the node types we want to use this renderer for. - return Collections.>singleton(IndentedCodeBlock.class); + return Set.of(IndentedCodeBlock.class); } @Override @@ -201,6 +246,20 @@ elements in the resulting HTML, you can create your own subclass of To define the HTML rendering for them, you can use a `NodeRenderer` as explained above. +#### Customize parsing + +There are a few ways to extend parsing or even override built-in parsing, +all of them via methods on `Parser.Builder` +(see [Blocks and inlines](https://spec.commonmark.org/0.31.2/#blocks-and-inlines) in the spec for an overview of blocks/inlines): + +- Parsing of specific block types (e.g. headings, code blocks, etc) can be + enabled/disabled with `enabledBlockTypes` +- Parsing of blocks can be extended/overridden with `customBlockParserFactory` +- Parsing of inline content can be extended/overridden with `customInlineContentParserFactory` +- Parsing of [delimiters](https://spec.commonmark.org/0.31.2/#emphasis-and-strong-emphasis) in inline content can be + extended with `customDelimiterProcessor` +- Processing of links can be customized with `linkProcessor` and `linkMarker` + #### Thread-safety Both the `Parser` and `HtmlRenderer` are designed so that you can @@ -214,7 +273,7 @@ report an issue. ### API documentation Javadocs are available online on -[javadoc.io](https://www.javadoc.io/doc/com.atlassian.commonmark/commonmark). +[javadoc.io](https://www.javadoc.io/doc/org.commonmark/commonmark). Extensions @@ -230,9 +289,9 @@ First, add an additional dependency (see [Maven Central] for others): ```xml - com.atlassian.commonmark + org.commonmark commonmark-ext-gfm-tables - 0.14.0 + 0.28.0 ``` @@ -241,7 +300,7 @@ Then, configure the extension on the builders: ```java import org.commonmark.ext.gfm.tables.TablesExtension; -List extensions = Arrays.asList(TablesExtension.create()); +List extensions = List.of(TablesExtension.create()); Parser parser = Parser.builder() .extensions(extensions) .build(); @@ -274,6 +333,34 @@ Enables tables using pipes as in [GitHub Flavored Markdown][gfm-tables]. Use class `TablesExtension` in artifact `commonmark-ext-gfm-tables`. +### Alerts + +Adds support for GitHub-style alerts (also known as callouts or admonitions) as described [here](https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax#alerts), e.g.: + +``` +> [!NOTE] +> The text of the note. +``` + +As types you can use NOTE, TIP, IMPORTANT, WARNING, CAUTION; or configure the extension to add additional ones. + +Use class `AlertsExtension` in artifact `commonmark-ext-gfm-alerts`. + +### Footnotes + +Enables footnotes like in [GitHub](https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax#footnotes) +or [Pandoc](https://pandoc.org/MANUAL.html#footnotes): + +``` +Main text[^1] + +[^1]: Additional text in a footnote +``` + +Inline footnotes like `^[inline footnote]` are also supported when enabled via `FootnotesExtension.Builder#inlineFootnotes`. + +Use class `FootnotesExtension` in artifact `commonmark-ext-footnotes`. + ### Heading anchor Enables adding auto generated "id" attributes to heading tags. The "id" @@ -319,6 +406,69 @@ document start here Use class `YamlFrontMatterExtension` in artifact `commonmark-ext-yaml-front-matter`. To fetch metadata, use `YamlFrontMatterVisitor`. +### Image Attributes + +Adds support for specifying attributes (specifically height and width) for images. + +The attribute elements are given as `key=value` pairs inside curly braces `{ }` after the image node to which they apply, +for example: +``` +![text](/url.png){width=640 height=480} +``` +will be rendered as: +``` +text +``` + +Use class `ImageAttributesExtension` in artifact `commonmark-ext-image-attributes`. + +Note: since this extension uses curly braces `{` `}` as its delimiters (in `StylesDelimiterProcessor`), this means that +other delimiter processors *cannot* use curly braces for delimiting. + +### Task List Items + +Adds support for tasks as list items. + +A task can be represented as a list item where the first non-whitespace character is a left bracket `[`, then a single +whitespace character or the letter `x` in lowercase or uppercase, then a right bracket `]` followed by at least one +whitespace before any other content. + +For example: +``` +- [ ] task #1 +- [x] task #2 +``` +will be rendered as: +``` +
    +
  • task #1
  • +
  • task #2
  • +
+``` + +Use class `TaskListItemsExtension` in artifact `commonmark-ext-task-list-items`. + +### Third-party extensions + +You can also find other extensions in the wild: + +* [commonmark-ext-notifications](https://github.com/McFoggy/commonmark-ext-notifications): this extension allows to easily create notifications/admonitions paragraphs like `INFO`, `SUCCESS`, `WARNING` or `ERROR` + +Used by +------- + +Some users of this library (feel free to raise a PR if you want to be added): +* [Atlassian](https://www.atlassian.com/) (where the library was initially developed) +* Java (OpenJDK) ([link](https://github.com/openjdk/jdk/blob/3895b8fc0b2c6d187080dba6fe08297adad4a480/src/jdk.internal.md/share/classes/module-info.java)) +* [Gerrit](https://www.gerritcodereview.com/) code review/Gitiles ([link](https://gerrit-review.googlesource.com/c/gitiles/+/353794)) +* [Clerk](https://clerk.vision/) moldable live programming for Clojure +* [Znai](https://github.com/testingisdocumenting/znai) +* [Open Note](https://github.com/YangDai2003/OpenNote-Compose) a markdown editor and note-taking app for Android +* [Quarkus Roq](https://github.com/quarkiverse/quarkus-roq/) The Roq Static Site Generator allows to easily create a static website or blog using Quarkus super-powers. +* [Lucee](https://github.com/lucee/lucee) +* [Previewer](https://github.com/sebthom/previewer-eclipse-plugin) an extensible Eclipse plugin that previews Markdown and other text based formats. +* [Xeres](https://xeres.io) a Peer-to-Peer application where all user generated content is done with markdown + See also -------- @@ -328,20 +478,21 @@ See also Contributing ------------ -See CONTRIBUTING.md file. +See [CONTRIBUTING.md](CONTRIBUTING.md) file. License ------- -Copyright (c) 2015-2019 Atlassian and others. +Copyright (c) 2015, Robin Stocker BSD (2-clause) licensed, see LICENSE.txt file. -[CommonMark]: http://commonmark.org/ +[CommonMark]: https://commonmark.org/ [Markdown]: https://daringfireball.net/projects/markdown/ -[commonmark.js]: https://github.com/jgm/commonmark.js -[CommonMark Dingus]: http://spec.commonmark.org/dingus/ -[Maven Central]: https://search.maven.org/#search|ga|1|g%3A%22com.atlassian.commonmark%22 -[Semantic Versioning]: http://semver.org/ +[commonmark.js]: https://github.com/commonmark/commonmark.js +[pegdown]: https://github.com/sirthias/pegdown +[CommonMark Dingus]: https://spec.commonmark.org/dingus/ +[Maven Central]: https://search.maven.org/#search|ga|1|g%3A%22org.commonmark%22 +[Semantic Versioning]: https://semver.org/ [autolink-java]: https://github.com/robinst/autolink-java [gfm-tables]: https://help.github.com/articles/organizing-information-with-tables/ diff --git a/commonmark-android-test/.travis.sh b/commonmark-android-test/.travis.sh deleted file mode 100755 index 781568ce1..000000000 --- a/commonmark-android-test/.travis.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/sh - -set -e -set -x - -version=$(cd .. && mvn help:evaluate -Dexpression=project.version | grep -v '^\[' | tail -1) -autolink_version=$(cd ../commonmark-ext-autolink && mvn help:evaluate -Dexpression=autolink.version | grep -v '^\[' | tail -1) - -touch test.properties -echo "path.report=../report" >> test.properties -echo "version.maven=0.14.0" >> test.properties -echo "version.maven_autolink=0.10.0" >> test.properties -echo "version.snapshot=$version" >> test.properties -echo "version.snapshot_autolink=$autolink_version" >> test.properties - -echo no | android create avd --force -n test -t "android-16" -emulator -avd test -no-audio -no-window & -android-wait-for-emulator - -TERM=dumb ./gradlew --stacktrace :app:connectedSnapshotDebugAndroidTest diff --git a/commonmark-android-test/README.md b/commonmark-android-test/README.md index 452556a63..0fb792ae3 100644 --- a/commonmark-android-test/README.md +++ b/commonmark-android-test/README.md @@ -1,19 +1,19 @@ commonmark-android-test ======================= -This module ensures that commonmark-java is supported on Android +This module ensures that commonmark-java is supported on Android by running `lint` checks on library sources. +Current `minSdk` is 19 Requirements: -* Java 7 or above -* Android SDK 15 -* Running emulator or connected android device +* Java 11 or above +* Android SDK 30 Configuration ----- 1. Download Android SDK -2. Be sure that SDK Platform 15 and emulator for this platform (system image) are installed. It's recommended to use x86 +2. Be sure that SDK Platform 30 is installed. It's recommended to use x86 3. Export to PATH: `path_to_android_sdk/platform-tools` and `path_to_android_sdk/tools` 4. Create 2 properties files in commonmark-android-test @@ -22,76 +22,21 @@ Configuration sdk.dir=/path_to_android_sdk ``` -/test.properties -```properties -# Absolute or relative (./ == /app) path to test reports. -path.report=../report - -# Version number of commonmark and extensions in maven central. -version.maven=0.14.0 -# Version number of autolink in maven central (not bundled with extension jar). -version.maven_autolink=0.10.0 - -# Version number of commonmark and extensions in project. -version.snapshot=0.14.1-SNAPSHOT -# Version number of autolink for snapshots (not bundled in extension jar). -version.snapshot_autolink=0.10.0 -``` - -If you're going to test on device with Android 15 then you can skip downloading emulator. - Usage ----- -#### Run test with MAVEN version +#### Run lint checked on Mac/Linux: ```shell -./gradlew :app:connectedMavenDebugAndroidTest +./gradlew :app:lint ``` on Windows: ```bat -.\gradlew :app:connectedMavenDebugAndroidTest -``` - -#### Run test with SNAPSHOT version - -Before running tests you need to run `mvn clean install` in the root of -this repository. - -on Mac/Linux: -```shell -./gradlew :app:connectedSnapshotDebugAndroidTest +.\gradlew :app:lint ``` -on Windows: -```bat -.\gradlew :app:connectedSnapshotDebugAndroidTest -``` - - -#### Testing in CI - -on Mac/Linux: -```shell -echo no | android create avd --force -n test -t "android-15" -emulator -avd test & -adb wait-for-device -./gradlew :app:clean :app:connectedSnapshotDebugAndroidTest -adb emu kill -``` - -on Windows: -```bat -echo no | android create avd --force -n test -t "android-15" -start emulator -avd test -adb wait-for-device -gradlew :app:clean :app:connectedSnapshotDebugAndroidTest & adb emu kill -``` - -There could be problems with command `adb wait-for-device` which could be resolved by adding additional pause before running test. - Links ----- [Gradle Documentations](https://docs.gradle.org/current/userguide/userguide.html) diff --git a/commonmark-android-test/app/build.gradle b/commonmark-android-test/app/build.gradle index 3ca56fbe9..fd8ae34cb 100644 --- a/commonmark-android-test/app/build.gradle +++ b/commonmark-android-test/app/build.gradle @@ -1,36 +1,20 @@ apply plugin: 'com.android.application' -def testProperties -def testPropertiesFile = file('../test.properties') -if (testPropertiesFile.canRead()) { - testProperties = new Properties() - testPropertiesFile.withInputStream { - stream -> testProperties.load(stream) - } -} - android { - compileSdkVersion 16 - buildToolsVersion "21.1.1" + namespace "org.commonmark.android.test" + compileSdk 30 defaultConfig { - applicationId "com.atlassian.commonmark.android.test" - minSdkVersion 16 - targetSdkVersion 16 + applicationId "org.commonmark.android.test" + minSdk 19 + targetSdk 30 versionCode 1 versionName "1.0" - - testInstrumentationRunner "android.support.test.runner.AndroidJUnitRunner" - } - - productFlavors { - maven - snapshot } compileOptions { - sourceCompatibility JavaVersion.VERSION_1_7 - targetCompatibility JavaVersion.VERSION_1_7 + sourceCompatibility JavaVersion.VERSION_11 + targetCompatibility JavaVersion.VERSION_11 } packagingOptions { @@ -40,45 +24,29 @@ android { exclude 'META-INF/NOTICE.txt' } - testOptions { - resultsDir = testProperties['path.report'] - } -} - -repositories { - flatDir { - dirs '../../commonmark/target', - '../../commonmark-ext-autolink/target', - '../../commonmark-ext-gfm-strikethrough/target', - '../../commonmark-ext-gfm-tables/target', - '../../commonmark-ext-heading-anchor/target', - '../../commonmark-ext-ins/target', - '../../commonmark-ext-yaml-front-matter/target', - '../../commonmark-test-util/target' + // we add other modules sources in order for lint to process them (lint operates on sources) + sourceSets { + main { + java { + [ + '../../commonmark', + '../../commonmark-ext-autolink', + '../../commonmark-ext-gfm-strikethrough', + '../../commonmark-ext-gfm-tables', + '../../commonmark-ext-heading-anchor', + '../../commonmark-ext-ins', + '../../commonmark-ext-yaml-front-matter' + ].forEach { d -> + // don't include module-info files, otherwise we get + // "too many module declarations found" + PatternSet patternSet = new PatternSet().exclude('**/module-info.java') + srcDirs += fileTree("$d/src/main/java").matching(patternSet) + } + } + } } } dependencies { - androidTestCompile 'com.android.support.test:runner:0.4.1' - androidTestCompile 'com.android.support.test:rules:0.4.1' - - androidTestCompile ':commonmark-test-util-' + testProperties['version.snapshot'] - - androidTestMavenCompile 'org.nibor.autolink:autolink:' + testProperties['version.maven_autolink'] - androidTestMavenCompile 'com.atlassian.commonmark:commonmark:' + testProperties['version.maven'] - androidTestMavenCompile 'com.atlassian.commonmark:commonmark-ext-autolink:' + testProperties['version.maven'] - androidTestMavenCompile 'com.atlassian.commonmark:commonmark-ext-gfm-strikethrough:' + testProperties['version.maven'] - androidTestMavenCompile 'com.atlassian.commonmark:commonmark-ext-gfm-tables:' + testProperties['version.maven'] - androidTestMavenCompile 'com.atlassian.commonmark:commonmark-ext-heading-anchor:' + testProperties['version.maven'] - androidTestMavenCompile 'com.atlassian.commonmark:commonmark-ext-ins:' + testProperties['version.maven'] - androidTestMavenCompile 'com.atlassian.commonmark:commonmark-ext-yaml-front-matter:' + testProperties['version.maven'] - - androidTestSnapshotCompile 'org.nibor.autolink:autolink:' + testProperties['version.snapshot_autolink'] - androidTestSnapshotCompile ':commonmark-' + testProperties['version.snapshot'] - androidTestSnapshotCompile ':commonmark-ext-autolink-' + testProperties['version.snapshot'] - androidTestSnapshotCompile ':commonmark-ext-gfm-strikethrough-' + testProperties['version.snapshot'] - androidTestSnapshotCompile ':commonmark-ext-gfm-tables-' + testProperties['version.snapshot'] - androidTestSnapshotCompile ':commonmark-ext-heading-anchor-' + testProperties['version.snapshot'] - androidTestSnapshotCompile ':commonmark-ext-ins-' + testProperties['version.snapshot'] - androidTestSnapshotCompile ':commonmark-ext-yaml-front-matter-' + testProperties['version.snapshot'] + implementation('org.nibor.autolink:autolink:0.11.0') } diff --git a/commonmark-android-test/app/lint.xml b/commonmark-android-test/app/lint.xml new file mode 100644 index 000000000..3507f11d5 --- /dev/null +++ b/commonmark-android-test/app/lint.xml @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/commonmark-android-test/app/src/androidTest/java/com/atlassian/commonmark/android/test/AndroidSupportTest.java b/commonmark-android-test/app/src/androidTest/java/com/atlassian/commonmark/android/test/AndroidSupportTest.java deleted file mode 100644 index 26fd2c360..000000000 --- a/commonmark-android-test/app/src/androidTest/java/com/atlassian/commonmark/android/test/AndroidSupportTest.java +++ /dev/null @@ -1,100 +0,0 @@ -package com.atlassian.commonmark.android.test; - -import org.commonmark.Extension; -import org.commonmark.ext.autolink.AutolinkExtension; -import org.commonmark.ext.front.matter.YamlFrontMatterExtension; -import org.commonmark.ext.gfm.strikethrough.StrikethroughExtension; -import org.commonmark.ext.gfm.tables.TablesExtension; -import org.commonmark.ext.heading.anchor.HeadingAnchorExtension; -import org.commonmark.ext.ins.InsExtension; -import org.commonmark.node.Node; -import org.commonmark.parser.Parser; -import org.commonmark.renderer.html.HtmlRenderer; -import org.commonmark.testutil.TestResources; -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; - -import android.support.test.runner.AndroidJUnit4; -import android.test.suitebuilder.annotation.SmallTest; - -import java.util.Collections; - -import static org.junit.Assert.assertNotNull; - -@RunWith(AndroidJUnit4.class) -@SmallTest -public class AndroidSupportTest { - - private String spec; - - @Before - public void setUp() throws Exception { - spec = TestResources.readAsString(TestResources.getSpec()); - } - - @Test - public void parseTest() throws Exception { - Parser parser = new Parser.Builder().build(); - - Node document = parser.parse(spec); - - assertNotNull(document); - } - - @Test - public void autolinkExtensionTest() throws Exception { - parseWithExtensionsTest(AutolinkExtension.create()); - } - - @Test - public void strikethroughExtensionTest() throws Exception { - parseWithExtensionsTest(StrikethroughExtension.create()); - } - - @Test - public void tablesExtensionTest() throws Exception { - parseWithExtensionsTest(TablesExtension.create()); - } - - @Test - public void headingAnchorExtensionTest() throws Exception { - parseWithExtensionsTest(HeadingAnchorExtension.create()); - } - - @Test - public void insExtensionTest() throws Exception { - parseWithExtensionsTest(InsExtension.create()); - } - - @Test - public void yamlFrontMatterExtensionTest() throws Exception { - parseWithExtensionsTest(YamlFrontMatterExtension.create()); - } - - @Test - public void htmlRendererTest() throws Exception { - Parser parser = Parser.builder().build(); - HtmlRenderer renderer = HtmlRenderer.builder().build(); - - String renderedString = renderer.render(parser.parse(spec)); - - assertNotNull(renderedString); - } - - private void parseWithExtensionsTest(Extension extension) throws Exception { - Parser parser = Parser.builder() - .extensions(Collections.singletonList(extension)) - .build(); - - Node document = parser.parse(spec); - assertNotNull(document); - - HtmlRenderer renderer = HtmlRenderer.builder() - .extensions(Collections.singletonList(extension)) - .build(); - - String renderedString = renderer.render(document); - assertNotNull(renderedString); - } -} diff --git a/commonmark-android-test/app/src/main/AndroidManifest.xml b/commonmark-android-test/app/src/main/AndroidManifest.xml index de2cd5725..486520569 100644 --- a/commonmark-android-test/app/src/main/AndroidManifest.xml +++ b/commonmark-android-test/app/src/main/AndroidManifest.xml @@ -1,3 +1,3 @@ - + diff --git a/commonmark-android-test/build.gradle b/commonmark-android-test/build.gradle index 027fee4d9..f359e8154 100644 --- a/commonmark-android-test/build.gradle +++ b/commonmark-android-test/build.gradle @@ -1,18 +1,22 @@ buildscript { repositories { - jcenter() + mavenCentral() + google() } dependencies { - classpath 'com.android.tools.build:gradle:1.5.0' + classpath 'com.android.tools.build:gradle:7.4.2' } } allprojects { repositories { - jcenter() + mavenCentral() + google() } } task clean(type: Delete) { delete rootProject.buildDir } + + diff --git a/commonmark-android-test/gradle/wrapper/gradle-wrapper.jar b/commonmark-android-test/gradle/wrapper/gradle-wrapper.jar index 8c0fb64a8..d64cd4917 100644 Binary files a/commonmark-android-test/gradle/wrapper/gradle-wrapper.jar and b/commonmark-android-test/gradle/wrapper/gradle-wrapper.jar differ diff --git a/commonmark-android-test/gradle/wrapper/gradle-wrapper.properties b/commonmark-android-test/gradle/wrapper/gradle-wrapper.properties index 122a0dca2..a80b22ce5 100644 --- a/commonmark-android-test/gradle/wrapper/gradle-wrapper.properties +++ b/commonmark-android-test/gradle/wrapper/gradle-wrapper.properties @@ -1,6 +1,7 @@ -#Mon Dec 28 10:00:20 PST 2015 distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-8.6-bin.zip +networkTimeout=10000 +validateDistributionUrl=true zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-2.10-all.zip diff --git a/commonmark-android-test/gradlew b/commonmark-android-test/gradlew index 9d82f7891..1aa94a426 100755 --- a/commonmark-android-test/gradlew +++ b/commonmark-android-test/gradlew @@ -1,74 +1,127 @@ -#!/usr/bin/env bash +#!/bin/sh + +# +# Copyright © 2015-2021 the original authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ############################################################################## -## -## Gradle start up script for UN*X -## +# +# Gradle start up script for POSIX generated by Gradle. +# +# Important for running: +# +# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is +# noncompliant, but you have some other compliant shell such as ksh or +# bash, then to run this script, type that shell name before the whole +# command line, like: +# +# ksh Gradle +# +# Busybox and similar reduced shells will NOT work, because this script +# requires all of these POSIX shell features: +# * functions; +# * expansions «$var», «${var}», «${var:-default}», «${var+SET}», +# «${var#prefix}», «${var%suffix}», and «$( cmd )»; +# * compound commands having a testable exit status, especially «case»; +# * various built-in commands including «command», «set», and «ulimit». +# +# Important for patching: +# +# (2) This script targets any POSIX shell, so it avoids extensions provided +# by Bash, Ksh, etc; in particular arrays are avoided. +# +# The "traditional" practice of packing multiple parameters into a +# space-separated string is a well documented source of bugs and security +# problems, so this is (mostly) avoided, by progressively accumulating +# options in "$@", and eventually passing that to Java. +# +# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, +# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; +# see the in-line comments for details. +# +# There are tweaks for specific operating systems such as AIX, CygWin, +# Darwin, MinGW, and NonStop. +# +# (3) This script is generated from the Groovy template +# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt +# within the Gradle project. +# +# You can find Gradle at https://github.com/gradle/gradle/. +# ############################################################################## -# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -DEFAULT_JVM_OPTS="" +# Attempt to set APP_HOME + +# Resolve links: $0 may be a link +app_path=$0 + +# Need this for daisy-chained symlinks. +while + APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path + [ -h "$app_path" ] +do + ls=$( ls -ld "$app_path" ) + link=${ls#*' -> '} + case $link in #( + /*) app_path=$link ;; #( + *) app_path=$APP_HOME$link ;; + esac +done -APP_NAME="Gradle" -APP_BASE_NAME=`basename "$0"` +# This is normally unused +# shellcheck disable=SC2034 +APP_BASE_NAME=${0##*/} +# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) +APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit # Use the maximum available, or set MAX_FD != -1 to use that value. -MAX_FD="maximum" +MAX_FD=maximum -warn ( ) { +warn () { echo "$*" -} +} >&2 -die ( ) { +die () { echo echo "$*" echo exit 1 -} +} >&2 # OS specific support (must be 'true' or 'false'). cygwin=false msys=false darwin=false -case "`uname`" in - CYGWIN* ) - cygwin=true - ;; - Darwin* ) - darwin=true - ;; - MINGW* ) - msys=true - ;; +nonstop=false +case "$( uname )" in #( + CYGWIN* ) cygwin=true ;; #( + Darwin* ) darwin=true ;; #( + MSYS* | MINGW* ) msys=true ;; #( + NONSTOP* ) nonstop=true ;; esac -# Attempt to set APP_HOME -# Resolve links: $0 may be a link -PRG="$0" -# Need this for relative symlinks. -while [ -h "$PRG" ] ; do - ls=`ls -ld "$PRG"` - link=`expr "$ls" : '.*-> \(.*\)$'` - if expr "$link" : '/.*' > /dev/null; then - PRG="$link" - else - PRG=`dirname "$PRG"`"/$link" - fi -done -SAVED="`pwd`" -cd "`dirname \"$PRG\"`/" >/dev/null -APP_HOME="`pwd -P`" -cd "$SAVED" >/dev/null - CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + # Determine the Java command to use to start the JVM. if [ -n "$JAVA_HOME" ] ; then if [ -x "$JAVA_HOME/jre/sh/java" ] ; then # IBM's JDK on AIX uses strange locations for the executables - JAVACMD="$JAVA_HOME/jre/sh/java" + JAVACMD=$JAVA_HOME/jre/sh/java else - JAVACMD="$JAVA_HOME/bin/java" + JAVACMD=$JAVA_HOME/bin/java fi if [ ! -x "$JAVACMD" ] ; then die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME @@ -77,84 +130,120 @@ Please set the JAVA_HOME variable in your environment to match the location of your Java installation." fi else - JAVACMD="java" - which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + JAVACMD=java + if ! command -v java >/dev/null 2>&1 + then + die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. Please set the JAVA_HOME variable in your environment to match the location of your Java installation." + fi fi # Increase the maximum file descriptors if we can. -if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then - MAX_FD_LIMIT=`ulimit -H -n` - if [ $? -eq 0 ] ; then - if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then - MAX_FD="$MAX_FD_LIMIT" - fi - ulimit -n $MAX_FD - if [ $? -ne 0 ] ; then - warn "Could not set maximum file descriptor limit: $MAX_FD" - fi - else - warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" - fi +if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then + case $MAX_FD in #( + max*) + # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC2039,SC3045 + MAX_FD=$( ulimit -H -n ) || + warn "Could not query maximum file descriptor limit" + esac + case $MAX_FD in #( + '' | soft) :;; #( + *) + # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC2039,SC3045 + ulimit -n "$MAX_FD" || + warn "Could not set maximum file descriptor limit to $MAX_FD" + esac fi -# For Darwin, add options to specify how the application appears in the dock -if $darwin; then - GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" -fi +# Collect all arguments for the java command, stacking in reverse order: +# * args from the command line +# * the main class name +# * -classpath +# * -D...appname settings +# * --module-path (only if needed) +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. + +# For Cygwin or MSYS, switch paths to Windows format before running java +if "$cygwin" || "$msys" ; then + APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) + CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) + + JAVACMD=$( cygpath --unix "$JAVACMD" ) -# For Cygwin, switch paths to Windows format before running java -if $cygwin ; then - APP_HOME=`cygpath --path --mixed "$APP_HOME"` - CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` - JAVACMD=`cygpath --unix "$JAVACMD"` - - # We build the pattern for arguments to be converted via cygpath - ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` - SEP="" - for dir in $ROOTDIRSRAW ; do - ROOTDIRS="$ROOTDIRS$SEP$dir" - SEP="|" - done - OURCYGPATTERN="(^($ROOTDIRS))" - # Add a user-defined pattern to the cygpath arguments - if [ "$GRADLE_CYGPATTERN" != "" ] ; then - OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" - fi # Now convert the arguments - kludge to limit ourselves to /bin/sh - i=0 - for arg in "$@" ; do - CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` - CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option - - if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition - eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` - else - eval `echo args$i`="\"$arg\"" + for arg do + if + case $arg in #( + -*) false ;; # don't mess with options #( + /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath + [ -e "$t" ] ;; #( + *) false ;; + esac + then + arg=$( cygpath --path --ignore --mixed "$arg" ) fi - i=$((i+1)) + # Roll the args list around exactly as many times as the number of + # args, so each arg winds up back in the position where it started, but + # possibly modified. + # + # NB: a `for` loop captures its iteration list before it begins, so + # changing the positional parameters here affects neither the number of + # iterations, nor the values presented in `arg`. + shift # remove old arg + set -- "$@" "$arg" # push replacement arg done - case $i in - (0) set -- ;; - (1) set -- "$args0" ;; - (2) set -- "$args0" "$args1" ;; - (3) set -- "$args0" "$args1" "$args2" ;; - (4) set -- "$args0" "$args1" "$args2" "$args3" ;; - (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; - (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; - (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; - (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; - (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; - esac fi -# Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules -function splitJvmOpts() { - JVM_OPTS=("$@") -} -eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS -JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME" -exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@" +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Collect all arguments for the java command: +# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, +# and any embedded shellness will be escaped. +# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be +# treated as '${Hostname}' itself on the command line. + +set -- \ + "-Dorg.gradle.appname=$APP_BASE_NAME" \ + -classpath "$CLASSPATH" \ + org.gradle.wrapper.GradleWrapperMain \ + "$@" + +# Stop when "xargs" is not available. +if ! command -v xargs >/dev/null 2>&1 +then + die "xargs is not available" +fi + +# Use "xargs" to parse quoted args. +# +# With -n1 it outputs one arg per line, with the quotes and backslashes removed. +# +# In Bash we could simply go: +# +# readarray ARGS < <( xargs -n1 <<<"$var" ) && +# set -- "${ARGS[@]}" "$@" +# +# but POSIX shell has neither arrays nor command substitution, so instead we +# post-process each arg (as a line of input to sed) to backslash-escape any +# character that might be a shell metacharacter, then use eval to reverse +# that process (while maintaining the separation between arguments), and wrap +# the whole thing up as a single "set" statement. +# +# This will of course break if any of these variables contains a newline or +# an unmatched quote. +# + +eval "set -- $( + printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | + xargs -n1 | + sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | + tr '\n' ' ' + )" '"$@"' + +exec "$JAVACMD" "$@" diff --git a/commonmark-android-test/gradlew.bat b/commonmark-android-test/gradlew.bat index aec99730b..7101f8e46 100644 --- a/commonmark-android-test/gradlew.bat +++ b/commonmark-android-test/gradlew.bat @@ -1,4 +1,20 @@ -@if "%DEBUG%" == "" @echo off +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem + +@if "%DEBUG%"=="" @echo off @rem ########################################################################## @rem @rem Gradle startup script for Windows @@ -8,26 +24,30 @@ @rem Set local scope for the variables with windows NT shell if "%OS%"=="Windows_NT" setlocal -@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -set DEFAULT_JVM_OPTS= - set DIRNAME=%~dp0 -if "%DIRNAME%" == "" set DIRNAME=. +if "%DIRNAME%"=="" set DIRNAME=. +@rem This is normally unused set APP_BASE_NAME=%~n0 set APP_HOME=%DIRNAME% +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + @rem Find java.exe if defined JAVA_HOME goto findJavaFromJavaHome set JAVA_EXE=java.exe %JAVA_EXE% -version >NUL 2>&1 -if "%ERRORLEVEL%" == "0" goto init +if %ERRORLEVEL% equ 0 goto execute -echo. -echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. -echo. -echo Please set the JAVA_HOME variable in your environment to match the -echo location of your Java installation. +echo. 1>&2 +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2 +echo. 1>&2 +echo Please set the JAVA_HOME variable in your environment to match the 1>&2 +echo location of your Java installation. 1>&2 goto fail @@ -35,54 +55,36 @@ goto fail set JAVA_HOME=%JAVA_HOME:"=% set JAVA_EXE=%JAVA_HOME%/bin/java.exe -if exist "%JAVA_EXE%" goto init +if exist "%JAVA_EXE%" goto execute -echo. -echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% -echo. -echo Please set the JAVA_HOME variable in your environment to match the -echo location of your Java installation. +echo. 1>&2 +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2 +echo. 1>&2 +echo Please set the JAVA_HOME variable in your environment to match the 1>&2 +echo location of your Java installation. 1>&2 goto fail -:init -@rem Get command-line arguments, handling Windowz variants - -if not "%OS%" == "Windows_NT" goto win9xME_args -if "%@eval[2+2]" == "4" goto 4NT_args - -:win9xME_args -@rem Slurp the command line arguments. -set CMD_LINE_ARGS= -set _SKIP=2 - -:win9xME_args_slurp -if "x%~1" == "x" goto execute - -set CMD_LINE_ARGS=%* -goto execute - -:4NT_args -@rem Get arguments from the 4NT Shell from JP Software -set CMD_LINE_ARGS=%$ - :execute @rem Setup the command line set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + @rem Execute Gradle -"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* :end @rem End local scope for the variables with windows NT shell -if "%ERRORLEVEL%"=="0" goto mainEnd +if %ERRORLEVEL% equ 0 goto mainEnd :fail rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of rem the _cmd.exe /c_ return code! -if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 -exit /b 1 +set EXIT_CODE=%ERRORLEVEL% +if %EXIT_CODE% equ 0 set EXIT_CODE=1 +if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% +exit /b %EXIT_CODE% :mainEnd if "%OS%"=="Windows_NT" endlocal diff --git a/commonmark-ext-autolink/.settings/org.eclipse.core.runtime.prefs b/commonmark-ext-autolink/.settings/org.eclipse.core.runtime.prefs deleted file mode 100644 index 5a0ad22d2..000000000 --- a/commonmark-ext-autolink/.settings/org.eclipse.core.runtime.prefs +++ /dev/null @@ -1,2 +0,0 @@ -eclipse.preferences.version=1 -line.separator=\n diff --git a/commonmark-ext-autolink/.settings/org.eclipse.jdt.core.prefs b/commonmark-ext-autolink/.settings/org.eclipse.jdt.core.prefs deleted file mode 100644 index 3c0d27c8f..000000000 --- a/commonmark-ext-autolink/.settings/org.eclipse.jdt.core.prefs +++ /dev/null @@ -1,290 +0,0 @@ -eclipse.preferences.version=1 -org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7 -org.eclipse.jdt.core.compiler.compliance=1.7 -org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning -org.eclipse.jdt.core.compiler.source=1.7 -org.eclipse.jdt.core.formatter.align_type_members_on_columns=false -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=16 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation=0 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant=16 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call=16 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation=16 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression=16 -org.eclipse.jdt.core.formatter.alignment_for_assignment=0 -org.eclipse.jdt.core.formatter.alignment_for_binary_expression=16 -org.eclipse.jdt.core.formatter.alignment_for_compact_if=16 -org.eclipse.jdt.core.formatter.alignment_for_conditional_expression=80 -org.eclipse.jdt.core.formatter.alignment_for_enum_constants=0 -org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer=16 -org.eclipse.jdt.core.formatter.alignment_for_method_declaration=0 -org.eclipse.jdt.core.formatter.alignment_for_multiple_fields=16 -org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_resources_in_try=80 -org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation=16 -org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_union_type_in_multicatch=16 -org.eclipse.jdt.core.formatter.blank_lines_after_imports=1 -org.eclipse.jdt.core.formatter.blank_lines_after_package=1 -org.eclipse.jdt.core.formatter.blank_lines_before_field=0 -org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration=0 -org.eclipse.jdt.core.formatter.blank_lines_before_imports=1 -org.eclipse.jdt.core.formatter.blank_lines_before_member_type=1 -org.eclipse.jdt.core.formatter.blank_lines_before_method=1 -org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk=1 -org.eclipse.jdt.core.formatter.blank_lines_before_package=0 -org.eclipse.jdt.core.formatter.blank_lines_between_import_groups=1 -org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations=1 -org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_array_initializer=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_block=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_block_in_case=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_enum_constant=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_lambda_body=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_method_declaration=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_switch=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_type_declaration=end_of_line -org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment=false -org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment=false -org.eclipse.jdt.core.formatter.comment.format_block_comments=true -org.eclipse.jdt.core.formatter.comment.format_header=false -org.eclipse.jdt.core.formatter.comment.format_html=true -org.eclipse.jdt.core.formatter.comment.format_javadoc_comments=true -org.eclipse.jdt.core.formatter.comment.format_line_comments=true -org.eclipse.jdt.core.formatter.comment.format_source_code=true -org.eclipse.jdt.core.formatter.comment.indent_parameter_description=true -org.eclipse.jdt.core.formatter.comment.indent_root_tags=true -org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags=insert -org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter=do not insert -org.eclipse.jdt.core.formatter.comment.line_length=120 -org.eclipse.jdt.core.formatter.comment.new_lines_at_block_boundaries=true -org.eclipse.jdt.core.formatter.comment.new_lines_at_javadoc_boundaries=true -org.eclipse.jdt.core.formatter.comment.preserve_white_space_between_code_and_line_comments=false -org.eclipse.jdt.core.formatter.compact_else_if=true -org.eclipse.jdt.core.formatter.continuation_indentation=2 -org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer=2 -org.eclipse.jdt.core.formatter.disabling_tag=@formatter\:off -org.eclipse.jdt.core.formatter.enabling_tag=@formatter\:on -org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line=false -org.eclipse.jdt.core.formatter.format_line_comment_starting_on_first_column=true -org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header=true -org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header=true -org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header=true -org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header=true -org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases=true -org.eclipse.jdt.core.formatter.indent_empty_lines=false -org.eclipse.jdt.core.formatter.indent_statements_compare_to_block=true -org.eclipse.jdt.core.formatter.indent_statements_compare_to_body=true -org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases=true -org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch=true -org.eclipse.jdt.core.formatter.indentation.size=4 -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_field=insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable=insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_method=insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_package=insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_type=insert -org.eclipse.jdt.core.formatter.insert_new_line_after_label=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_after_type_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter=insert -org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator=insert -org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_binary_operator=insert -org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters=insert -org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block=insert -org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast=insert -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert=insert -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case=insert -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional=insert -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for=insert -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters=insert -org.eclipse.jdt.core.formatter.insert_space_after_ellipsis=insert -org.eclipse.jdt.core.formatter.insert_space_after_lambda_arrow=insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer=insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_try=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional=insert -org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for=insert -org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_try_resources=insert -org.eclipse.jdt.core.formatter.insert_space_after_unary_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter=insert -org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator=insert -org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_binary_operator=insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer=insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_try=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert=insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional=insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for=insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_ellipsis=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_lambda_arrow=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_try=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while=insert -org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return=insert -org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw=insert -org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional=insert -org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_semicolon=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_try_resources=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_unary_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation=do not insert -org.eclipse.jdt.core.formatter.join_lines_in_comments=true -org.eclipse.jdt.core.formatter.join_wrapped_lines=false -org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line=false -org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line=false -org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line=false -org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line=false -org.eclipse.jdt.core.formatter.lineSplit=120 -org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column=false -org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column=false -org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body=0 -org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve=1 -org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line=true -org.eclipse.jdt.core.formatter.tabulation.char=space -org.eclipse.jdt.core.formatter.tabulation.size=4 -org.eclipse.jdt.core.formatter.use_on_off_tags=false -org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations=false -org.eclipse.jdt.core.formatter.wrap_before_binary_operator=true -org.eclipse.jdt.core.formatter.wrap_before_or_operator_multicatch=true -org.eclipse.jdt.core.formatter.wrap_outer_expressions_when_nested=true -org.eclipse.jdt.core.javaFormatter=org.eclipse.jdt.core.defaultJavaFormatter diff --git a/commonmark-ext-autolink/pom.xml b/commonmark-ext-autolink/pom.xml index 80dd347f1..2cc4d53ca 100644 --- a/commonmark-ext-autolink/pom.xml +++ b/commonmark-ext-autolink/pom.xml @@ -2,9 +2,9 @@ 4.0.0 - com.atlassian.commonmark + org.commonmark commonmark-parent - 0.14.1-SNAPSHOT + 0.28.1-SNAPSHOT commonmark-ext-autolink @@ -12,12 +12,12 @@ commonmark-java extension for turning plain URLs and email addresses into links - 0.10.0 + 0.12.0 - com.atlassian.commonmark + org.commonmark commonmark @@ -27,26 +27,10 @@ - com.atlassian.commonmark + org.commonmark commonmark-test-util test - - - - org.apache.maven.plugins - maven-jar-plugin - - - - org.commonmark.ext.autolink - - - - - - - diff --git a/commonmark-ext-autolink/src/main/java/module-info.java b/commonmark-ext-autolink/src/main/java/module-info.java new file mode 100644 index 000000000..561934b85 --- /dev/null +++ b/commonmark-ext-autolink/src/main/java/module-info.java @@ -0,0 +1,6 @@ +module org.commonmark.ext.autolink { + exports org.commonmark.ext.autolink; + + requires transitive org.commonmark; + requires org.nibor.autolink; +} diff --git a/commonmark-ext-autolink/src/main/java/org/commonmark/ext/autolink/AutolinkExtension.java b/commonmark-ext-autolink/src/main/java/org/commonmark/ext/autolink/AutolinkExtension.java index e5926c7bb..7d5a74f30 100644 --- a/commonmark-ext-autolink/src/main/java/org/commonmark/ext/autolink/AutolinkExtension.java +++ b/commonmark-ext-autolink/src/main/java/org/commonmark/ext/autolink/AutolinkExtension.java @@ -1,5 +1,8 @@ package org.commonmark.ext.autolink; +import java.util.EnumSet; +import java.util.Set; + import org.commonmark.Extension; import org.commonmark.ext.autolink.internal.AutolinkPostProcessor; import org.commonmark.parser.Parser; @@ -18,16 +21,71 @@ */ public class AutolinkExtension implements Parser.ParserExtension { - private AutolinkExtension() { + private final Set linkTypes; + + private AutolinkExtension(Builder builder) { + this.linkTypes = builder.linkTypes; } + /** + * @return the extension with default options + */ public static Extension create() { - return new AutolinkExtension(); + return builder().build(); + } + + /** + * @return a builder to configure the behavior of the extension. + */ + public static Builder builder() { + return new Builder(); } @Override public void extend(Parser.Builder parserBuilder) { - parserBuilder.postProcessor(new AutolinkPostProcessor()); + parserBuilder.postProcessor(new AutolinkPostProcessor(linkTypes)); } + public static class Builder { + + private Set linkTypes = EnumSet.allOf(AutolinkType.class); + + /** + * @param linkTypes the link types that should be converted. By default, + * all {@link AutolinkType}s are converted. + * @return {@code this} + */ + public Builder linkTypes(AutolinkType... linkTypes) { + if (linkTypes == null) { + throw new NullPointerException("linkTypes must not be null"); + } + + return this.linkTypes(Set.of(linkTypes)); + } + + /** + * @param linkTypes the link types that should be converted. By default, + * all {@link AutolinkType}s are converted. + * @return {@code this} + */ + public Builder linkTypes(Set linkTypes) { + if (linkTypes == null) { + throw new NullPointerException("linkTypes must not be null"); + } + + if (linkTypes.isEmpty()) { + throw new IllegalArgumentException("linkTypes must not be empty"); + } + + this.linkTypes = EnumSet.copyOf(linkTypes); + return this; + } + + /** + * @return a configured extension + */ + public Extension build() { + return new AutolinkExtension(this); + } + } } diff --git a/commonmark-ext-autolink/src/main/java/org/commonmark/ext/autolink/AutolinkType.java b/commonmark-ext-autolink/src/main/java/org/commonmark/ext/autolink/AutolinkType.java new file mode 100644 index 000000000..2c8c6574f --- /dev/null +++ b/commonmark-ext-autolink/src/main/java/org/commonmark/ext/autolink/AutolinkType.java @@ -0,0 +1,19 @@ +package org.commonmark.ext.autolink; + +/** + * The types of strings that can be automatically turned into links. + */ +public enum AutolinkType { + /** + * URL such as {@code http://example.com} + */ + URL, + /** + * Email address such as {@code foo@example.com} + */ + EMAIL, + /** + * URL such as {@code www.example.com} + */ + WWW +} diff --git a/commonmark-ext-autolink/src/main/java/org/commonmark/ext/autolink/internal/AutolinkPostProcessor.java b/commonmark-ext-autolink/src/main/java/org/commonmark/ext/autolink/internal/AutolinkPostProcessor.java index 0f94d5902..a381c2f19 100644 --- a/commonmark-ext-autolink/src/main/java/org/commonmark/ext/autolink/internal/AutolinkPostProcessor.java +++ b/commonmark-ext-autolink/src/main/java/org/commonmark/ext/autolink/internal/AutolinkPostProcessor.java @@ -1,22 +1,47 @@ package org.commonmark.ext.autolink.internal; -import org.commonmark.node.AbstractVisitor; -import org.commonmark.node.Link; -import org.commonmark.node.Node; -import org.commonmark.node.Text; +import org.commonmark.ext.autolink.AutolinkType; +import org.commonmark.node.*; import org.commonmark.parser.PostProcessor; import org.nibor.autolink.LinkExtractor; import org.nibor.autolink.LinkSpan; import org.nibor.autolink.LinkType; import org.nibor.autolink.Span; -import java.util.EnumSet; +import java.util.*; public class AutolinkPostProcessor implements PostProcessor { - private LinkExtractor linkExtractor = LinkExtractor.builder() - .linkTypes(EnumSet.of(LinkType.URL, LinkType.EMAIL)) - .build(); + private final LinkExtractor linkExtractor; + + public AutolinkPostProcessor(Set linkTypes) { + if (linkTypes == null) { + throw new NullPointerException("linkTypes must not be null"); + } + + if (linkTypes.isEmpty()) { + throw new IllegalArgumentException("linkTypes must not be empty"); + } + + var types = EnumSet.noneOf(LinkType.class); + for (AutolinkType linkType : linkTypes) { + switch (linkType) { + case URL: + types.add(LinkType.URL); + break; + case EMAIL: + types.add(LinkType.EMAIL); + break; + case WWW: + types.add(LinkType.WWW); + break; + } + } + + this.linkExtractor = LinkExtractor.builder() + .linkTypes(types) + .build(); + } @Override public Node process(Node node) { @@ -25,31 +50,58 @@ public Node process(Node node) { return node; } - private void linkify(Text textNode) { - String literal = textNode.getLiteral(); + private void linkify(Text originalTextNode) { + String literal = originalTextNode.getLiteral(); - Node lastNode = textNode; + Node lastNode = originalTextNode; + List sourceSpans = originalTextNode.getSourceSpans(); + SourceSpan sourceSpan = sourceSpans.size() == 1 ? sourceSpans.get(0) : null; - for (Span span : linkExtractor.extractSpans(literal)) { - String text = literal.substring(span.getBeginIndex(), span.getEndIndex()); + Iterator spans = linkExtractor.extractSpans(literal).iterator(); + while (spans.hasNext()) { + Span span = spans.next(); + + if (lastNode == originalTextNode && !spans.hasNext() && !(span instanceof LinkSpan)) { + // Didn't find any links, don't bother changing existing node. + return; + } + + Text textNode = createTextNode(literal, span, sourceSpan); if (span instanceof LinkSpan) { - String destination = getDestination((LinkSpan) span, text); - Text contentNode = new Text(text); + String destination = getDestination((LinkSpan) span, textNode.getLiteral()); + Link linkNode = new Link(destination, null); - linkNode.appendChild(contentNode); + linkNode.appendChild(textNode); + linkNode.setSourceSpans(textNode.getSourceSpans()); lastNode = insertNode(linkNode, lastNode); } else { - lastNode = insertNode(new Text(text), lastNode); + lastNode = insertNode(textNode, lastNode); } } // Original node no longer needed - textNode.unlink(); + originalTextNode.unlink(); + } + + private static Text createTextNode(String literal, Span span, SourceSpan sourceSpan) { + int beginIndex = span.getBeginIndex(); + int endIndex = span.getEndIndex(); + String text = literal.substring(beginIndex, endIndex); + Text textNode = new Text(text); + if (sourceSpan != null) { + textNode.addSourceSpan(sourceSpan.subSpan(beginIndex, endIndex)); + } + return textNode; } private static String getDestination(LinkSpan linkSpan, String linkText) { - if (linkSpan.getType() == LinkType.EMAIL) { + var type = linkSpan.getType(); + + if (type == LinkType.EMAIL) { return "mailto:" + linkText; + } else if (type == LinkType.WWW) { + // Use http instead of https (see https://github.github.com/gfm/#extended-www-autolink) + return "http://" + linkText; } else { return linkText; } diff --git a/commonmark-ext-autolink/src/main/resources/META-INF/LICENSE.txt b/commonmark-ext-autolink/src/main/resources/META-INF/LICENSE.txt new file mode 100644 index 000000000..b09e367ce --- /dev/null +++ b/commonmark-ext-autolink/src/main/resources/META-INF/LICENSE.txt @@ -0,0 +1,23 @@ +Copyright (c) 2015, Atlassian Pty Ltd +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/commonmark-ext-autolink/src/test/java/org/commonmark/ext/autolink/AutolinkTest.java b/commonmark-ext-autolink/src/test/java/org/commonmark/ext/autolink/AutolinkTest.java index ae586b6f0..82c3899fc 100644 --- a/commonmark-ext-autolink/src/test/java/org/commonmark/ext/autolink/AutolinkTest.java +++ b/commonmark-ext-autolink/src/test/java/org/commonmark/ext/autolink/AutolinkTest.java @@ -1,20 +1,30 @@ package org.commonmark.ext.autolink; import org.commonmark.Extension; +import org.commonmark.node.*; +import org.commonmark.parser.IncludeSourceSpans; import org.commonmark.parser.Parser; import org.commonmark.renderer.html.HtmlRenderer; import org.commonmark.testutil.RenderingTestCase; -import org.junit.Test; +import org.junit.jupiter.api.Test; -import java.util.Collections; +import java.util.List; import java.util.Set; +import static org.assertj.core.api.Assertions.assertThat; + public class AutolinkTest extends RenderingTestCase { - private static final Set EXTENSIONS = Collections.singleton(AutolinkExtension.create()); + private static final Set EXTENSIONS = Set.of(AutolinkExtension.create()); private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build(); private static final HtmlRenderer RENDERER = HtmlRenderer.builder().extensions(EXTENSIONS).build(); + private static final Set NO_WWW_EXTENSIONS = Set.of(AutolinkExtension.builder() + .linkTypes(AutolinkType.URL, AutolinkType.EMAIL) + .build()); + private static final Parser NO_WWW_PARSER = Parser.builder().extensions(NO_WWW_EXTENSIONS).build(); + private static final HtmlRenderer NO_WWW_RENDERER = HtmlRenderer.builder().extensions(NO_WWW_EXTENSIONS).build(); + @Test public void oneTextNode() { assertRendering("foo http://one.org/ bar http://two.org/", @@ -53,6 +63,64 @@ public void dontLinkTextWithinLinks() { "

http://example.com

\n"); } + @Test + public void wwwLinks() { + assertRendering("www.example.com", + "

www.example.com

\n"); + } + + @Test + public void noWwwLinks() { + String html = NO_WWW_RENDERER.render(NO_WWW_PARSER.parse("www.example.com")); + assertThat(html).isEqualTo("

www.example.com

\n"); + } + + @Test + public void sourceSpans() { + Parser parser = Parser.builder() + .extensions(EXTENSIONS) + .includeSourceSpans(IncludeSourceSpans.BLOCKS_AND_INLINES) + .build(); + Node document = parser.parse("abc\n" + + "http://example.com/one\n" + + "def http://example.com/two\n" + + "ghi http://example.com/three jkl"); + + Paragraph paragraph = (Paragraph) document.getFirstChild(); + Text abc = (Text) paragraph.getFirstChild(); + assertThat(abc.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 0, 0, 3))); + + assertThat(abc.getNext()).isInstanceOf(SoftLineBreak.class); + + Link one = (Link) abc.getNext().getNext(); + assertThat(one.getDestination()).isEqualTo("http://example.com/one"); + assertThat(one.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(1, 0, 4, 22))); + + assertThat(one.getNext()).isInstanceOf(SoftLineBreak.class); + + Text def = (Text) one.getNext().getNext(); + assertThat(def.getLiteral()).isEqualTo("def "); + assertThat(def.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(2, 0, 27, 4))); + + Link two = (Link) def.getNext(); + assertThat(two.getDestination()).isEqualTo("http://example.com/two"); + assertThat(two.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(2, 4, 31, 22))); + + assertThat(two.getNext()).isInstanceOf(SoftLineBreak.class); + + Text ghi = (Text) two.getNext().getNext(); + assertThat(ghi.getLiteral()).isEqualTo("ghi "); + assertThat(ghi.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(3, 0, 54, 4))); + + Link three = (Link) ghi.getNext(); + assertThat(three.getDestination()).isEqualTo("http://example.com/three"); + assertThat(three.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(3, 4, 58, 24))); + + Text jkl = (Text) three.getNext(); + assertThat(jkl.getLiteral()).isEqualTo(" jkl"); + assertThat(jkl.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(3, 28, 82, 4))); + } + @Override protected String render(String source) { return RENDERER.render(PARSER.parse(source)); diff --git a/commonmark-ext-footnotes/pom.xml b/commonmark-ext-footnotes/pom.xml new file mode 100644 index 000000000..0d9e2f30c --- /dev/null +++ b/commonmark-ext-footnotes/pom.xml @@ -0,0 +1,27 @@ + + + 4.0.0 + + org.commonmark + commonmark-parent + 0.28.1-SNAPSHOT + + + commonmark-ext-footnotes + commonmark-java extension for footnotes + commonmark-java extension for footnotes using [^1] syntax + + + + org.commonmark + commonmark + + + + org.commonmark + commonmark-test-util + test + + + + diff --git a/commonmark-ext-footnotes/src/main/java/module-info.java b/commonmark-ext-footnotes/src/main/java/module-info.java new file mode 100644 index 000000000..0667b2801 --- /dev/null +++ b/commonmark-ext-footnotes/src/main/java/module-info.java @@ -0,0 +1,5 @@ +module org.commonmark.ext.footnotes { + exports org.commonmark.ext.footnotes; + + requires transitive org.commonmark; +} diff --git a/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/FootnoteDefinition.java b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/FootnoteDefinition.java new file mode 100644 index 000000000..4a560dc9e --- /dev/null +++ b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/FootnoteDefinition.java @@ -0,0 +1,27 @@ +package org.commonmark.ext.footnotes; + +import org.commonmark.node.CustomBlock; + +/** + * A footnote definition, e.g.: + *

+ * [^foo]: This is the footnote text
+ * 
+ * The {@link #getLabel() label} is the text in brackets after {@code ^}, so {@code foo} in the example. The contents + * of the footnote are child nodes of the definition, a {@link org.commonmark.node.Paragraph} in the example. + *

+ * Footnote definitions are parsed even if there's no corresponding {@link FootnoteReference}. + */ +public class FootnoteDefinition extends CustomBlock { + + private String label; + + public FootnoteDefinition(String label) { + this.label = label; + } + + public String getLabel() { + return label; + } +} + diff --git a/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/FootnoteReference.java b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/FootnoteReference.java new file mode 100644 index 000000000..61dcf8626 --- /dev/null +++ b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/FootnoteReference.java @@ -0,0 +1,21 @@ +package org.commonmark.ext.footnotes; + +import org.commonmark.node.CustomNode; + +/** + * A footnote reference, e.g. [^foo] in Some text with a footnote[^foo] + *

+ * The {@link #getLabel() label} is the text within brackets after {@code ^}, so {@code foo} in the example. It needs to + * match the label of a corresponding {@link FootnoteDefinition} for the footnote to be parsed. + */ +public class FootnoteReference extends CustomNode { + private String label; + + public FootnoteReference(String label) { + this.label = label; + } + + public String getLabel() { + return label; + } +} diff --git a/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/FootnotesExtension.java b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/FootnotesExtension.java new file mode 100644 index 000000000..dd532fa34 --- /dev/null +++ b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/FootnotesExtension.java @@ -0,0 +1,105 @@ +package org.commonmark.ext.footnotes; + +import org.commonmark.Extension; +import org.commonmark.ext.footnotes.internal.*; +import org.commonmark.parser.Parser; +import org.commonmark.renderer.NodeRenderer; +import org.commonmark.renderer.html.HtmlRenderer; +import org.commonmark.renderer.markdown.MarkdownNodeRendererContext; +import org.commonmark.renderer.markdown.MarkdownNodeRendererFactory; +import org.commonmark.renderer.markdown.MarkdownRenderer; + +import java.util.Set; + +/** + * Extension for footnotes with syntax like GitHub Flavored Markdown: + *


+ * Some text with a footnote[^1].
+ *
+ * [^1]: The text of the footnote.
+ * 
+ * The [^1] is a {@link FootnoteReference}, with "1" being the label. + *

+ * The line with [^1]: ... is a {@link FootnoteDefinition}, with the contents as child nodes (can be a + * paragraph like in the example, or other blocks like lists). + *

+ * All the footnotes (definitions) will be rendered in a list at the end of a document, no matter where they appear in + * the source. The footnotes will be numbered starting from 1, then 2, etc, depending on the order in which they appear + * in the text (and not dependent on the label). The footnote reference is a link to the footnote, and from the footnote + * there is a link back to the reference (or multiple). + *

+ * There is also optional support for inline footnotes, use {@link #builder()} and then set {@link Builder#inlineFootnotes}. + * + * @see GitHub docs for footnotes + */ +public class FootnotesExtension implements Parser.ParserExtension, + HtmlRenderer.HtmlRendererExtension, + MarkdownRenderer.MarkdownRendererExtension { + + private final boolean inlineFootnotes; + + private FootnotesExtension(boolean inlineFootnotes) { + this.inlineFootnotes = inlineFootnotes; + } + + /** + * The extension with the default configuration (no support for inline footnotes). + */ + public static Extension create() { + return builder().build(); + } + + public static Builder builder() { + return new Builder(); + } + + @Override + public void extend(Parser.Builder parserBuilder) { + parserBuilder + .customBlockParserFactory(new FootnoteBlockParser.Factory()) + .linkProcessor(new FootnoteLinkProcessor()); + if (inlineFootnotes) { + parserBuilder.linkMarker('^'); + } + } + + @Override + public void extend(HtmlRenderer.Builder rendererBuilder) { + rendererBuilder.nodeRendererFactory(FootnoteHtmlNodeRenderer::new); + } + + @Override + public void extend(MarkdownRenderer.Builder rendererBuilder) { + rendererBuilder.nodeRendererFactory(new MarkdownNodeRendererFactory() { + @Override + public NodeRenderer create(MarkdownNodeRendererContext context) { + return new FootnoteMarkdownNodeRenderer(context); + } + + @Override + public Set getSpecialCharacters() { + return Set.of(); + } + }); + } + + public static class Builder { + + private boolean inlineFootnotes = false; + + /** + * Enable support for inline footnotes without definitions, e.g.: + *

+         * Some text^[this is an inline footnote]
+         * 
+ */ + public Builder inlineFootnotes(boolean inlineFootnotes) { + this.inlineFootnotes = inlineFootnotes; + return this; + } + + public FootnotesExtension build() { + return new FootnotesExtension(inlineFootnotes); + } + } +} diff --git a/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/InlineFootnote.java b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/InlineFootnote.java new file mode 100644 index 000000000..665d01936 --- /dev/null +++ b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/InlineFootnote.java @@ -0,0 +1,6 @@ +package org.commonmark.ext.footnotes; + +import org.commonmark.node.CustomNode; + +public class InlineFootnote extends CustomNode { +} diff --git a/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteBlockParser.java b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteBlockParser.java new file mode 100644 index 000000000..110bdef20 --- /dev/null +++ b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteBlockParser.java @@ -0,0 +1,105 @@ +package org.commonmark.ext.footnotes.internal; + +import org.commonmark.ext.footnotes.FootnoteDefinition; +import org.commonmark.node.Block; +import org.commonmark.node.DefinitionMap; +import org.commonmark.parser.block.*; +import org.commonmark.text.Characters; + +import java.util.List; + +/** + * Parser for a single {@link FootnoteDefinition} block. + */ +public class FootnoteBlockParser extends AbstractBlockParser { + + private final FootnoteDefinition block; + + public FootnoteBlockParser(String label) { + block = new FootnoteDefinition(label); + } + + @Override + public Block getBlock() { + return block; + } + + @Override + public boolean isContainer() { + return true; + } + + @Override + public boolean canContain(Block childBlock) { + return true; + } + + @Override + public BlockContinue tryContinue(ParserState parserState) { + if (parserState.getIndent() >= 4) { + // It looks like content needs to be indented by 4 so that it's part of a footnote (instead of starting a new block). + return BlockContinue.atColumn(4); + } else if (parserState.isBlank()) { + // A blank line doesn't finish a footnote yet. If there's another line with indent >= 4 after it, + // that should result in another paragraph in this footnote definition. + return BlockContinue.atIndex(parserState.getIndex()); + } else { + // We're not continuing to give other block parsers a chance to interrupt this definition. + // But if no other block parser applied (including another FootnotesBlockParser), we will + // accept the line via lazy continuation (same as a block quote). + return BlockContinue.none(); + } + } + + @Override + public List> getDefinitions() { + var map = new DefinitionMap<>(FootnoteDefinition.class); + map.putIfAbsent(block.getLabel(), block); + return List.of(map); + } + + public static class Factory implements BlockParserFactory { + + @Override + public BlockStart tryStart(ParserState state, MatchedBlockParser matchedBlockParser) { + if (state.getIndent() >= 4) { + return BlockStart.none(); + } + var index = state.getNextNonSpaceIndex(); + var content = state.getLine().getContent(); + if (content.charAt(index) != '[' || index + 1 >= content.length()) { + return BlockStart.none(); + } + index++; + if (content.charAt(index) != '^' || index + 1 >= content.length()) { + return BlockStart.none(); + } + // Now at first label character (if any) + index++; + var labelStart = index; + + for (index = labelStart; index < content.length(); index++) { + var c = content.charAt(index); + switch (c) { + case ']': + if (index > labelStart && index + 1 < content.length() && content.charAt(index + 1) == ':') { + var label = content.subSequence(labelStart, index).toString(); + // After the colon, any number of spaces is skipped (not part of the content) + var afterSpaces = Characters.skipSpaceTab(content, index + 2, content.length()); + return BlockStart.of(new FootnoteBlockParser(label)).atIndex(afterSpaces); + } else { + return BlockStart.none(); + } + case ' ': + case '\r': + case '\n': + case '\0': + case '\t': + return BlockStart.none(); + } + } + + return BlockStart.none(); + } + } +} diff --git a/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteHtmlNodeRenderer.java b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteHtmlNodeRenderer.java new file mode 100644 index 000000000..70eb048a3 --- /dev/null +++ b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteHtmlNodeRenderer.java @@ -0,0 +1,391 @@ +package org.commonmark.ext.footnotes.internal; + +import org.commonmark.ext.footnotes.FootnoteDefinition; +import org.commonmark.ext.footnotes.FootnoteReference; +import org.commonmark.ext.footnotes.InlineFootnote; +import org.commonmark.node.*; +import org.commonmark.renderer.NodeRenderer; +import org.commonmark.renderer.html.HtmlNodeRendererContext; +import org.commonmark.renderer.html.HtmlWriter; + +import java.util.*; +import java.util.function.Consumer; + +/** + * HTML rendering for footnotes. + *

+ * Aims to match the rendering of cmark-gfm (which is slightly different from GitHub's when it comes to class + * attributes, not sure why). + *

+ * Some notes on how rendering works: + *

+ * + *

Nested footnotes

+ * Text in footnote definitions can reference other footnotes, even ones that aren't referenced in the main text. + * This makes them tricky because it's not enough to just go through the main text for references. + * And before we can render a definition, we need to know all references (because we add links back to references). + *

+ * In other words, footnotes form a directed graph. Footnotes can reference each other so cycles are possible too. + *

+ * One way to implement it, which is what cmark-gfm does, is to go through the whole document (including definitions) + * and find all references in order. That guarantees that all definitions are found, but it has strange results for + * ordering or when the reference is in an unreferenced definition, see tests. In graph terms, it renders all + * definitions that have an incoming edge, no matter whether they are connected to the main text or not. + *

+ * The way we implement it: + *

    + *
  1. Start with the references in the main text; we can render them as we go
  2. + *
  3. After the main text is rendered, we have the referenced definitions, but there might be more from definition text
  4. + *
  5. To find the remaining definitions, we visit the definitions from before to look at references
  6. + *
  7. Repeat (breadth-first search) until we've found all definitions (note that we can't render before that's done because of backrefs)
  8. + *
  9. Now render the definitions (and any references inside)
  10. + *
+ * This means we only render definitions whose references are actually rendered, and in a meaningful order (all main + * text footnotes first, then any nested ones). + */ +public class FootnoteHtmlNodeRenderer implements NodeRenderer { + + private final HtmlWriter html; + private final HtmlNodeRendererContext context; + + /** + * All definitions (even potentially unused ones), for looking up references + */ + private DefinitionMap definitionMap; + + /** + * Definitions that were referenced, in order in which they should be rendered. + */ + private final Map referencedDefinitions = new LinkedHashMap<>(); + + /** + * Information about references that should be rendered as footnotes. This doesn't contain all references, just the + * ones from inside definitions. + */ + private final Map references = new HashMap<>(); + + public FootnoteHtmlNodeRenderer(HtmlNodeRendererContext context) { + this.html = context.getWriter(); + this.context = context; + } + + @Override + public Set> getNodeTypes() { + return Set.of(FootnoteReference.class, InlineFootnote.class, FootnoteDefinition.class); + } + + @Override + public void beforeRoot(Node rootNode) { + // Collect all definitions first, so we can look them up when encountering a reference later. + var visitor = new DefinitionVisitor(); + rootNode.accept(visitor); + definitionMap = visitor.definitions; + } + + @Override + public void render(Node node) { + if (node instanceof FootnoteReference) { + // This is called for all references, even ones inside definitions that we render at the end. + // Inside definitions, we have registered the reference already. + var ref = (FootnoteReference) node; + // Use containsKey because if value is null, we don't need to try registering again. + var info = references.containsKey(ref) ? references.get(ref) : tryRegisterReference(ref); + if (info != null) { + renderReference(ref, info); + } else { + // A reference without a corresponding definition is rendered as plain text + html.text("[^" + ref.getLabel() + "]"); + } + } else if (node instanceof InlineFootnote) { + var info = references.get(node); + if (info == null) { + info = registerReference(node, null); + } + renderReference(node, info); + } + } + + @Override + public void afterRoot(Node rootNode) { + // Now render the referenced definitions if there are any. + if (referencedDefinitions.isEmpty()) { + return; + } + + var firstDef = referencedDefinitions.keySet().iterator().next(); + var attrs = new LinkedHashMap(); + attrs.put("class", "footnotes"); + attrs.put("data-footnotes", null); + html.tag("section", context.extendAttributes(firstDef, "section", attrs)); + html.line(); + html.tag("ol"); + html.line(); + + // Check whether there are any footnotes inside the definitions that we're about to render. For those, we might + // need to render more definitions. So do a breadth-first search to find all relevant definitions. + var check = new LinkedList<>(referencedDefinitions.keySet()); + while (!check.isEmpty()) { + var def = check.removeFirst(); + def.accept(new ShallowReferenceVisitor(def, node -> { + if (node instanceof FootnoteReference) { + var ref = (FootnoteReference) node; + var d = definitionMap.get(ref.getLabel()); + if (d != null) { + if (!referencedDefinitions.containsKey(d)) { + check.addLast(d); + } + references.put(ref, registerReference(d, d.getLabel())); + } + } else if (node instanceof InlineFootnote) { + check.addLast(node); + references.put(node, registerReference(node, null)); + } + })); + } + + for (var entry : referencedDefinitions.entrySet()) { + // This will also render any footnote references inside definitions + renderDefinition(entry.getKey(), entry.getValue()); + } + + html.tag("/ol"); + html.line(); + html.tag("/section"); + html.line(); + } + + private ReferenceInfo tryRegisterReference(FootnoteReference ref) { + var def = definitionMap.get(ref.getLabel()); + if (def == null) { + return null; + } + return registerReference(def, def.getLabel()); + } + + private ReferenceInfo registerReference(Node node, String label) { + // The first referenced definition gets number 1, second one 2, etc. + var referencedDef = referencedDefinitions.computeIfAbsent(node, k -> { + var num = referencedDefinitions.size() + 1; + var key = definitionKey(label, num); + return new ReferencedDefinition(num, key); + }); + var definitionNumber = referencedDef.definitionNumber; + // The reference number for that particular definition. E.g. if there's two references for the same definition, + // the first one is 1, the second one 2, etc. This is needed to give each reference a unique ID so that each + // reference can get its own backlink from the definition. + var refNumber = referencedDef.references.size() + 1; + var definitionKey = referencedDef.definitionKey; + var id = referenceId(definitionKey, refNumber); + referencedDef.references.add(id); + + return new ReferenceInfo(id, definitionId(definitionKey), definitionNumber); + } + + private void renderReference(Node node, ReferenceInfo referenceInfo) { + html.tag("sup", context.extendAttributes(node, "sup", Map.of("class", "footnote-ref"))); + + var href = "#" + referenceInfo.definitionId; + var attrs = new LinkedHashMap(); + attrs.put("href", href); + attrs.put("id", referenceInfo.id); + attrs.put("data-footnote-ref", null); + html.tag("a", context.extendAttributes(node, "a", attrs)); + html.raw(String.valueOf(referenceInfo.definitionNumber)); + html.tag("/a"); + html.tag("/sup"); + } + + private void renderDefinition(Node def, ReferencedDefinition referencedDefinition) { + var attrs = new LinkedHashMap(); + attrs.put("id", definitionId(referencedDefinition.definitionKey)); + html.tag("li", context.extendAttributes(def, "li", attrs)); + html.line(); + + if (def.getLastChild() instanceof Paragraph) { + // Add backlinks into last paragraph before

. This is what GFM does. + var lastParagraph = (Paragraph) def.getLastChild(); + var node = def.getFirstChild(); + while (node != lastParagraph) { + if (node instanceof Paragraph) { + // Because we're manually rendering the

for the last paragraph, do the same for all other + // paragraphs for consistency (Paragraph rendering might be overwritten by a custom renderer). + html.tag("p", context.extendAttributes(node, "p", Map.of())); + renderChildren(node); + html.tag("/p"); + html.line(); + } else { + context.render(node); + } + node = node.getNext(); + } + + html.tag("p", context.extendAttributes(lastParagraph, "p", Map.of())); + renderChildren(lastParagraph); + html.raw(" "); + renderBackrefs(def, referencedDefinition); + html.tag("/p"); + html.line(); + } else if (def instanceof InlineFootnote) { + html.tag("p", context.extendAttributes(def, "p", Map.of())); + renderChildren(def); + html.raw(" "); + renderBackrefs(def, referencedDefinition); + html.tag("/p"); + html.line(); + } else { + renderChildren(def); + html.line(); + renderBackrefs(def, referencedDefinition); + } + + html.tag("/li"); + html.line(); + } + + private void renderBackrefs(Node def, ReferencedDefinition referencedDefinition) { + var refs = referencedDefinition.references; + for (int i = 0; i < refs.size(); i++) { + var ref = refs.get(i); + var refNumber = i + 1; + var idx = referencedDefinition.definitionNumber + (refNumber > 1 ? ("-" + refNumber) : ""); + + var attrs = new LinkedHashMap(); + attrs.put("href", "#" + ref); + attrs.put("class", "footnote-backref"); + attrs.put("data-footnote-backref", null); + attrs.put("data-footnote-backref-idx", idx); + attrs.put("aria-label", "Back to reference " + idx); + html.tag("a", context.extendAttributes(def, "a", attrs)); + if (refNumber > 1) { + html.tag("sup", context.extendAttributes(def, "sup", Map.of("class", "footnote-ref"))); + html.raw(String.valueOf(refNumber)); + html.tag("/sup"); + } + // U+21A9 LEFTWARDS ARROW WITH HOOK + html.raw("\u21A9"); + html.tag("/a"); + if (i + 1 < refs.size()) { + html.raw(" "); + } + } + } + + private String referenceId(String definitionKey, int number) { + return "fnref" + definitionKey + (number == 1 ? "" : ("-" + number)); + } + + private String definitionKey(String label, int number) { + // Named definitions use the pattern "fn-{name}" and inline definitions use "fn{number}" so as not to conflict. + // "fn{number}" is also what pandoc uses (for all types), starting with number 1. + if (label != null) { + return "-" + label; + } else { + return "" + number; + } + } + + private String definitionId(String definitionKey) { + return "fn" + definitionKey; + } + + private void renderChildren(Node parent) { + Node node = parent.getFirstChild(); + while (node != null) { + Node next = node.getNext(); + context.render(node); + node = next; + } + } + + private static class DefinitionVisitor extends AbstractVisitor { + + private final DefinitionMap definitions = new DefinitionMap<>(FootnoteDefinition.class); + + @Override + public void visit(CustomBlock customBlock) { + if (customBlock instanceof FootnoteDefinition) { + var def = (FootnoteDefinition) customBlock; + definitions.putIfAbsent(def.getLabel(), def); + } else { + super.visit(customBlock); + } + } + } + + /** + * Visit footnote references/inline footnotes inside the parent (but not the parent itself). We want a shallow visit + * because the caller wants to control when to descend. + */ + private static class ShallowReferenceVisitor extends AbstractVisitor { + private final Node parent; + private final Consumer consumer; + + private ShallowReferenceVisitor(Node parent, Consumer consumer) { + this.parent = parent; + this.consumer = consumer; + } + + @Override + public void visit(CustomNode customNode) { + if (customNode instanceof FootnoteReference) { + consumer.accept(customNode); + } else if (customNode instanceof InlineFootnote) { + if (customNode == parent) { + // Descend into the parent (inline footnotes can contain inline footnotes) + super.visit(customNode); + } else { + // Don't descend here because we want to be shallow. + consumer.accept(customNode); + } + } else { + super.visit(customNode); + } + } + } + + private static class ReferencedDefinition { + /** + * The definition number, starting from 1, and in order in which they're referenced. + */ + final int definitionNumber; + /** + * The unique key of the definition. Together with a static prefix it forms the ID used in the HTML. + */ + final String definitionKey; + /** + * The IDs of references for this definition, for backrefs. + */ + final List references = new ArrayList<>(); + + ReferencedDefinition(int definitionNumber, String definitionKey) { + this.definitionNumber = definitionNumber; + this.definitionKey = definitionKey; + } + } + + private static class ReferenceInfo { + /** + * The ID of the reference; in the corresponding definition, a link back to this reference will be rendered. + */ + private final String id; + /** + * The ID of the definition, for linking to the definition. + */ + private final String definitionId; + /** + * The definition number, rendered in superscript. + */ + private final int definitionNumber; + + private ReferenceInfo(String id, String definitionId, int definitionNumber) { + this.id = id; + this.definitionId = definitionId; + this.definitionNumber = definitionNumber; + } + } +} diff --git a/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteLinkProcessor.java b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteLinkProcessor.java new file mode 100644 index 000000000..07b008576 --- /dev/null +++ b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteLinkProcessor.java @@ -0,0 +1,57 @@ +package org.commonmark.ext.footnotes.internal; + +import org.commonmark.ext.footnotes.FootnoteDefinition; +import org.commonmark.ext.footnotes.FootnoteReference; +import org.commonmark.ext.footnotes.InlineFootnote; +import org.commonmark.node.LinkReferenceDefinition; +import org.commonmark.parser.InlineParserContext; +import org.commonmark.parser.beta.LinkInfo; +import org.commonmark.parser.beta.LinkProcessor; +import org.commonmark.parser.beta.LinkResult; +import org.commonmark.parser.beta.Scanner; + +/** + * For turning e.g. [^foo] into a {@link FootnoteReference}, + * and ^[foo] into an {@link InlineFootnote}. + */ +public class FootnoteLinkProcessor implements LinkProcessor { + @Override + public LinkResult process(LinkInfo linkInfo, Scanner scanner, InlineParserContext context) { + + if (linkInfo.marker() != null && linkInfo.marker().getLiteral().equals("^")) { + // An inline footnote like ^[footnote text]. Note that we only get the marker here if the option is enabled + // on the extension. + return LinkResult.wrapTextIn(new InlineFootnote(), linkInfo.afterTextBracket()).includeMarker(); + } + + if (linkInfo.destination() != null) { + // If it's an inline link, it can't be a footnote reference + return LinkResult.none(); + } + + var text = linkInfo.text(); + if (!text.startsWith("^")) { + // Footnote reference needs to start with [^ + return LinkResult.none(); + } + + if (linkInfo.label() != null && context.getDefinition(LinkReferenceDefinition.class, linkInfo.label()) != null) { + // If there's a label after the text and the label has a definition -> it's a link, and it should take + // preference, e.g. in `[^foo][bar]` if `[bar]` has a definition, `[^foo]` won't be a footnote reference. + return LinkResult.none(); + } + + var label = text.substring(1); + // Check if we have a definition, otherwise ignore (same behavior as for link reference definitions). + // Note that the definition parser already checked the syntax of the label, we don't need to check again. + var def = context.getDefinition(FootnoteDefinition.class, label); + if (def == null) { + return LinkResult.none(); + } + + // For footnotes, we only ever consume the text part of the link, not the label part (if any) + var position = linkInfo.afterTextBracket(); + // If the marker is `![`, we don't want to include the `!`, so start from bracket + return LinkResult.replaceWith(new FootnoteReference(label), position); + } +} diff --git a/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteMarkdownNodeRenderer.java b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteMarkdownNodeRenderer.java new file mode 100644 index 000000000..3dcf4fc83 --- /dev/null +++ b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteMarkdownNodeRenderer.java @@ -0,0 +1,70 @@ +package org.commonmark.ext.footnotes.internal; + +import org.commonmark.ext.footnotes.FootnoteDefinition; +import org.commonmark.ext.footnotes.FootnoteReference; +import org.commonmark.ext.footnotes.InlineFootnote; +import org.commonmark.node.*; +import org.commonmark.renderer.NodeRenderer; +import org.commonmark.renderer.markdown.MarkdownNodeRendererContext; +import org.commonmark.renderer.markdown.MarkdownWriter; + +import java.util.Set; + +public class FootnoteMarkdownNodeRenderer implements NodeRenderer { + + private final MarkdownWriter writer; + private final MarkdownNodeRendererContext context; + + public FootnoteMarkdownNodeRenderer(MarkdownNodeRendererContext context) { + this.writer = context.getWriter(); + this.context = context; + } + + @Override + public Set> getNodeTypes() { + return Set.of(FootnoteReference.class, InlineFootnote.class, FootnoteDefinition.class); + } + + @Override + public void render(Node node) { + if (node instanceof FootnoteReference) { + renderReference((FootnoteReference) node); + } else if (node instanceof InlineFootnote) { + renderInline((InlineFootnote) node); + } else if (node instanceof FootnoteDefinition) { + renderDefinition((FootnoteDefinition) node); + } + } + + private void renderReference(FootnoteReference ref) { + writer.raw("[^"); + // The label is parsed as-is without escaping, so we can render it back as-is + writer.raw(ref.getLabel()); + writer.raw("]"); + } + + private void renderInline(InlineFootnote inlineFootnote) { + writer.raw("^["); + renderChildren(inlineFootnote); + writer.raw("]"); + } + + private void renderDefinition(FootnoteDefinition def) { + writer.raw("[^"); + writer.raw(def.getLabel()); + writer.raw("]: "); + + writer.pushPrefix(" "); + renderChildren(def); + writer.popPrefix(); + } + + private void renderChildren(Node parent) { + Node node = parent.getFirstChild(); + while (node != null) { + Node next = node.getNext(); + context.render(node); + node = next; + } + } +} diff --git a/commonmark-ext-footnotes/src/main/javadoc/overview.html b/commonmark-ext-footnotes/src/main/javadoc/overview.html new file mode 100644 index 000000000..4f19d2115 --- /dev/null +++ b/commonmark-ext-footnotes/src/main/javadoc/overview.html @@ -0,0 +1,6 @@ + + +Extension for footnotes using [^1] syntax +

See {@link org.commonmark.ext.footnotes.FootnotesExtension}

+ + diff --git a/commonmark-ext-footnotes/src/main/resources/META-INF/LICENSE.txt b/commonmark-ext-footnotes/src/main/resources/META-INF/LICENSE.txt new file mode 100644 index 000000000..b09e367ce --- /dev/null +++ b/commonmark-ext-footnotes/src/main/resources/META-INF/LICENSE.txt @@ -0,0 +1,23 @@ +Copyright (c) 2015, Atlassian Pty Ltd +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/commonmark-ext-footnotes/src/test/java/org/commonmark/ext/footnotes/FootnoteHtmlRendererTest.java b/commonmark-ext-footnotes/src/test/java/org/commonmark/ext/footnotes/FootnoteHtmlRendererTest.java new file mode 100644 index 000000000..bc7d4f74c --- /dev/null +++ b/commonmark-ext-footnotes/src/test/java/org/commonmark/ext/footnotes/FootnoteHtmlRendererTest.java @@ -0,0 +1,339 @@ +package org.commonmark.ext.footnotes; + +import org.commonmark.Extension; +import org.commonmark.node.Document; +import org.commonmark.node.Paragraph; +import org.commonmark.node.Text; +import org.commonmark.parser.Parser; +import org.commonmark.renderer.html.HtmlRenderer; +import org.commonmark.testutil.Asserts; +import org.commonmark.testutil.RenderingTestCase; +import org.junit.jupiter.api.Test; + +import java.util.List; +import java.util.Set; + +public class FootnoteHtmlRendererTest extends RenderingTestCase { + private static final Set EXTENSIONS = Set.of(FootnotesExtension.create()); + private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build(); + private static final HtmlRenderer RENDERER = HtmlRenderer.builder().extensions(EXTENSIONS).build(); + + @Test + public void testOne() { + assertRendering("Test [^foo]\n\n[^foo]: note\n", + "

Test 1

\n" + + "
\n" + + "
    \n" + + "
  1. \n" + + "

    note

    \n" + + "
  2. \n" + + "
\n" + + "
\n"); + } + + @Test + public void testLabelNormalization() { + // Labels match via their normalized form. For the href and IDs to match, rendering needs to use the + // label from the definition consistently. + assertRendering("Test [^bar]\n\n[^BAR]: note\n", + "

Test 1

\n" + + "
\n" + + "
    \n" + + "
  1. \n" + + "

    note

    \n" + + "
  2. \n" + + "
\n" + + "
\n"); + } + + @Test + public void testMultipleReferences() { + // Tests a few things: + // - Numbering is based on the reference order, not the definition order + // - The same number is used when a definition is referenced multiple times + // - Multiple backrefs are rendered + assertRendering("First [^foo]\n\nThen [^bar]\n\nThen [^foo] again\n\n[^bar]: b\n[^foo]: f\n", + "

First 1

\n" + + "

Then 2

\n" + + "

Then 1 again

\n" + + "
\n" + + "
    \n" + + "
  1. \n" + + "

    f 2

    \n" + + "
  2. \n" + + "
  3. \n" + + "

    b

    \n" + + "
  4. \n" + + "
\n" + + "
\n"); + } + + @Test + public void testDefinitionWithTwoParagraphs() { + // With two paragraphs, the backref should be added to the second one + assertRendering("Test [^foo]\n\n[^foo]: one\n \n two\n", + "

Test 1

\n" + + "
\n" + + "
    \n" + + "
  1. \n" + + "

    one

    \n" + + "

    two

    \n" + + "
  2. \n" + + "
\n" + + "
\n"); + } + + @Test + public void testDefinitionWithList() { + assertRendering("Test [^foo]\n\n[^foo]:\n - one\n - two\n", + "

Test 1

\n" + + "
\n" + + "
    \n" + + "
  1. \n" + + "
      \n" + + "
    • one
    • \n" + + "
    • two
    • \n" + + "
    \n" + + "
  2. \n" + + "
\n" + + "
\n"); + } + + // See docs on FootnoteHtmlNodeRenderer about nested footnotes. + + @Test + public void testNestedFootnotesSimple() { + assertRendering("[^foo1]\n" + + "\n" + + "[^foo1]: one [^foo2]\n" + + "[^foo2]: two\n", "

1

\n" + + "
\n" + + "
    \n" + + "
  1. \n" + + "

    one 2

    \n" + + "
  2. \n" + + "
  3. \n" + + "

    two

    \n" + + "
  4. \n" + + "
\n" + + "
\n"); + } + + @Test + public void testNestedFootnotesOrder() { + // GitHub has a strange result here, the definitions are in order: 1. bar, 2. foo. + // The reason is that the number is done based on all references in document order, including references in + // definitions. So [^bar] from the first line is first. + assertRendering("[^foo]: foo [^bar]\n" + + "\n" + + "[^foo]\n" + + "\n" + + "[^bar]: bar\n", "

1

\n" + + "
\n" + + "
    \n" + + "
  1. \n" + + "

    foo 2

    \n" + + "
  2. \n" + + "
  3. \n" + + "

    bar

    \n" + + "
  4. \n" + + "
\n" + + "
\n"); + } + + @Test + public void testNestedFootnotesOrder2() { + assertRendering("[^1]\n" + + "\n" + + "[^4]: four\n" + + "[^3]: three [^4]\n" + + "[^2]: two [^4]\n" + + "[^1]: one [^2][^3]\n", "

1

\n" + + "
\n" + + "
    \n" + + "
  1. \n" + + "

    one 23

    \n" + + "
  2. \n" + + "
  3. \n" + + "

    two 4

    \n" + + "
  4. \n" + + "
  5. \n" + + "

    three 4

    \n" + + "
  6. \n" + + "
  7. \n" + + "

    four 2

    \n" + + "
  8. \n" + + "
\n" + + "
\n"); + } + + @Test + public void testNestedFootnotesCycle() { + // Footnotes can contain cycles, lol. + assertRendering("[^foo1]\n" + + "\n" + + "[^foo1]: one [^foo2]\n" + + "[^foo2]: two [^foo1]\n", "

1

\n" + + "
\n" + + "
    \n" + + "
  1. \n" + + "

    one 2 2

    \n" + + "
  2. \n" + + "
  3. \n" + + "

    two 1

    \n" + + "
  4. \n" + + "
\n" + + "
\n"); + } + + @Test + public void testNestedFootnotesUnreferenced() { + // This should not result in any footnotes, as baz itself isn't referenced. + // But GitHub renders bar only, with a broken backref, because bar is referenced from foo. + assertRendering("[^foo]: foo[^bar]\n" + + "[^bar]: bar\n", ""); + + // And here only 1 is rendered. + assertRendering("[^1]\n" + + "\n" + + "[^1]: one\n" + + "[^foo]: foo[^bar]\n" + + "[^bar]: bar\n", "

1

\n" + + "
\n" + + "
    \n" + + "
  1. \n" + + "

    one

    \n" + + "
  2. \n" + + "
\n" + + "
\n"); + } + + @Test + public void testInlineFootnotes() { + assertRenderingInline("Test ^[inline *footnote*]", + "

Test 1

\n" + + "
\n" + + "
    \n" + + "
  1. \n" + + "

    inline footnote

    \n" + + "
  2. \n" + + "
\n" + + "
\n"); + } + + @Test + public void testInlineFootnotesNested() { + assertRenderingInline("Test ^[inline ^[nested]]", + "

Test 1

\n" + + "
\n" + + "
    \n" + + "
  1. \n" + + "

    inline 2

    \n" + + "
  2. \n" + + "
  3. \n" + + "

    nested

    \n" + + "
  4. \n" + + "
\n" + + "
\n"); + } + + @Test + public void testInlineFootnoteWithReference() { + // This is a bit tricky because the IDs need to be unique. + assertRenderingInline("Test ^[inline [^1]]\n" + + "\n" + + "[^1]: normal", + "

Test 1

\n" + + "
\n" + + "
    \n" + + "
  1. \n" + + "

    inline 2

    \n" + + "
  2. \n" + + "
  3. \n" + + "

    normal

    \n" + + "
  4. \n" + + "
\n" + + "
\n"); + } + + @Test + public void testInlineFootnoteInsideDefinition() { + assertRenderingInline("Test [^1]\n" + + "\n" + + "[^1]: Definition ^[inline]\n", + "

Test 1

\n" + + "
\n" + + "
    \n" + + "
  1. \n" + + "

    Definition 2

    \n" + + "
  2. \n" + + "
  3. \n" + + "

    inline

    \n" + + "
  4. \n" + + "
\n" + + "
\n"); + } + + @Test + public void testInlineFootnoteInsideDefinition2() { + // Tricky because of the nested inline footnote which we want to visit after foo (breadth-first). + assertRenderingInline("Test [^1]\n" + + "\n" + + "[^1]: Definition ^[inline ^[nested]] ^[foo]\n", + "

Test 1

\n" + + "
\n" + + "
    \n" + + "
  1. \n" + + "

    Definition 2 3

    \n" + + "
  2. \n" + + "
  3. \n" + + "

    inline 4

    \n" + + "
  4. \n" + + "
  5. \n" + + "

    foo

    \n" + + "
  6. \n" + + "
  7. \n" + + "

    nested

    \n" + + "
  8. \n" + + "
\n" + + "
\n"); + } + + + @Test + public void testRenderNodesDirectly() { + // Everything should work as expected when rendering from nodes directly (no parsing step). + var doc = new Document(); + var p = new Paragraph(); + p.appendChild(new Text("Test ")); + p.appendChild(new FootnoteReference("foo")); + var def = new FootnoteDefinition("foo"); + var note = new Paragraph(); + note.appendChild(new Text("note!")); + def.appendChild(note); + doc.appendChild(p); + doc.appendChild(def); + + var expected = "

Test 1

\n" + + "
\n" + + "
    \n" + + "
  1. \n" + + "

    note!

    \n" + + "
  2. \n" + + "
\n" + + "
\n"; + Asserts.assertRendering("", expected, RENDERER.render(doc)); + } + + @Override + protected String render(String source) { + return RENDERER.render(PARSER.parse(source)); + } + + private static void assertRenderingInline(String source, String expected) { + var extension = FootnotesExtension.builder().inlineFootnotes(true).build(); + var parser = Parser.builder().extensions(List.of(extension)).build(); + var renderer = HtmlRenderer.builder().extensions(List.of(extension)).build(); + Asserts.assertRendering(source, expected, renderer.render(parser.parse(source))); + } +} diff --git a/commonmark-ext-footnotes/src/test/java/org/commonmark/ext/footnotes/FootnoteMarkdownRendererTest.java b/commonmark-ext-footnotes/src/test/java/org/commonmark/ext/footnotes/FootnoteMarkdownRendererTest.java new file mode 100644 index 000000000..2f1125a02 --- /dev/null +++ b/commonmark-ext-footnotes/src/test/java/org/commonmark/ext/footnotes/FootnoteMarkdownRendererTest.java @@ -0,0 +1,65 @@ +package org.commonmark.ext.footnotes; + +import org.commonmark.Extension; +import org.commonmark.node.Node; +import org.commonmark.parser.Parser; +import org.commonmark.renderer.markdown.MarkdownRenderer; +import org.junit.jupiter.api.Test; + +import java.util.Set; + +import static org.assertj.core.api.Assertions.assertThat; + +public class FootnoteMarkdownRendererTest { + private static final Set EXTENSIONS = Set.of(FootnotesExtension.builder().inlineFootnotes(true).build()); + private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build(); + private static final MarkdownRenderer RENDERER = MarkdownRenderer.builder().extensions(EXTENSIONS).build(); + + @Test + public void testSimple() { + assertRoundTrip("Test [^foo]\n\n[^foo]: note\n"); + } + + @Test + public void testUnreferenced() { + // Whether a reference has a corresponding definition or vice versa shouldn't matter for Markdown rendering. + assertRoundTrip("Test [^foo]\n\n[^foo]: one\n\n[^bar]: two\n"); + } + + @Test + public void testFootnoteWithBlock() { + assertRoundTrip("Test [^foo]\n\n[^foo]: - foo\n - bar\n"); + } + + @Test + public void testBackslashInLabel() { + assertRoundTrip("[^\\foo]\n\n[^\\foo]: note\n"); + } + + @Test + public void testMultipleLines() { + assertRoundTrip("Test [^1]\n\n[^1]: footnote l1\n footnote l2\n"); + } + + @Test + public void testMultipleParagraphs() { + // Note that the line between p1 and p2 could be blank too (instead of 4 spaces), but we currently don't + // preserve that information. + assertRoundTrip("Test [^1]\n\n[^1]: footnote p1\n \n footnote p2\n"); + } + + @Test + public void testInline() { + assertRoundTrip("^[test *foo*]\n"); + } + + private void assertRoundTrip(String input) { + String rendered = parseAndRender(input); + assertThat(rendered).isEqualTo(input); + } + + private String parseAndRender(String source) { + Node parsed = PARSER.parse(source); + return RENDERER.render(parsed); + } +} diff --git a/commonmark-ext-footnotes/src/test/java/org/commonmark/ext/footnotes/FootnotesTest.java b/commonmark-ext-footnotes/src/test/java/org/commonmark/ext/footnotes/FootnotesTest.java new file mode 100644 index 000000000..7763cedb4 --- /dev/null +++ b/commonmark-ext-footnotes/src/test/java/org/commonmark/ext/footnotes/FootnotesTest.java @@ -0,0 +1,366 @@ +package org.commonmark.ext.footnotes; + +import org.commonmark.Extension; +import org.commonmark.node.*; +import org.commonmark.parser.IncludeSourceSpans; +import org.commonmark.parser.Parser; +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import java.util.Set; + +import static org.assertj.core.api.Assertions.assertThat; + +public class FootnotesTest { + + private static final Set EXTENSIONS = Set.of(FootnotesExtension.create()); + private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build(); + + @Test + public void testDefBlockStart() { + for (var s : List.of("1", "a", "^", "*", "\\a", "\uD83D\uDE42", "&0")) { + var doc = PARSER.parse("[^" + s + "]: footnote\n"); + var def = find(doc, FootnoteDefinition.class); + assertThat(def.getLabel()).isEqualTo(s); + } + + for (var s : List.of("", " ", "a b", "]", "\r", "\n", "\t")) { + var input = "[^" + s + "]: footnote\n"; + var doc = PARSER.parse(input); + assertThat(tryFind(doc, FootnoteDefinition.class)).as("input: " + input).isNull(); + } + } + + @Test + public void testDefBlockStartInterrupts() { + // This is different from a link reference definition, which can only be at the start of paragraphs. + var doc = PARSER.parse("test\n[^1]: footnote\n"); + var paragraph = find(doc, Paragraph.class); + var def = find(doc, FootnoteDefinition.class); + assertThat(((Text) paragraph.getLastChild()).getLiteral()).isEqualTo("test"); + assertThat(def.getLabel()).isEqualTo("1"); + } + + @Test + public void testDefBlockStartIndented() { + var doc1 = PARSER.parse(" [^1]: footnote\n"); + assertThat(find(doc1, FootnoteDefinition.class).getLabel()).isEqualTo("1"); + var doc2 = PARSER.parse(" [^1]: footnote\n"); + assertNone(doc2, FootnoteDefinition.class); + } + + @Test + public void testDefMultiple() { + var doc = PARSER.parse("[^1]: foo\n[^2]: bar\n"); + var defs = findAll(doc, FootnoteDefinition.class); + assertThat(defs.get(0).getLabel()).isEqualTo("1"); + assertThat(defs.get(1).getLabel()).isEqualTo("2"); + } + + @Test + public void testDefBlockStartAfterLinkReferenceDefinition() { + var doc = PARSER.parse("[foo]: /url\n[^1]: footnote\n"); + var linkReferenceDef = find(doc, LinkReferenceDefinition.class); + var footnotesDef = find(doc, FootnoteDefinition.class); + assertThat(linkReferenceDef.getLabel()).isEqualTo("foo"); + assertThat(footnotesDef.getLabel()).isEqualTo("1"); + } + + @Test + public void testDefContainsParagraph() { + var doc = PARSER.parse("[^1]: footnote\n"); + var def = find(doc, FootnoteDefinition.class); + var paragraph = (Paragraph) def.getFirstChild(); + assertText("footnote", paragraph.getFirstChild()); + } + + @Test + public void testDefBlockStartSpacesAfterColon() { + var doc = PARSER.parse("[^1]: footnote\n"); + var def = find(doc, FootnoteDefinition.class); + var paragraph = (Paragraph) def.getFirstChild(); + assertText("footnote", paragraph.getFirstChild()); + } + + @Test + public void testDefContainsIndentedCodeBlock() { + var doc = PARSER.parse("[^1]:\n code\n"); + var def = find(doc, FootnoteDefinition.class); + var codeBlock = (IndentedCodeBlock) def.getFirstChild(); + assertThat(codeBlock.getLiteral()).isEqualTo("code\n"); + } + + @Test + public void testDefContainsMultipleLines() { + var doc = PARSER.parse("[^1]: footnote\nstill\n"); + var def = find(doc, FootnoteDefinition.class); + assertThat(def.getLabel()).isEqualTo("1"); + var paragraph = (Paragraph) def.getFirstChild(); + assertText("footnote", paragraph.getFirstChild()); + assertText("still", paragraph.getLastChild()); + } + + @Test + public void testDefContainsMultipleParagraphs() { + var doc = PARSER.parse("[^1]: footnote p1\n\n footnote p2\n"); + var def = find(doc, FootnoteDefinition.class); + assertThat(def.getLabel()).isEqualTo("1"); + var p1 = (Paragraph) def.getFirstChild(); + assertText("footnote p1", p1.getFirstChild()); + var p2 = (Paragraph) p1.getNext(); + assertText("footnote p2", p2.getFirstChild()); + } + + @Test + public void testDefFollowedByParagraph() { + var doc = PARSER.parse("[^1]: footnote\n\nnormal paragraph\n"); + var def = find(doc, FootnoteDefinition.class); + assertThat(def.getLabel()).isEqualTo("1"); + assertText("footnote", def.getFirstChild().getFirstChild()); + assertText("normal paragraph", def.getNext().getFirstChild()); + } + + @Test + public void testDefContainsList() { + var doc = PARSER.parse("[^1]: - foo\n - bar\n"); + var def = find(doc, FootnoteDefinition.class); + assertThat(def.getLabel()).isEqualTo("1"); + var list = (BulletList) def.getFirstChild(); + var item1 = (ListItem) list.getFirstChild(); + var item2 = (ListItem) list.getLastChild(); + assertText("foo", item1.getFirstChild().getFirstChild()); + assertText("bar", item2.getFirstChild().getFirstChild()); + } + + @Test + public void testDefInterruptedByOthers() { + var doc = PARSER.parse("[^1]: footnote\n# Heading\n"); + var def = find(doc, FootnoteDefinition.class); + var heading = find(doc, Heading.class); + assertThat(def.getLabel()).isEqualTo("1"); + assertText("Heading", heading.getFirstChild()); + } + + @Test + public void testReference() { + var doc = PARSER.parse("Test [^foo]\n\n[^foo]: /url\n"); + var ref = find(doc, FootnoteReference.class); + assertThat(ref.getLabel()).isEqualTo("foo"); + } + + @Test + public void testReferenceNoDefinition() { + var doc = PARSER.parse("Test [^foo]\n"); + assertNone(doc, FootnoteReference.class); + } + + @Test + public void testRefWithEmphasisInside() { + // No emphasis inside footnote reference, should just be treated as text + var doc = PARSER.parse("Test [^*foo*]\n\n[^*foo*]: def\n"); + var ref = find(doc, FootnoteReference.class); + assertThat(ref.getLabel()).isEqualTo("*foo*"); + assertThat(ref.getFirstChild()).isNull(); + var paragraph = doc.getFirstChild(); + var text = (Text) paragraph.getFirstChild(); + assertThat(text.getLiteral()).isEqualTo("Test "); + assertThat(text.getNext()).isEqualTo(ref); + assertThat(paragraph.getLastChild()).isEqualTo(ref); + } + + @Test + public void testRefWithEmphasisAround() { + // Emphasis around footnote reference, the * inside needs to be removed from emphasis processing + var doc = PARSER.parse("Test *abc [^foo*] def*\n\n[^foo*]: def\n"); + var ref = find(doc, FootnoteReference.class); + assertThat(ref.getLabel()).isEqualTo("foo*"); + assertText("abc ", ref.getPrevious()); + assertText(" def", ref.getNext()); + var em = find(doc, Emphasis.class); + assertThat(ref.getParent()).isEqualTo(em); + } + + @Test + public void testRefAfterBang() { + var doc = PARSER.parse("Test![^foo]\n\n[^foo]: def\n"); + var ref = find(doc, FootnoteReference.class); + assertThat(ref.getLabel()).isEqualTo("foo"); + var paragraph = doc.getFirstChild(); + assertText("Test!", paragraph.getFirstChild()); + } + + @Test + public void testRefAsLabelOnly() { + // [^bar] is a footnote but [foo] is just text, because full reference links (text `foo`, label `^bar`) don't + // resolve as footnotes. If `[foo][^bar]` fails to parse as a bracket, `[^bar]` by itself needs to be tried. + var doc = PARSER.parse("Test [foo][^bar]\n\n[^bar]: footnote\n"); + var ref = find(doc, FootnoteReference.class); + assertThat(ref.getLabel()).isEqualTo("bar"); + var paragraph = doc.getFirstChild(); + assertText("Test [foo]", paragraph.getFirstChild()); + } + + @Test + public void testRefWithEmptyLabel() { + // [^bar] is a footnote but [] is just text, because collapsed reference links don't resolve as footnotes + var doc = PARSER.parse("Test [^bar][]\n\n[^bar]: footnote\n"); + var ref = find(doc, FootnoteReference.class); + assertThat(ref.getLabel()).isEqualTo("bar"); + var paragraph = doc.getFirstChild(); + assertText("Test ", paragraph.getFirstChild()); + assertText("[]", paragraph.getLastChild()); + } + + @Test + public void testRefWithBracket() { + // Not a footnote, [ needs to be escaped + var doc = PARSER.parse("Test [^f[oo]\n\n[^f[oo]: /url\n"); + assertNone(doc, FootnoteReference.class); + } + + @Test + public void testRefWithBackslash() { + var doc = PARSER.parse("[^\\foo]\n\n[^\\foo]: note\n"); + var ref = find(doc, FootnoteReference.class); + assertThat(ref.getLabel()).isEqualTo("\\foo"); + var def = find(doc, FootnoteDefinition.class); + assertThat(def.getLabel()).isEqualTo("\\foo"); + } + + @Test + public void testPreferInlineLink() { + var doc = PARSER.parse("Test [^bar](/url)\n\n[^bar]: footnote\n"); + assertNone(doc, FootnoteReference.class); + } + + @Test + public void testPreferReferenceLink() { + // This is tricky because `[^*foo*][foo]` is a valid link already. If `[foo]` was not defined, the first bracket + // would be a footnote. + var doc = PARSER.parse("Test [^*foo*][foo]\n\n[^*foo*]: /url\n\n[foo]: /url"); + assertNone(doc, FootnoteReference.class); + } + + @Test + public void testReferenceLinkWithoutDefinition() { + // Similar to previous test but there's no definition + var doc = PARSER.parse("Test [^*foo*][foo]\n\n[^*foo*]: def\n"); + var ref = find(doc, FootnoteReference.class); + assertThat(ref.getLabel()).isEqualTo("*foo*"); + var paragraph = (Paragraph) doc.getFirstChild(); + assertText("Test ", paragraph.getFirstChild()); + assertText("[foo]", paragraph.getLastChild()); + } + + @Test + public void testFootnoteInLink() { + // Expected to behave the same way as a link within a link, see https://spec.commonmark.org/0.31.2/#example-518 + // i.e. the first (inner) link is parsed, which means the outer one becomes plain text, as nesting links is not + // allowed. + var doc = PARSER.parse("[link with footnote ref [^1]](https://example.com)\n\n[^1]: footnote\n"); + var ref = find(doc, FootnoteReference.class); + assertThat(ref.getLabel()).isEqualTo("1"); + var paragraph = doc.getFirstChild(); + assertText("[link with footnote ref ", paragraph.getFirstChild()); + assertText("](https://example.com)", paragraph.getLastChild()); + } + + @Test + public void testFootnoteWithMarkerInLink() { + var doc = PARSER.parse("[link with footnote ref ![^1]](https://example.com)\n\n[^1]: footnote\n"); + var ref = find(doc, FootnoteReference.class); + assertThat(ref.getLabel()).isEqualTo("1"); + var paragraph = doc.getFirstChild(); + assertText("[link with footnote ref !", paragraph.getFirstChild()); + assertText("](https://example.com)", paragraph.getLastChild()); + } + + @Test + public void testInlineFootnote() { + var extension = FootnotesExtension.builder().inlineFootnotes(true).build(); + var parser = Parser.builder().extensions(Set.of(extension)).build(); + + { + var doc = parser.parse("Test ^[inline footnote]"); + assertText("Test ", doc.getFirstChild().getFirstChild()); + var fn = find(doc, InlineFootnote.class); + assertText("inline footnote", fn.getFirstChild()); + } + + { + var doc = parser.parse("Test \\^[not inline footnote]"); + assertNone(doc, InlineFootnote.class); + } + + { + var doc = parser.parse("Test ^[not inline footnote"); + assertNone(doc, InlineFootnote.class); + var t = doc.getFirstChild().getFirstChild(); + assertText("Test ^[not inline footnote", t); + } + + { + // This is a tricky one because the code span in the link text + // includes the `]` (and doesn't need to be escaped). Therefore + // inline footnote parsing has to do full link text parsing/inline parsing. + // https://spec.commonmark.org/0.31.2/#link-text + + var doc = parser.parse("^[test `bla]`]"); + var fn = find(doc, InlineFootnote.class); + assertText("test ", fn.getFirstChild()); + var code = fn.getFirstChild().getNext(); + assertThat(((Code) code).getLiteral()).isEqualTo("bla]"); + } + + { + var doc = parser.parse("^[with a [link](url)]"); + var fn = find(doc, InlineFootnote.class); + assertText("with a ", fn.getFirstChild()); + var link = fn.getFirstChild().getNext(); + assertThat(((Link) link).getDestination()).isEqualTo("url"); + } + } + + @Test + public void testSourcePositions() { + var parser = Parser.builder().extensions(EXTENSIONS).includeSourceSpans(IncludeSourceSpans.BLOCKS_AND_INLINES).build(); + + var doc = parser.parse("Test [^foo]\n\n[^foo]: /url\n"); + var ref = find(doc, FootnoteReference.class); + assertThat(ref.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 5, 5, 6))); + + var def = find(doc, FootnoteDefinition.class); + assertThat(def.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(2, 0, 13, 12))); + } + + private static void assertNone(Node parent, Class nodeClass) { + assertThat(tryFind(parent, nodeClass)).as(() -> "Node " + parent + " containing " + nodeClass).isNull(); + } + + private static T find(Node parent, Class nodeClass) { + return Objects.requireNonNull(tryFind(parent, nodeClass), "Could not find a " + nodeClass.getSimpleName() + " node in " + parent); + } + + private static T tryFind(Node parent, Class nodeClass) { + return findAll(parent, nodeClass).stream().findFirst().orElse(null); + } + + private static List findAll(Node parent, Class nodeClass) { + var nodes = new ArrayList(); + for (var node = parent.getFirstChild(); node != null; node = node.getNext()) { + if (nodeClass.isInstance(node)) { + //noinspection unchecked + nodes.add((T) node); + } + nodes.addAll(findAll(node, nodeClass)); + } + return nodes; + } + + private static void assertText(String expected, Node node) { + var text = (Text) node; + assertThat(text.getLiteral()).isEqualTo(expected); + } +} diff --git a/commonmark-ext-footnotes/src/test/resources/footnotes.html b/commonmark-ext-footnotes/src/test/resources/footnotes.html new file mode 100644 index 000000000..1dd83185f --- /dev/null +++ b/commonmark-ext-footnotes/src/test/resources/footnotes.html @@ -0,0 +1,18 @@ + + + + + + Footnotes testing + + + + +Paste HTML from footnote rendering in here to manually check that linking works as expected. + + + diff --git a/commonmark-ext-gfm-alerts/README.md b/commonmark-ext-gfm-alerts/README.md new file mode 100644 index 000000000..2368812e5 --- /dev/null +++ b/commonmark-ext-gfm-alerts/README.md @@ -0,0 +1,74 @@ +# commonmark-ext-gfm-alerts + +Extension for [commonmark-java](https://github.com/commonmark/commonmark-java) that adds support for [GitHub Flavored Markdown alerts](https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax#alerts). + +Enables highlighting important information using blockquote syntax with five standard alert types: NOTE, TIP, IMPORTANT, WARNING, and CAUTION. + +## Usage + +#### Markdown Syntax + +```markdown +> [!NOTE] +> Useful information + +> [!WARNING] +> Critical information +``` + +#### Standard GFM Types + +```java +var extension = AlertsExtension.create(); +var parser = Parser.builder().extensions(List.of(extension)).build(); +var renderer = HtmlRenderer.builder().extensions(List.of(extension)).build(); +``` + +#### Custom Alert Types + +Add custom types beyond the five standard GFM types: + +```java +var extension = AlertsExtension.builder() + .addCustomType("BUG", "Known Bug") + .build(); +``` + +Custom types must be UPPERCASE. Standard type titles can also be overridden for localization. + +#### Styling + +Alerts render as `
` elements with CSS classes: + +```html +
+

Note

+

Content

+
+``` + +Basic CSS example: + +```css +.markdown-alert { + padding: 0.5rem 1rem; + margin-bottom: 1rem; + border-left: 4px solid; +} + +.markdown-alert-note { border-color: #0969da; background-color: #ddf4ff; } +.markdown-alert-tip { border-color: #1a7f37; background-color: #dcffe4; } +.markdown-alert-important { border-color: #8250df; background-color: #f6f0ff; } +.markdown-alert-warning { border-color: #9a6700; background-color: #fff8c5; } +.markdown-alert-caution { border-color: #cf222e; background-color: #ffebe9; } +``` + +![Alerts](screenshots/alerts.png) + +Icons can be added using GitHub's [Octicons](https://primer.style/octicons/): + +![Alerts with icons](screenshots/alerts-with-icons.png) + +## License + +See the main commonmark-java project for license information. diff --git a/commonmark-ext-gfm-alerts/pom.xml b/commonmark-ext-gfm-alerts/pom.xml new file mode 100644 index 000000000..02ecbf802 --- /dev/null +++ b/commonmark-ext-gfm-alerts/pom.xml @@ -0,0 +1,27 @@ + + + 4.0.0 + + org.commonmark + commonmark-parent + 0.28.1-SNAPSHOT + + + commonmark-ext-gfm-alerts + commonmark-java extension for alerts + commonmark-java extension for GFM alerts (admonition blocks) using [!TYPE] syntax (GitHub Flavored Markdown) + + + + org.commonmark + commonmark + + + + org.commonmark + commonmark-test-util + test + + + + diff --git a/commonmark-ext-gfm-alerts/screenshots/alerts-with-icons.png b/commonmark-ext-gfm-alerts/screenshots/alerts-with-icons.png new file mode 100644 index 000000000..47da9402b Binary files /dev/null and b/commonmark-ext-gfm-alerts/screenshots/alerts-with-icons.png differ diff --git a/commonmark-ext-gfm-alerts/screenshots/alerts.png b/commonmark-ext-gfm-alerts/screenshots/alerts.png new file mode 100644 index 000000000..83d4009f0 Binary files /dev/null and b/commonmark-ext-gfm-alerts/screenshots/alerts.png differ diff --git a/commonmark-ext-gfm-alerts/src/main/java/module-info.java b/commonmark-ext-gfm-alerts/src/main/java/module-info.java new file mode 100644 index 000000000..e8b5aecb7 --- /dev/null +++ b/commonmark-ext-gfm-alerts/src/main/java/module-info.java @@ -0,0 +1,5 @@ +module org.commonmark.ext.gfm.alerts { + exports org.commonmark.ext.gfm.alerts; + + requires transitive org.commonmark; +} diff --git a/commonmark-ext-gfm-alerts/src/main/java/org/commonmark/ext/gfm/alerts/Alert.java b/commonmark-ext-gfm-alerts/src/main/java/org/commonmark/ext/gfm/alerts/Alert.java new file mode 100644 index 000000000..bb28e7344 --- /dev/null +++ b/commonmark-ext-gfm-alerts/src/main/java/org/commonmark/ext/gfm/alerts/Alert.java @@ -0,0 +1,19 @@ +package org.commonmark.ext.gfm.alerts; + +import org.commonmark.node.CustomBlock; + +/** + * Alert block for highlighting important information using {@code [!TYPE]} syntax. + */ +public class Alert extends CustomBlock { + + private final String type; + + public Alert(String type) { + this.type = type; + } + + public String getType() { + return type; + } +} diff --git a/commonmark-ext-gfm-alerts/src/main/java/org/commonmark/ext/gfm/alerts/AlertsExtension.java b/commonmark-ext-gfm-alerts/src/main/java/org/commonmark/ext/gfm/alerts/AlertsExtension.java new file mode 100644 index 000000000..3990034d2 --- /dev/null +++ b/commonmark-ext-gfm-alerts/src/main/java/org/commonmark/ext/gfm/alerts/AlertsExtension.java @@ -0,0 +1,118 @@ +package org.commonmark.ext.gfm.alerts; + +import org.commonmark.Extension; +import org.commonmark.ext.gfm.alerts.internal.AlertPostProcessor; +import org.commonmark.ext.gfm.alerts.internal.AlertHtmlNodeRenderer; +import org.commonmark.ext.gfm.alerts.internal.AlertMarkdownNodeRenderer; +import org.commonmark.parser.Parser; +import org.commonmark.renderer.NodeRenderer; +import org.commonmark.renderer.html.HtmlNodeRendererContext; +import org.commonmark.renderer.html.HtmlNodeRendererFactory; +import org.commonmark.renderer.html.HtmlRenderer; +import org.commonmark.renderer.markdown.MarkdownNodeRendererContext; +import org.commonmark.renderer.markdown.MarkdownNodeRendererFactory; +import org.commonmark.renderer.markdown.MarkdownRenderer; + +import java.util.HashMap; +import java.util.Locale; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +/** + * Extension for GFM alerts using {@code [!TYPE]} syntax (GitHub Flavored Markdown). + *

+ * Create with {@link #create()} or {@link #builder()} and configure on builders + * ({@link org.commonmark.parser.Parser.Builder#extensions(Iterable)}, + * {@link HtmlRenderer.Builder#extensions(Iterable)}). + * Parsed alerts become {@link Alert} blocks. + */ +public class AlertsExtension implements Parser.ParserExtension, HtmlRenderer.HtmlRendererExtension, + MarkdownRenderer.MarkdownRendererExtension { + + static final Set STANDARD_TYPES = Set.of("NOTE", "TIP", "IMPORTANT", "WARNING", "CAUTION"); + + private final Map customTypes; + + private AlertsExtension(Builder builder) { + this.customTypes = new HashMap<>(builder.customTypes); + } + + public static Extension create() { + return builder().build(); + } + + public static Builder builder() { + return new Builder(); + } + + @Override + public void extend(Parser.Builder parserBuilder) { + var allowedTypes = new HashSet<>(STANDARD_TYPES); + allowedTypes.addAll(customTypes.keySet()); + parserBuilder.postProcessor(new AlertPostProcessor(allowedTypes)); + } + + @Override + public void extend(HtmlRenderer.Builder rendererBuilder) { + rendererBuilder.nodeRendererFactory(new HtmlNodeRendererFactory() { + @Override + public NodeRenderer create(HtmlNodeRendererContext context) { + return new AlertHtmlNodeRenderer(context, customTypes); + } + }); + } + + @Override + public void extend(MarkdownRenderer.Builder rendererBuilder) { + rendererBuilder.nodeRendererFactory(new MarkdownNodeRendererFactory() { + @Override + public NodeRenderer create(MarkdownNodeRendererContext context) { + return new AlertMarkdownNodeRenderer(context); + } + + @Override + public Set getSpecialCharacters() { + return Set.of(); + } + }); + } + + /** + * Builder for configuring the alerts extension. + */ + public static class Builder { + private final Map customTypes = new HashMap<>(); + + /** + * Adds a custom alert type with a display title. + *

+ * This can also be used to override the display title of standard GFM types + * (e.g., for localization). + * + * @param type the alert type (must be uppercase) + * @param title the display title for this alert type + * @return {@code this} + */ + public Builder addCustomType(String type, String title) { + if (type == null || type.isEmpty()) { + throw new IllegalArgumentException("Type must not be null or empty"); + } + if (title == null || title.isEmpty()) { + throw new IllegalArgumentException("Title must not be null or empty"); + } + if (!type.equals(type.toUpperCase(Locale.ROOT))) { + throw new IllegalArgumentException("Type must be uppercase: " + type); + } + customTypes.put(type, title); + return this; + } + + /** + * @return a configured {@link Extension} + */ + public Extension build() { + return new AlertsExtension(this); + } + } +} diff --git a/commonmark-ext-gfm-alerts/src/main/java/org/commonmark/ext/gfm/alerts/internal/AlertHtmlNodeRenderer.java b/commonmark-ext-gfm-alerts/src/main/java/org/commonmark/ext/gfm/alerts/internal/AlertHtmlNodeRenderer.java new file mode 100644 index 000000000..ca562ba33 --- /dev/null +++ b/commonmark-ext-gfm-alerts/src/main/java/org/commonmark/ext/gfm/alerts/internal/AlertHtmlNodeRenderer.java @@ -0,0 +1,78 @@ +package org.commonmark.ext.gfm.alerts.internal; + +import org.commonmark.ext.gfm.alerts.Alert; +import org.commonmark.node.Node; +import org.commonmark.renderer.html.HtmlNodeRendererContext; +import org.commonmark.renderer.html.HtmlWriter; + +import java.util.LinkedHashMap; +import java.util.Map; + +public class AlertHtmlNodeRenderer extends AlertNodeRenderer { + + private final HtmlWriter htmlWriter; + private final HtmlNodeRendererContext context; + private final Map customTypeTitles; + + public AlertHtmlNodeRenderer(HtmlNodeRendererContext context, Map customTypeTitles) { + this.htmlWriter = context.getWriter(); + this.context = context; + this.customTypeTitles = customTypeTitles; + } + + @Override + protected void renderAlert(Alert alert) { + var type = alert.getType(); + var cssClass = type.toLowerCase(); + + htmlWriter.line(); + var attributes = new LinkedHashMap(); + attributes.put("class", "markdown-alert markdown-alert-" + cssClass); + attributes.put("data-alert-type", cssClass); + + htmlWriter.tag("div", context.extendAttributes(alert, "div", attributes)); + htmlWriter.line(); + + // Render alert title + htmlWriter.tag("p", context.extendAttributes(alert, "p", Map.of("class", "markdown-alert-title"))); + htmlWriter.text(getAlertTitle(type)); + htmlWriter.tag("/p"); + htmlWriter.line(); + + // Render children (the alert content) + renderChildren(alert); + + htmlWriter.tag("/div"); + htmlWriter.line(); + } + + private String getAlertTitle(String type) { + var customTypeTitle = customTypeTitles.get(type); + if (customTypeTitle != null) { + return customTypeTitle; + } + switch (type) { + case "NOTE": + return "Note"; + case "TIP": + return "Tip"; + case "IMPORTANT": + return "Important"; + case "WARNING": + return "Warning"; + case "CAUTION": + return "Caution"; + default: + throw new IllegalStateException("Unknown alert type: " + type); + } + } + + private void renderChildren(Node parent) { + var node = parent.getFirstChild(); + while (node != null) { + var next = node.getNext(); + context.render(node); + node = next; + } + } +} diff --git a/commonmark-ext-gfm-alerts/src/main/java/org/commonmark/ext/gfm/alerts/internal/AlertMarkdownNodeRenderer.java b/commonmark-ext-gfm-alerts/src/main/java/org/commonmark/ext/gfm/alerts/internal/AlertMarkdownNodeRenderer.java new file mode 100644 index 000000000..e3da62aea --- /dev/null +++ b/commonmark-ext-gfm-alerts/src/main/java/org/commonmark/ext/gfm/alerts/internal/AlertMarkdownNodeRenderer.java @@ -0,0 +1,38 @@ +package org.commonmark.ext.gfm.alerts.internal; + +import org.commonmark.ext.gfm.alerts.Alert; +import org.commonmark.node.Node; +import org.commonmark.renderer.markdown.MarkdownNodeRendererContext; +import org.commonmark.renderer.markdown.MarkdownWriter; + +public class AlertMarkdownNodeRenderer extends AlertNodeRenderer { + + private final MarkdownWriter writer; + private final MarkdownNodeRendererContext context; + + public AlertMarkdownNodeRenderer(MarkdownNodeRendererContext context) { + this.writer = context.getWriter(); + this.context = context; + } + + @Override + protected void renderAlert(Alert alert) { + // First line: > [!TYPE] + writer.writePrefix("> "); + writer.pushPrefix("> "); + writer.raw("[!" + alert.getType() + "]"); + writer.line(); + renderChildren(alert); + writer.popPrefix(); + writer.block(); + } + + private void renderChildren(Node parent) { + var node = parent.getFirstChild(); + while (node != null) { + var next = node.getNext(); + context.render(node); + node = next; + } + } +} diff --git a/commonmark-ext-gfm-alerts/src/main/java/org/commonmark/ext/gfm/alerts/internal/AlertNodeRenderer.java b/commonmark-ext-gfm-alerts/src/main/java/org/commonmark/ext/gfm/alerts/internal/AlertNodeRenderer.java new file mode 100644 index 000000000..45b34bb46 --- /dev/null +++ b/commonmark-ext-gfm-alerts/src/main/java/org/commonmark/ext/gfm/alerts/internal/AlertNodeRenderer.java @@ -0,0 +1,23 @@ +package org.commonmark.ext.gfm.alerts.internal; + +import org.commonmark.ext.gfm.alerts.Alert; +import org.commonmark.node.Node; +import org.commonmark.renderer.NodeRenderer; + +import java.util.Set; + +public abstract class AlertNodeRenderer implements NodeRenderer { + + @Override + public Set> getNodeTypes() { + return Set.of(Alert.class); + } + + @Override + public void render(Node node) { + var alert = (Alert) node; + renderAlert(alert); + } + + protected abstract void renderAlert(Alert alert); +} diff --git a/commonmark-ext-gfm-alerts/src/main/java/org/commonmark/ext/gfm/alerts/internal/AlertPostProcessor.java b/commonmark-ext-gfm-alerts/src/main/java/org/commonmark/ext/gfm/alerts/internal/AlertPostProcessor.java new file mode 100644 index 000000000..8008fc8dd --- /dev/null +++ b/commonmark-ext-gfm-alerts/src/main/java/org/commonmark/ext/gfm/alerts/internal/AlertPostProcessor.java @@ -0,0 +1,111 @@ +package org.commonmark.ext.gfm.alerts.internal; + +import org.commonmark.ext.gfm.alerts.Alert; +import org.commonmark.node.BlockQuote; +import org.commonmark.node.HardLineBreak; +import org.commonmark.node.Node; +import org.commonmark.node.Paragraph; +import org.commonmark.node.SoftLineBreak; +import org.commonmark.node.Text; +import org.commonmark.parser.PostProcessor; + +import java.util.Locale; +import java.util.Set; +import java.util.regex.Pattern; + +public class AlertPostProcessor implements PostProcessor { + + // Alert type marker, matching any case (GitHub supports lowercase, mixed, and uppercase) + private static final Pattern ALERT_PATTERN = Pattern.compile("^\\[!([a-zA-Z]+)]\\s*$"); + + private final Set allowedTypes; + + public AlertPostProcessor(Set allowedTypes) { + this.allowedTypes = allowedTypes; + } + + @Override + public Node process(Node document) { + // Only look at direct children of Document — GitHub only detects alerts at the top level. + var child = document.getFirstChild(); + while (child != null) { + var next = child.getNext(); + if (child instanceof BlockQuote) { + tryConvertToAlert((BlockQuote) child); + } + child = next; + } + return document; + } + + private void tryConvertToAlert(BlockQuote blockQuote) { + var firstChild = blockQuote.getFirstChild(); + if (!(firstChild instanceof Paragraph)) { + return; + } + + var paragraph = (Paragraph) firstChild; + var firstInline = paragraph.getFirstChild(); + if (!(firstInline instanceof Text)) { + return; + } + + var textNode = (Text) firstInline; + + // The alert marker can be the entire text node content, or just the first line + // before a line break (trailing spaces create a HardLineBreak instead of SoftLineBreak). + var afterMarker = firstInline.getNext(); + if (afterMarker != null && !(afterMarker instanceof SoftLineBreak) && !(afterMarker instanceof HardLineBreak)) { + // Text followed by something other than a line break - not an alert + return; + } + + var matcher = ALERT_PATTERN.matcher(textNode.getLiteral()); + if (!matcher.matches()) { + return; + } + + var type = matcher.group(1).toUpperCase(Locale.ROOT); + if (!allowedTypes.contains(type)) { + return; + } + + // Must have content after the marker line. An alert with ONLY the marker + // and no content is a normal blockquote on GitHub. + if (afterMarker != null) { + // There's a line break after marker - check if there's content after it + if (afterMarker.getNext() == null && paragraph.getNext() == null) { + return; + } + afterMarker.unlink(); + } else { + // Marker is the only thing in the paragraph + if (paragraph.getNext() == null) { + return; + } + } + + // Valid alert. Create Alert node and transfer children. + var alert = new Alert(type); + alert.setSourceSpans(blockQuote.getSourceSpans()); + blockQuote.insertAfter(alert); + + // Remove the marker text from the first paragraph + firstInline.unlink(); + + // If paragraph is now empty, remove it + if (paragraph.getFirstChild() == null) { + paragraph.unlink(); + } + + // Move remaining children from blockquote to alert + var child = blockQuote.getFirstChild(); + while (child != null) { + var next = child.getNext(); + alert.appendChild(child); + child = next; + } + + blockQuote.unlink(); + } +} diff --git a/commonmark-ext-gfm-alerts/src/main/javadoc/overview.html b/commonmark-ext-gfm-alerts/src/main/javadoc/overview.html new file mode 100644 index 000000000..145232a87 --- /dev/null +++ b/commonmark-ext-gfm-alerts/src/main/javadoc/overview.html @@ -0,0 +1,6 @@ + + +Extension for GitHub Flavored Markdown (GFM) alerts using blockquote syntax +

See {@link org.commonmark.ext.gfm.alerts.AlertsExtension}

+ + diff --git a/commonmark-ext-gfm-alerts/src/test/java/org/commonmark/ext/gfm/alerts/AlertsMarkdownRendererTest.java b/commonmark-ext-gfm-alerts/src/test/java/org/commonmark/ext/gfm/alerts/AlertsMarkdownRendererTest.java new file mode 100644 index 000000000..aca90e2df --- /dev/null +++ b/commonmark-ext-gfm-alerts/src/test/java/org/commonmark/ext/gfm/alerts/AlertsMarkdownRendererTest.java @@ -0,0 +1,73 @@ +package org.commonmark.ext.gfm.alerts; + +import org.commonmark.Extension; +import org.commonmark.parser.Parser; +import org.commonmark.renderer.markdown.MarkdownRenderer; +import org.junit.jupiter.api.Test; + +import java.util.Set; + +import static org.assertj.core.api.Assertions.assertThat; + +public class AlertsMarkdownRendererTest { + + private static final Set EXTENSIONS = Set.of(AlertsExtension.create()); + private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build(); + private static final MarkdownRenderer RENDERER = MarkdownRenderer.builder().extensions(EXTENSIONS).build(); + + @Test + public void alertRoundTrip() { + assertRoundTrip("> [!WARNING]\n> Be careful\n"); + } + + @Test + public void allStandardTypesRoundTrip() { + assertRoundTrip("> [!NOTE]\n> Note\n"); + assertRoundTrip("> [!TIP]\n> Tip\n"); + assertRoundTrip("> [!IMPORTANT]\n> Important\n"); + assertRoundTrip("> [!WARNING]\n> Warning\n"); + assertRoundTrip("> [!CAUTION]\n> Caution\n"); + } + + @Test + public void lowercaseTypeRendersAsUppercase() { + // Lowercase input gets normalized to uppercase type + String rendered = RENDERER.render(PARSER.parse("> [!note]\n> Content\n")); + assertThat(rendered).isEqualTo("> [!NOTE]\n> Content\n"); + } + + @Test + public void alertWithMultipleParagraphs() { + String input = "> [!NOTE]\n> First paragraph\n>\n> Second paragraph\n"; + // MarkdownWriter always writes the prefix including trailing space + String expected = "> [!NOTE]\n> First paragraph\n> \n> Second paragraph\n"; + String rendered = RENDERER.render(PARSER.parse(input)); + assertThat(rendered).isEqualTo(expected); + } + + @Test + public void customTypeRoundTrip() { + Extension extension = AlertsExtension.builder() + .addCustomType("INFO", "Information") + .build(); + + Parser parser = Parser.builder().extensions(Set.of(extension)).build(); + MarkdownRenderer renderer = MarkdownRenderer.builder().extensions(Set.of(extension)).build(); + + String input = "> [!INFO]\n> Custom type\n"; + String rendered = renderer.render(parser.parse(input)); + assertThat(rendered).isEqualTo(input); + } + + @Test + public void alertWithList() { + String input = "> [!NOTE]\n> Items:\n> \n> - First\n> - Second\n"; + String rendered = RENDERER.render(PARSER.parse(input)); + assertThat(rendered).isEqualTo(input); + } + + private void assertRoundTrip(String input) { + String rendered = RENDERER.render(PARSER.parse(input)); + assertThat(rendered).isEqualTo(input); + } +} diff --git a/commonmark-ext-gfm-alerts/src/test/java/org/commonmark/ext/gfm/alerts/AlertsSpecTest.java b/commonmark-ext-gfm-alerts/src/test/java/org/commonmark/ext/gfm/alerts/AlertsSpecTest.java new file mode 100644 index 000000000..8155d8009 --- /dev/null +++ b/commonmark-ext-gfm-alerts/src/test/java/org/commonmark/ext/gfm/alerts/AlertsSpecTest.java @@ -0,0 +1,44 @@ +package org.commonmark.ext.gfm.alerts; + +import org.commonmark.Extension; +import org.commonmark.parser.Parser; +import org.commonmark.renderer.html.HtmlRenderer; +import org.commonmark.testutil.RenderingTestCase; +import org.commonmark.testutil.example.Example; +import org.commonmark.testutil.example.ExampleReader; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.Parameter; +import org.junit.jupiter.params.ParameterizedClass; +import org.junit.jupiter.params.provider.MethodSource; + +import java.net.URL; +import java.util.List; +import java.util.Set; + +@ParameterizedClass +@MethodSource("data") +public class AlertsSpecTest extends RenderingTestCase { + + private static final Set EXTENSIONS = Set.of(AlertsExtension.create()); + private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build(); + // Use softbreak("
") to match GitHub's rendering for easier comparison with GitHub API output. + private static final HtmlRenderer RENDERER = HtmlRenderer.builder().extensions(EXTENSIONS).softbreak("
\n").build(); + + @Parameter + Example example; + + static List data() { + URL spec = AlertsSpecTest.class.getResource("/alerts-spec.txt"); + return ExampleReader.readExamples(spec, "alert"); + } + + @Test + public void testHtmlRendering() { + assertRendering(example.getSource(), example.getHtml()); + } + + @Override + protected String render(String source) { + return RENDERER.render(PARSER.parse(source)); + } +} \ No newline at end of file diff --git a/commonmark-ext-gfm-alerts/src/test/java/org/commonmark/ext/gfm/alerts/AlertsTest.java b/commonmark-ext-gfm-alerts/src/test/java/org/commonmark/ext/gfm/alerts/AlertsTest.java new file mode 100644 index 000000000..c46c532fe --- /dev/null +++ b/commonmark-ext-gfm-alerts/src/test/java/org/commonmark/ext/gfm/alerts/AlertsTest.java @@ -0,0 +1,140 @@ +package org.commonmark.ext.gfm.alerts; + +import org.commonmark.Extension; +import org.commonmark.node.Node; +import org.commonmark.parser.Parser; +import org.commonmark.renderer.html.HtmlRenderer; +import org.junit.jupiter.api.Test; + +import java.util.Set; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; + +public class AlertsTest { + + private static final Set EXTENSIONS = Set.of(AlertsExtension.create()); + private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build(); + + // Custom types + + @Test + public void customType() { + Extension extension = AlertsExtension.builder() + .addCustomType("INFO", "Information") + .build(); + + Parser parser = Parser.builder().extensions(Set.of(extension)).build(); + HtmlRenderer renderer = HtmlRenderer.builder().extensions(Set.of(extension)).build(); + + assertThat(renderer.render(parser.parse("> [!INFO]\n> Custom alert"))).isEqualTo( + "
\n" + + "

Information

\n" + + "

Custom alert

\n" + + "
\n"); + } + + @Test + public void multipleCustomTypes() { + Extension extension = AlertsExtension.builder() + .addCustomType("INFO", "Information") + .addCustomType("SUCCESS", "Success!") + .addCustomType("DANGER", "Danger!") + .build(); + + Parser parser = Parser.builder().extensions(Set.of(extension)).build(); + HtmlRenderer renderer = HtmlRenderer.builder().extensions(Set.of(extension)).build(); + + assertThat(renderer.render(parser.parse("> [!INFO]\n> Info content\n\n> [!SUCCESS]\n> Success content\n\n> [!DANGER]\n> Danger content"))).isEqualTo( + "
\n" + + "

Information

\n" + + "

Info content

\n" + + "
\n" + + "
\n" + + "

Success!

\n" + + "

Success content

\n" + + "
\n" + + "
\n" + + "

Danger!

\n" + + "

Danger content

\n" + + "
\n"); + } + + @Test + public void standardTypesWithCustomConfigured() { + Extension extension = AlertsExtension.builder() + .addCustomType("INFO", "Information") + .build(); + + Parser parser = Parser.builder().extensions(Set.of(extension)).build(); + HtmlRenderer renderer = HtmlRenderer.builder().extensions(Set.of(extension)).build(); + + assertThat(renderer.render(parser.parse("> [!NOTE]\n> Standard type"))).isEqualTo( + "
\n" + + "

Note

\n" + + "

Standard type

\n" + + "
\n"); + } + + @Test + public void overrideStandardTypeTitle() { + Extension extension = AlertsExtension.builder() + .addCustomType("NOTE", "Nota") + .build(); + + Parser parser = Parser.builder().extensions(Set.of(extension)).build(); + HtmlRenderer renderer = HtmlRenderer.builder().extensions(Set.of(extension)).build(); + + assertThat(renderer.render(parser.parse("> [!NOTE]\n> Localized title"))).isEqualTo( + "
\n" + + "

Nota

\n" + + "

Localized title

\n" + + "
\n"); + } + + // Custom type validation + + @Test + public void customTypeMustBeUppercase() { + assertThrows(IllegalArgumentException.class, () -> + AlertsExtension.builder().addCustomType("info", "Information").build()); + } + + @Test + public void customTypeMustNotBeEmpty() { + assertThrows(IllegalArgumentException.class, () -> + AlertsExtension.builder().addCustomType("", "Title").build()); + } + + @Test + public void customTypeTitleMustNotBeEmpty() { + assertThrows(IllegalArgumentException.class, () -> + AlertsExtension.builder().addCustomType("INFO", "").build()); + } + + // AST + + @Test + public void alertParsedAsAlertNode() { + Node document = PARSER.parse("> [!NOTE]\n> This is a note"); + Node firstChild = document.getFirstChild(); + assertThat(firstChild).isInstanceOf(Alert.class); + Alert alert = (Alert) firstChild; + assertThat(alert.getType()).isEqualTo("NOTE"); + } + + @Test + public void customTypeParsedAsAlertNode() { + Extension extension = AlertsExtension.builder() + .addCustomType("INFO", "Information") + .build(); + + Parser parser = Parser.builder().extensions(Set.of(extension)).build(); + + Node document = parser.parse("> [!INFO]\n> Custom alert"); + Alert alert = (Alert) document.getFirstChild(); + + assertThat(alert.getType()).isEqualTo("INFO"); + } + +} \ No newline at end of file diff --git a/commonmark-ext-gfm-alerts/src/test/java/org/commonmark/ext/gfm/alerts/examples/AlertsExample.java b/commonmark-ext-gfm-alerts/src/test/java/org/commonmark/ext/gfm/alerts/examples/AlertsExample.java new file mode 100644 index 000000000..34b78385c --- /dev/null +++ b/commonmark-ext-gfm-alerts/src/test/java/org/commonmark/ext/gfm/alerts/examples/AlertsExample.java @@ -0,0 +1,85 @@ +package org.commonmark.ext.gfm.alerts.examples; + +import org.commonmark.ext.gfm.alerts.AlertsExtension; +import org.commonmark.parser.Parser; +import org.commonmark.renderer.html.HtmlRenderer; + +import java.util.List; + +/** + * Example demonstrating the use of the GFM Alerts extension. + */ +public class AlertsExample { + + public static void main(String[] args) { + standardTypesExample(); + System.out.println("\n" + "=".repeat(60) + "\n"); + customTypesExample(); + } + + private static void standardTypesExample() { + System.out.println("STANDARD GFM ALERT TYPES"); + System.out.println("=".repeat(60)); + + var extension = AlertsExtension.create(); + + var parser = Parser.builder() + .extensions(List.of(extension)) + .build(); + + var renderer = HtmlRenderer.builder() + .extensions(List.of(extension)) + .build(); + + var markdown = "# GFM Alerts Demo\n\n" + + "> [!NOTE]\n" + + "> Highlights information that users should take into account.\n\n" + + "> [!TIP]\n" + + "> Helpful advice for doing things better.\n\n" + + "> [!IMPORTANT]\n" + + "> Key information users need to know.\n\n" + + "> [!WARNING]\n" + + "> Urgent info that needs immediate attention.\n\n" + + "> [!CAUTION]\n" + + "> Advises about risks or negative outcomes.\n"; + + var html = renderer.render(parser.parse(markdown)); + + System.out.println("Markdown Input:"); + System.out.println(markdown); + System.out.println("\nHTML Output:"); + System.out.println(html); + } + + private static void customTypesExample() { + System.out.println("CUSTOM ALERT TYPES"); + System.out.println("=".repeat(60)); + + var extension = AlertsExtension.builder() + .addCustomType("BUG", "Known Bug") + .build(); + + var parser = Parser.builder() + .extensions(List.of(extension)) + .build(); + + var renderer = HtmlRenderer.builder() + .extensions(List.of(extension)) + .build(); + + var markdown = "# Custom Alert Types\n\n" + + "> [!NOTE]\n" + + "> Useful information that users should know.\n\n" + + "> [!TIP]\n" + + "> Helpful advice for doing things better.\n\n" + + "> [!BUG]\n" + + "> This feature has a known issue with large files (see #42).\n"; + + var html = renderer.render(parser.parse(markdown)); + + System.out.println("Markdown Input:"); + System.out.println(markdown); + System.out.println("\nHTML Output:"); + System.out.println(html); + } +} diff --git a/commonmark-ext-gfm-alerts/src/test/resources/alerts-spec-template.md b/commonmark-ext-gfm-alerts/src/test/resources/alerts-spec-template.md new file mode 100644 index 000000000..9c1cf117b --- /dev/null +++ b/commonmark-ext-gfm-alerts/src/test/resources/alerts-spec-template.md @@ -0,0 +1,280 @@ +# Alerts + +## Standard types + +```````````````````````````````` example alert +> [!NOTE] +> This is a note +```````````````````````````````` + +```````````````````````````````` example alert +> [!TIP] +> This is a tip +```````````````````````````````` + +```````````````````````````````` example alert +> [!IMPORTANT] +> This is important +```````````````````````````````` + +```````````````````````````````` example alert +> [!WARNING] +> This is a warning +```````````````````````````````` + +```````````````````````````````` example alert +> [!CAUTION] +> This is a caution +```````````````````````````````` + +## Case insensitivity + +Alert type matching is case-insensitive. + +```````````````````````````````` example alert +> [!note] +> Content +```````````````````````````````` + +```````````````````````````````` example alert +> [!Note] +> Content +```````````````````````````````` + +## Alert content + +Marker alone in first paragraph, blank line, then content: + +```````````````````````````````` example alert +> [!NOTE] +> +> Content +```````````````````````````````` + +Multiple paragraphs: + +```````````````````````````````` example alert +> [!NOTE] +> First paragraph +> +> Second paragraph +```````````````````````````````` + +Inline formatting: + +```````````````````````````````` example alert +> [!TIP] +> This is **bold** and *italic* +```````````````````````````````` + +Code block inside alert: + +```````````````````````````````` example alert +> [!TIP] +> Code: +> +> function() { } +> +> End +```````````````````````````````` + +List inside alert: + +```````````````````````````````` example alert +> [!IMPORTANT] +> Items: +> - First item +> - Second item +```````````````````````````````` + +Links inside alert: + +```````````````````````````````` example alert +> [!NOTE] +> Check out [this link](https://example.com) for more info +```````````````````````````````` + +Heading inside alert: + +```````````````````````````````` example alert +> [!IMPORTANT] +> ## Heading +> Content below heading +```````````````````````````````` + +Empty lines in middle of alert: + +```````````````````````````````` example alert +> [!NOTE] +> First +> +> +> After empty lines +```````````````````````````````` + +## Not an alert + +Text after marker on the same line: + +```````````````````````````````` example alert +> [!NOTE] Some text +```````````````````````````````` + +Unknown type: + +```````````````````````````````` example alert +> [!INVALID] +> Some text +```````````````````````````````` + +Unconfigured custom type is not an alert: + +```````````````````````````````` example alert +> [!INFO] +> Should be blockquote +```````````````````````````````` + +Marker with no content: + +```````````````````````````````` example alert +> [!NOTE] +```````````````````````````````` + +Whitespace-only content after marker: + +```````````````````````````````` example alert +> [!TIP] +> +> +```````````````````````````````` + +Extra space inside marker: + +```````````````````````````````` example alert +> [! NOTE] +> Should be blockquote +```````````````````````````````` + +Missing brackets: + +```````````````````````````````` example alert +> !NOTE +> Should be blockquote +```````````````````````````````` + +Missing exclamation mark: + +```````````````````````````````` example alert +> [NOTE] +> Should be blockquote +```````````````````````````````` + +Regular blockquote is not affected: + +```````````````````````````````` example alert +> This is a regular blockquote +```````````````````````````````` + +## Boundaries + +Trailing spaces after marker: + +```````````````````````````````` example alert +> [!NOTE] +> This is a note +```````````````````````````````` + +Trailing tabs after marker: + +```````````````````````````````` example alert +> [!WARNING]→→ +> Be careful +```````````````````````````````` + +Leading spaces before blockquote marker: + +```````````````````````````````` example alert + > [!IMPORTANT] + > Content +```````````````````````````````` + +Blank line after marker ends the blockquote (not an alert): + +```````````````````````````````` example alert +> [!NOTE] + +Some text +```````````````````````````````` + +Alert followed by blockquote: + +```````````````````````````````` example alert +> [!NOTE] +> This is an alert + +> This is a blockquote +```````````````````````````````` + +Adjacent alerts: + +```````````````````````````````` example alert +> [!NOTE] +> First alert + +> [!WARNING] +> Second alert +```````````````````````````````` + +## Nesting and containers + +Nested alert inside alert renders as blockquote: + +```````````````````````````````` example alert +> [!NOTE] +> This is a note +>> [!WARNING] +>> Nested content +```````````````````````````````` + +Nested blockquote inside alert: + +```````````````````````````````` example alert +> [!NOTE] +> This is a note +>> Nested blockquote +```````````````````````````````` + +Alert inside list item stays as blockquote: + +```````````````````````````````` example alert +- > [!NOTE] + > Test +```````````````````````````````` + +Alert marker in content is treated as text: + +```````````````````````````````` example alert +> [!NOTE] +> This is a note +> [!WARNING] +> This is still part of the note +```````````````````````````````` + +## Continuation and interruption + +Lazy continuation: + +```````````````````````````````` example alert +> [!NOTE] +> First line +Lazy continuation +> Continues alert +```````````````````````````````` + +Alert type after regular blockquote content is not an alert: + +```````````````````````````````` example alert +> Regular blockquote +> [!NOTE] +> More text +```````````````````````````````` \ No newline at end of file diff --git a/commonmark-ext-gfm-alerts/src/test/resources/alerts-spec.txt b/commonmark-ext-gfm-alerts/src/test/resources/alerts-spec.txt new file mode 100644 index 000000000..6f041fee4 --- /dev/null +++ b/commonmark-ext-gfm-alerts/src/test/resources/alerts-spec.txt @@ -0,0 +1,492 @@ +# Alerts + +Expectations verified against GitHub Markdown API (gh api markdown -f mode=gfm). +Our HTML omits GitHub's SVG icons and uses a `data-alert-type` attribute instead. + +## Standard types + +```````````````````````````````` example alert +> [!NOTE] +> This is a note +. +
+

Note

+

This is a note

+
+```````````````````````````````` + +```````````````````````````````` example alert +> [!TIP] +> This is a tip +. +
+

Tip

+

This is a tip

+
+```````````````````````````````` + +```````````````````````````````` example alert +> [!IMPORTANT] +> This is important +. +
+

Important

+

This is important

+
+```````````````````````````````` + +```````````````````````````````` example alert +> [!WARNING] +> This is a warning +. +
+

Warning

+

This is a warning

+
+```````````````````````````````` + +```````````````````````````````` example alert +> [!CAUTION] +> This is a caution +. +
+

Caution

+

This is a caution

+
+```````````````````````````````` + +## Case insensitivity + +Alert type matching is case-insensitive. + +```````````````````````````````` example alert +> [!note] +> Content +. +
+

Note

+

Content

+
+```````````````````````````````` + +```````````````````````````````` example alert +> [!Note] +> Content +. +
+

Note

+

Content

+
+```````````````````````````````` + +## Alert content + +Marker alone in first paragraph, blank line, then content: + +```````````````````````````````` example alert +> [!NOTE] +> +> Content +. +
+

Note

+

Content

+
+```````````````````````````````` + +Multiple paragraphs: + +```````````````````````````````` example alert +> [!NOTE] +> First paragraph +> +> Second paragraph +. +
+

Note

+

First paragraph

+

Second paragraph

+
+```````````````````````````````` + +Inline formatting: + +```````````````````````````````` example alert +> [!TIP] +> This is **bold** and *italic* +. +
+

Tip

+

This is bold and italic

+
+```````````````````````````````` + +Code block inside alert: + +```````````````````````````````` example alert +> [!TIP] +> Code: +> +> function() { } +> +> End +. +
+

Tip

+

Code:

+
function() { }
+
+

End

+
+```````````````````````````````` + +List inside alert: + +```````````````````````````````` example alert +> [!IMPORTANT] +> Items: +> - First item +> - Second item +. +
+

Important

+

Items:

+
    +
  • First item
  • +
  • Second item
  • +
+
+```````````````````````````````` + +Links inside alert: + +```````````````````````````````` example alert +> [!NOTE] +> Check out [this link](https://example.com) for more info +. +
+

Note

+

Check out this link for more info

+
+```````````````````````````````` + +Heading inside alert: + +```````````````````````````````` example alert +> [!IMPORTANT] +> ## Heading +> Content below heading +. +
+

Important

+

Heading

+

Content below heading

+
+```````````````````````````````` + +Empty lines in middle of alert: + +```````````````````````````````` example alert +> [!NOTE] +> First +> +> +> After empty lines +. +
+

Note

+

First

+

After empty lines

+
+```````````````````````````````` + +## Not an alert + +Text after marker on the same line: + +```````````````````````````````` example alert +> [!NOTE] Some text +. +
+

[!NOTE] Some text

+
+```````````````````````````````` + +Unknown type: + +```````````````````````````````` example alert +> [!INVALID] +> Some text +. +
+

[!INVALID]
+Some text

+
+```````````````````````````````` + +Unconfigured custom type is not an alert: + +```````````````````````````````` example alert +> [!INFO] +> Should be blockquote +. +
+

[!INFO]
+Should be blockquote

+
+```````````````````````````````` + +Marker with no content: + +```````````````````````````````` example alert +> [!NOTE] +. +
+

[!NOTE]

+
+```````````````````````````````` + +Whitespace-only content after marker: + +```````````````````````````````` example alert +> [!TIP] +> +> +. +
+

[!TIP]

+
+```````````````````````````````` + +Extra space inside marker: + +```````````````````````````````` example alert +> [! NOTE] +> Should be blockquote +. +
+

[! NOTE]
+Should be blockquote

+
+```````````````````````````````` + +Missing brackets: + +```````````````````````````````` example alert +> !NOTE +> Should be blockquote +. +
+

!NOTE
+Should be blockquote

+
+```````````````````````````````` + +Missing exclamation mark: + +```````````````````````````````` example alert +> [NOTE] +> Should be blockquote +. +
+

[NOTE]
+Should be blockquote

+
+```````````````````````````````` + +Regular blockquote is not affected: + +```````````````````````````````` example alert +> This is a regular blockquote +. +
+

This is a regular blockquote

+
+```````````````````````````````` + +## Boundaries + +Trailing spaces after marker: + +```````````````````````````````` example alert +> [!NOTE] +> This is a note +. +
+

Note

+

This is a note

+
+```````````````````````````````` + +Trailing tabs after marker: + +```````````````````````````````` example alert +> [!WARNING]→→ +> Be careful +. +
+

Warning

+

Be careful

+
+```````````````````````````````` + +Leading spaces before blockquote marker: + +```````````````````````````````` example alert + > [!IMPORTANT] + > Content +. +
+

Important

+

Content

+
+```````````````````````````````` + +Blank line after marker ends the blockquote (not an alert): + +```````````````````````````````` example alert +> [!NOTE] + +Some text +. +
+

[!NOTE]

+
+

Some text

+```````````````````````````````` + +Alert followed by blockquote: + +```````````````````````````````` example alert +> [!NOTE] +> This is an alert + +> This is a blockquote +. +
+

Note

+

This is an alert

+
+
+

This is a blockquote

+
+```````````````````````````````` + +Adjacent alerts: + +```````````````````````````````` example alert +> [!NOTE] +> First alert + +> [!WARNING] +> Second alert +. +
+

Note

+

First alert

+
+
+

Warning

+

Second alert

+
+```````````````````````````````` + +## Nesting and containers + +Nested alert inside alert renders as blockquote: + +```````````````````````````````` example alert +> [!NOTE] +> This is a note +>> [!WARNING] +>> Nested content +. +
+

Note

+

This is a note

+
+

[!WARNING]
+Nested content

+
+
+```````````````````````````````` + +Nested blockquote inside alert: + +```````````````````````````````` example alert +> [!NOTE] +> This is a note +>> Nested blockquote +. +
+

Note

+

This is a note

+
+

Nested blockquote

+
+
+```````````````````````````````` + +Alert inside list item stays as blockquote: + +```````````````````````````````` example alert +- > [!NOTE] + > Test +. +
    +
  • +
    +

    [!NOTE]
    +Test

    +
    +
  • +
+```````````````````````````````` + +Alert marker in content is treated as text: + +```````````````````````````````` example alert +> [!NOTE] +> This is a note +> [!WARNING] +> This is still part of the note +. +
+

Note

+

This is a note
+[!WARNING]
+This is still part of the note

+
+```````````````````````````````` + +## Continuation and interruption + +Lazy continuation: + +```````````````````````````````` example alert +> [!NOTE] +> First line +Lazy continuation +> Continues alert +. +
+

Note

+

First line
+Lazy continuation
+Continues alert

+
+```````````````````````````````` + +Alert type after regular blockquote content is not an alert: + +```````````````````````````````` example alert +> Regular blockquote +> [!NOTE] +> More text +. +
+

Regular blockquote
+[!NOTE]
+More text

+
+```````````````````````````````` diff --git a/commonmark-ext-gfm-alerts/src/test/resources/generate-alerts-spec.java b/commonmark-ext-gfm-alerts/src/test/resources/generate-alerts-spec.java new file mode 100644 index 000000000..06192f107 --- /dev/null +++ b/commonmark-ext-gfm-alerts/src/test/resources/generate-alerts-spec.java @@ -0,0 +1,111 @@ +///usr/bin/env jbang "$0" "$@" ; exit $? + +// Generates alerts-spec.txt from alerts-spec-template.md by rendering each example +// through the GitHub Markdown API and inserting the normalized HTML expectation. +// +// Prerequisites: gh CLI installed and authenticated (gh auth login) +// Usage: cd commonmark-ext-gfm-alerts/src/test/resources && jbang generate-alerts-spec.java + +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Pattern; + +class GenerateAlertsSpec { + + private static final String FENCE = "````````````````````````````````"; + private static final String EXAMPLE_OPEN = FENCE + " example alert"; + + public static void main(String[] args) throws Exception { + var templatePath = Path.of("alerts-spec-template.md"); + if (!Files.exists(templatePath)) { + System.err.println("Run from the directory containing alerts-spec-template.md"); + System.exit(1); + } + + var lines = Files.readAllLines(templatePath); + var output = new ArrayList(); + var header = "Expectations verified against GitHub Markdown API (gh api markdown -f mode=gfm).\n" + + "Our HTML omits GitHub's SVG icons and uses a `data-alert-type` attribute instead."; + + int exampleCount = 0; + int i = 0; + while (i < lines.size()) { + var line = lines.get(i); + + // Insert header after the first heading + if (i == 0 && line.startsWith("# ")) { + output.add(line); + output.add(""); + output.add(header); + i++; + continue; + } + + if (line.equals(EXAMPLE_OPEN)) { + // Collect source lines until closing fence + output.add(line); + i++; + var sourceLines = new ArrayList(); + while (i < lines.size() && !lines.get(i).equals(FENCE)) { + sourceLines.add(lines.get(i)); + output.add(lines.get(i)); + i++; + } + + // Render via GitHub API (→ represents tabs in the spec format) + var source = String.join("\n", sourceLines).replace("\u2192", "\t"); + exampleCount++; + System.out.printf("%d: %s%n", exampleCount, + source.substring(0, Math.min(40, source.length())).replace("\n", "\\n")); + + var ghHtml = normalizeHtml(renderViaGh(source)); + + // Insert separator and HTML expectation + output.add("."); + output.add(ghHtml); + output.add(FENCE); + i++; // skip closing fence from template + } else { + output.add(line); + i++; + } + } + + var specPath = Path.of("alerts-spec.txt"); + Files.writeString(specPath, String.join("\n", output) + "\n"); + System.out.println("Done — " + exampleCount + " examples written to alerts-spec.txt"); + } + + static String renderViaGh(String markdown) throws Exception { + var process = new ProcessBuilder("gh", "api", "markdown", "-f", "mode=gfm", "-f", "text=" + markdown) + .redirectErrorStream(true) + .start(); + var output = new String(process.getInputStream().readAllBytes()); + if (process.waitFor() != 0) { + throw new RuntimeException("gh api failed: " + output); + } + return output; + } + + // Normalize GitHub API HTML to match our renderer output. + static String normalizeHtml(String html) { + // Strip GitHub-specific elements and attributes + html = Pattern.compile("]*>.*?", Pattern.DOTALL).matcher(html).replaceAll(""); + html = html.replaceAll(" (dir=\"auto\"|rel=\"nofollow\"|class=\"notranslate\")", ""); + // Add data-alert-type and insert newlines to match our renderer's formatting + html = Pattern.compile("class=\"markdown-alert markdown-alert-(\\w+)\"") + .matcher(html) + .replaceAll("class=\"markdown-alert markdown-alert-$1\" data-alert-type=\"$1\""); + html = Pattern.compile("(data-alert-type=\"\\w+\">)(

", "

\n

"); + return html.replace("\r\n", "\n").lines() + .map(String::stripTrailing) + .reduce((a, b) -> a + "\n" + b) + .orElse("") + .strip(); + } +} \ No newline at end of file diff --git a/commonmark-ext-gfm-strikethrough/.settings/org.eclipse.core.runtime.prefs b/commonmark-ext-gfm-strikethrough/.settings/org.eclipse.core.runtime.prefs deleted file mode 100644 index 5a0ad22d2..000000000 --- a/commonmark-ext-gfm-strikethrough/.settings/org.eclipse.core.runtime.prefs +++ /dev/null @@ -1,2 +0,0 @@ -eclipse.preferences.version=1 -line.separator=\n diff --git a/commonmark-ext-gfm-strikethrough/.settings/org.eclipse.jdt.core.prefs b/commonmark-ext-gfm-strikethrough/.settings/org.eclipse.jdt.core.prefs deleted file mode 100644 index 3c0d27c8f..000000000 --- a/commonmark-ext-gfm-strikethrough/.settings/org.eclipse.jdt.core.prefs +++ /dev/null @@ -1,290 +0,0 @@ -eclipse.preferences.version=1 -org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7 -org.eclipse.jdt.core.compiler.compliance=1.7 -org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning -org.eclipse.jdt.core.compiler.source=1.7 -org.eclipse.jdt.core.formatter.align_type_members_on_columns=false -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=16 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation=0 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant=16 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call=16 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation=16 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression=16 -org.eclipse.jdt.core.formatter.alignment_for_assignment=0 -org.eclipse.jdt.core.formatter.alignment_for_binary_expression=16 -org.eclipse.jdt.core.formatter.alignment_for_compact_if=16 -org.eclipse.jdt.core.formatter.alignment_for_conditional_expression=80 -org.eclipse.jdt.core.formatter.alignment_for_enum_constants=0 -org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer=16 -org.eclipse.jdt.core.formatter.alignment_for_method_declaration=0 -org.eclipse.jdt.core.formatter.alignment_for_multiple_fields=16 -org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_resources_in_try=80 -org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation=16 -org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_union_type_in_multicatch=16 -org.eclipse.jdt.core.formatter.blank_lines_after_imports=1 -org.eclipse.jdt.core.formatter.blank_lines_after_package=1 -org.eclipse.jdt.core.formatter.blank_lines_before_field=0 -org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration=0 -org.eclipse.jdt.core.formatter.blank_lines_before_imports=1 -org.eclipse.jdt.core.formatter.blank_lines_before_member_type=1 -org.eclipse.jdt.core.formatter.blank_lines_before_method=1 -org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk=1 -org.eclipse.jdt.core.formatter.blank_lines_before_package=0 -org.eclipse.jdt.core.formatter.blank_lines_between_import_groups=1 -org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations=1 -org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_array_initializer=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_block=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_block_in_case=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_enum_constant=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_lambda_body=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_method_declaration=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_switch=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_type_declaration=end_of_line -org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment=false -org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment=false -org.eclipse.jdt.core.formatter.comment.format_block_comments=true -org.eclipse.jdt.core.formatter.comment.format_header=false -org.eclipse.jdt.core.formatter.comment.format_html=true -org.eclipse.jdt.core.formatter.comment.format_javadoc_comments=true -org.eclipse.jdt.core.formatter.comment.format_line_comments=true -org.eclipse.jdt.core.formatter.comment.format_source_code=true -org.eclipse.jdt.core.formatter.comment.indent_parameter_description=true -org.eclipse.jdt.core.formatter.comment.indent_root_tags=true -org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags=insert -org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter=do not insert -org.eclipse.jdt.core.formatter.comment.line_length=120 -org.eclipse.jdt.core.formatter.comment.new_lines_at_block_boundaries=true -org.eclipse.jdt.core.formatter.comment.new_lines_at_javadoc_boundaries=true -org.eclipse.jdt.core.formatter.comment.preserve_white_space_between_code_and_line_comments=false -org.eclipse.jdt.core.formatter.compact_else_if=true -org.eclipse.jdt.core.formatter.continuation_indentation=2 -org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer=2 -org.eclipse.jdt.core.formatter.disabling_tag=@formatter\:off -org.eclipse.jdt.core.formatter.enabling_tag=@formatter\:on -org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line=false -org.eclipse.jdt.core.formatter.format_line_comment_starting_on_first_column=true -org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header=true -org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header=true -org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header=true -org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header=true -org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases=true -org.eclipse.jdt.core.formatter.indent_empty_lines=false -org.eclipse.jdt.core.formatter.indent_statements_compare_to_block=true -org.eclipse.jdt.core.formatter.indent_statements_compare_to_body=true -org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases=true -org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch=true -org.eclipse.jdt.core.formatter.indentation.size=4 -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_field=insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable=insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_method=insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_package=insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_type=insert -org.eclipse.jdt.core.formatter.insert_new_line_after_label=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_after_type_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter=insert -org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator=insert -org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_binary_operator=insert -org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters=insert -org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block=insert -org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast=insert -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert=insert -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case=insert -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional=insert -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for=insert -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters=insert -org.eclipse.jdt.core.formatter.insert_space_after_ellipsis=insert -org.eclipse.jdt.core.formatter.insert_space_after_lambda_arrow=insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer=insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_try=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional=insert -org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for=insert -org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_try_resources=insert -org.eclipse.jdt.core.formatter.insert_space_after_unary_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter=insert -org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator=insert -org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_binary_operator=insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer=insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_try=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert=insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional=insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for=insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_ellipsis=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_lambda_arrow=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_try=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while=insert -org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return=insert -org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw=insert -org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional=insert -org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_semicolon=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_try_resources=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_unary_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation=do not insert -org.eclipse.jdt.core.formatter.join_lines_in_comments=true -org.eclipse.jdt.core.formatter.join_wrapped_lines=false -org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line=false -org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line=false -org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line=false -org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line=false -org.eclipse.jdt.core.formatter.lineSplit=120 -org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column=false -org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column=false -org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body=0 -org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve=1 -org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line=true -org.eclipse.jdt.core.formatter.tabulation.char=space -org.eclipse.jdt.core.formatter.tabulation.size=4 -org.eclipse.jdt.core.formatter.use_on_off_tags=false -org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations=false -org.eclipse.jdt.core.formatter.wrap_before_binary_operator=true -org.eclipse.jdt.core.formatter.wrap_before_or_operator_multicatch=true -org.eclipse.jdt.core.formatter.wrap_outer_expressions_when_nested=true -org.eclipse.jdt.core.javaFormatter=org.eclipse.jdt.core.defaultJavaFormatter diff --git a/commonmark-ext-gfm-strikethrough/pom.xml b/commonmark-ext-gfm-strikethrough/pom.xml index 3df0581e6..9d8f55e5f 100644 --- a/commonmark-ext-gfm-strikethrough/pom.xml +++ b/commonmark-ext-gfm-strikethrough/pom.xml @@ -2,9 +2,9 @@ 4.0.0 - com.atlassian.commonmark + org.commonmark commonmark-parent - 0.14.1-SNAPSHOT + 0.28.1-SNAPSHOT commonmark-ext-gfm-strikethrough @@ -13,31 +13,15 @@ - com.atlassian.commonmark + org.commonmark commonmark - com.atlassian.commonmark + org.commonmark commonmark-test-util test - - - - org.apache.maven.plugins - maven-jar-plugin - - - - org.commonmark.ext.gfm.strikethrough - - - - - - - diff --git a/commonmark-ext-gfm-strikethrough/src/main/java/module-info.java b/commonmark-ext-gfm-strikethrough/src/main/java/module-info.java new file mode 100644 index 000000000..b6204934b --- /dev/null +++ b/commonmark-ext-gfm-strikethrough/src/main/java/module-info.java @@ -0,0 +1,5 @@ +module org.commonmark.ext.gfm.strikethrough { + exports org.commonmark.ext.gfm.strikethrough; + + requires transitive org.commonmark; +} diff --git a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/Strikethrough.java b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/Strikethrough.java index 115ae9ea4..0c24642bc 100644 --- a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/Strikethrough.java +++ b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/Strikethrough.java @@ -4,19 +4,23 @@ import org.commonmark.node.Delimited; /** - * A strikethrough node containing text and other inline nodes nodes as children. + * A strikethrough node containing text and other inline nodes as children. */ public class Strikethrough extends CustomNode implements Delimited { - private static final String DELIMITER = "~~"; + private String delimiter; + + public Strikethrough(String delimiter) { + this.delimiter = delimiter; + } @Override public String getOpeningDelimiter() { - return DELIMITER; + return delimiter; } @Override public String getClosingDelimiter() { - return DELIMITER; + return delimiter; } } diff --git a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/StrikethroughExtension.java b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/StrikethroughExtension.java index 3d0839f11..364205aed 100644 --- a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/StrikethroughExtension.java +++ b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/StrikethroughExtension.java @@ -1,42 +1,78 @@ package org.commonmark.ext.gfm.strikethrough; import org.commonmark.Extension; -import org.commonmark.renderer.text.TextContentRenderer; -import org.commonmark.renderer.text.TextContentNodeRendererContext; -import org.commonmark.renderer.text.TextContentNodeRendererFactory; import org.commonmark.ext.gfm.strikethrough.internal.StrikethroughDelimiterProcessor; import org.commonmark.ext.gfm.strikethrough.internal.StrikethroughHtmlNodeRenderer; +import org.commonmark.ext.gfm.strikethrough.internal.StrikethroughMarkdownNodeRenderer; import org.commonmark.ext.gfm.strikethrough.internal.StrikethroughTextContentNodeRenderer; -import org.commonmark.renderer.html.HtmlRenderer; -import org.commonmark.renderer.html.HtmlNodeRendererContext; -import org.commonmark.renderer.html.HtmlNodeRendererFactory; import org.commonmark.parser.Parser; import org.commonmark.renderer.NodeRenderer; +import org.commonmark.renderer.html.HtmlNodeRendererContext; +import org.commonmark.renderer.html.HtmlNodeRendererFactory; +import org.commonmark.renderer.html.HtmlRenderer; +import org.commonmark.renderer.markdown.MarkdownNodeRendererContext; +import org.commonmark.renderer.markdown.MarkdownNodeRendererFactory; +import org.commonmark.renderer.markdown.MarkdownRenderer; +import org.commonmark.renderer.text.TextContentNodeRendererContext; +import org.commonmark.renderer.text.TextContentNodeRendererFactory; +import org.commonmark.renderer.text.TextContentRenderer; + +import java.util.Set; /** - * Extension for GFM strikethrough using ~~ (GitHub Flavored Markdown). + * Extension for GFM strikethrough using {@code ~} or {@code ~~} (GitHub Flavored Markdown). + *

Example input:

+ *
{@code ~foo~ or ~~bar~~}
+ *

Example output (HTML):

+ *
{@code foo or bar}
*

- * Create it with {@link #create()} and then configure it on the builders + * Create the extension with {@link #create()} and then add it to the parser and renderer builders * ({@link org.commonmark.parser.Parser.Builder#extensions(Iterable)}, * {@link HtmlRenderer.Builder#extensions(Iterable)}). *

*

* The parsed strikethrough text regions are turned into {@link Strikethrough} nodes. *

+ *

+ * If you have another extension that only uses a single tilde ({@code ~}) syntax, you will have to configure this + * {@link StrikethroughExtension} to only accept the double tilde syntax, like this: + *

+ *
+ *     {@code
+ *     StrikethroughExtension.builder().requireTwoTildes(true).build();
+ *     }
+ * 
+ *

+ * If you don't do that, there's a conflict between the two extensions and you will get an + * {@link IllegalArgumentException} when constructing the parser. + *

*/ public class StrikethroughExtension implements Parser.ParserExtension, HtmlRenderer.HtmlRendererExtension, - TextContentRenderer.TextContentRendererExtension { + TextContentRenderer.TextContentRendererExtension, MarkdownRenderer.MarkdownRendererExtension { - private StrikethroughExtension() { + private final boolean requireTwoTildes; + + private StrikethroughExtension(Builder builder) { + this.requireTwoTildes = builder.requireTwoTildes; } + /** + * @return the extension with default options + */ public static Extension create() { - return new StrikethroughExtension(); + return builder().build(); + } + + /** + * @return a builder to configure the behavior of the extension + */ + public static Builder builder() { + return new Builder(); } @Override public void extend(Parser.Builder parserBuilder) { - parserBuilder.customDelimiterProcessor(new StrikethroughDelimiterProcessor()); + parserBuilder.customDelimiterProcessor(new StrikethroughDelimiterProcessor(requireTwoTildes)); } @Override @@ -58,4 +94,41 @@ public NodeRenderer create(TextContentNodeRendererContext context) { } }); } + + @Override + public void extend(MarkdownRenderer.Builder rendererBuilder) { + rendererBuilder.nodeRendererFactory(new MarkdownNodeRendererFactory() { + @Override + public NodeRenderer create(MarkdownNodeRendererContext context) { + return new StrikethroughMarkdownNodeRenderer(context); + } + + @Override + public Set getSpecialCharacters() { + return Set.of('~'); + } + }); + } + + public static class Builder { + + private boolean requireTwoTildes = false; + + /** + * @param requireTwoTildes Whether two tilde characters ({@code ~~}) are required for strikethrough or whether + * one is also enough. Default is {@code false}; both a single tilde and two tildes can be used for strikethrough. + * @return {@code this} + */ + public Builder requireTwoTildes(boolean requireTwoTildes) { + this.requireTwoTildes = requireTwoTildes; + return this; + } + + /** + * @return a configured extension + */ + public Extension build() { + return new StrikethroughExtension(this); + } + } } diff --git a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughDelimiterProcessor.java b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughDelimiterProcessor.java index dd881b419..4657106ab 100644 --- a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughDelimiterProcessor.java +++ b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughDelimiterProcessor.java @@ -2,12 +2,24 @@ import org.commonmark.ext.gfm.strikethrough.Strikethrough; import org.commonmark.node.Node; +import org.commonmark.node.Nodes; +import org.commonmark.node.SourceSpans; import org.commonmark.node.Text; import org.commonmark.parser.delimiter.DelimiterProcessor; import org.commonmark.parser.delimiter.DelimiterRun; public class StrikethroughDelimiterProcessor implements DelimiterProcessor { + private final boolean requireTwoTildes; + + public StrikethroughDelimiterProcessor() { + this(false); + } + + public StrikethroughDelimiterProcessor(boolean requireTwoTildes) { + this.requireTwoTildes = requireTwoTildes; + } + @Override public char getOpeningCharacter() { return '~'; @@ -20,31 +32,36 @@ public char getClosingCharacter() { @Override public int getMinLength() { - return 2; + return requireTwoTildes ? 2 : 1; } @Override - public int getDelimiterUse(DelimiterRun opener, DelimiterRun closer) { - if (opener.length() >= 2 && closer.length() >= 2) { - // Use exactly two delimiters even if we have more, and don't care about internal openers/closers. - return 2; + public int process(DelimiterRun openingRun, DelimiterRun closingRun) { + if (openingRun.length() == closingRun.length() && openingRun.length() <= 2) { + // GitHub only accepts either one or two delimiters, but not a mix or more than that. + + Text opener = openingRun.getOpener(); + + // Wrap nodes between delimiters in strikethrough. + String delimiter = openingRun.length() == 1 ? opener.getLiteral() : opener.getLiteral() + opener.getLiteral(); + Node strikethrough = new Strikethrough(delimiter); + + SourceSpans sourceSpans = new SourceSpans(); + sourceSpans.addAllFrom(openingRun.getOpeners(openingRun.length())); + + for (Node node : Nodes.between(opener, closingRun.getCloser())) { + strikethrough.appendChild(node); + sourceSpans.addAll(node.getSourceSpans()); + } + + sourceSpans.addAllFrom(closingRun.getClosers(closingRun.length())); + strikethrough.setSourceSpans(sourceSpans.getSourceSpans()); + + opener.insertAfter(strikethrough); + + return openingRun.length(); } else { return 0; } } - - @Override - public void process(Text opener, Text closer, int delimiterCount) { - // Wrap nodes between delimiters in strikethrough. - Node strikethrough = new Strikethrough(); - - Node tmp = opener.getNext(); - while (tmp != null && tmp != closer) { - Node next = tmp.getNext(); - strikethrough.appendChild(tmp); - tmp = next; - } - - opener.insertAfter(strikethrough); - } } diff --git a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughHtmlNodeRenderer.java b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughHtmlNodeRenderer.java index 4dd0de39b..b1a82cb03 100644 --- a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughHtmlNodeRenderer.java +++ b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughHtmlNodeRenderer.java @@ -1,10 +1,9 @@ package org.commonmark.ext.gfm.strikethrough.internal; -import org.commonmark.renderer.html.HtmlWriter; -import org.commonmark.renderer.html.HtmlNodeRendererContext; import org.commonmark.node.Node; +import org.commonmark.renderer.html.HtmlNodeRendererContext; +import org.commonmark.renderer.html.HtmlWriter; -import java.util.Collections; import java.util.Map; public class StrikethroughHtmlNodeRenderer extends StrikethroughNodeRenderer { @@ -19,7 +18,7 @@ public StrikethroughHtmlNodeRenderer(HtmlNodeRendererContext context) { @Override public void render(Node node) { - Map attributes = context.extendAttributes(node, "del", Collections.emptyMap()); + Map attributes = context.extendAttributes(node, "del", Map.of()); html.tag("del", attributes); renderChildren(node); html.tag("/del"); diff --git a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughMarkdownNodeRenderer.java b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughMarkdownNodeRenderer.java new file mode 100644 index 000000000..1c91dd64f --- /dev/null +++ b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughMarkdownNodeRenderer.java @@ -0,0 +1,34 @@ +package org.commonmark.ext.gfm.strikethrough.internal; + +import org.commonmark.ext.gfm.strikethrough.Strikethrough; +import org.commonmark.node.Node; +import org.commonmark.renderer.markdown.MarkdownNodeRendererContext; +import org.commonmark.renderer.markdown.MarkdownWriter; + +public class StrikethroughMarkdownNodeRenderer extends StrikethroughNodeRenderer { + + private final MarkdownNodeRendererContext context; + private final MarkdownWriter writer; + + public StrikethroughMarkdownNodeRenderer(MarkdownNodeRendererContext context) { + this.context = context; + this.writer = context.getWriter(); + } + + @Override + public void render(Node node) { + Strikethrough strikethrough = (Strikethrough) node; + writer.raw(strikethrough.getOpeningDelimiter()); + renderChildren(node); + writer.raw(strikethrough.getClosingDelimiter()); + } + + private void renderChildren(Node parent) { + Node node = parent.getFirstChild(); + while (node != null) { + Node next = node.getNext(); + context.render(node); + node = next; + } + } +} diff --git a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughNodeRenderer.java b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughNodeRenderer.java index 4f3a12618..18ed21887 100644 --- a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughNodeRenderer.java +++ b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughNodeRenderer.java @@ -4,13 +4,12 @@ import org.commonmark.node.Node; import org.commonmark.renderer.NodeRenderer; -import java.util.Collections; import java.util.Set; abstract class StrikethroughNodeRenderer implements NodeRenderer { @Override public Set> getNodeTypes() { - return Collections.>singleton(Strikethrough.class); + return Set.of(Strikethrough.class); } } diff --git a/commonmark-ext-gfm-strikethrough/src/main/resources/META-INF/LICENSE.txt b/commonmark-ext-gfm-strikethrough/src/main/resources/META-INF/LICENSE.txt new file mode 100644 index 000000000..b09e367ce --- /dev/null +++ b/commonmark-ext-gfm-strikethrough/src/main/resources/META-INF/LICENSE.txt @@ -0,0 +1,23 @@ +Copyright (c) 2015, Atlassian Pty Ltd +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughMarkdownRendererTest.java b/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughMarkdownRendererTest.java new file mode 100644 index 000000000..c497a4db3 --- /dev/null +++ b/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughMarkdownRendererTest.java @@ -0,0 +1,35 @@ +package org.commonmark.ext.gfm.strikethrough; + +import org.commonmark.Extension; +import org.commonmark.parser.Parser; +import org.commonmark.renderer.markdown.MarkdownRenderer; +import org.junit.jupiter.api.Test; + +import java.util.Set; + +import static org.assertj.core.api.Assertions.assertThat; + +public class StrikethroughMarkdownRendererTest { + + private static final Set EXTENSIONS = Set.of(StrikethroughExtension.create()); + private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build(); + private static final MarkdownRenderer RENDERER = MarkdownRenderer.builder().extensions(EXTENSIONS).build(); + + @Test + public void testStrikethrough() { + assertRoundTrip("~foo~ ~bar~\n"); + assertRoundTrip("~~foo~~ ~~bar~~\n"); + assertRoundTrip("~~f\\~oo~~ ~~bar~~\n"); + + assertRoundTrip("\\~foo\\~\n"); + } + + protected String render(String source) { + return RENDERER.render(PARSER.parse(source)); + } + + private void assertRoundTrip(String input) { + String rendered = render(input); + assertThat(rendered).isEqualTo(input); + } +} diff --git a/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughSpecTest.java b/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughSpecTest.java new file mode 100644 index 000000000..f1199b521 --- /dev/null +++ b/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughSpecTest.java @@ -0,0 +1,42 @@ +package org.commonmark.ext.gfm.strikethrough; + +import org.commonmark.Extension; +import org.commonmark.parser.Parser; +import org.commonmark.renderer.html.HtmlRenderer; +import org.commonmark.testutil.RenderingTestCase; +import org.commonmark.testutil.TestResources; +import org.commonmark.testutil.example.Example; +import org.commonmark.testutil.example.ExampleReader; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.Parameter; +import org.junit.jupiter.params.ParameterizedClass; +import org.junit.jupiter.params.provider.MethodSource; + +import java.util.List; +import java.util.Set; + +@ParameterizedClass +@MethodSource("data") +public class StrikethroughSpecTest extends RenderingTestCase { + + private static final Set EXTENSIONS = Set.of(StrikethroughExtension.create()); + private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build(); + private static final HtmlRenderer RENDERER = HtmlRenderer.builder().extensions(EXTENSIONS).build(); + + @Parameter + Example example; + + static List data() { + return ExampleReader.readExamples(TestResources.getGfmSpec(), "strikethrough"); + } + + @Test + public void testHtmlRendering() { + assertRendering(example.getSource(), example.getHtml()); + } + + @Override + protected String render(String source) { + return RENDERER.render(PARSER.parse(source)); + } +} diff --git a/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughTest.java b/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughTest.java index 225977854..c29391cdd 100644 --- a/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughTest.java +++ b/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughTest.java @@ -2,32 +2,38 @@ import org.commonmark.Extension; import org.commonmark.node.Node; +import org.commonmark.node.Paragraph; +import org.commonmark.node.SourceSpan; +import org.commonmark.node.Text; +import org.commonmark.parser.IncludeSourceSpans; import org.commonmark.parser.Parser; +import org.commonmark.parser.delimiter.DelimiterProcessor; +import org.commonmark.parser.delimiter.DelimiterRun; import org.commonmark.renderer.html.HtmlRenderer; import org.commonmark.renderer.text.TextContentRenderer; import org.commonmark.testutil.RenderingTestCase; -import org.junit.Test; +import org.junit.jupiter.api.Test; -import java.util.Collections; +import java.util.List; import java.util.Set; -import static org.junit.Assert.assertEquals; +import static org.assertj.core.api.Assertions.assertThat; public class StrikethroughTest extends RenderingTestCase { - private static final Set EXTENSIONS = Collections.singleton(StrikethroughExtension.create()); + private static final Set EXTENSIONS = Set.of(StrikethroughExtension.create()); private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build(); private static final HtmlRenderer HTML_RENDERER = HtmlRenderer.builder().extensions(EXTENSIONS).build(); private static final TextContentRenderer CONTENT_RENDERER = TextContentRenderer.builder() .extensions(EXTENSIONS).build(); @Test - public void oneTildeIsNotEnough() { - assertRendering("~foo~", "

~foo~

\n"); + public void oneTildeIsEnough() { + assertRendering("~foo~", "

foo

\n"); } @Test - public void twoTildesYay() { + public void twoTildesWorksToo() { assertRendering("~~foo~~", "

foo

\n"); } @@ -44,23 +50,22 @@ public void unmatched() { @Test public void threeInnerThree() { - assertRendering("a ~~~foo~~~", "

a ~foo~

\n"); + assertRendering("a ~~~foo~~~", "

a ~~~foo~~~

\n"); } @Test public void twoInnerThree() { - assertRendering("~~foo~~~", "

foo~

\n"); + assertRendering("~~foo~~~", "

~~foo~~~

\n"); } @Test public void tildesInside() { assertRendering("~~foo~bar~~", "

foo~bar

\n"); assertRendering("~~foo~~bar~~", "

foobar~~

\n"); - assertRendering("~~foo~~~bar~~", "

foo~bar~~

\n"); - assertRendering("~~foo~~~~bar~~", "

foobar

\n"); - assertRendering("~~foo~~~~~bar~~", "

foo~bar

\n"); - assertRendering("~~foo~~~~~~bar~~", "

foo~~bar

\n"); - assertRendering("~~foo~~~~~~~bar~~", "

foo~~~bar

\n"); + assertRendering("~~foo~~~bar~~", "

foo~~~bar

\n"); + assertRendering("~~foo~~~~bar~~", "

foo~~~~bar

\n"); + assertRendering("~~foo~~~~~bar~~", "

foo~~~~~bar

\n"); + assertRendering("~~foo~~~~~~bar~~", "

foo~~~~~~bar

\n"); } @Test @@ -79,18 +84,69 @@ public void insideBlockQuote() { public void delimited() { Node document = PARSER.parse("~~foo~~"); Strikethrough strikethrough = (Strikethrough) document.getFirstChild().getFirstChild(); - assertEquals("~~", strikethrough.getOpeningDelimiter()); - assertEquals("~~", strikethrough.getClosingDelimiter()); + assertThat(strikethrough.getOpeningDelimiter()).isEqualTo("~~"); + assertThat(strikethrough.getClosingDelimiter()).isEqualTo("~~"); } @Test public void textContentRenderer() { Node document = PARSER.parse("~~foo~~"); - assertEquals("/foo/", CONTENT_RENDERER.render(document)); + assertThat(CONTENT_RENDERER.render(document)).isEqualTo("/foo/"); + } + + @Test + public void requireTwoTildesOption() { + Parser parser = Parser.builder() + .extensions(Set.of(StrikethroughExtension.builder() + .requireTwoTildes(true) + .build())) + .customDelimiterProcessor(new SubscriptDelimiterProcessor()) + .build(); + + Node document = parser.parse("~foo~ ~~bar~~"); + assertThat(CONTENT_RENDERER.render(document)).isEqualTo("(sub)foo(/sub) /bar/"); + } + + @Test + public void sourceSpans() { + Parser parser = Parser.builder() + .extensions(EXTENSIONS) + .includeSourceSpans(IncludeSourceSpans.BLOCKS_AND_INLINES) + .build(); + + Node document = parser.parse("hey ~~there~~\n"); + Paragraph block = (Paragraph) document.getFirstChild(); + Node strikethrough = block.getLastChild(); + assertThat(strikethrough.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 4, 4, 9))); } @Override protected String render(String source) { return HTML_RENDERER.render(PARSER.parse(source)); } + + private static class SubscriptDelimiterProcessor implements DelimiterProcessor { + + @Override + public char getOpeningCharacter() { + return '~'; + } + + @Override + public char getClosingCharacter() { + return '~'; + } + + @Override + public int getMinLength() { + return 1; + } + + @Override + public int process(DelimiterRun openingRun, DelimiterRun closingRun) { + openingRun.getOpener().insertAfter(new Text("(sub)")); + closingRun.getCloser().insertBefore(new Text("(/sub)")); + return 1; + } + } } diff --git a/commonmark-ext-gfm-tables/.settings/org.eclipse.core.runtime.prefs b/commonmark-ext-gfm-tables/.settings/org.eclipse.core.runtime.prefs deleted file mode 100644 index 5a0ad22d2..000000000 --- a/commonmark-ext-gfm-tables/.settings/org.eclipse.core.runtime.prefs +++ /dev/null @@ -1,2 +0,0 @@ -eclipse.preferences.version=1 -line.separator=\n diff --git a/commonmark-ext-gfm-tables/.settings/org.eclipse.jdt.core.prefs b/commonmark-ext-gfm-tables/.settings/org.eclipse.jdt.core.prefs deleted file mode 100644 index 3c0d27c8f..000000000 --- a/commonmark-ext-gfm-tables/.settings/org.eclipse.jdt.core.prefs +++ /dev/null @@ -1,290 +0,0 @@ -eclipse.preferences.version=1 -org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7 -org.eclipse.jdt.core.compiler.compliance=1.7 -org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning -org.eclipse.jdt.core.compiler.source=1.7 -org.eclipse.jdt.core.formatter.align_type_members_on_columns=false -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=16 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation=0 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant=16 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call=16 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation=16 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression=16 -org.eclipse.jdt.core.formatter.alignment_for_assignment=0 -org.eclipse.jdt.core.formatter.alignment_for_binary_expression=16 -org.eclipse.jdt.core.formatter.alignment_for_compact_if=16 -org.eclipse.jdt.core.formatter.alignment_for_conditional_expression=80 -org.eclipse.jdt.core.formatter.alignment_for_enum_constants=0 -org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer=16 -org.eclipse.jdt.core.formatter.alignment_for_method_declaration=0 -org.eclipse.jdt.core.formatter.alignment_for_multiple_fields=16 -org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_resources_in_try=80 -org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation=16 -org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_union_type_in_multicatch=16 -org.eclipse.jdt.core.formatter.blank_lines_after_imports=1 -org.eclipse.jdt.core.formatter.blank_lines_after_package=1 -org.eclipse.jdt.core.formatter.blank_lines_before_field=0 -org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration=0 -org.eclipse.jdt.core.formatter.blank_lines_before_imports=1 -org.eclipse.jdt.core.formatter.blank_lines_before_member_type=1 -org.eclipse.jdt.core.formatter.blank_lines_before_method=1 -org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk=1 -org.eclipse.jdt.core.formatter.blank_lines_before_package=0 -org.eclipse.jdt.core.formatter.blank_lines_between_import_groups=1 -org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations=1 -org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_array_initializer=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_block=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_block_in_case=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_enum_constant=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_lambda_body=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_method_declaration=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_switch=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_type_declaration=end_of_line -org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment=false -org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment=false -org.eclipse.jdt.core.formatter.comment.format_block_comments=true -org.eclipse.jdt.core.formatter.comment.format_header=false -org.eclipse.jdt.core.formatter.comment.format_html=true -org.eclipse.jdt.core.formatter.comment.format_javadoc_comments=true -org.eclipse.jdt.core.formatter.comment.format_line_comments=true -org.eclipse.jdt.core.formatter.comment.format_source_code=true -org.eclipse.jdt.core.formatter.comment.indent_parameter_description=true -org.eclipse.jdt.core.formatter.comment.indent_root_tags=true -org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags=insert -org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter=do not insert -org.eclipse.jdt.core.formatter.comment.line_length=120 -org.eclipse.jdt.core.formatter.comment.new_lines_at_block_boundaries=true -org.eclipse.jdt.core.formatter.comment.new_lines_at_javadoc_boundaries=true -org.eclipse.jdt.core.formatter.comment.preserve_white_space_between_code_and_line_comments=false -org.eclipse.jdt.core.formatter.compact_else_if=true -org.eclipse.jdt.core.formatter.continuation_indentation=2 -org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer=2 -org.eclipse.jdt.core.formatter.disabling_tag=@formatter\:off -org.eclipse.jdt.core.formatter.enabling_tag=@formatter\:on -org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line=false -org.eclipse.jdt.core.formatter.format_line_comment_starting_on_first_column=true -org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header=true -org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header=true -org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header=true -org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header=true -org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases=true -org.eclipse.jdt.core.formatter.indent_empty_lines=false -org.eclipse.jdt.core.formatter.indent_statements_compare_to_block=true -org.eclipse.jdt.core.formatter.indent_statements_compare_to_body=true -org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases=true -org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch=true -org.eclipse.jdt.core.formatter.indentation.size=4 -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_field=insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable=insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_method=insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_package=insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_type=insert -org.eclipse.jdt.core.formatter.insert_new_line_after_label=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_after_type_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter=insert -org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator=insert -org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_binary_operator=insert -org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters=insert -org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block=insert -org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast=insert -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert=insert -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case=insert -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional=insert -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for=insert -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters=insert -org.eclipse.jdt.core.formatter.insert_space_after_ellipsis=insert -org.eclipse.jdt.core.formatter.insert_space_after_lambda_arrow=insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer=insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_try=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional=insert -org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for=insert -org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_try_resources=insert -org.eclipse.jdt.core.formatter.insert_space_after_unary_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter=insert -org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator=insert -org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_binary_operator=insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer=insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_try=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert=insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional=insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for=insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_ellipsis=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_lambda_arrow=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_try=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while=insert -org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return=insert -org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw=insert -org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional=insert -org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_semicolon=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_try_resources=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_unary_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation=do not insert -org.eclipse.jdt.core.formatter.join_lines_in_comments=true -org.eclipse.jdt.core.formatter.join_wrapped_lines=false -org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line=false -org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line=false -org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line=false -org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line=false -org.eclipse.jdt.core.formatter.lineSplit=120 -org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column=false -org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column=false -org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body=0 -org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve=1 -org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line=true -org.eclipse.jdt.core.formatter.tabulation.char=space -org.eclipse.jdt.core.formatter.tabulation.size=4 -org.eclipse.jdt.core.formatter.use_on_off_tags=false -org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations=false -org.eclipse.jdt.core.formatter.wrap_before_binary_operator=true -org.eclipse.jdt.core.formatter.wrap_before_or_operator_multicatch=true -org.eclipse.jdt.core.formatter.wrap_outer_expressions_when_nested=true -org.eclipse.jdt.core.javaFormatter=org.eclipse.jdt.core.defaultJavaFormatter diff --git a/commonmark-ext-gfm-tables/pom.xml b/commonmark-ext-gfm-tables/pom.xml index ad69f1795..5bd323168 100644 --- a/commonmark-ext-gfm-tables/pom.xml +++ b/commonmark-ext-gfm-tables/pom.xml @@ -2,9 +2,9 @@ 4.0.0 - com.atlassian.commonmark + org.commonmark commonmark-parent - 0.14.1-SNAPSHOT + 0.28.1-SNAPSHOT commonmark-ext-gfm-tables @@ -13,31 +13,15 @@ - com.atlassian.commonmark + org.commonmark commonmark - com.atlassian.commonmark + org.commonmark commonmark-test-util test - - - - org.apache.maven.plugins - maven-jar-plugin - - - - org.commonmark.ext.gfm.tables - - - - - - - diff --git a/commonmark-ext-gfm-tables/src/main/java/module-info.java b/commonmark-ext-gfm-tables/src/main/java/module-info.java new file mode 100644 index 000000000..7e6d2629c --- /dev/null +++ b/commonmark-ext-gfm-tables/src/main/java/module-info.java @@ -0,0 +1,5 @@ +module org.commonmark.ext.gfm.tables { + exports org.commonmark.ext.gfm.tables; + + requires transitive org.commonmark; +} diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableCell.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableCell.java index 61880c6c3..033c2dd04 100644 --- a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableCell.java +++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableCell.java @@ -9,6 +9,7 @@ public class TableCell extends CustomNode { private boolean header; private Alignment alignment; + private int width; /** * @return whether the cell is a header or not @@ -22,7 +23,7 @@ public void setHeader(boolean header) { } /** - * @return the cell alignment + * @return the cell alignment or {@code null} if no specific alignment */ public Alignment getAlignment() { return alignment; @@ -32,6 +33,17 @@ public void setAlignment(Alignment alignment) { this.alignment = alignment; } + /** + * @return the cell width (the number of dash and colon characters in the delimiter row of the table for this column) + */ + public int getWidth() { + return width; + } + + public void setWidth(int width) { + this.width = width; + } + /** * How the cell is aligned horizontally. */ diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TablesExtension.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TablesExtension.java index 5707b0f14..f754b8276 100644 --- a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TablesExtension.java +++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TablesExtension.java @@ -3,16 +3,22 @@ import org.commonmark.Extension; import org.commonmark.ext.gfm.tables.internal.TableBlockParser; import org.commonmark.ext.gfm.tables.internal.TableHtmlNodeRenderer; +import org.commonmark.ext.gfm.tables.internal.TableMarkdownNodeRenderer; import org.commonmark.ext.gfm.tables.internal.TableTextContentNodeRenderer; import org.commonmark.parser.Parser; import org.commonmark.renderer.NodeRenderer; import org.commonmark.renderer.html.HtmlNodeRendererContext; import org.commonmark.renderer.html.HtmlNodeRendererFactory; import org.commonmark.renderer.html.HtmlRenderer; +import org.commonmark.renderer.markdown.MarkdownNodeRendererContext; +import org.commonmark.renderer.markdown.MarkdownNodeRendererFactory; +import org.commonmark.renderer.markdown.MarkdownRenderer; import org.commonmark.renderer.text.TextContentNodeRendererContext; import org.commonmark.renderer.text.TextContentNodeRendererFactory; import org.commonmark.renderer.text.TextContentRenderer; +import java.util.Set; + /** * Extension for GFM tables using "|" pipes (GitHub Flavored Markdown). *

@@ -27,7 +33,7 @@ * @see Tables (extension) in GitHub Flavored Markdown Spec */ public class TablesExtension implements Parser.ParserExtension, HtmlRenderer.HtmlRendererExtension, - TextContentRenderer.TextContentRendererExtension { + TextContentRenderer.TextContentRendererExtension, MarkdownRenderer.MarkdownRendererExtension { private TablesExtension() { } @@ -60,4 +66,19 @@ public NodeRenderer create(TextContentNodeRendererContext context) { } }); } + + @Override + public void extend(MarkdownRenderer.Builder rendererBuilder) { + rendererBuilder.nodeRendererFactory(new MarkdownNodeRendererFactory() { + @Override + public NodeRenderer create(MarkdownNodeRendererContext context) { + return new TableMarkdownNodeRenderer(context); + } + + @Override + public Set getSpecialCharacters() { + return Set.of('|'); + } + }); + } } diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableBlockParser.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableBlockParser.java index 112764ba0..57af128d8 100644 --- a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableBlockParser.java +++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableBlockParser.java @@ -3,8 +3,12 @@ import org.commonmark.ext.gfm.tables.*; import org.commonmark.node.Block; import org.commonmark.node.Node; +import org.commonmark.node.SourceSpan; import org.commonmark.parser.InlineParser; +import org.commonmark.parser.SourceLine; +import org.commonmark.parser.SourceLines; import org.commonmark.parser.block.*; +import org.commonmark.text.Characters; import java.util.ArrayList; import java.util.List; @@ -12,20 +16,19 @@ public class TableBlockParser extends AbstractBlockParser { private final TableBlock block = new TableBlock(); - private final List bodyLines = new ArrayList<>(); - private final List columns; - private final List headerCells; + private final List rowLines = new ArrayList<>(); + private final List columns; - private boolean nextIsSeparatorLine = true; + private boolean canHaveLazyContinuationLines = true; - private TableBlockParser(List columns, List headerCells) { + private TableBlockParser(List columns, SourceLine headerLine) { this.columns = columns; - this.headerCells = headerCells; + this.rowLines.add(headerLine); } @Override public boolean canHaveLazyContinuationLines() { - return true; + return canHaveLazyContinuationLines; } @Override @@ -35,7 +38,17 @@ public Block getBlock() { @Override public BlockContinue tryContinue(ParserState state) { - if (state.getLine().toString().contains("|")) { + CharSequence content = state.getLine().getContent(); + int pipe = Characters.find('|', content, state.getNextNonSpaceIndex()); + if (pipe != -1) { + if (pipe == state.getNextNonSpaceIndex()) { + // If we *only* have a pipe character (and whitespace), that is not a valid table row and ends the table. + if (Characters.skipSpaceTab(content, pipe + 1, content.length()) == content.length()) { + // We also don't want the pipe to be added via lazy continuation. + canHaveLazyContinuationLines = false; + return BlockContinue.none(); + } + } return BlockContinue.atIndex(state.getIndex()); } else { return BlockContinue.none(); @@ -43,38 +56,48 @@ public BlockContinue tryContinue(ParserState state) { } @Override - public void addLine(CharSequence line) { - if (nextIsSeparatorLine) { - nextIsSeparatorLine = false; - } else { - bodyLines.add(line); - } + public void addLine(SourceLine line) { + rowLines.add(line); } @Override public void parseInlines(InlineParser inlineParser) { - int headerColumns = headerCells.size(); + List sourceSpans = block.getSourceSpans(); + SourceSpan headerSourceSpan = !sourceSpans.isEmpty() ? sourceSpans.get(0) : null; Node head = new TableHead(); + if (headerSourceSpan != null) { + head.addSourceSpan(headerSourceSpan); + } block.appendChild(head); TableRow headerRow = new TableRow(); + headerRow.setSourceSpans(head.getSourceSpans()); head.appendChild(headerRow); + + List headerCells = split(rowLines.get(0)); + int headerColumns = headerCells.size(); for (int i = 0; i < headerColumns; i++) { - String cell = headerCells.get(i); + SourceLine cell = headerCells.get(i); TableCell tableCell = parseCell(cell, i, inlineParser); tableCell.setHeader(true); headerRow.appendChild(tableCell); } - Node body = null; - for (CharSequence rowLine : bodyLines) { - List cells = split(rowLine); + TableBody body = null; + // Body starts at index 2. 0 is header, 1 is separator. + for (int rowIndex = 2; rowIndex < rowLines.size(); rowIndex++) { + SourceLine rowLine = rowLines.get(rowIndex); + SourceSpan sourceSpan = rowIndex < sourceSpans.size() ? sourceSpans.get(rowIndex) : null; + List cells = split(rowLine); TableRow row = new TableRow(); + if (sourceSpan != null) { + row.addSourceSpan(sourceSpan); + } // Body can not have more columns than head for (int i = 0; i < headerColumns; i++) { - String cell = i < cells.size() ? cells.get(i) : ""; + SourceLine cell = i < cells.size() ? cells.get(i) : SourceLine.of("", null); TableCell tableCell = parseCell(cell, i, inlineParser); row.appendChild(tableCell); } @@ -85,33 +108,50 @@ public void parseInlines(InlineParser inlineParser) { block.appendChild(body); } body.appendChild(row); + body.addSourceSpan(sourceSpan); } } - private TableCell parseCell(String cell, int column, InlineParser inlineParser) { + private TableCell parseCell(SourceLine cell, int column, InlineParser inlineParser) { TableCell tableCell = new TableCell(); + SourceSpan sourceSpan = cell.getSourceSpan(); + if (sourceSpan != null) { + tableCell.addSourceSpan(sourceSpan); + } if (column < columns.size()) { - tableCell.setAlignment(columns.get(column)); + TableCellInfo cellInfo = columns.get(column); + tableCell.setAlignment(cellInfo.getAlignment()); + tableCell.setWidth(cellInfo.getWidth()); } - inlineParser.parse(cell.trim(), tableCell); + CharSequence content = cell.getContent(); + int start = Characters.skipSpaceTab(content, 0, content.length()); + int end = Characters.skipSpaceTabBackwards(content, content.length() - 1, start); + inlineParser.parse(SourceLines.of(cell.substring(start, end + 1)), tableCell); return tableCell; } - private static List split(CharSequence input) { - String line = input.toString().trim(); - if (line.startsWith("|")) { - line = line.substring(1); + private static List split(SourceLine line) { + CharSequence row = line.getContent(); + int nonSpace = Characters.skipSpaceTab(row, 0, row.length()); + int cellStart = nonSpace; + int cellEnd = row.length(); + if (row.charAt(nonSpace) == '|') { + // This row has leading/trailing pipes - skip the leading pipe + cellStart = nonSpace + 1; + // Strip whitespace from the end but not the pipe or we could miss an empty ("||") cell + int nonSpaceEnd = Characters.skipSpaceTabBackwards(row, row.length() - 1, cellStart); + cellEnd = nonSpaceEnd + 1; } - List cells = new ArrayList<>(); + List cells = new ArrayList<>(); StringBuilder sb = new StringBuilder(); - for (int i = 0; i < line.length(); i++) { - char c = line.charAt(i); + for (int i = cellStart; i < cellEnd; i++) { + char c = row.charAt(i); switch (c) { case '\\': - if (i + 1 < line.length() && line.charAt(i + 1) == '|') { + if (i + 1 < cellEnd && row.charAt(i + 1) == '|') { // Pipe is special for table parsing. An escaped pipe doesn't result in a new cell, but is // passed down to inline parsing as an unescaped pipe. Note that that applies even for the `\|` // in an input like `\\|` - in other words, table parsing doesn't support escaping backslashes. @@ -123,15 +163,20 @@ private static List split(CharSequence input) { } break; case '|': - cells.add(sb.toString()); + String content = sb.toString(); + + cells.add(SourceLine.of(content, line.substring(cellStart, i).getSourceSpan())); sb.setLength(0); + // + 1 to skip the pipe itself for the next cell's span + cellStart = i + 1; break; default: sb.append(c); } } if (sb.length() > 0) { - cells.add(sb.toString()); + String content = sb.toString(); + cells.add(SourceLine.of(content, line.substring(cellStart, line.getContent().length()).getSourceSpan())); } return cells; } @@ -144,11 +189,12 @@ private static List split(CharSequence input) { // -|- // |-|-| // --- | --- - private static List parseSeparator(CharSequence s) { - List columns = new ArrayList<>(); + private static List parseSeparator(CharSequence s) { + List columns = new ArrayList<>(); int pipes = 0; boolean valid = false; int i = 0; + int width = 0; while (i < s.length()) { char c = s.charAt(i); switch (c) { @@ -173,10 +219,12 @@ private static List parseSeparator(CharSequence s) { if (c == ':') { left = true; i++; + width++; } boolean haveDash = false; while (i < s.length() && s.charAt(i) == '-') { i++; + width++; haveDash = true; } if (!haveDash) { @@ -186,8 +234,10 @@ private static List parseSeparator(CharSequence s) { if (i < s.length() && s.charAt(i) == ':') { right = true; i++; + width++; } - columns.add(getAlignment(left, right)); + columns.add(new TableCellInfo(getAlignment(left, right), width)); + width = 0; // Next, need another pipe pipes = 0; break; @@ -223,17 +273,18 @@ public static class Factory extends AbstractBlockParserFactory { @Override public BlockStart tryStart(ParserState state, MatchedBlockParser matchedBlockParser) { - CharSequence line = state.getLine(); - CharSequence paragraph = matchedBlockParser.getParagraphContent(); - if (paragraph != null && paragraph.toString().contains("|") && !paragraph.toString().contains("\n")) { - CharSequence separatorLine = line.subSequence(state.getIndex(), line.length()); - List columns = parseSeparator(separatorLine); + List paragraphLines = matchedBlockParser.getParagraphLines().getLines(); + if (paragraphLines.size() >= 1 && Characters.find('|', paragraphLines.get(paragraphLines.size() - 1).getContent(), 0) != -1) { + SourceLine line = state.getLine(); + SourceLine separatorLine = line.substring(state.getIndex(), line.getContent().length()); + List columns = parseSeparator(separatorLine.getContent()); if (columns != null && !columns.isEmpty()) { - List headerCells = split(paragraph); + SourceLine paragraph = paragraphLines.get(paragraphLines.size() - 1); + List headerCells = split(paragraph); if (columns.size() >= headerCells.size()) { - return BlockStart.of(new TableBlockParser(columns, headerCells)) + return BlockStart.of(new TableBlockParser(columns, paragraph)) .atIndex(state.getIndex()) - .replaceActiveBlockParser(); + .replaceParagraphLines(1); } } } @@ -241,4 +292,21 @@ public BlockStart tryStart(ParserState state, MatchedBlockParser matchedBlockPar } } + private static class TableCellInfo { + private final TableCell.Alignment alignment; + private final int width; + + public TableCell.Alignment getAlignment() { + return alignment; + } + + public int getWidth() { + return width; + } + + public TableCellInfo(TableCell.Alignment alignment, int width) { + this.alignment = alignment; + this.width = width; + } + } } diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableHtmlNodeRenderer.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableHtmlNodeRenderer.java index a1de50aac..966c4c151 100644 --- a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableHtmlNodeRenderer.java +++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableHtmlNodeRenderer.java @@ -5,7 +5,6 @@ import org.commonmark.renderer.html.HtmlNodeRendererContext; import org.commonmark.renderer.html.HtmlWriter; -import java.util.Collections; import java.util.Map; public class TableHtmlNodeRenderer extends TableNodeRenderer { @@ -18,6 +17,7 @@ public TableHtmlNodeRenderer(HtmlNodeRendererContext context) { this.context = context; } + @Override protected void renderBlock(TableBlock tableBlock) { htmlWriter.line(); htmlWriter.tag("table", getAttributes(tableBlock, "table")); @@ -26,6 +26,7 @@ protected void renderBlock(TableBlock tableBlock) { htmlWriter.line(); } + @Override protected void renderHead(TableHead tableHead) { htmlWriter.line(); htmlWriter.tag("thead", getAttributes(tableHead, "thead")); @@ -34,6 +35,7 @@ protected void renderHead(TableHead tableHead) { htmlWriter.line(); } + @Override protected void renderBody(TableBody tableBody) { htmlWriter.line(); htmlWriter.tag("tbody", getAttributes(tableBody, "tbody")); @@ -42,6 +44,7 @@ protected void renderBody(TableBody tableBody) { htmlWriter.line(); } + @Override protected void renderRow(TableRow tableRow) { htmlWriter.line(); htmlWriter.tag("tr", getAttributes(tableRow, "tr")); @@ -50,6 +53,7 @@ protected void renderRow(TableRow tableRow) { htmlWriter.line(); } + @Override protected void renderCell(TableCell tableCell) { String tagName = tableCell.isHeader() ? "th" : "td"; htmlWriter.line(); @@ -60,14 +64,14 @@ protected void renderCell(TableCell tableCell) { } private Map getAttributes(Node node, String tagName) { - return context.extendAttributes(node, tagName, Collections.emptyMap()); + return context.extendAttributes(node, tagName, Map.of()); } private Map getCellAttributes(TableCell tableCell, String tagName) { if (tableCell.getAlignment() != null) { - return context.extendAttributes(tableCell, tagName, Collections.singletonMap("align", getAlignValue(tableCell.getAlignment()))); + return context.extendAttributes(tableCell, tagName, Map.of("align", getAlignValue(tableCell.getAlignment()))); } else { - return context.extendAttributes(tableCell, tagName, Collections.emptyMap()); + return context.extendAttributes(tableCell, tagName, Map.of()); } } diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableMarkdownNodeRenderer.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableMarkdownNodeRenderer.java new file mode 100644 index 000000000..b0705f579 --- /dev/null +++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableMarkdownNodeRenderer.java @@ -0,0 +1,88 @@ +package org.commonmark.ext.gfm.tables.internal; + +import org.commonmark.ext.gfm.tables.*; +import org.commonmark.node.Node; +import org.commonmark.renderer.markdown.MarkdownNodeRendererContext; +import org.commonmark.renderer.markdown.MarkdownWriter; +import org.commonmark.text.AsciiMatcher; + +import java.util.ArrayList; +import java.util.List; + +/** + * The Table node renderer that is needed for rendering GFM tables (GitHub Flavored Markdown) to text content. + */ +public class TableMarkdownNodeRenderer extends TableNodeRenderer { + private final MarkdownWriter writer; + private final MarkdownNodeRendererContext context; + + private final AsciiMatcher pipe = AsciiMatcher.builder().c('|').build(); + + private final List columns = new ArrayList<>(); + + public TableMarkdownNodeRenderer(MarkdownNodeRendererContext context) { + this.writer = context.getWriter(); + this.context = context; + } + + @Override + protected void renderBlock(TableBlock node) { + columns.clear(); + writer.pushTight(true); + renderChildren(node); + writer.popTight(); + writer.block(); + } + + @Override + protected void renderHead(TableHead node) { + renderChildren(node); + for (TableCell.Alignment columnAlignment : columns) { + writer.raw('|'); + if (columnAlignment == TableCell.Alignment.LEFT) { + writer.raw(":---"); + } else if (columnAlignment == TableCell.Alignment.RIGHT) { + writer.raw("---:"); + } else if (columnAlignment == TableCell.Alignment.CENTER) { + writer.raw(":---:"); + } else { + writer.raw("---"); + } + } + writer.raw("|"); + writer.block(); + } + + @Override + protected void renderBody(TableBody node) { + renderChildren(node); + } + + @Override + protected void renderRow(TableRow node) { + renderChildren(node); + // Trailing | at the end of the line + writer.raw("|"); + writer.block(); + } + + @Override + protected void renderCell(TableCell node) { + if (node.getParent() != null && node.getParent().getParent() instanceof TableHead) { + columns.add(node.getAlignment()); + } + writer.raw("|"); + writer.pushRawEscape(pipe); + renderChildren(node); + writer.popRawEscape(); + } + + private void renderChildren(Node parent) { + Node node = parent.getFirstChild(); + while (node != null) { + Node next = node.getNext(); + context.render(node); + node = next; + } + } +} diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableNodeRenderer.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableNodeRenderer.java index 93478a30b..2982e1518 100644 --- a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableNodeRenderer.java +++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableNodeRenderer.java @@ -1,28 +1,22 @@ package org.commonmark.ext.gfm.tables.internal; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Set; - -import org.commonmark.ext.gfm.tables.TableBlock; -import org.commonmark.ext.gfm.tables.TableBody; -import org.commonmark.ext.gfm.tables.TableCell; -import org.commonmark.ext.gfm.tables.TableHead; -import org.commonmark.ext.gfm.tables.TableRow; +import org.commonmark.ext.gfm.tables.*; import org.commonmark.node.Node; import org.commonmark.renderer.NodeRenderer; +import java.util.Set; + abstract class TableNodeRenderer implements NodeRenderer { @Override public Set> getNodeTypes() { - return new HashSet<>(Arrays.asList( + return Set.of( TableBlock.class, TableHead.class, TableBody.class, TableRow.class, TableCell.class - )); + ); } @Override diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableTextContentNodeRenderer.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableTextContentNodeRenderer.java index 94b0e8665..0ba6894b5 100644 --- a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableTextContentNodeRenderer.java +++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableTextContentNodeRenderer.java @@ -22,49 +22,46 @@ public TableTextContentNodeRenderer(TextContentNodeRendererContext context) { this.context = context; } + @Override protected void renderBlock(TableBlock tableBlock) { + // Render rows tight + textContentWriter.pushTight(true); renderChildren(tableBlock); - if (tableBlock.getNext() != null) { - textContentWriter.write("\n"); - } + textContentWriter.popTight(); + textContentWriter.block(); } + @Override protected void renderHead(TableHead tableHead) { renderChildren(tableHead); } + @Override protected void renderBody(TableBody tableBody) { renderChildren(tableBody); } + @Override protected void renderRow(TableRow tableRow) { - textContentWriter.line(); renderChildren(tableRow); - textContentWriter.line(); + textContentWriter.block(); } + @Override protected void renderCell(TableCell tableCell) { renderChildren(tableCell); - textContentWriter.write('|'); - textContentWriter.whitespace(); - } - - private void renderLastCell(TableCell tableCell) { - renderChildren(tableCell); + // For the last cell in row, don't render the delimiter + if (tableCell.getNext() != null) { + textContentWriter.write('|'); + textContentWriter.whitespace(); + } } private void renderChildren(Node parent) { Node node = parent.getFirstChild(); while (node != null) { Node next = node.getNext(); - - // For last cell in row, we dont render the delimiter. - if (node instanceof TableCell && next == null) { - renderLastCell((TableCell) node); - } else { - context.render(node); - } - + context.render(node); node = next; } } diff --git a/commonmark-ext-gfm-tables/src/main/resources/META-INF/LICENSE.txt b/commonmark-ext-gfm-tables/src/main/resources/META-INF/LICENSE.txt new file mode 100644 index 000000000..b09e367ce --- /dev/null +++ b/commonmark-ext-gfm-tables/src/main/resources/META-INF/LICENSE.txt @@ -0,0 +1,23 @@ +Copyright (c) 2015, Atlassian Pty Ltd +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TableMarkdownRendererTest.java b/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TableMarkdownRendererTest.java new file mode 100644 index 000000000..85c11206c --- /dev/null +++ b/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TableMarkdownRendererTest.java @@ -0,0 +1,75 @@ +package org.commonmark.ext.gfm.tables; + +import org.commonmark.Extension; +import org.commonmark.parser.Parser; +import org.commonmark.renderer.markdown.MarkdownRenderer; +import org.junit.jupiter.api.Test; + +import java.util.Set; + +import static org.assertj.core.api.Assertions.assertThat; + +public class TableMarkdownRendererTest { + + private static final Set EXTENSIONS = Set.of(TablesExtension.create()); + private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build(); + private static final MarkdownRenderer RENDERER = MarkdownRenderer.builder().extensions(EXTENSIONS).build(); + + @Test + public void testHeadNoBody() { + assertRoundTrip("|Abc|\n|---|\n"); + assertRoundTrip("|Abc|Def|\n|---|---|\n"); + assertRoundTrip("|Abc||\n|---|---|\n"); + } + + @Test + public void testHeadAndBody() { + assertRoundTrip("|Abc|\n|---|\n|1|\n"); + assertRoundTrip("|Abc|Def|\n|---|---|\n|1|2|\n"); + } + + @Test + public void testBodyHasFewerColumns() { + // Could try not to write empty trailing cells but this is fine too + assertRoundTrip("|Abc|Def|\n|---|---|\n|1||\n"); + } + + @Test + public void testAlignment() { + assertRoundTrip("|Abc|Def|\n|:---|---|\n|1|2|\n"); + assertRoundTrip("|Abc|Def|\n|---|---:|\n|1|2|\n"); + assertRoundTrip("|Abc|Def|\n|:---:|:---:|\n|1|2|\n"); + } + + @Test + public void testInsideBlockQuote() { + assertRoundTrip("> |Abc|Def|\n> |---|---|\n> |1|2|\n"); + } + + @Test + public void testMultipleTables() { + assertRoundTrip("|Abc|Def|\n|---|---|\n\n|One|\n|---|\n|Only|\n"); + } + + @Test + public void testEscaping() { + assertRoundTrip("|Abc|Def|\n|---|---|\n|Pipe in|text \\||\n"); + assertRoundTrip("|Abc|Def|\n|---|---|\n|Pipe in|code `\\|`|\n"); + assertRoundTrip("|Abc|Def|\n|---|---|\n|Inline HTML|Foo\\|bar|\n"); + } + + @Test + public void testEscaped() { + // `|` in Text nodes needs to be escaped, otherwise the generated Markdown does not get parsed back as a table + assertRoundTrip("\\|Abc\\|\n\\|---\\|\n"); + } + + protected String render(String source) { + return RENDERER.render(PARSER.parse(source)); + } + + private void assertRoundTrip(String input) { + String rendered = render(input); + assertThat(rendered).isEqualTo(input); + } +} diff --git a/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TablesSpecTest.java b/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TablesSpecTest.java index 12c806e32..e7f3db4d1 100644 --- a/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TablesSpecTest.java +++ b/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TablesSpecTest.java @@ -7,39 +7,27 @@ import org.commonmark.testutil.TestResources; import org.commonmark.testutil.example.Example; import org.commonmark.testutil.example.ExampleReader; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.Parameter; +import org.junit.jupiter.params.ParameterizedClass; +import org.junit.jupiter.params.provider.MethodSource; -import java.util.ArrayList; -import java.util.Collections; import java.util.List; import java.util.Set; -@RunWith(Parameterized.class) +@ParameterizedClass +@MethodSource("data") public class TablesSpecTest extends RenderingTestCase { - private static final Set EXTENSIONS = Collections.singleton(TablesExtension.create()); + private static final Set EXTENSIONS = Set.of(TablesExtension.create()); private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build(); private static final HtmlRenderer RENDERER = HtmlRenderer.builder().extensions(EXTENSIONS).build(); - private final Example example; + @Parameter + Example example; - public TablesSpecTest(Example example) { - this.example = example; - } - - @Parameters(name = "{0}") - public static List data() { - List examples = ExampleReader.readExamples(TestResources.class.getResource("/gfm-spec.txt")); - List data = new ArrayList<>(); - for (Example example : examples) { - if (example.getInfo().contains("table")) { - data.add(new Object[]{example}); - } - } - return data; + static List data() { + return ExampleReader.readExamples(TestResources.getGfmSpec(), "table"); } @Test diff --git a/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TablesTest.java b/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TablesTest.java index 563ae8c18..3f4b37d54 100644 --- a/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TablesTest.java +++ b/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TablesTest.java @@ -1,25 +1,25 @@ package org.commonmark.ext.gfm.tables; import org.commonmark.Extension; -import org.commonmark.node.Node; +import org.commonmark.node.*; +import org.commonmark.parser.IncludeSourceSpans; import org.commonmark.parser.Parser; import org.commonmark.renderer.html.AttributeProvider; import org.commonmark.renderer.html.AttributeProviderContext; import org.commonmark.renderer.html.AttributeProviderFactory; import org.commonmark.renderer.html.HtmlRenderer; import org.commonmark.testutil.RenderingTestCase; -import org.junit.Test; +import org.junit.jupiter.api.Test; -import java.util.Collections; +import java.util.List; import java.util.Map; import java.util.Set; -import static org.hamcrest.CoreMatchers.is; -import static org.junit.Assert.assertThat; +import static org.assertj.core.api.Assertions.assertThat; public class TablesTest extends RenderingTestCase { - private static final Set EXTENSIONS = Collections.singleton(TablesExtension.create()); + private static final Set EXTENSIONS = Set.of(TablesExtension.create()); private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build(); private static final HtmlRenderer RENDERER = HtmlRenderer.builder().extensions(EXTENSIONS).build(); @@ -78,11 +78,6 @@ public void separatorNeedsPipes() { assertRendering("Abc|Def\n|--- ---", "

Abc|Def\n|--- ---

\n"); } - @Test - public void headerMustBeOneLine() { - assertRendering("No\nAbc|Def\n---|---", "

No\nAbc|Def\n---|---

\n"); - } - @Test public void oneHeadNoBody() { assertRendering("Abc|Def\n---|---", "\n" + @@ -154,6 +149,24 @@ public void oneHeadOneBody() { "
\n"); } + @Test + public void spaceBeforeSeparator() { + assertRendering(" |Abc|Def|\n |---|---|\n |1|2|", "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "
AbcDef
12
\n"); + } + @Test public void separatorMustNotHaveLessPartsThanHead() { assertRendering("Abc|Def|Ghi\n---|---\n1|2|3", "

Abc|Def|Ghi\n---|---\n1|2|3

\n"); @@ -213,6 +226,49 @@ public void pipesOnOutside() { "\n"); } + @Test + public void pipesOnOutsideWhitespaceAfterHeader() { + assertRendering("|Abc|Def| \n|---|---|\n|1|2|", "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "
AbcDef
12
\n"); + } + + @Test + public void pipesOnOutsideZeroLengthHeaders() { + // This is literally what someone has done IRL - it helped to expose + // an issue with parsing the last header cell correctly + assertRendering("||center header||\n" + + "-|-------------|-\n" + + "1| 2 |3", + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "
center header
123
\n"); + } + @Test public void inlineElements() { assertRendering("*Abc*|Def\n---|---\n1|2", "\n" + @@ -599,6 +655,67 @@ public void issue142() { "
\n"); } + @Test + public void danglingPipe() { + assertRendering("Abc|Def\n" + + "---|---\n" + + "1|2\n" + + "|", "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "
AbcDef
12
\n" + + "

|

\n"); + + assertRendering("Abc|Def\n" + + "---|---\n" + + "1|2\n" + + " | ", "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "
AbcDef
12
\n" + + "

|

\n"); + } + + @Test + public void interruptsParagraph() { + assertRendering("text\n" + + "|a |\n" + + "|---|\n" + + "|b |", "

text

\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "
a
b
\n"); + } + @Test public void attributeProviderIsApplied() { AttributeProviderFactory factory = new AttributeProviderFactory() { @@ -627,7 +744,7 @@ public void setAttributes(Node node, String tagName, Map attribu .extensions(EXTENSIONS) .build(); String rendered = renderer.render(PARSER.parse("Abc|Def\n---|---\n1|2")); - assertThat(rendered, is("\n" + + assertThat(rendered).isEqualTo("
\n" + "\n" + "\n" + "\n" + @@ -640,7 +757,126 @@ public void setAttributes(Node node, String tagName, Map attribu "\n" + "\n" + "\n" + - "
Abc2
\n")); + "\n"); + } + + @Test + public void columnWidthIsRecorded() { + AttributeProviderFactory factory = new AttributeProviderFactory() { + @Override + public AttributeProvider create(AttributeProviderContext context) { + return new AttributeProvider() { + @Override + public void setAttributes(Node node, String tagName, Map attributes) { + if (node instanceof TableCell && "th".equals(tagName)) { + attributes.put("width", ((TableCell) node).getWidth() + "em"); + } + } + }; + } + }; + HtmlRenderer renderer = HtmlRenderer.builder() + .attributeProviderFactory(factory) + .extensions(EXTENSIONS) + .build(); + String rendered = renderer.render(PARSER.parse("Abc|Def\n-----|---\n1|2")); + assertThat(rendered).isEqualTo("\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "
AbcDef
12
\n"); + } + + @Test + public void sourceSpans() { + Parser parser = Parser.builder() + .extensions(EXTENSIONS) + .includeSourceSpans(IncludeSourceSpans.BLOCKS_AND_INLINES) + .build(); + Node document = parser.parse("Abc|Def\n---|---\n|1|2\n 3|four|\n|||\n"); + + TableBlock block = (TableBlock) document.getFirstChild(); + assertThat(block.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 0, 0, 7), SourceSpan.of(1, 0, 8, 7), + SourceSpan.of(2, 0, 16, 4), SourceSpan.of(3, 0, 21, 8), SourceSpan.of(4, 0, 30, 3))); + + TableHead head = (TableHead) block.getFirstChild(); + assertThat(head.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 0, 0, 7))); + + TableRow headRow = (TableRow) head.getFirstChild(); + assertThat(headRow.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 0, 0, 7))); + TableCell headRowCell1 = (TableCell) headRow.getFirstChild(); + TableCell headRowCell2 = (TableCell) headRow.getLastChild(); + assertThat(headRowCell1.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 0, 0, 3))); + assertThat(headRowCell1.getFirstChild().getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 0, 0, 3))); + assertThat(headRowCell2.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 4, 4, 3))); + assertThat(headRowCell2.getFirstChild().getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 4, 4, 3))); + + TableBody body = (TableBody) block.getLastChild(); + assertThat(body.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(2, 0, 16, 4), SourceSpan.of(3, 0, 21, 8), SourceSpan.of(4, 0, 30, 3))); + + TableRow bodyRow1 = (TableRow) body.getFirstChild(); + assertThat(bodyRow1.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(2, 0, 16, 4))); + TableCell bodyRow1Cell1 = (TableCell) bodyRow1.getFirstChild(); + TableCell bodyRow1Cell2 = (TableCell) bodyRow1.getLastChild(); + assertThat(bodyRow1Cell1.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(2, 1, 17, 1))); + assertThat(bodyRow1Cell1.getFirstChild().getSourceSpans()).isEqualTo(List.of(SourceSpan.of(2, 1, 17, 1))); + assertThat(bodyRow1Cell2.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(2, 3, 19, 1))); + assertThat(bodyRow1Cell2.getFirstChild().getSourceSpans()).isEqualTo(List.of(SourceSpan.of(2, 3, 19, 1))); + + TableRow bodyRow2 = (TableRow) body.getFirstChild().getNext(); + assertThat(bodyRow2.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(3, 0, 21, 8))); + TableCell bodyRow2Cell1 = (TableCell) bodyRow2.getFirstChild(); + TableCell bodyRow2Cell2 = (TableCell) bodyRow2.getLastChild(); + assertThat(bodyRow2Cell1.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(3, 1, 22, 1))); + assertThat(bodyRow2Cell1.getFirstChild().getSourceSpans()).isEqualTo(List.of(SourceSpan.of(3, 1, 22, 1))); + assertThat(bodyRow2Cell2.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(3, 3, 24, 4))); + assertThat(bodyRow2Cell2.getFirstChild().getSourceSpans()).isEqualTo(List.of(SourceSpan.of(3, 3, 24, 4))); + + TableRow bodyRow3 = (TableRow) body.getLastChild(); + assertThat(bodyRow3.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(4, 0, 30, 3))); + TableCell bodyRow3Cell1 = (TableCell) bodyRow3.getFirstChild(); + TableCell bodyRow3Cell2 = (TableCell) bodyRow3.getLastChild(); + assertThat(bodyRow3Cell1.getSourceSpans()).isEqualTo(List.of()); + assertThat(bodyRow3Cell2.getSourceSpans()).isEqualTo(List.of()); + } + + @Test + public void sourceSpansWhenInterrupting() { + var parser = Parser.builder() + .extensions(EXTENSIONS) + .includeSourceSpans(IncludeSourceSpans.BLOCKS_AND_INLINES) + .build(); + var document = parser.parse("a\n" + + "bc\n" + + "|de|\n" + + "|---|\n" + + "|fg|"); + + var paragraph = (Paragraph) document.getFirstChild(); + var text = (Text) paragraph.getFirstChild(); + assertThat(text.getLiteral()).isEqualTo("a"); + assertThat(text.getNext()).isInstanceOf(SoftLineBreak.class); + var text2 = (Text) text.getNext().getNext(); + assertThat(text2.getLiteral()).isEqualTo("bc"); + + assertThat(paragraph.getSourceSpans()).isEqualTo(List.of( + SourceSpan.of(0, 0, 0, 1), + SourceSpan.of(1, 0, 2, 2))); + + var table = (TableBlock) document.getLastChild(); + assertThat(table.getSourceSpans()).isEqualTo(List.of( + SourceSpan.of(2, 0, 5, 4), + SourceSpan.of(3, 0, 10, 5), + SourceSpan.of(4, 0, 16, 4))); } @Override diff --git a/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TablesTextContentTest.java b/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TablesTextContentTest.java index 6d859f1c9..966f097fd 100644 --- a/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TablesTextContentTest.java +++ b/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TablesTextContentTest.java @@ -2,138 +2,165 @@ import org.commonmark.Extension; import org.commonmark.parser.Parser; +import org.commonmark.renderer.text.LineBreakRendering; import org.commonmark.renderer.text.TextContentRenderer; -import org.commonmark.testutil.RenderingTestCase; -import org.junit.Test; +import org.commonmark.testutil.Asserts; +import org.junit.jupiter.api.Test; -import java.util.Collections; import java.util.Set; -public class TablesTextContentTest extends RenderingTestCase { +public class TablesTextContentTest { - private static final Set EXTENSIONS = Collections.singleton(TablesExtension.create()); + private static final Set EXTENSIONS = Set.of(TablesExtension.create()); private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build(); private static final TextContentRenderer RENDERER = TextContentRenderer.builder().extensions(EXTENSIONS).build(); + private static final TextContentRenderer COMPACT_RENDERER = TextContentRenderer.builder().extensions(EXTENSIONS).build(); + private static final TextContentRenderer SEPARATE_RENDERER = TextContentRenderer.builder().extensions(EXTENSIONS) + .lineBreakRendering(LineBreakRendering.SEPARATE_BLOCKS).build(); + private static final TextContentRenderer STRIPPED_RENDERER = TextContentRenderer.builder().extensions(EXTENSIONS) + .lineBreakRendering(LineBreakRendering.STRIP).build(); + @Test public void oneHeadNoBody() { - assertRendering("Abc|Def\n---|---", "Abc| Def\n"); + assertCompact("Abc|Def\n---|---", "Abc| Def"); } @Test public void oneColumnOneHeadNoBody() { - String expected = "Abc\n"; - assertRendering("|Abc\n|---\n", expected); - assertRendering("|Abc|\n|---|\n", expected); - assertRendering("Abc|\n---|\n", expected); + String expected = "Abc"; + assertCompact("|Abc\n|---\n", expected); + assertCompact("|Abc|\n|---|\n", expected); + assertCompact("Abc|\n---|\n", expected); // Pipe required on separator - assertRendering("|Abc\n---\n", "|Abc"); + assertCompact("|Abc\n---\n", "|Abc"); // Pipe required on head - assertRendering("Abc\n|---\n", "Abc\n|---"); + assertCompact("Abc\n|---\n", "Abc\n|---"); } @Test public void oneColumnOneHeadOneBody() { - String expected = "Abc\n1\n"; - assertRendering("|Abc\n|---\n|1", expected); - assertRendering("|Abc|\n|---|\n|1|", expected); - assertRendering("Abc|\n---|\n1|", expected); + String expected = "Abc\n1"; + assertCompact("|Abc\n|---\n|1", expected); + assertCompact("|Abc|\n|---|\n|1|", expected); + assertCompact("Abc|\n---|\n1|", expected); // Pipe required on separator - assertRendering("|Abc\n---\n|1", "|Abc\n|1"); + assertCompact("|Abc\n---\n|1", "|Abc\n|1"); } @Test public void oneHeadOneBody() { - assertRendering("Abc|Def\n---|---\n1|2", "Abc| Def\n1| 2\n"); + assertCompact("Abc|Def\n---|---\n1|2", "Abc| Def\n1| 2"); } @Test public void separatorMustNotHaveLessPartsThanHead() { - assertRendering("Abc|Def|Ghi\n---|---\n1|2|3", "Abc|Def|Ghi\n---|---\n1|2|3"); + assertCompact("Abc|Def|Ghi\n---|---\n1|2|3", "Abc|Def|Ghi\n---|---\n1|2|3"); } @Test public void padding() { - assertRendering(" Abc | Def \n --- | --- \n 1 | 2 ", "Abc| Def\n1| 2\n"); + assertCompact(" Abc | Def \n --- | --- \n 1 | 2 ", "Abc| Def\n1| 2"); } @Test public void paddingWithCodeBlockIndentation() { - assertRendering("Abc|Def\n---|---\n 1|2", "Abc| Def\n1| 2\n"); + assertCompact("Abc|Def\n---|---\n 1|2", "Abc| Def\n1| 2"); } @Test public void pipesOnOutside() { - assertRendering("|Abc|Def|\n|---|---|\n|1|2|", "Abc| Def\n1| 2\n"); + assertCompact("|Abc|Def|\n|---|---|\n|1|2|", "Abc| Def\n1| 2"); } @Test public void inlineElements() { - assertRendering("*Abc*|Def\n---|---\n1|2", "Abc| Def\n1| 2\n"); + assertCompact("*Abc*|Def\n---|---\n1|2", "Abc| Def\n1| 2"); } @Test public void escapedPipe() { - assertRendering("Abc|Def\n---|---\n1\\|2|20", "Abc| Def\n1|2| 20\n"); + assertCompact("Abc|Def\n---|---\n1\\|2|20", "Abc| Def\n1|2| 20"); } @Test public void alignLeft() { - assertRendering("Abc|Def\n:---|---\n1|2", "Abc| Def\n1| 2\n"); + assertCompact("Abc|Def\n:---|---\n1|2", "Abc| Def\n1| 2"); } @Test public void alignRight() { - assertRendering("Abc|Def\n---:|---\n1|2", "Abc| Def\n1| 2\n"); + assertCompact("Abc|Def\n---:|---\n1|2", "Abc| Def\n1| 2"); } @Test public void alignCenter() { - assertRendering("Abc|Def\n:---:|---\n1|2", "Abc| Def\n1| 2\n"); + assertCompact("Abc|Def\n:---:|---\n1|2", "Abc| Def\n1| 2"); } @Test public void alignCenterSecond() { - assertRendering("Abc|Def\n---|:---:\n1|2", "Abc| Def\n1| 2\n"); + assertCompact("Abc|Def\n---|:---:\n1|2", "Abc| Def\n1| 2"); } @Test public void alignLeftWithSpaces() { - assertRendering("Abc|Def\n :--- |---\n1|2", "Abc| Def\n1| 2\n"); + assertCompact("Abc|Def\n :--- |---\n1|2", "Abc| Def\n1| 2"); } @Test public void alignmentMarkerMustBeNextToDashes() { - assertRendering("Abc|Def\n: ---|---", "Abc|Def\n: ---|---"); - assertRendering("Abc|Def\n--- :|---", "Abc|Def\n--- :|---"); - assertRendering("Abc|Def\n---|: ---", "Abc|Def\n---|: ---"); - assertRendering("Abc|Def\n---|--- :", "Abc|Def\n---|--- :"); + assertCompact("Abc|Def\n: ---|---", "Abc|Def\n: ---|---"); + assertCompact("Abc|Def\n--- :|---", "Abc|Def\n--- :|---"); + assertCompact("Abc|Def\n---|: ---", "Abc|Def\n---|: ---"); + assertCompact("Abc|Def\n---|--- :", "Abc|Def\n---|--- :"); } @Test public void bodyCanNotHaveMoreColumnsThanHead() { - assertRendering("Abc|Def\n---|---\n1|2|3", "Abc| Def\n1| 2\n"); + assertCompact("Abc|Def\n---|---\n1|2|3", "Abc| Def\n1| 2"); } @Test public void bodyWithFewerColumnsThanHeadResultsInEmptyCells() { - assertRendering("Abc|Def|Ghi\n---|---|---\n1|2", "Abc| Def| Ghi\n1| 2| \n"); + assertCompact("Abc|Def|Ghi\n---|---|---\n1|2", "Abc| Def| Ghi\n1| 2| "); } @Test public void insideBlockQuote() { - assertRendering("> Abc|Def\n> ---|---\n> 1|2", "«\nAbc| Def\n1| 2\n»"); + assertCompact("> Abc|Def\n> ---|---\n> 1|2", "«Abc| Def\n1| 2»"); } @Test public void tableWithLazyContinuationLine() { - assertRendering("Abc|Def\n---|---\n1|2\nlazy", "Abc| Def\n1| 2\nlazy| \n"); + assertCompact("Abc|Def\n---|---\n1|2\nlazy", "Abc| Def\n1| 2\nlazy| "); + } + + @Test + public void tableBetweenOtherBlocks() { + var s = "Foo\n\nAbc|Def\n---|---\n1|2\n\nBar"; + assertCompact(s, "Foo\nAbc| Def\n1| 2\nBar"); + assertSeparate(s, "Foo\n\nAbc| Def\n1| 2\n\nBar"); + assertStripped(s, "Foo Abc| Def 1| 2 Bar"); + } + + private void assertCompact(String source, String expected) { + var doc = PARSER.parse(source); + var actualRendering = COMPACT_RENDERER.render(doc); + Asserts.assertRendering(source, expected, actualRendering); + } + + private void assertSeparate(String source, String expected) { + var doc = PARSER.parse(source); + var actualRendering = SEPARATE_RENDERER.render(doc); + Asserts.assertRendering(source, expected, actualRendering); } - @Override - protected String render(String source) { - return RENDERER.render(PARSER.parse(source)); + private void assertStripped(String source, String expected) { + var doc = PARSER.parse(source); + var actualRendering = STRIPPED_RENDERER.render(doc); + Asserts.assertRendering(source, expected, actualRendering); } } diff --git a/commonmark-ext-heading-anchor/pom.xml b/commonmark-ext-heading-anchor/pom.xml index ca797b7be..26d2d19b1 100644 --- a/commonmark-ext-heading-anchor/pom.xml +++ b/commonmark-ext-heading-anchor/pom.xml @@ -2,9 +2,9 @@ 4.0.0 - com.atlassian.commonmark + org.commonmark commonmark-parent - 0.14.1-SNAPSHOT + 0.28.1-SNAPSHOT commonmark-ext-heading-anchor @@ -13,31 +13,15 @@ - com.atlassian.commonmark + org.commonmark commonmark - com.atlassian.commonmark + org.commonmark commonmark-test-util test - - - - org.apache.maven.plugins - maven-jar-plugin - - - - org.commonmark.ext.heading.anchor - - - - - - - diff --git a/commonmark-ext-heading-anchor/src/main/java/module-info.java b/commonmark-ext-heading-anchor/src/main/java/module-info.java new file mode 100644 index 000000000..2369323a6 --- /dev/null +++ b/commonmark-ext-heading-anchor/src/main/java/module-info.java @@ -0,0 +1,5 @@ +module org.commonmark.ext.heading.anchor { + exports org.commonmark.ext.heading.anchor; + + requires transitive org.commonmark; +} diff --git a/commonmark-ext-heading-anchor/src/main/resources/META-INF/LICENSE.txt b/commonmark-ext-heading-anchor/src/main/resources/META-INF/LICENSE.txt new file mode 100644 index 000000000..b09e367ce --- /dev/null +++ b/commonmark-ext-heading-anchor/src/main/resources/META-INF/LICENSE.txt @@ -0,0 +1,23 @@ +Copyright (c) 2015, Atlassian Pty Ltd +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/commonmark-ext-heading-anchor/src/test/java/org/commonmark/ext/heading/anchor/HeadingAnchorConfigurationTest.java b/commonmark-ext-heading-anchor/src/test/java/org/commonmark/ext/heading/anchor/HeadingAnchorConfigurationTest.java index 5a7f47cd3..438a3a9bd 100644 --- a/commonmark-ext-heading-anchor/src/test/java/org/commonmark/ext/heading/anchor/HeadingAnchorConfigurationTest.java +++ b/commonmark-ext-heading-anchor/src/test/java/org/commonmark/ext/heading/anchor/HeadingAnchorConfigurationTest.java @@ -3,12 +3,11 @@ import org.commonmark.Extension; import org.commonmark.parser.Parser; import org.commonmark.renderer.html.HtmlRenderer; -import org.junit.Test; +import org.junit.jupiter.api.Test; -import java.util.Arrays; +import java.util.List; -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.MatcherAssert.assertThat; +import static org.assertj.core.api.Assertions.assertThat; public class HeadingAnchorConfigurationTest { @@ -21,34 +20,34 @@ private HtmlRenderer buildRenderer(String defaultId, String prefix, String suffi .idSuffix(suffix) .build(); return HtmlRenderer.builder() - .extensions(Arrays.asList(ext)) + .extensions(List.of(ext)) .build(); } @Test public void testDefaultConfigurationHasNoAdditions() { HtmlRenderer renderer = HtmlRenderer.builder() - .extensions(Arrays.asList(HeadingAnchorExtension.create())) + .extensions(List.of(HeadingAnchorExtension.create())) .build(); - assertThat(doRender(renderer, "# "), equalTo("

\n")); + assertThat(doRender(renderer, "# ")).isEqualTo("

\n"); } @Test public void testDefaultIdWhenNoTextOnHeader() { HtmlRenderer renderer = buildRenderer("defid", "", ""); - assertThat(doRender(renderer, "# "), equalTo("

\n")); + assertThat(doRender(renderer, "# ")).isEqualTo("

\n"); } @Test public void testPrefixAddedToHeader() { HtmlRenderer renderer = buildRenderer("", "pre-", ""); - assertThat(doRender(renderer, "# text"), equalTo("

text

\n")); + assertThat(doRender(renderer, "# text")).isEqualTo("

text

\n"); } @Test public void testSuffixAddedToHeader() { HtmlRenderer renderer = buildRenderer("", "", "-post"); - assertThat(doRender(renderer, "# text"), equalTo("

text

\n")); + assertThat(doRender(renderer, "# text")).isEqualTo("

text

\n"); } private String doRender(HtmlRenderer renderer, String text) { diff --git a/commonmark-ext-heading-anchor/src/test/java/org/commonmark/ext/heading/anchor/HeadingAnchorTest.java b/commonmark-ext-heading-anchor/src/test/java/org/commonmark/ext/heading/anchor/HeadingAnchorTest.java index 821aa9a84..3149542e3 100644 --- a/commonmark-ext-heading-anchor/src/test/java/org/commonmark/ext/heading/anchor/HeadingAnchorTest.java +++ b/commonmark-ext-heading-anchor/src/test/java/org/commonmark/ext/heading/anchor/HeadingAnchorTest.java @@ -4,14 +4,13 @@ import org.commonmark.parser.Parser; import org.commonmark.renderer.html.HtmlRenderer; import org.commonmark.testutil.RenderingTestCase; -import org.junit.Test; +import org.junit.jupiter.api.Test; -import java.util.Collections; import java.util.Set; public class HeadingAnchorTest extends RenderingTestCase { - private static final Set EXTENSIONS = Collections.singleton(HeadingAnchorExtension.create()); + private static final Set EXTENSIONS = Set.of(HeadingAnchorExtension.create()); private static final Parser PARSER = Parser.builder().build(); private static final HtmlRenderer RENDERER = HtmlRenderer.builder().extensions(EXTENSIONS).build(); diff --git a/commonmark-ext-image-attributes/pom.xml b/commonmark-ext-image-attributes/pom.xml new file mode 100644 index 000000000..e646bc3fd --- /dev/null +++ b/commonmark-ext-image-attributes/pom.xml @@ -0,0 +1,27 @@ + + + 4.0.0 + + org.commonmark + commonmark-parent + 0.28.1-SNAPSHOT + + + commonmark-ext-image-attributes + commonmark-java extension for image attributes + commonmark-java extension for adding attributes to images + + + + org.commonmark + commonmark + + + + org.commonmark + commonmark-test-util + test + + + + diff --git a/commonmark-ext-image-attributes/src/main/java/module-info.java b/commonmark-ext-image-attributes/src/main/java/module-info.java new file mode 100644 index 000000000..42d04a358 --- /dev/null +++ b/commonmark-ext-image-attributes/src/main/java/module-info.java @@ -0,0 +1,5 @@ +module org.commonmark.ext.image.attributes { + exports org.commonmark.ext.image.attributes; + + requires transitive org.commonmark; +} diff --git a/commonmark-ext-image-attributes/src/main/java/org/commonmark/ext/image/attributes/ImageAttributes.java b/commonmark-ext-image-attributes/src/main/java/org/commonmark/ext/image/attributes/ImageAttributes.java new file mode 100644 index 000000000..1ee43958b --- /dev/null +++ b/commonmark-ext-image-attributes/src/main/java/org/commonmark/ext/image/attributes/ImageAttributes.java @@ -0,0 +1,37 @@ +package org.commonmark.ext.image.attributes; + +import org.commonmark.node.CustomNode; +import org.commonmark.node.Delimited; + +import java.util.Map; + +/** + * A node containing text and other inline nodes as children. + */ +public class ImageAttributes extends CustomNode implements Delimited { + + private final Map attributes; + + public ImageAttributes(Map attributes) { + this.attributes = attributes; + } + + @Override + public String getOpeningDelimiter() { + return "{"; + } + + @Override + public String getClosingDelimiter() { + return "}"; + } + + public Map getAttributes() { + return attributes; + } + + @Override + protected String toStringAttributes() { + return "imageAttributes=" + attributes; + } +} diff --git a/commonmark-ext-image-attributes/src/main/java/org/commonmark/ext/image/attributes/ImageAttributesExtension.java b/commonmark-ext-image-attributes/src/main/java/org/commonmark/ext/image/attributes/ImageAttributesExtension.java new file mode 100644 index 000000000..28c6abab2 --- /dev/null +++ b/commonmark-ext-image-attributes/src/main/java/org/commonmark/ext/image/attributes/ImageAttributesExtension.java @@ -0,0 +1,45 @@ +package org.commonmark.ext.image.attributes; + +import org.commonmark.Extension; +import org.commonmark.ext.image.attributes.internal.ImageAttributesAttributeProvider; +import org.commonmark.ext.image.attributes.internal.ImageAttributesDelimiterProcessor; +import org.commonmark.parser.Parser; +import org.commonmark.renderer.html.AttributeProvider; +import org.commonmark.renderer.html.AttributeProviderContext; +import org.commonmark.renderer.html.AttributeProviderFactory; +import org.commonmark.renderer.html.HtmlRenderer; + +/** + * Extension for adding attributes to image nodes. + *

+ * Create it with {@link #create()} and then configure it on the builders + * ({@link org.commonmark.parser.Parser.Builder#extensions(Iterable)}, + * {@link HtmlRenderer.Builder#extensions(Iterable)}). + *

+ * + * @since 0.15.0 + */ +public class ImageAttributesExtension implements Parser.ParserExtension, HtmlRenderer.HtmlRendererExtension { + + private ImageAttributesExtension() { + } + + public static Extension create() { + return new ImageAttributesExtension(); + } + + @Override + public void extend(Parser.Builder parserBuilder) { + parserBuilder.customDelimiterProcessor(new ImageAttributesDelimiterProcessor()); + } + + @Override + public void extend(HtmlRenderer.Builder rendererBuilder) { + rendererBuilder.attributeProviderFactory(new AttributeProviderFactory() { + @Override + public AttributeProvider create(AttributeProviderContext context) { + return ImageAttributesAttributeProvider.create(); + } + }); + } +} diff --git a/commonmark-ext-image-attributes/src/main/java/org/commonmark/ext/image/attributes/internal/ImageAttributesAttributeProvider.java b/commonmark-ext-image-attributes/src/main/java/org/commonmark/ext/image/attributes/internal/ImageAttributesAttributeProvider.java new file mode 100644 index 000000000..edd9c4692 --- /dev/null +++ b/commonmark-ext-image-attributes/src/main/java/org/commonmark/ext/image/attributes/internal/ImageAttributesAttributeProvider.java @@ -0,0 +1,39 @@ +package org.commonmark.ext.image.attributes.internal; + +import org.commonmark.ext.image.attributes.ImageAttributes; +import org.commonmark.node.AbstractVisitor; +import org.commonmark.node.CustomNode; +import org.commonmark.node.Image; +import org.commonmark.node.Node; +import org.commonmark.renderer.html.AttributeProvider; + +import java.util.*; + +public class ImageAttributesAttributeProvider implements AttributeProvider { + + private ImageAttributesAttributeProvider() { + } + + public static ImageAttributesAttributeProvider create() { + return new ImageAttributesAttributeProvider(); + } + + @Override + public void setAttributes(Node node, String tagName, final Map attributes) { + if (node instanceof Image) { + node.accept(new AbstractVisitor() { + @Override + public void visit(CustomNode node) { + if (node instanceof ImageAttributes) { + ImageAttributes imageAttributes = (ImageAttributes) node; + for (Map.Entry entry : imageAttributes.getAttributes().entrySet()) { + attributes.put(entry.getKey(), entry.getValue()); + } + // Now that we have used the image attributes we remove the node. + imageAttributes.unlink(); + } + } + }); + } + } +} diff --git a/commonmark-ext-image-attributes/src/main/java/org/commonmark/ext/image/attributes/internal/ImageAttributesDelimiterProcessor.java b/commonmark-ext-image-attributes/src/main/java/org/commonmark/ext/image/attributes/internal/ImageAttributesDelimiterProcessor.java new file mode 100644 index 000000000..a335ccadc --- /dev/null +++ b/commonmark-ext-image-attributes/src/main/java/org/commonmark/ext/image/attributes/internal/ImageAttributesDelimiterProcessor.java @@ -0,0 +1,87 @@ +package org.commonmark.ext.image.attributes.internal; + +import org.commonmark.ext.image.attributes.ImageAttributes; +import org.commonmark.node.Image; +import org.commonmark.node.Node; +import org.commonmark.node.Nodes; +import org.commonmark.node.Text; +import org.commonmark.parser.delimiter.DelimiterProcessor; +import org.commonmark.parser.delimiter.DelimiterRun; + +import java.util.*; + +public class ImageAttributesDelimiterProcessor implements DelimiterProcessor { + + // Only allow a defined set of attributes to be used. + private static final Set SUPPORTED_ATTRIBUTES = Set.of("width", "height"); + + @Override + public char getOpeningCharacter() { + return '{'; + } + + @Override + public char getClosingCharacter() { + return '}'; + } + + @Override + public int getMinLength() { + return 1; + } + + @Override + public int process(DelimiterRun openingRun, DelimiterRun closingRun) { + if (openingRun.length() != 1) { + return 0; + } + + // Check if the attributes can be applied - if the previous node is an Image, and if all the attributes are in + // the set of SUPPORTED_ATTRIBUTES + Text opener = openingRun.getOpener(); + Node nodeToStyle = opener.getPrevious(); + if (!(nodeToStyle instanceof Image)) { + return 0; + } + + List toUnlink = new ArrayList<>(); + StringBuilder content = new StringBuilder(); + + for (Node node : Nodes.between(opener, closingRun.getCloser())) { + // Only Text nodes can be used for attributes + if (node instanceof Text) { + content.append(((Text) node).getLiteral()); + toUnlink.add(node); + } else { + // This node type is not supported, so stop here (no need to check any further ones). + return 0; + } + } + + Map attributesMap = new LinkedHashMap<>(); + String attributes = content.toString(); + for (String s : attributes.split("\\s+")) { + String[] attribute = s.split("="); + if (attribute.length > 1 && SUPPORTED_ATTRIBUTES.contains(attribute[0].toLowerCase())) { + attributesMap.put(attribute[0], attribute[1]); + } else { + // This attribute is not supported, so stop here (no need to check any further ones). + return 0; + } + } + + // Unlink the tmp nodes + for (Node node : toUnlink) { + node.unlink(); + } + + if (attributesMap.size() > 0) { + ImageAttributes imageAttributes = new ImageAttributes(attributesMap); + + // The new node is added as a child of the image node to which the attributes apply. + nodeToStyle.appendChild(imageAttributes); + } + + return 1; + } +} diff --git a/commonmark-ext-image-attributes/src/main/javadoc/overview.html b/commonmark-ext-image-attributes/src/main/javadoc/overview.html new file mode 100644 index 000000000..060597233 --- /dev/null +++ b/commonmark-ext-image-attributes/src/main/javadoc/overview.html @@ -0,0 +1,6 @@ + + +Extension for adding attributes to image nodes +

See {@link org.commonmark.ext.image.attributes.ImageAttributes}

+ + diff --git a/commonmark-ext-image-attributes/src/main/resources/META-INF/LICENSE.txt b/commonmark-ext-image-attributes/src/main/resources/META-INF/LICENSE.txt new file mode 100644 index 000000000..b09e367ce --- /dev/null +++ b/commonmark-ext-image-attributes/src/main/resources/META-INF/LICENSE.txt @@ -0,0 +1,23 @@ +Copyright (c) 2015, Atlassian Pty Ltd +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/commonmark-ext-image-attributes/src/test/java/org/commonmark/ext/image/attributes/ImageAttributesTest.java b/commonmark-ext-image-attributes/src/test/java/org/commonmark/ext/image/attributes/ImageAttributesTest.java new file mode 100644 index 000000000..3edf8497e --- /dev/null +++ b/commonmark-ext-image-attributes/src/test/java/org/commonmark/ext/image/attributes/ImageAttributesTest.java @@ -0,0 +1,141 @@ +package org.commonmark.ext.image.attributes; + +import org.commonmark.Extension; +import org.commonmark.node.Node; +import org.commonmark.node.Paragraph; +import org.commonmark.node.SourceSpan; +import org.commonmark.parser.IncludeSourceSpans; +import org.commonmark.parser.Parser; +import org.commonmark.renderer.html.HtmlRenderer; +import org.commonmark.testutil.RenderingTestCase; +import org.junit.jupiter.api.Test; + +import java.util.List; +import java.util.Set; + +import static org.assertj.core.api.Assertions.assertThat; + +public class ImageAttributesTest extends RenderingTestCase { + + private static final Set EXTENSIONS = Set.of(ImageAttributesExtension.create()); + private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build(); + private static final HtmlRenderer RENDERER = HtmlRenderer.builder().extensions(EXTENSIONS).build(); + + @Test + public void baseCase() { + assertRendering("![text](/url.png){height=5}", + "

\"text\"

\n"); + + assertRendering("![text](/url.png){height=5 width=6}", + "

\"text\"

\n"); + + assertRendering("![text](/url.png){height=99px width=100px}", + "

\"text\"

\n"); + + assertRendering("![text](/url.png){width=100 height=100}", + "

\"text\"

\n"); + + assertRendering("![text](/url.png){height=4.8 width=3.14}", + "

\"text\"

\n"); + + assertRendering("![text](/url.png){Width=18 HeIgHt=1001}", + "

\"text\"

\n"); + + assertRendering("![text](/url.png){height=green width=blue}", + "

\"text\"

\n"); + } + + @Test + public void doubleDelimiters() { + assertRendering("![text](/url.png){{height=5}}", + "

\"text\"{{height=5}}

\n"); + } + + @Test + public void mismatchingDelimitersAreIgnored() { + assertRendering("![text](/url.png){", "

\"text\"{

\n"); + } + + @Test + public void unsupportedStyleNamesAreLeftUnchanged() { + assertRendering("![text](/url.png){j=502 K=101 img=2 url=5}", + "

\"text\"{j=502 K=101 img=2 url=5}

\n"); + assertRendering("![foo](/url.png){height=3 invalid}\n", + "

\"foo\"{height=3 invalid}

\n"); + assertRendering("![foo](/url.png){height=3 *test*}\n", + "

\"foo\"{height=3 test}

\n"); + } + + @Test + public void styleWithNoValueIsIgnored() { + assertRendering("![text](/url.png){height}", + "

\"text\"{height}

\n"); + } + + @Test + public void repeatedStyleNameUsesFinalOne() { + assertRendering("![text](/url.png){height=4 height=5 width=1 height=6}", + "

\"text\"

\n"); + } + + @Test + public void styleValuesAreEscaped() { + assertRendering("![text](/url.png){height=\"text\"

\n"); + assertRendering("![text](/url.png){height=\"\"img}", + "

\"text\"

\n"); + } + + @Test + public void imageAltTextWithSpaces() { + assertRendering("![Android SDK Manager](/contrib/android-sdk-manager.png){height=502 width=101}", + "

\"Android

\n"); + } + + @Test + public void imageAltTextWithSoftLineBreak() { + assertRendering("![foo\nbar](/url){height=101 width=202}\n", + "

\"foo\nbar\"

\n"); + } + + @Test + public void imageAltTextWithHardLineBreak() { + assertRendering("![foo \nbar](/url){height=506 width=1}\n", + "

\"foo\nbar\"

\n"); + } + + @Test + public void imageAltTextWithEntities() { + assertRendering("![foo ä](/url){height=99 width=100}\n", + "

\"foo

\n"); + } + + @Test + public void textNodesAreUnchanged() { + assertRendering("x{height=3 width=4}\n", "

x{height=3 width=4}

\n"); + assertRendering("x {height=3 width=4}\n", "

x {height=3 width=4}

\n"); + assertRendering("\\documentclass[12pt]{article}\n", "

\\documentclass[12pt]{article}

\n"); + assertRendering("some *text*{height=3 width=4}\n", "

some text{height=3 width=4}

\n"); + assertRendering("{NN} text", "

{NN} text

\n"); + assertRendering("{}", "

{}

\n"); + } + + @Test + public void sourceSpans() { + Parser parser = Parser.builder() + .extensions(EXTENSIONS) + .includeSourceSpans(IncludeSourceSpans.BLOCKS_AND_INLINES) + .build(); + + // This doesn't result in image attributes, so source spans should be for the single (merged) text node. + Node document = parser.parse("x{height=3 width=4}\n"); + Paragraph block = (Paragraph) document.getFirstChild(); + Node text = block.getFirstChild(); + assertThat(text.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 0, 0, 19))); + } + + @Override + protected String render(String source) { + return RENDERER.render(PARSER.parse(source)); + } +} diff --git a/commonmark-ext-ins/pom.xml b/commonmark-ext-ins/pom.xml index 6f8c59318..48481c073 100644 --- a/commonmark-ext-ins/pom.xml +++ b/commonmark-ext-ins/pom.xml @@ -2,9 +2,9 @@ 4.0.0 - com.atlassian.commonmark + org.commonmark commonmark-parent - 0.14.1-SNAPSHOT + 0.28.1-SNAPSHOT commonmark-ext-ins @@ -13,31 +13,15 @@ - com.atlassian.commonmark + org.commonmark commonmark - com.atlassian.commonmark + org.commonmark commonmark-test-util test - - - - org.apache.maven.plugins - maven-jar-plugin - - - - org.commonmark.ext.ins - - - - - - - diff --git a/commonmark-ext-ins/src/main/java/module-info.java b/commonmark-ext-ins/src/main/java/module-info.java new file mode 100644 index 000000000..fb96ea598 --- /dev/null +++ b/commonmark-ext-ins/src/main/java/module-info.java @@ -0,0 +1,5 @@ +module org.commonmark.ext.ins { + exports org.commonmark.ext.ins; + + requires transitive org.commonmark; +} diff --git a/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/InsExtension.java b/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/InsExtension.java index 831cd75c8..e8a53e59a 100644 --- a/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/InsExtension.java +++ b/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/InsExtension.java @@ -2,12 +2,22 @@ import org.commonmark.Extension; import org.commonmark.ext.ins.internal.InsDelimiterProcessor; -import org.commonmark.ext.ins.internal.InsNodeRenderer; +import org.commonmark.ext.ins.internal.InsHtmlNodeRenderer; +import org.commonmark.ext.ins.internal.InsMarkdownNodeRenderer; +import org.commonmark.ext.ins.internal.InsTextContentNodeRenderer; +import org.commonmark.parser.Parser; +import org.commonmark.renderer.NodeRenderer; import org.commonmark.renderer.html.HtmlNodeRendererContext; import org.commonmark.renderer.html.HtmlNodeRendererFactory; -import org.commonmark.parser.Parser; import org.commonmark.renderer.html.HtmlRenderer; -import org.commonmark.renderer.NodeRenderer; +import org.commonmark.renderer.markdown.MarkdownNodeRendererContext; +import org.commonmark.renderer.markdown.MarkdownNodeRendererFactory; +import org.commonmark.renderer.markdown.MarkdownRenderer; +import org.commonmark.renderer.text.TextContentNodeRendererContext; +import org.commonmark.renderer.text.TextContentNodeRendererFactory; +import org.commonmark.renderer.text.TextContentRenderer; + +import java.util.Set; /** * Extension for ins using ++ @@ -20,7 +30,7 @@ * The parsed ins text regions are turned into {@link Ins} nodes. *

*/ -public class InsExtension implements Parser.ParserExtension, HtmlRenderer.HtmlRendererExtension { +public class InsExtension implements Parser.ParserExtension, HtmlRenderer.HtmlRendererExtension, TextContentRenderer.TextContentRendererExtension, MarkdownRenderer.MarkdownRendererExtension { private InsExtension() { } @@ -39,7 +49,34 @@ public void extend(HtmlRenderer.Builder rendererBuilder) { rendererBuilder.nodeRendererFactory(new HtmlNodeRendererFactory() { @Override public NodeRenderer create(HtmlNodeRendererContext context) { - return new InsNodeRenderer(context); + return new InsHtmlNodeRenderer(context); + } + }); + } + + @Override + public void extend(TextContentRenderer.Builder rendererBuilder) { + rendererBuilder.nodeRendererFactory(new TextContentNodeRendererFactory() { + @Override + public NodeRenderer create(TextContentNodeRendererContext context) { + return new InsTextContentNodeRenderer(context); + } + }); + } + + @Override + public void extend(MarkdownRenderer.Builder rendererBuilder) { + rendererBuilder.nodeRendererFactory(new MarkdownNodeRendererFactory() { + @Override + public NodeRenderer create(MarkdownNodeRendererContext context) { + return new InsMarkdownNodeRenderer(context); + } + + @Override + public Set getSpecialCharacters() { + // We technically don't need to escape single occurrences of +, but that's all the extension API + // exposes currently. + return Set.of('+'); } }); } diff --git a/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsDelimiterProcessor.java b/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsDelimiterProcessor.java index 9a4ad383c..b0bfb4c6e 100644 --- a/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsDelimiterProcessor.java +++ b/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsDelimiterProcessor.java @@ -2,6 +2,8 @@ import org.commonmark.ext.ins.Ins; import org.commonmark.node.Node; +import org.commonmark.node.Nodes; +import org.commonmark.node.SourceSpans; import org.commonmark.node.Text; import org.commonmark.parser.delimiter.DelimiterProcessor; import org.commonmark.parser.delimiter.DelimiterRun; @@ -24,27 +26,31 @@ public int getMinLength() { } @Override - public int getDelimiterUse(DelimiterRun opener, DelimiterRun closer) { - if (opener.length() >= 2 && closer.length() >= 2) { + public int process(DelimiterRun openingRun, DelimiterRun closingRun) { + if (openingRun.length() >= 2 && closingRun.length() >= 2) { // Use exactly two delimiters even if we have more, and don't care about internal openers/closers. + + Text opener = openingRun.getOpener(); + + // Wrap nodes between delimiters in ins. + Node ins = new Ins(); + + SourceSpans sourceSpans = new SourceSpans(); + sourceSpans.addAllFrom(openingRun.getOpeners(2)); + + for (Node node : Nodes.between(opener, closingRun.getCloser())) { + ins.appendChild(node); + sourceSpans.addAll(node.getSourceSpans()); + } + + sourceSpans.addAllFrom(closingRun.getClosers(2)); + ins.setSourceSpans(sourceSpans.getSourceSpans()); + + opener.insertAfter(ins); + return 2; } else { return 0; } } - - @Override - public void process(Text opener, Text closer, int delimiterCount) { - // Wrap nodes between delimiters in ins. - Node ins = new Ins(); - - Node tmp = opener.getNext(); - while (tmp != null && tmp != closer) { - Node next = tmp.getNext(); - ins.appendChild(tmp); - tmp = next; - } - - opener.insertAfter(ins); - } } diff --git a/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsHtmlNodeRenderer.java b/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsHtmlNodeRenderer.java new file mode 100644 index 000000000..dcd05fd59 --- /dev/null +++ b/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsHtmlNodeRenderer.java @@ -0,0 +1,35 @@ +package org.commonmark.ext.ins.internal; + +import org.commonmark.node.Node; +import org.commonmark.renderer.html.HtmlNodeRendererContext; +import org.commonmark.renderer.html.HtmlWriter; + +import java.util.Map; + +public class InsHtmlNodeRenderer extends InsNodeRenderer { + + private final HtmlNodeRendererContext context; + private final HtmlWriter html; + + public InsHtmlNodeRenderer(HtmlNodeRendererContext context) { + this.context = context; + this.html = context.getWriter(); + } + + @Override + public void render(Node node) { + Map attributes = context.extendAttributes(node, "ins", Map.of()); + html.tag("ins", attributes); + renderChildren(node); + html.tag("/ins"); + } + + private void renderChildren(Node parent) { + Node node = parent.getFirstChild(); + while (node != null) { + Node next = node.getNext(); + context.render(node); + node = next; + } + } +} diff --git a/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsMarkdownNodeRenderer.java b/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsMarkdownNodeRenderer.java new file mode 100644 index 000000000..851d47282 --- /dev/null +++ b/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsMarkdownNodeRenderer.java @@ -0,0 +1,32 @@ +package org.commonmark.ext.ins.internal; + +import org.commonmark.node.Node; +import org.commonmark.renderer.markdown.MarkdownNodeRendererContext; +import org.commonmark.renderer.markdown.MarkdownWriter; + +public class InsMarkdownNodeRenderer extends InsNodeRenderer { + + private final MarkdownNodeRendererContext context; + private final MarkdownWriter writer; + + public InsMarkdownNodeRenderer(MarkdownNodeRendererContext context) { + this.context = context; + this.writer = context.getWriter(); + } + + @Override + public void render(Node node) { + writer.raw("++"); + renderChildren(node); + writer.raw("++"); + } + + private void renderChildren(Node parent) { + Node node = parent.getFirstChild(); + while (node != null) { + Node next = node.getNext(); + context.render(node); + node = next; + } + } +} diff --git a/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsNodeRenderer.java b/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsNodeRenderer.java index faf15cae7..31f0a64ec 100644 --- a/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsNodeRenderer.java +++ b/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsNodeRenderer.java @@ -1,44 +1,15 @@ package org.commonmark.ext.ins.internal; import org.commonmark.ext.ins.Ins; -import org.commonmark.renderer.html.HtmlWriter; -import org.commonmark.renderer.html.HtmlNodeRendererContext; import org.commonmark.node.Node; import org.commonmark.renderer.NodeRenderer; -import java.util.Collections; -import java.util.Map; import java.util.Set; -public class InsNodeRenderer implements NodeRenderer { - - private final HtmlNodeRendererContext context; - private final HtmlWriter html; - - public InsNodeRenderer(HtmlNodeRendererContext context) { - this.context = context; - this.html = context.getWriter(); - } +abstract class InsNodeRenderer implements NodeRenderer { @Override public Set> getNodeTypes() { - return Collections.>singleton(Ins.class); - } - - @Override - public void render(Node node) { - Map attributes = context.extendAttributes(node, "ins", Collections.emptyMap()); - html.tag("ins", attributes); - renderChildren(node); - html.tag("/ins"); - } - - private void renderChildren(Node parent) { - Node node = parent.getFirstChild(); - while (node != null) { - Node next = node.getNext(); - context.render(node); - node = next; - } + return Set.of(Ins.class); } } diff --git a/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsTextContentNodeRenderer.java b/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsTextContentNodeRenderer.java new file mode 100644 index 000000000..f30947c93 --- /dev/null +++ b/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsTextContentNodeRenderer.java @@ -0,0 +1,27 @@ +package org.commonmark.ext.ins.internal; + +import org.commonmark.node.Node; +import org.commonmark.renderer.text.TextContentNodeRendererContext; + +public class InsTextContentNodeRenderer extends InsNodeRenderer { + + private final TextContentNodeRendererContext context; + + public InsTextContentNodeRenderer(TextContentNodeRendererContext context) { + this.context = context; + } + + @Override + public void render(Node node) { + renderChildren(node); + } + + private void renderChildren(Node parent) { + Node node = parent.getFirstChild(); + while (node != null) { + Node next = node.getNext(); + context.render(node); + node = next; + } + } +} diff --git a/commonmark-ext-ins/src/main/resources/META-INF/LICENSE.txt b/commonmark-ext-ins/src/main/resources/META-INF/LICENSE.txt new file mode 100644 index 000000000..b09e367ce --- /dev/null +++ b/commonmark-ext-ins/src/main/resources/META-INF/LICENSE.txt @@ -0,0 +1,23 @@ +Copyright (c) 2015, Atlassian Pty Ltd +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/commonmark-ext-ins/src/test/java/org/commonmark/ext/ins/InsMarkdownRendererTest.java b/commonmark-ext-ins/src/test/java/org/commonmark/ext/ins/InsMarkdownRendererTest.java new file mode 100644 index 000000000..6fc9ead67 --- /dev/null +++ b/commonmark-ext-ins/src/test/java/org/commonmark/ext/ins/InsMarkdownRendererTest.java @@ -0,0 +1,33 @@ +package org.commonmark.ext.ins; + +import org.commonmark.Extension; +import org.commonmark.parser.Parser; +import org.commonmark.renderer.markdown.MarkdownRenderer; +import org.junit.jupiter.api.Test; + +import java.util.Set; + +import static org.assertj.core.api.Assertions.assertThat; + +public class InsMarkdownRendererTest { + + private static final Set EXTENSIONS = Set.of(InsExtension.create()); + private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build(); + private static final MarkdownRenderer RENDERER = MarkdownRenderer.builder().extensions(EXTENSIONS).build(); + + @Test + public void testStrikethrough() { + assertRoundTrip("++foo++\n"); + + assertRoundTrip("\\+\\+foo\\+\\+\n"); + } + + protected String render(String source) { + return RENDERER.render(PARSER.parse(source)); + } + + private void assertRoundTrip(String input) { + String rendered = render(input); + assertThat(rendered).isEqualTo(input); + } +} diff --git a/commonmark-ext-ins/src/test/java/org/commonmark/ext/ins/InsTest.java b/commonmark-ext-ins/src/test/java/org/commonmark/ext/ins/InsTest.java index 2b97431c3..a5c91a395 100644 --- a/commonmark-ext-ins/src/test/java/org/commonmark/ext/ins/InsTest.java +++ b/commonmark-ext-ins/src/test/java/org/commonmark/ext/ins/InsTest.java @@ -2,21 +2,27 @@ import org.commonmark.Extension; import org.commonmark.node.Node; +import org.commonmark.node.Paragraph; +import org.commonmark.node.SourceSpan; +import org.commonmark.parser.IncludeSourceSpans; import org.commonmark.parser.Parser; import org.commonmark.renderer.html.HtmlRenderer; +import org.commonmark.renderer.text.TextContentRenderer; import org.commonmark.testutil.RenderingTestCase; -import org.junit.Test; +import org.junit.jupiter.api.Test; -import java.util.Collections; +import java.util.List; import java.util.Set; -import static org.junit.Assert.assertEquals; +import static org.assertj.core.api.Assertions.assertThat; public class InsTest extends RenderingTestCase { - private static final Set EXTENSIONS = Collections.singleton(InsExtension.create()); + private static final Set EXTENSIONS = Set.of(InsExtension.create()); private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build(); private static final HtmlRenderer RENDERER = HtmlRenderer.builder().extensions(EXTENSIONS).build(); + private static final TextContentRenderer CONTENT_RENDERER = TextContentRenderer.builder() + .extensions(EXTENSIONS).build(); @Test public void onePlusIsNotEnough() { @@ -76,8 +82,27 @@ public void insideBlockQuote() { public void delimited() { Node document = PARSER.parse("++foo++"); Ins ins = (Ins) document.getFirstChild().getFirstChild(); - assertEquals("++", ins.getOpeningDelimiter()); - assertEquals("++", ins.getClosingDelimiter()); + assertThat(ins.getOpeningDelimiter()).isEqualTo("++"); + assertThat(ins.getClosingDelimiter()).isEqualTo("++"); + } + + @Test + public void textContentRenderer() { + Node document = PARSER.parse("++foo++"); + assertThat(CONTENT_RENDERER.render(document)).isEqualTo("foo"); + } + + @Test + public void sourceSpans() { + Parser parser = Parser.builder() + .extensions(EXTENSIONS) + .includeSourceSpans(IncludeSourceSpans.BLOCKS_AND_INLINES) + .build(); + + Node document = parser.parse("hey ++there++\n"); + Paragraph block = (Paragraph) document.getFirstChild(); + Node ins = block.getLastChild(); + assertThat(ins.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 4, 4, 9))); } @Override diff --git a/commonmark-ext-task-list-items/pom.xml b/commonmark-ext-task-list-items/pom.xml new file mode 100644 index 000000000..4359f8707 --- /dev/null +++ b/commonmark-ext-task-list-items/pom.xml @@ -0,0 +1,27 @@ + + + 4.0.0 + + org.commonmark + commonmark-parent + 0.28.1-SNAPSHOT + + + commonmark-ext-task-list-items + commonmark-java extension for task list items + commonmark-java extension for task list items + + + + org.commonmark + commonmark + + + + org.commonmark + commonmark-test-util + test + + + + diff --git a/commonmark-ext-task-list-items/src/main/java/module-info.java b/commonmark-ext-task-list-items/src/main/java/module-info.java new file mode 100644 index 000000000..9528323ea --- /dev/null +++ b/commonmark-ext-task-list-items/src/main/java/module-info.java @@ -0,0 +1,5 @@ +module org.commonmark.ext.task.list.items { + exports org.commonmark.ext.task.list.items; + + requires transitive org.commonmark; +} diff --git a/commonmark-ext-task-list-items/src/main/java/org/commonmark/ext/task/list/items/TaskListItemMarker.java b/commonmark-ext-task-list-items/src/main/java/org/commonmark/ext/task/list/items/TaskListItemMarker.java new file mode 100644 index 000000000..9eca59bc9 --- /dev/null +++ b/commonmark-ext-task-list-items/src/main/java/org/commonmark/ext/task/list/items/TaskListItemMarker.java @@ -0,0 +1,19 @@ +package org.commonmark.ext.task.list.items; + +import org.commonmark.node.CustomNode; + +/** + * A marker node indicating that a list item contains a task. + */ +public class TaskListItemMarker extends CustomNode { + + private final boolean checked; + + public TaskListItemMarker(boolean checked) { + this.checked = checked; + } + + public boolean isChecked() { + return checked; + } +} diff --git a/commonmark-ext-task-list-items/src/main/java/org/commonmark/ext/task/list/items/TaskListItemsExtension.java b/commonmark-ext-task-list-items/src/main/java/org/commonmark/ext/task/list/items/TaskListItemsExtension.java new file mode 100644 index 000000000..9bf0a2155 --- /dev/null +++ b/commonmark-ext-task-list-items/src/main/java/org/commonmark/ext/task/list/items/TaskListItemsExtension.java @@ -0,0 +1,45 @@ +package org.commonmark.ext.task.list.items; + +import org.commonmark.Extension; +import org.commonmark.ext.task.list.items.internal.TaskListItemHtmlNodeRenderer; +import org.commonmark.ext.task.list.items.internal.TaskListItemPostProcessor; +import org.commonmark.parser.Parser; +import org.commonmark.renderer.NodeRenderer; +import org.commonmark.renderer.html.HtmlNodeRendererContext; +import org.commonmark.renderer.html.HtmlNodeRendererFactory; +import org.commonmark.renderer.html.HtmlRenderer; + +/** + * Extension for adding task list items. + *

+ * Create it with {@link #create()} and then configure it on the builders + * ({@link org.commonmark.parser.Parser.Builder#extensions(Iterable)}, + * {@link HtmlRenderer.Builder#extensions(Iterable)}). + *

+ * + * @since 0.15.0 + */ +public class TaskListItemsExtension implements Parser.ParserExtension, HtmlRenderer.HtmlRendererExtension { + + private TaskListItemsExtension() { + } + + public static Extension create() { + return new TaskListItemsExtension(); + } + + @Override + public void extend(Parser.Builder parserBuilder) { + parserBuilder.postProcessor(new TaskListItemPostProcessor()); + } + + @Override + public void extend(HtmlRenderer.Builder rendererBuilder) { + rendererBuilder.nodeRendererFactory(new HtmlNodeRendererFactory() { + @Override + public NodeRenderer create(HtmlNodeRendererContext context) { + return new TaskListItemHtmlNodeRenderer(context); + } + }); + } +} diff --git a/commonmark-ext-task-list-items/src/main/java/org/commonmark/ext/task/list/items/internal/TaskListItemHtmlNodeRenderer.java b/commonmark-ext-task-list-items/src/main/java/org/commonmark/ext/task/list/items/internal/TaskListItemHtmlNodeRenderer.java new file mode 100644 index 000000000..331b301e9 --- /dev/null +++ b/commonmark-ext-task-list-items/src/main/java/org/commonmark/ext/task/list/items/internal/TaskListItemHtmlNodeRenderer.java @@ -0,0 +1,52 @@ +package org.commonmark.ext.task.list.items.internal; + +import org.commonmark.ext.task.list.items.TaskListItemMarker; +import org.commonmark.node.Node; +import org.commonmark.renderer.NodeRenderer; +import org.commonmark.renderer.html.HtmlNodeRendererContext; +import org.commonmark.renderer.html.HtmlWriter; + +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Set; + +public class TaskListItemHtmlNodeRenderer implements NodeRenderer { + + private final HtmlNodeRendererContext context; + private final HtmlWriter html; + + public TaskListItemHtmlNodeRenderer(HtmlNodeRendererContext context) { + this.context = context; + this.html = context.getWriter(); + } + + @Override + public Set> getNodeTypes() { + return Set.of(TaskListItemMarker.class); + } + + @Override + public void render(Node node) { + if (node instanceof TaskListItemMarker) { + Map attributes = new LinkedHashMap<>(); + attributes.put("type", "checkbox"); + attributes.put("disabled", ""); + if (((TaskListItemMarker) node).isChecked()) { + attributes.put("checked", ""); + } + html.tag("input", context.extendAttributes(node, "input", attributes)); + // Add a space after the input tag (as the next text node has been trimmed) + html.text(" "); + renderChildren(node); + } + } + + private void renderChildren(Node parent) { + Node node = parent.getFirstChild(); + while (node != null) { + Node next = node.getNext(); + context.render(node); + node = next; + } + } +} diff --git a/commonmark-ext-task-list-items/src/main/java/org/commonmark/ext/task/list/items/internal/TaskListItemPostProcessor.java b/commonmark-ext-task-list-items/src/main/java/org/commonmark/ext/task/list/items/internal/TaskListItemPostProcessor.java new file mode 100644 index 000000000..b95c2e30d --- /dev/null +++ b/commonmark-ext-task-list-items/src/main/java/org/commonmark/ext/task/list/items/internal/TaskListItemPostProcessor.java @@ -0,0 +1,49 @@ +package org.commonmark.ext.task.list.items.internal; + +import org.commonmark.ext.task.list.items.TaskListItemMarker; +import org.commonmark.node.*; +import org.commonmark.parser.PostProcessor; + +import java.util.Objects; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class TaskListItemPostProcessor implements PostProcessor { + + private static final Pattern REGEX_TASK_LIST_ITEM = Pattern.compile("^\\[([xX\\s])]\\s+(.*)"); + + @Override + public Node process(Node node) { + TaskListItemVisitor visitor = new TaskListItemVisitor(); + node.accept(visitor); + return node; + } + + private static class TaskListItemVisitor extends AbstractVisitor { + + @Override + public void visit(ListItem listItem) { + Node child = listItem.getFirstChild(); + if (child instanceof Paragraph) { + Node node = child.getFirstChild(); + if (node instanceof Text) { + Text textNode = (Text) node; + Matcher matcher = REGEX_TASK_LIST_ITEM.matcher(textNode.getLiteral()); + if (matcher.matches()) { + String checked = matcher.group(1); + boolean isChecked = Objects.equals(checked, "X") || Objects.equals(checked, "x"); + + // Add the task list item marker node as the first child of the list item. + listItem.prependChild(new TaskListItemMarker(isChecked)); + + // Parse the node using the input after the task marker (in other words, group 2 from the matcher). + // (Note that the String has been trimmed, so we should add a space between the + // TaskListItemMarker and the text that follows it when we come to render it). + textNode.setLiteral(matcher.group(2)); + } + } + } + visitChildren(listItem); + } + } +} diff --git a/commonmark-ext-task-list-items/src/main/resources/META-INF/LICENSE.txt b/commonmark-ext-task-list-items/src/main/resources/META-INF/LICENSE.txt new file mode 100644 index 000000000..b09e367ce --- /dev/null +++ b/commonmark-ext-task-list-items/src/main/resources/META-INF/LICENSE.txt @@ -0,0 +1,23 @@ +Copyright (c) 2015, Atlassian Pty Ltd +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/commonmark-ext-task-list-items/src/test/java/org/commonmark/ext/task/list/items/TaskListItemsTest.java b/commonmark-ext-task-list-items/src/test/java/org/commonmark/ext/task/list/items/TaskListItemsTest.java new file mode 100644 index 000000000..0adc615a7 --- /dev/null +++ b/commonmark-ext-task-list-items/src/test/java/org/commonmark/ext/task/list/items/TaskListItemsTest.java @@ -0,0 +1,102 @@ +package org.commonmark.ext.task.list.items; + +import org.commonmark.Extension; +import org.commonmark.parser.Parser; +import org.commonmark.renderer.html.HtmlRenderer; +import org.commonmark.testutil.RenderingTestCase; +import org.junit.jupiter.api.Test; + +import java.util.Set; + +public class TaskListItemsTest extends RenderingTestCase { + + private static final Set EXTENSIONS = Set.of(TaskListItemsExtension.create()); + private static final String HTML_CHECKED = ""; + private static final String HTML_UNCHECKED = ""; + private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build(); + private static final HtmlRenderer RENDERER = HtmlRenderer.builder().extensions(EXTENSIONS).build(); + + @Test + public void baseCase() { + assertRendering("- [x] this is *done*\n", "
    \n
  • " + HTML_CHECKED + " this is done
  • \n
\n"); + + assertRendering("- [ ] do this\n", "
    \n
  • " + HTML_UNCHECKED + " do this
  • \n
\n"); + + assertRendering("- [x] foo\n" + + " - [ ] bar\n" + + " - [x] baz\n" + + "- [ ] bim", + "
    \n" + + "
  • " + HTML_CHECKED + " foo\n" + + "
      \n" + + "
    • " + HTML_UNCHECKED + " bar
    • \n" + + "
    • " + HTML_CHECKED + " baz
    • \n" + + "
    \n" + + "
  • \n" + + "
  • " + HTML_UNCHECKED + " bim
  • \n" + + "
\n"); + + assertRendering("* [ ] do this\n* [ ] and this", + "
    \n
  • " + HTML_UNCHECKED + " do this
  • \n
  • " + HTML_UNCHECKED + " and this
  • \n
\n"); + + assertRendering("+ [x] one\n" + + " - [ ] two\n" + + " * [x] three\n", + "
    \n" + + "
  • " + HTML_CHECKED + " one\n" + + "
      \n" + + "
    • " + HTML_UNCHECKED + " two\n" + + "
        \n" + + "
      • " + HTML_CHECKED + " three
      • \n" + + "
      \n" + + "
    • \n" + + "
    \n" + + "
  • \n" + + "
\n"); + + assertRendering("TODO list\n" + + "---------\n" + + "- [ ] first task\n" + + "- [x] second task\n" + + "- [ ] third task\n\n" + + "Let me know when you are finished", + "

TODO list

\n" + + "
    \n" + + "
  • " + HTML_UNCHECKED + " first task
  • \n" + + "
  • " + HTML_CHECKED + " second task
  • \n" + + "
  • " + HTML_UNCHECKED + " third task
  • \n" + + "
\n" + + "

Let me know when you are finished

\n"); + } + + @Test + public void notListItem() { + assertRendering("[x] this is not a task\n", "

[x] this is not a task

\n"); + assertRendering(" [ ] this is not a task either\n", "

[ ] this is not a task either

\n"); + } + + @Test + public void notValidTaskFormat() { + assertRendering("- [x]no space\n", "
    \n
  • [x]no space
  • \n
\n"); + assertRendering("- [O] is not a _task_\n", "
    \n
  • [O] is not a task
  • \n
\n"); + assertRendering("* [] neither is this\n", "
    \n
  • [] neither is this
  • \n
\n"); + assertRendering("* [ ] nor this\n" + + "* [XX] nor this\n", + "
    \n
  • [ ] nor this
  • \n
  • [XX] nor this
  • \n
\n"); + assertRendering("+ [x]] is not a task\n", "
    \n
  • [x]] is not a task
  • \n
\n"); + assertRendering("- [x isn't\n", "
    \n
  • [x isn't
  • \n
\n"); + assertRendering("- [[x is not\n", "
    \n
  • [[x is not
  • \n
\n"); + assertRendering("- x] nope\n", "
    \n
  • x] nope
  • \n
\n"); + assertRendering("- x]] no way\n", "
    \n
  • x]] no way
  • \n
\n"); + assertRendering("+ (x) sorry no\n", "
    \n
  • (x) sorry no
  • \n
\n"); + assertRendering("+ {x} sorry not sorry\n", "
    \n
  • {x} sorry not sorry
  • \n
\n"); + assertRendering("+ [[x]] nooo\n", "
    \n
  • [[x]] nooo
  • \n
\n"); + assertRendering("+ text before [x] is not a task\n", "
    \n
  • text before [x] is not a task
  • \n
\n"); + assertRendering("* [x] \n* [ ] \n", "
    \n
  • [x]
  • \n
  • [ ]
  • \n
\n"); + } + + @Override + protected String render(String source) { + return RENDERER.render(PARSER.parse(source)); + } +} diff --git a/commonmark-ext-yaml-front-matter/pom.xml b/commonmark-ext-yaml-front-matter/pom.xml index d4534ec55..e6822f771 100644 --- a/commonmark-ext-yaml-front-matter/pom.xml +++ b/commonmark-ext-yaml-front-matter/pom.xml @@ -3,8 +3,8 @@ 4.0.0 commonmark-parent - com.atlassian.commonmark - 0.14.1-SNAPSHOT + org.commonmark + 0.28.1-SNAPSHOT commonmark-ext-yaml-front-matter @@ -13,31 +13,15 @@ - com.atlassian.commonmark + org.commonmark commonmark - com.atlassian.commonmark + org.commonmark commonmark-test-util test - - - - org.apache.maven.plugins - maven-jar-plugin - - - - org.commonmark.ext.front.matter - - - - - - - diff --git a/commonmark-ext-yaml-front-matter/src/main/java/module-info.java b/commonmark-ext-yaml-front-matter/src/main/java/module-info.java new file mode 100644 index 000000000..5f96c14ad --- /dev/null +++ b/commonmark-ext-yaml-front-matter/src/main/java/module-info.java @@ -0,0 +1,5 @@ +module org.commonmark.ext.front.matter { + exports org.commonmark.ext.front.matter; + + requires transitive org.commonmark; +} diff --git a/commonmark-ext-yaml-front-matter/src/main/java/org/commonmark/ext/front/matter/internal/YamlFrontMatterBlockParser.java b/commonmark-ext-yaml-front-matter/src/main/java/org/commonmark/ext/front/matter/internal/YamlFrontMatterBlockParser.java index 5612d9ffd..469cf4e2f 100644 --- a/commonmark-ext-yaml-front-matter/src/main/java/org/commonmark/ext/front/matter/internal/YamlFrontMatterBlockParser.java +++ b/commonmark-ext-yaml-front-matter/src/main/java/org/commonmark/ext/front/matter/internal/YamlFrontMatterBlockParser.java @@ -2,9 +2,10 @@ import org.commonmark.ext.front.matter.YamlFrontMatterBlock; import org.commonmark.ext.front.matter.YamlFrontMatterNode; -import org.commonmark.internal.DocumentBlockParser; import org.commonmark.node.Block; +import org.commonmark.node.Document; import org.commonmark.parser.InlineParser; +import org.commonmark.parser.SourceLine; import org.commonmark.parser.block.*; import java.util.ArrayList; @@ -13,7 +14,7 @@ import java.util.regex.Pattern; public class YamlFrontMatterBlockParser extends AbstractBlockParser { - private static final Pattern REGEX_METADATA = Pattern.compile("^[ ]{0,3}([A-Za-z0-9_-]+):\\s*(.*)"); + private static final Pattern REGEX_METADATA = Pattern.compile("^[ ]{0,3}([A-Za-z0-9._-]+):\\s*(.*)"); private static final Pattern REGEX_METADATA_LIST = Pattern.compile("^[ ]+-\\s*(.*)"); private static final Pattern REGEX_METADATA_LITERAL = Pattern.compile("^\\s*(.*)"); private static final Pattern REGEX_BEGIN = Pattern.compile("^-{3}(\\s.*)?"); @@ -37,12 +38,12 @@ public Block getBlock() { } @Override - public void addLine(CharSequence line) { + public void addLine(SourceLine line) { } @Override public BlockContinue tryContinue(ParserState parserState) { - final CharSequence line = parserState.getLine(); + final CharSequence line = parserState.getLine().getContent(); if (REGEX_END.matcher(line).matches()) { if (currentKey != null) { @@ -60,10 +61,11 @@ public BlockContinue tryContinue(ParserState parserState) { inLiteral = false; currentKey = matcher.group(1); currentValues = new ArrayList<>(); - if ("|".equals(matcher.group(2))) { + String value = matcher.group(2); + if ("|".equals(value)) { inLiteral = true; - } else if (!"".equals(matcher.group(2))) { - currentValues.add(matcher.group(2)); + } else if (!"".equals(value)) { + currentValues.add(parseString(value)); } return BlockContinue.atIndex(parserState.getIndex()); @@ -80,7 +82,8 @@ public BlockContinue tryContinue(ParserState parserState) { } else { matcher = REGEX_METADATA_LIST.matcher(line); if (matcher.matches()) { - currentValues.add(matcher.group(1)); + String value = matcher.group(1); + currentValues.add(parseString(value)); } } @@ -92,13 +95,31 @@ public BlockContinue tryContinue(ParserState parserState) { public void parseInlines(InlineParser inlineParser) { } + private static String parseString(String s) { + // Limited parsing of https://yaml.org/spec/1.2.2/#73-flow-scalar-styles + // We assume input is well-formed and otherwise treat it as a plain string. In a real + // parser, e.g. `'foo` would be invalid because it's missing a trailing `'`. + if (s.startsWith("'") && s.endsWith("'")) { + String inner = s.substring(1, s.length() - 1); + return inner.replace("''", "'"); + } else if (s.startsWith("\"") && s.endsWith("\"")) { + String inner = s.substring(1, s.length() - 1); + // Only support escaped `\` and `"`, nothing else. + return inner + .replace("\\\"", "\"") + .replace("\\\\", "\\"); + } else { + return s; + } + } + public static class Factory extends AbstractBlockParserFactory { @Override public BlockStart tryStart(ParserState state, MatchedBlockParser matchedBlockParser) { - CharSequence line = state.getLine(); + CharSequence line = state.getLine().getContent(); BlockParser parentParser = matchedBlockParser.getMatchedBlockParser(); // check whether this line is the first line of whole document or not - if (parentParser instanceof DocumentBlockParser && parentParser.getBlock().getFirstChild() == null && + if (parentParser.getBlock() instanceof Document && parentParser.getBlock().getFirstChild() == null && REGEX_BEGIN.matcher(line).matches()) { return BlockStart.of(new YamlFrontMatterBlockParser()).atIndex(state.getNextNonSpaceIndex()); } diff --git a/commonmark-ext-yaml-front-matter/src/main/resources/META-INF/LICENSE.txt b/commonmark-ext-yaml-front-matter/src/main/resources/META-INF/LICENSE.txt new file mode 100644 index 000000000..b09e367ce --- /dev/null +++ b/commonmark-ext-yaml-front-matter/src/main/resources/META-INF/LICENSE.txt @@ -0,0 +1,23 @@ +Copyright (c) 2015, Atlassian Pty Ltd +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/commonmark-ext-yaml-front-matter/src/test/java/org/commonmark/ext/front/matter/YamlFrontMatterTest.java b/commonmark-ext-yaml-front-matter/src/test/java/org/commonmark/ext/front/matter/YamlFrontMatterTest.java index 505c70e6a..db17d4a4e 100644 --- a/commonmark-ext-yaml-front-matter/src/test/java/org/commonmark/ext/front/matter/YamlFrontMatterTest.java +++ b/commonmark-ext-yaml-front-matter/src/test/java/org/commonmark/ext/front/matter/YamlFrontMatterTest.java @@ -6,18 +6,16 @@ import org.commonmark.parser.Parser; import org.commonmark.renderer.html.HtmlRenderer; import org.commonmark.testutil.RenderingTestCase; -import org.junit.Test; +import org.junit.jupiter.api.Test; -import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Set; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.assertj.core.api.Assertions.assertThat; public class YamlFrontMatterTest extends RenderingTestCase { - private static final Set EXTENSIONS = Collections.singleton(YamlFrontMatterExtension.create()); + private static final Set EXTENSIONS = Set.of(YamlFrontMatterExtension.create()); private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build(); private static final HtmlRenderer RENDERER = HtmlRenderer.builder().extensions(EXTENSIONS).build(); @@ -30,16 +28,12 @@ public void simpleValue() { "\ngreat"; final String rendered = "

great

\n"; - YamlFrontMatterVisitor visitor = new YamlFrontMatterVisitor(); - Node document = PARSER.parse(input); - document.accept(visitor); + Map> data = getFrontMatter(input); - Map> data = visitor.getData(); - - assertEquals(1, data.size()); - assertEquals("hello", data.keySet().iterator().next()); - assertEquals(1, data.get("hello").size()); - assertEquals("world", data.get("hello").get(0)); + assertThat(data).hasSize(1); + assertThat(data.keySet().iterator().next()).isEqualTo("hello"); + assertThat(data.get("hello")).hasSize(1); + assertThat(data.get("hello").get(0)).isEqualTo("world"); assertRendering(input, rendered); } @@ -53,15 +47,11 @@ public void emptyValue() { "\ngreat"; final String rendered = "

great

\n"; - YamlFrontMatterVisitor visitor = new YamlFrontMatterVisitor(); - Node document = PARSER.parse(input); - document.accept(visitor); + Map> data = getFrontMatter(input); - Map> data = visitor.getData(); - - assertEquals(1, data.size()); - assertEquals("key", data.keySet().iterator().next()); - assertEquals(0, data.get("key").size()); + assertThat(data).hasSize(1); + assertThat(data.keySet().iterator().next()).isEqualTo("key"); + assertThat(data.get("key")).hasSize(0); assertRendering(input, rendered); } @@ -77,17 +67,13 @@ public void listValues() { "\ngreat"; final String rendered = "

great

\n"; - YamlFrontMatterVisitor visitor = new YamlFrontMatterVisitor(); - Node document = PARSER.parse(input); - document.accept(visitor); - - Map> data = visitor.getData(); + Map> data = getFrontMatter(input); - assertEquals(1, data.size()); - assertTrue(data.containsKey("list")); - assertEquals(2, data.get("list").size()); - assertEquals("value1", data.get("list").get(0)); - assertEquals("value2", data.get("list").get(1)); + assertThat(data).hasSize(1); + assertThat(data).containsKey("list"); + assertThat(data.get("list")).hasSize(2); + assertThat(data.get("list").get(0)).isEqualTo("value1"); + assertThat(data.get("list").get(1)).isEqualTo("value2"); assertRendering(input, rendered); } @@ -103,16 +89,12 @@ public void literalValue1() { "\ngreat"; final String rendered = "

great

\n"; - YamlFrontMatterVisitor visitor = new YamlFrontMatterVisitor(); - Node document = PARSER.parse(input); - document.accept(visitor); - - Map> data = visitor.getData(); + Map> data = getFrontMatter(input); - assertEquals(1, data.size()); - assertTrue(data.containsKey("literal")); - assertEquals(1, data.get("literal").size()); - assertEquals("hello markdown!\nliteral thing...", data.get("literal").get(0)); + assertThat(data).hasSize(1); + assertThat(data).containsKey("literal"); + assertThat(data.get("literal")).hasSize(1); + assertThat(data.get("literal").get(0)).isEqualTo("hello markdown!\nliteral thing..."); assertRendering(input, rendered); } @@ -127,16 +109,12 @@ public void literalValue2() { "\ngreat"; final String rendered = "

great

\n"; - YamlFrontMatterVisitor visitor = new YamlFrontMatterVisitor(); - Node document = PARSER.parse(input); - document.accept(visitor); - - Map> data = visitor.getData(); + Map> data = getFrontMatter(input); - assertEquals(1, data.size()); - assertTrue(data.containsKey("literal")); - assertEquals(1, data.get("literal").size()); - assertEquals("- hello markdown!", data.get("literal").get(0)); + assertThat(data).hasSize(1); + assertThat(data).containsKey("literal"); + assertThat(data.get("literal")).hasSize(1); + assertThat(data.get("literal").get(0)).isEqualTo("- hello markdown!"); assertRendering(input, rendered); } @@ -156,26 +134,22 @@ public void complexValues() { "\ngreat"; final String rendered = "

great

\n"; - YamlFrontMatterVisitor visitor = new YamlFrontMatterVisitor(); - Node document = PARSER.parse(input); - document.accept(visitor); + Map> data = getFrontMatter(input); - Map> data = visitor.getData(); - - assertEquals(3, data.size()); + assertThat(data).hasSize(3); - assertTrue(data.containsKey("simple")); - assertEquals(1, data.get("simple").size()); - assertEquals("value", data.get("simple").get(0)); + assertThat(data).containsKey("simple"); + assertThat(data.get("simple")).hasSize(1); + assertThat(data.get("simple").get(0)).isEqualTo("value"); - assertTrue(data.containsKey("literal")); - assertEquals(1, data.get("literal").size()); - assertEquals("hello markdown!\n\nliteral literal", data.get("literal").get(0)); + assertThat(data).containsKey("literal"); + assertThat(data.get("literal")).hasSize(1); + assertThat(data.get("literal").get(0)).isEqualTo("hello markdown!\n\nliteral literal"); - assertTrue(data.containsKey("list")); - assertEquals(2, data.get("list").size()); - assertEquals("value1", data.get("list").get(0)); - assertEquals("value2", data.get("list").get(1)); + assertThat(data).containsKey("list"); + assertThat(data.get("list")).hasSize(2); + assertThat(data.get("list").get(0)).isEqualTo("value1"); + assertThat(data.get("list").get(1)).isEqualTo("value2"); assertRendering(input, rendered); } @@ -187,13 +161,9 @@ public void empty() { "test"; final String rendered = "

test

\n"; - YamlFrontMatterVisitor visitor = new YamlFrontMatterVisitor(); - Node document = PARSER.parse(input); - document.accept(visitor); - - Map> data = visitor.getData(); + Map> data = getFrontMatter(input); - assertTrue(data.isEmpty()); + assertThat(data).isEmpty(); assertRendering(input, rendered); } @@ -207,13 +177,9 @@ public void yamlInParagraph() { "\n---"; final String rendered = "

hello

\n

hello markdown world!

\n

hello: world

\n"; - YamlFrontMatterVisitor visitor = new YamlFrontMatterVisitor(); - Node document = PARSER.parse(input); - document.accept(visitor); + Map> data = getFrontMatter(input); - Map> data = visitor.getData(); - - assertTrue(data.isEmpty()); + assertThat(data).isEmpty(); assertRendering(input, rendered); } @@ -226,13 +192,9 @@ public void yamlOnSecondLine() { "\n---"; final String rendered = "

hello

\n
\n

hello: world

\n"; - YamlFrontMatterVisitor visitor = new YamlFrontMatterVisitor(); - Node document = PARSER.parse(input); - document.accept(visitor); - - Map> data = visitor.getData(); + Map> data = getFrontMatter(input); - assertTrue(data.isEmpty()); + assertThat(data).isEmpty(); assertRendering(input, rendered); } @@ -243,13 +205,9 @@ public void nonMatchedStartTag() { "test"; final String rendered = "
\n

test

\n"; - YamlFrontMatterVisitor visitor = new YamlFrontMatterVisitor(); - Node document = PARSER.parse(input); - document.accept(visitor); - - Map> data = visitor.getData(); + Map> data = getFrontMatter(input); - assertTrue(data.isEmpty()); + assertThat(data).isEmpty(); assertRendering(input, rendered); } @@ -261,13 +219,9 @@ public void inList() { "test"; final String rendered = "
    \n
  • \n
    \n
    \n
  • \n
\n

test

\n"; - YamlFrontMatterVisitor visitor = new YamlFrontMatterVisitor(); - Node document = PARSER.parse(input); - document.accept(visitor); - - Map> data = visitor.getData(); + Map> data = getFrontMatter(input); - assertTrue(data.isEmpty()); + assertThat(data).isEmpty(); assertRendering(input, rendered); } @@ -285,9 +239,9 @@ public void visitorIgnoresOtherCustomNodes() { document.accept(visitor); Map> data = visitor.getData(); - assertEquals(1, data.size()); - assertTrue(data.containsKey("hello")); - assertEquals(Collections.singletonList("world"), data.get("hello")); + assertThat(data).hasSize(1); + assertThat(data).containsKey("hello"); + assertThat(data.get("hello")).isEqualTo(List.of("world")); } @Test @@ -300,15 +254,62 @@ public void nodesCanBeModified() { Node document = PARSER.parse(input); YamlFrontMatterNode node = (YamlFrontMatterNode) document.getFirstChild().getFirstChild(); node.setKey("see"); - node.setValues(Collections.singletonList("you")); + node.setValues(List.of("you")); YamlFrontMatterVisitor visitor = new YamlFrontMatterVisitor(); document.accept(visitor); Map> data = visitor.getData(); - assertEquals(1, data.size()); - assertTrue(data.containsKey("see")); - assertEquals(Collections.singletonList("you"), data.get("see")); + assertThat(data).hasSize(1); + assertThat(data).containsKey("see"); + assertThat(data.get("see")).isEqualTo(List.of("you")); + } + + @Test + public void dotInKeys() { + final String input = "---" + + "\nms.author: author" + + "\n---" + + "\n"; + + Map> data = getFrontMatter(input); + + assertThat(data).hasSize(1); + assertThat(data.keySet().iterator().next()).isEqualTo("ms.author"); + assertThat(data.get("ms.author")).hasSize(1); + assertThat(data.get("ms.author").get(0)).isEqualTo("author"); + } + + @Test + public void singleQuotedLiterals() { + final String input = "---" + + "\nstring: 'It''s me'" + + "\nlist:" + + "\n - 'I''m here'" + + "\n---" + + "\n"; + + Map> data = getFrontMatter(input); + + assertThat(data).hasSize(2); + assertThat(data.get("string").get(0)).isEqualTo("It's me"); + assertThat(data.get("list").get(0)).isEqualTo("I'm here"); + } + + @Test + public void doubleQuotedLiteral() { + final String input = "---" + + "\nstring: \"backslash: \\\\ quote: \\\"\"" + + "\nlist:" + + "\n - \"hey\"" + + "\n---" + + "\n"; + + Map> data = getFrontMatter(input); + + assertThat(data).hasSize(2); + assertThat(data.get("string").get(0)).isEqualTo("backslash: \\ quote: \""); + assertThat(data.get("list").get(0)).isEqualTo("hey"); } @Override @@ -316,6 +317,15 @@ protected String render(String source) { return RENDERER.render(PARSER.parse(source)); } + private Map> getFrontMatter(String input) { + YamlFrontMatterVisitor visitor = new YamlFrontMatterVisitor(); + Node document = PARSER.parse(input); + document.accept(visitor); + + Map> data = visitor.getData(); + return data; + } + // Custom node for tests private static class TestNode extends CustomNode { } diff --git a/commonmark-integration-test/.settings/org.eclipse.core.runtime.prefs b/commonmark-integration-test/.settings/org.eclipse.core.runtime.prefs deleted file mode 100644 index 5a0ad22d2..000000000 --- a/commonmark-integration-test/.settings/org.eclipse.core.runtime.prefs +++ /dev/null @@ -1,2 +0,0 @@ -eclipse.preferences.version=1 -line.separator=\n diff --git a/commonmark-integration-test/.settings/org.eclipse.jdt.core.prefs b/commonmark-integration-test/.settings/org.eclipse.jdt.core.prefs deleted file mode 100644 index 3c0d27c8f..000000000 --- a/commonmark-integration-test/.settings/org.eclipse.jdt.core.prefs +++ /dev/null @@ -1,290 +0,0 @@ -eclipse.preferences.version=1 -org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7 -org.eclipse.jdt.core.compiler.compliance=1.7 -org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning -org.eclipse.jdt.core.compiler.source=1.7 -org.eclipse.jdt.core.formatter.align_type_members_on_columns=false -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=16 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation=0 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant=16 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call=16 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation=16 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression=16 -org.eclipse.jdt.core.formatter.alignment_for_assignment=0 -org.eclipse.jdt.core.formatter.alignment_for_binary_expression=16 -org.eclipse.jdt.core.formatter.alignment_for_compact_if=16 -org.eclipse.jdt.core.formatter.alignment_for_conditional_expression=80 -org.eclipse.jdt.core.formatter.alignment_for_enum_constants=0 -org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer=16 -org.eclipse.jdt.core.formatter.alignment_for_method_declaration=0 -org.eclipse.jdt.core.formatter.alignment_for_multiple_fields=16 -org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_resources_in_try=80 -org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation=16 -org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_union_type_in_multicatch=16 -org.eclipse.jdt.core.formatter.blank_lines_after_imports=1 -org.eclipse.jdt.core.formatter.blank_lines_after_package=1 -org.eclipse.jdt.core.formatter.blank_lines_before_field=0 -org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration=0 -org.eclipse.jdt.core.formatter.blank_lines_before_imports=1 -org.eclipse.jdt.core.formatter.blank_lines_before_member_type=1 -org.eclipse.jdt.core.formatter.blank_lines_before_method=1 -org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk=1 -org.eclipse.jdt.core.formatter.blank_lines_before_package=0 -org.eclipse.jdt.core.formatter.blank_lines_between_import_groups=1 -org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations=1 -org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_array_initializer=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_block=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_block_in_case=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_enum_constant=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_lambda_body=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_method_declaration=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_switch=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_type_declaration=end_of_line -org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment=false -org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment=false -org.eclipse.jdt.core.formatter.comment.format_block_comments=true -org.eclipse.jdt.core.formatter.comment.format_header=false -org.eclipse.jdt.core.formatter.comment.format_html=true -org.eclipse.jdt.core.formatter.comment.format_javadoc_comments=true -org.eclipse.jdt.core.formatter.comment.format_line_comments=true -org.eclipse.jdt.core.formatter.comment.format_source_code=true -org.eclipse.jdt.core.formatter.comment.indent_parameter_description=true -org.eclipse.jdt.core.formatter.comment.indent_root_tags=true -org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags=insert -org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter=do not insert -org.eclipse.jdt.core.formatter.comment.line_length=120 -org.eclipse.jdt.core.formatter.comment.new_lines_at_block_boundaries=true -org.eclipse.jdt.core.formatter.comment.new_lines_at_javadoc_boundaries=true -org.eclipse.jdt.core.formatter.comment.preserve_white_space_between_code_and_line_comments=false -org.eclipse.jdt.core.formatter.compact_else_if=true -org.eclipse.jdt.core.formatter.continuation_indentation=2 -org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer=2 -org.eclipse.jdt.core.formatter.disabling_tag=@formatter\:off -org.eclipse.jdt.core.formatter.enabling_tag=@formatter\:on -org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line=false -org.eclipse.jdt.core.formatter.format_line_comment_starting_on_first_column=true -org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header=true -org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header=true -org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header=true -org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header=true -org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases=true -org.eclipse.jdt.core.formatter.indent_empty_lines=false -org.eclipse.jdt.core.formatter.indent_statements_compare_to_block=true -org.eclipse.jdt.core.formatter.indent_statements_compare_to_body=true -org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases=true -org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch=true -org.eclipse.jdt.core.formatter.indentation.size=4 -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_field=insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable=insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_method=insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_package=insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_type=insert -org.eclipse.jdt.core.formatter.insert_new_line_after_label=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_after_type_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter=insert -org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator=insert -org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_binary_operator=insert -org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters=insert -org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block=insert -org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast=insert -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert=insert -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case=insert -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional=insert -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for=insert -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters=insert -org.eclipse.jdt.core.formatter.insert_space_after_ellipsis=insert -org.eclipse.jdt.core.formatter.insert_space_after_lambda_arrow=insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer=insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_try=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional=insert -org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for=insert -org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_try_resources=insert -org.eclipse.jdt.core.formatter.insert_space_after_unary_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter=insert -org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator=insert -org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_binary_operator=insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer=insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_try=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert=insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional=insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for=insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_ellipsis=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_lambda_arrow=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_try=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while=insert -org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return=insert -org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw=insert -org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional=insert -org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_semicolon=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_try_resources=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_unary_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation=do not insert -org.eclipse.jdt.core.formatter.join_lines_in_comments=true -org.eclipse.jdt.core.formatter.join_wrapped_lines=false -org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line=false -org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line=false -org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line=false -org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line=false -org.eclipse.jdt.core.formatter.lineSplit=120 -org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column=false -org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column=false -org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body=0 -org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve=1 -org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line=true -org.eclipse.jdt.core.formatter.tabulation.char=space -org.eclipse.jdt.core.formatter.tabulation.size=4 -org.eclipse.jdt.core.formatter.use_on_off_tags=false -org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations=false -org.eclipse.jdt.core.formatter.wrap_before_binary_operator=true -org.eclipse.jdt.core.formatter.wrap_before_or_operator_multicatch=true -org.eclipse.jdt.core.formatter.wrap_outer_expressions_when_nested=true -org.eclipse.jdt.core.javaFormatter=org.eclipse.jdt.core.defaultJavaFormatter diff --git a/commonmark-integration-test/pom.xml b/commonmark-integration-test/pom.xml index 6f2bdf0fe..7e0048a73 100644 --- a/commonmark-integration-test/pom.xml +++ b/commonmark-integration-test/pom.xml @@ -2,9 +2,9 @@ 4.0.0 - com.atlassian.commonmark + org.commonmark commonmark-parent - 0.14.1-SNAPSHOT + 0.28.1-SNAPSHOT commonmark-integration-test @@ -13,27 +13,43 @@ - com.atlassian.commonmark + org.commonmark commonmark - com.atlassian.commonmark + org.commonmark commonmark-ext-autolink - com.atlassian.commonmark + org.commonmark + commonmark-ext-footnotes + + + org.commonmark commonmark-ext-ins - com.atlassian.commonmark + org.commonmark + commonmark-ext-gfm-alerts + + + org.commonmark commonmark-ext-gfm-strikethrough - com.atlassian.commonmark + org.commonmark commonmark-ext-gfm-tables - com.atlassian.commonmark + org.commonmark + commonmark-ext-image-attributes + + + org.commonmark + commonmark-ext-task-list-items + + + org.commonmark commonmark-ext-yaml-front-matter @@ -45,7 +61,7 @@ - com.atlassian.commonmark + org.commonmark commonmark-test-util test diff --git a/commonmark-integration-test/src/main/java/org/commonmark/integration/IntegrationTests.java b/commonmark-integration-test/src/main/java/org/commonmark/integration/IntegrationTests.java new file mode 100644 index 000000000..48e1ee5ba --- /dev/null +++ b/commonmark-integration-test/src/main/java/org/commonmark/integration/IntegrationTests.java @@ -0,0 +1,16 @@ +package org.commonmark.integration; + +// Prevent maven-gpg-plugin from failing with this error: +// The project artifact has not been assembled yet. +// Please do not invoke this goal before the lifecycle phase "package". +// +// Apparently it doesn't like a module that doesn't have any classes in main, +// because that means no jar is generated. +// And the javadoc plugin doesn't like if there's no classes with documentation, +// + +/** + * Module with integration tests. + */ +public class IntegrationTests { +} diff --git a/commonmark-integration-test/src/main/resources/META-INF/LICENSE.txt b/commonmark-integration-test/src/main/resources/META-INF/LICENSE.txt new file mode 100644 index 000000000..b09e367ce --- /dev/null +++ b/commonmark-integration-test/src/main/resources/META-INF/LICENSE.txt @@ -0,0 +1,23 @@ +Copyright (c) 2015, Atlassian Pty Ltd +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/commonmark-integration-test/src/test/java/org/commonmark/integration/BoundsIntegrationTest.java b/commonmark-integration-test/src/test/java/org/commonmark/integration/BoundsIntegrationTest.java index 8ee15164a..f1259b825 100644 --- a/commonmark-integration-test/src/test/java/org/commonmark/integration/BoundsIntegrationTest.java +++ b/commonmark-integration-test/src/test/java/org/commonmark/integration/BoundsIntegrationTest.java @@ -3,39 +3,30 @@ import org.commonmark.node.Node; import org.commonmark.parser.Parser; import org.commonmark.testutil.TestResources; -import org.commonmark.testutil.example.Example; import org.commonmark.testutil.example.ExampleReader; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.Parameter; +import org.junit.jupiter.params.ParameterizedClass; +import org.junit.jupiter.params.provider.MethodSource; -import java.util.ArrayList; import java.util.List; -import static org.junit.Assert.assertNotNull; +import static org.assertj.core.api.Assertions.assertThat; /** * Tests various substrings of the spec examples to check for out of bounds exceptions. */ -@RunWith(Parameterized.class) +@ParameterizedClass +@MethodSource("data") public class BoundsIntegrationTest { private static final Parser PARSER = Parser.builder().build(); - protected final String input; + @Parameter + String input; - public BoundsIntegrationTest(String input) { - this.input = input; - } - - @Parameterized.Parameters(name = "{0}") - public static List data() { - List examples = ExampleReader.readExamples(TestResources.getSpec()); - List data = new ArrayList<>(); - for (Example example : examples) { - data.add(new Object[]{example.getSource()}); - } - return data; + static List data() { + return ExampleReader.readExampleSources(TestResources.getSpec()); } @Test @@ -54,7 +45,7 @@ private void parse(String input) { try { Node parsed = PARSER.parse(input); // Parsing should always return a node - assertNotNull(parsed); + assertThat(parsed).isNotNull(); } catch (Exception e) { throw new AssertionError("Parsing failed, input: " + input, e); } diff --git a/commonmark-integration-test/src/test/java/org/commonmark/integration/Extensions.java b/commonmark-integration-test/src/test/java/org/commonmark/integration/Extensions.java new file mode 100644 index 000000000..9090c797f --- /dev/null +++ b/commonmark-integration-test/src/test/java/org/commonmark/integration/Extensions.java @@ -0,0 +1,28 @@ +package org.commonmark.integration; + +import org.commonmark.Extension; +import org.commonmark.ext.autolink.AutolinkExtension; +import org.commonmark.ext.footnotes.FootnotesExtension; +import org.commonmark.ext.front.matter.YamlFrontMatterExtension; +import org.commonmark.ext.gfm.alerts.AlertsExtension; +import org.commonmark.ext.gfm.strikethrough.StrikethroughExtension; +import org.commonmark.ext.gfm.tables.TablesExtension; +import org.commonmark.ext.image.attributes.ImageAttributesExtension; +import org.commonmark.ext.ins.InsExtension; +import org.commonmark.ext.task.list.items.TaskListItemsExtension; + +import java.util.List; + +public class Extensions { + + static final List ALL_EXTENSIONS = List.of( + AutolinkExtension.create(), + FootnotesExtension.create(), + ImageAttributesExtension.create(), + InsExtension.create(), + AlertsExtension.create(), + StrikethroughExtension.create(), + TablesExtension.create(), + TaskListItemsExtension.create(), + YamlFrontMatterExtension.create()); +} diff --git a/commonmark-integration-test/src/test/java/org/commonmark/integration/ExtensionsIntegrationTest.java b/commonmark-integration-test/src/test/java/org/commonmark/integration/ExtensionsIntegrationTest.java new file mode 100644 index 000000000..523154d2c --- /dev/null +++ b/commonmark-integration-test/src/test/java/org/commonmark/integration/ExtensionsIntegrationTest.java @@ -0,0 +1,38 @@ +package org.commonmark.integration; + +import org.commonmark.parser.Parser; +import org.commonmark.renderer.html.HtmlRenderer; +import org.commonmark.testutil.RenderingTestCase; +import org.junit.jupiter.api.Test; + +/** + * Tests to ensure all extensions work well together. + */ +public class ExtensionsIntegrationTest extends RenderingTestCase { + + protected static final Parser PARSER = Parser.builder() + .extensions(Extensions.ALL_EXTENSIONS) + .build(); + protected static final HtmlRenderer RENDERER = HtmlRenderer.builder() + .extensions(Extensions.ALL_EXTENSIONS) + .percentEncodeUrls(true) + .build(); + + @Test + public void testImageAttributes() { + assertRendering("![text](/url.png){height=5 width=6}", "

\"text\"

\n"); + } + + @Test + public void testTaskListItems() { + assertRendering("- [ ] task to do\n- [x] task done\n", + "
    \n
  • task to do
  • \n" + + "
  • task done
  • \n
\n"); + + } + + @Override + protected String render(String source) { + return RENDERER.render(PARSER.parse(source)); + } +} diff --git a/commonmark-integration-test/src/test/java/org/commonmark/integration/MarkdownRendererIntegrationTest.java b/commonmark-integration-test/src/test/java/org/commonmark/integration/MarkdownRendererIntegrationTest.java new file mode 100644 index 000000000..fe14273ab --- /dev/null +++ b/commonmark-integration-test/src/test/java/org/commonmark/integration/MarkdownRendererIntegrationTest.java @@ -0,0 +1,37 @@ +package org.commonmark.integration; + +import org.commonmark.Extension; +import org.commonmark.ext.autolink.AutolinkExtension; +import org.commonmark.ext.front.matter.YamlFrontMatterExtension; +import org.commonmark.ext.gfm.strikethrough.StrikethroughExtension; +import org.commonmark.ext.gfm.tables.TablesExtension; +import org.commonmark.ext.image.attributes.ImageAttributesExtension; +import org.commonmark.ext.ins.InsExtension; +import org.commonmark.ext.task.list.items.TaskListItemsExtension; +import org.commonmark.parser.Parser; +import org.commonmark.renderer.markdown.MarkdownRenderer; +import org.junit.jupiter.api.Test; + +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; + +public class MarkdownRendererIntegrationTest { + + private static final Parser PARSER = Parser.builder().extensions(Extensions.ALL_EXTENSIONS).build(); + private static final MarkdownRenderer RENDERER = MarkdownRenderer.builder().extensions(Extensions.ALL_EXTENSIONS).build(); + + @Test + public void testStrikethroughInTable() { + assertRoundTrip("|Abc|\n|---|\n|~strikethrough~|\n|\\~escaped\\~|\n"); + } + + private String render(String source) { + return RENDERER.render(PARSER.parse(source)); + } + + private void assertRoundTrip(String input) { + String rendered = render(input); + assertThat(rendered).isEqualTo(input); + } +} diff --git a/commonmark-integration-test/src/test/java/org/commonmark/integration/PegDownBenchmark.java b/commonmark-integration-test/src/test/java/org/commonmark/integration/PegDownBenchmark.java index 7b61242f4..ecc9c2cfd 100644 --- a/commonmark-integration-test/src/test/java/org/commonmark/integration/PegDownBenchmark.java +++ b/commonmark-integration-test/src/test/java/org/commonmark/integration/PegDownBenchmark.java @@ -12,7 +12,6 @@ import org.pegdown.Extensions; import org.pegdown.PegDownProcessor; -import java.util.Collections; import java.util.List; @State(Scope.Benchmark) @@ -32,7 +31,7 @@ public static void main(String[] args) throws Exception { @Benchmark public long wholeSpec() { - return parseAndRender(Collections.singletonList(SPEC)); + return parseAndRender(List.of(SPEC)); } @Benchmark diff --git a/commonmark-integration-test/src/test/java/org/commonmark/integration/SourceSpanIntegrationTest.java b/commonmark-integration-test/src/test/java/org/commonmark/integration/SourceSpanIntegrationTest.java new file mode 100644 index 000000000..171cc51b1 --- /dev/null +++ b/commonmark-integration-test/src/test/java/org/commonmark/integration/SourceSpanIntegrationTest.java @@ -0,0 +1,21 @@ +package org.commonmark.integration; + +import org.commonmark.parser.IncludeSourceSpans; +import org.commonmark.parser.Parser; +import org.commonmark.testutil.example.Example; + +/** + * Spec and all extensions, with source spans enabled. + */ +public class SourceSpanIntegrationTest extends SpecIntegrationTest { + + protected static final Parser PARSER = Parser.builder() + .extensions(Extensions.ALL_EXTENSIONS) + .includeSourceSpans(IncludeSourceSpans.BLOCKS) + .build(); + + @Override + protected String render(String source) { + return RENDERER.render(PARSER.parse(source)); + } +} diff --git a/commonmark-integration-test/src/test/java/org/commonmark/integration/SpecIntegrationTest.java b/commonmark-integration-test/src/test/java/org/commonmark/integration/SpecIntegrationTest.java index 6462f4094..07853d402 100644 --- a/commonmark-integration-test/src/test/java/org/commonmark/integration/SpecIntegrationTest.java +++ b/commonmark-integration-test/src/test/java/org/commonmark/integration/SpecIntegrationTest.java @@ -1,51 +1,35 @@ package org.commonmark.integration; -import org.commonmark.Extension; -import org.commonmark.ext.autolink.AutolinkExtension; -import org.commonmark.ext.ins.InsExtension; -import org.commonmark.ext.gfm.strikethrough.StrikethroughExtension; -import org.commonmark.ext.gfm.tables.TablesExtension; -import org.commonmark.ext.front.matter.YamlFrontMatterExtension; import org.commonmark.renderer.html.HtmlRenderer; import org.commonmark.parser.Parser; import org.commonmark.testutil.example.Example; import org.commonmark.testutil.SpecTestCase; -import org.junit.Test; +import org.junit.jupiter.api.Test; import java.util.*; +import static org.commonmark.testutil.Asserts.assertRendering; + /** * Tests that the spec examples still render the same with all extensions enabled. */ public class SpecIntegrationTest extends SpecTestCase { - private static final List EXTENSIONS = Arrays.asList( - AutolinkExtension.create(), - InsExtension.create(), - StrikethroughExtension.create(), - TablesExtension.create(), - YamlFrontMatterExtension.create()); - private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build(); + protected static final Parser PARSER = Parser.builder().extensions(Extensions.ALL_EXTENSIONS).build(); // The spec says URL-escaping is optional, but the examples assume that it's enabled. - private static final HtmlRenderer RENDERER = HtmlRenderer.builder().extensions(EXTENSIONS).percentEncodeUrls(true).build(); - private static final Map OVERRIDDEN_EXAMPLES = getOverriddenExamples(); - - public SpecIntegrationTest(Example example) { - super(example); - } + protected static final HtmlRenderer RENDERER = HtmlRenderer.builder().extensions(Extensions.ALL_EXTENSIONS).percentEncodeUrls(true).build(); + protected static final Map OVERRIDDEN_EXAMPLES = getOverriddenExamples(); @Test - @Override public void testHtmlRendering() { String expectedHtml = OVERRIDDEN_EXAMPLES.get(example.getSource()); if (expectedHtml != null) { - assertRendering(example.getSource(), expectedHtml); + assertRendering(example.getSource(), expectedHtml, render(example.getSource())); } else { - super.testHtmlRendering(); + assertRendering(example.getSource(), example.getHtml(), render(example.getSource())); } } - @Override protected String render(String source) { return RENDERER.render(PARSER.parse(source)); } @@ -54,7 +38,7 @@ private static Map getOverriddenExamples() { Map m = new HashMap<>(); // Not a spec autolink because of space, but the resulting text contains a valid URL - m.put("\n", "

<http://foo.bar/baz bim>

\n"); + m.put("\n", "

<https://foo.bar/baz bim>

\n"); // Not a spec autolink, but the resulting text contains a valid email m.put("\n", "

<foo+@bar.example.com>

\n"); @@ -63,10 +47,10 @@ private static Map getOverriddenExamples() { m.put("\n", "

<heck://bing.bong>

\n"); // Not a spec autolink because of spaces, but autolink extension doesn't limit schemes - m.put("< http://foo.bar >\n", "

< http://foo.bar >

\n"); + m.put("< https://foo.bar >\n", "

< https://foo.bar >

\n"); // Plain autolink - m.put("http://example.com\n", "

http://example.com

\n"); + m.put("https://example.com\n", "

https://example.com

\n"); // Plain autolink m.put("foo@bar.example.com\n", "

foo@bar.example.com

\n"); diff --git a/commonmark-integration-test/src/test/java/org/commonmark/ui/DingusApp.java b/commonmark-integration-test/src/test/java/org/commonmark/ui/DingusApp.java new file mode 100644 index 000000000..0e98386bb --- /dev/null +++ b/commonmark-integration-test/src/test/java/org/commonmark/ui/DingusApp.java @@ -0,0 +1,114 @@ +package org.commonmark.ui; + +import org.commonmark.parser.Parser; +import org.commonmark.renderer.html.HtmlRenderer; +import org.commonmark.renderer.text.TextContentRenderer; + +import java.awt.*; +import javax.swing.*; +import javax.swing.event.ChangeEvent; +import javax.swing.event.ChangeListener; +import javax.swing.event.DocumentEvent; +import javax.swing.event.DocumentListener; + +/** + * Simple UI to quickly test out different rendering of CommonMark inputs. + * Similar to commonmark.js dingus. + **/ +public class DingusApp { + + private final Parser parser = Parser.builder().build(); + private final TextContentRenderer textRenderer = TextContentRenderer.builder().build(); + private final HtmlRenderer htmlRenderer = HtmlRenderer.builder().build(); + + private final JTabbedPane tabbedPane; + private final JEditorPane htmlVisualRendererOutput; + private final JTextArea htmlSourceRendererOutput; + private final JTextArea textRendererOutput; + + public static void main(String[] args) { + new DingusApp().run(); + } + + private DingusApp() { + tabbedPane = new JTabbedPane(); + + htmlVisualRendererOutput = new JEditorPane(); + htmlVisualRendererOutput.setEnabled(false); + htmlVisualRendererOutput.setContentType("text/html"); + + htmlSourceRendererOutput = new JTextArea(); + htmlSourceRendererOutput.setEnabled(false); + htmlSourceRendererOutput.setLineWrap(true); + htmlSourceRendererOutput.setFont(new Font(Font.MONOSPACED, Font.PLAIN, 12)); + + textRendererOutput = new JTextArea(); + textRendererOutput.setEnabled(false); + textRendererOutput.setLineWrap(true); + textRendererOutput.setFont(new Font(Font.MONOSPACED, Font.PLAIN, 12)); + } + + private void run() { + JFrame frame = new JFrame("commonmark-java dingus"); + frame.setDefaultCloseOperation(WindowConstants.EXIT_ON_CLOSE); + frame.setMinimumSize(new Dimension(400, 300)); + frame.setSize(new Dimension(1200, 675)); + + final JTextArea input = new JTextArea(); + input.setBorder(BorderFactory.createTitledBorder("Input")); + input.setLineWrap(true); + input.setFont(new Font(Font.MONOSPACED, Font.PLAIN, 12)); + + input.getDocument().addDocumentListener(new DocumentListener() { + @Override + public void insertUpdate(DocumentEvent e) { + updateOutput(input.getText()); + } + + @Override + public void removeUpdate(DocumentEvent e) { + updateOutput(input.getText()); + } + + @Override + public void changedUpdate(DocumentEvent e) { + } + }); + + tabbedPane.addTab("HTML rendered", htmlVisualRendererOutput); + tabbedPane.addTab("HTML source", htmlSourceRendererOutput); + tabbedPane.addTab("Plain text", textRendererOutput); + + tabbedPane.addChangeListener(new ChangeListener() { + @Override + public void stateChanged(ChangeEvent e) { + updateOutput(input.getText()); + } + }); + + input.setText("# Example\n" + + "Enter text *here* and see how it renders on the right.\n\n" + + "* Try\n* this\n\n" + + "```\nor this\n```"); + updateOutput(input.getText()); + + frame.setLayout(new GridLayout()); + frame.add(input); + frame.add(tabbedPane); + + frame.setVisible(true); + } + + private void updateOutput(String inputText) { + if (tabbedPane.getSelectedComponent() == htmlVisualRendererOutput) { + String rendered = htmlRenderer.render(parser.parse(inputText)); + htmlVisualRendererOutput.setText(rendered); + } else if (tabbedPane.getSelectedComponent() == htmlSourceRendererOutput) { + String rendered = htmlRenderer.render(parser.parse(inputText)); + htmlSourceRendererOutput.setText(rendered); + } else if (tabbedPane.getSelectedComponent() == textRendererOutput) { + String rendered = textRenderer.render(parser.parse(inputText)); + textRendererOutput.setText(rendered); + } + } +} diff --git a/commonmark-test-util/pom.xml b/commonmark-test-util/pom.xml index efb27f330..6a9c342cc 100644 --- a/commonmark-test-util/pom.xml +++ b/commonmark-test-util/pom.xml @@ -2,9 +2,9 @@ 4.0.0 - com.atlassian.commonmark + org.commonmark commonmark-parent - 0.14.1-SNAPSHOT + 0.28.1-SNAPSHOT commonmark-test-util @@ -13,25 +13,13 @@ - junit - junit + org.junit.jupiter + junit-jupiter + + + org.assertj + assertj-core - - - - org.apache.maven.plugins - maven-jar-plugin - - - - org.commonmark.testutil - - - - - - - diff --git a/commonmark-test-util/src/main/java/module-info.java b/commonmark-test-util/src/main/java/module-info.java new file mode 100644 index 000000000..12980d80a --- /dev/null +++ b/commonmark-test-util/src/main/java/module-info.java @@ -0,0 +1,7 @@ +module org.commonmark.testutil { + exports org.commonmark.testutil; + exports org.commonmark.testutil.example; + + requires org.assertj.core; + requires org.junit.jupiter.params; +} diff --git a/commonmark-test-util/src/main/java/org/commonmark/testutil/Asserts.java b/commonmark-test-util/src/main/java/org/commonmark/testutil/Asserts.java new file mode 100644 index 000000000..971a1b4ea --- /dev/null +++ b/commonmark-test-util/src/main/java/org/commonmark/testutil/Asserts.java @@ -0,0 +1,17 @@ +package org.commonmark.testutil; + +import static org.assertj.core.api.Assertions.assertThat; + +public class Asserts { + public static void assertRendering(String source, String expectedRendering, String actualRendering) { + // include source for better assertion errors + String expected = showTabs(expectedRendering + "\n\n" + source); + String actual = showTabs(actualRendering + "\n\n" + source); + assertThat(actual).isEqualTo(expected); + } + + private static String showTabs(String s) { + // Tabs are shown as "rightwards arrow" for easier comparison + return s.replace("\t", "\u2192"); + } +} diff --git a/commonmark-test-util/src/main/java/org/commonmark/testutil/RenderingTestCase.java b/commonmark-test-util/src/main/java/org/commonmark/testutil/RenderingTestCase.java index 682123494..f7da4c008 100644 --- a/commonmark-test-util/src/main/java/org/commonmark/testutil/RenderingTestCase.java +++ b/commonmark-test-util/src/main/java/org/commonmark/testutil/RenderingTestCase.java @@ -1,22 +1,12 @@ package org.commonmark.testutil; -import static org.junit.Assert.assertEquals; +import static org.assertj.core.api.Assertions.assertThat; public abstract class RenderingTestCase { protected abstract String render(String source); protected void assertRendering(String source, String expectedResult) { - String renderedContent = render(source); - - // include source for better assertion errors - String expected = showTabs(expectedResult + "\n\n" + source); - String actual = showTabs(renderedContent + "\n\n" + source); - assertEquals(expected, actual); - } - - private static String showTabs(String s) { - // Tabs are shown as "rightwards arrow" for easier comparison - return s.replace("\t", "\u2192"); + Asserts.assertRendering(source, expectedResult, render(source)); } } diff --git a/commonmark-test-util/src/main/java/org/commonmark/testutil/SpecTestCase.java b/commonmark-test-util/src/main/java/org/commonmark/testutil/SpecTestCase.java index 1c35b7c28..c29a6a69a 100644 --- a/commonmark-test-util/src/main/java/org/commonmark/testutil/SpecTestCase.java +++ b/commonmark-test-util/src/main/java/org/commonmark/testutil/SpecTestCase.java @@ -2,36 +2,22 @@ import org.commonmark.testutil.example.Example; import org.commonmark.testutil.example.ExampleReader; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; +import org.junit.jupiter.params.Parameter; +import org.junit.jupiter.params.ParameterizedClass; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import java.util.ArrayList; import java.util.List; -@RunWith(Parameterized.class) -public abstract class SpecTestCase extends RenderingTestCase { +@ParameterizedClass +@MethodSource("data") +public abstract class SpecTestCase { - protected final Example example; + @Parameter + protected Example example; - public SpecTestCase(Example example) { - this.example = example; + static List data() { + return ExampleReader.readExamples(TestResources.getSpec()); } - - @Parameters(name = "{0}") - public static List data() { - List examples = ExampleReader.readExamples(TestResources.getSpec()); - List data = new ArrayList<>(); - for (Example example : examples) { - data.add(new Object[]{example}); - } - return data; - } - - @Test - public void testHtmlRendering() { - assertRendering(example.getSource(), example.getHtml()); - } - } diff --git a/commonmark-test-util/src/main/java/org/commonmark/testutil/Strings.java b/commonmark-test-util/src/main/java/org/commonmark/testutil/Strings.java deleted file mode 100644 index ed709ed81..000000000 --- a/commonmark-test-util/src/main/java/org/commonmark/testutil/Strings.java +++ /dev/null @@ -1,12 +0,0 @@ -package org.commonmark.testutil; - -public class Strings { - - public static String repeat(String s, int count) { - StringBuilder sb = new StringBuilder(s.length() * count); - for (int i = 0; i < count; i++) { - sb.append(s); - } - return sb.toString(); - } -} diff --git a/commonmark-test-util/src/main/java/org/commonmark/testutil/TestResources.java b/commonmark-test-util/src/main/java/org/commonmark/testutil/TestResources.java index 8f6f5c071..5af649a86 100644 --- a/commonmark-test-util/src/main/java/org/commonmark/testutil/TestResources.java +++ b/commonmark-test-util/src/main/java/org/commonmark/testutil/TestResources.java @@ -4,8 +4,7 @@ import java.io.IOException; import java.io.InputStreamReader; import java.net.URL; -import java.nio.charset.Charset; -import java.util.Arrays; +import java.nio.charset.StandardCharsets; import java.util.List; public class TestResources { @@ -14,8 +13,12 @@ public static URL getSpec() { return TestResources.class.getResource("/spec.txt"); } + public static URL getGfmSpec() { + return TestResources.class.getResource("/gfm-spec.txt"); + } + public static List getRegressions() { - return Arrays.asList( + return List.of( TestResources.class.getResource("/cmark-regression.txt"), TestResources.class.getResource("/commonmark.js-regression.txt") ); @@ -23,7 +26,7 @@ public static List getRegressions() { public static String readAsString(URL url) { StringBuilder sb = new StringBuilder(); - try (BufferedReader reader = new BufferedReader(new InputStreamReader(url.openStream(), Charset.forName("UTF-8")))) { + try (BufferedReader reader = new BufferedReader(new InputStreamReader(url.openStream(), StandardCharsets.UTF_8))) { String line; while ((line = reader.readLine()) != null) { sb.append(line); diff --git a/commonmark-test-util/src/main/java/org/commonmark/testutil/example/Example.java b/commonmark-test-util/src/main/java/org/commonmark/testutil/example/Example.java index 417a66097..11e87d0aa 100644 --- a/commonmark-test-util/src/main/java/org/commonmark/testutil/example/Example.java +++ b/commonmark-test-util/src/main/java/org/commonmark/testutil/example/Example.java @@ -30,6 +30,10 @@ public String getHtml() { return html; } + public String getSection() { + return section; + } + @Override public String toString() { return "File \"" + filename + "\" section \"" + section + "\" example " + exampleNumber; diff --git a/commonmark-test-util/src/main/java/org/commonmark/testutil/example/ExampleReader.java b/commonmark-test-util/src/main/java/org/commonmark/testutil/example/ExampleReader.java index 0972a227f..d40a10f63 100644 --- a/commonmark-test-util/src/main/java/org/commonmark/testutil/example/ExampleReader.java +++ b/commonmark-test-util/src/main/java/org/commonmark/testutil/example/ExampleReader.java @@ -2,11 +2,12 @@ import java.io.*; import java.net.URL; -import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Collectors; /** * Reader for files containing examples of CommonMark source and the expected HTML rendering (e.g. spec.txt). @@ -42,6 +43,15 @@ public static List readExamples(URL url) { } } + public static List readExamples(URL url, String info) { + var examples = readExamples(url); + return examples.stream().filter(e -> e.getInfo().contains(info)).collect(Collectors.toList()); + } + + public static List readExampleObjects(URL url, String info) { + return readExamples(url, info).stream().map(e -> new Object[]{e}).collect(Collectors.toList()); + } + public static List readExampleSources(URL url) { List examples = ExampleReader.readExamples(url); List result = new ArrayList<>(); @@ -55,7 +65,7 @@ private List read() throws IOException { resetContents(); try (BufferedReader reader = new BufferedReader( - new InputStreamReader(inputStream, Charset.forName("UTF-8")))) { + new InputStreamReader(inputStream, StandardCharsets.UTF_8))) { String line; while ((line = reader.readLine()) != null) { processLine(line); diff --git a/commonmark-test-util/src/main/resources/META-INF/LICENSE.txt b/commonmark-test-util/src/main/resources/META-INF/LICENSE.txt new file mode 100644 index 000000000..b09e367ce --- /dev/null +++ b/commonmark-test-util/src/main/resources/META-INF/LICENSE.txt @@ -0,0 +1,23 @@ +Copyright (c) 2015, Atlassian Pty Ltd +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/commonmark-test-util/src/main/resources/README.md b/commonmark-test-util/src/main/resources/README.md index a749c59b5..f51e88358 100644 --- a/commonmark-test-util/src/main/resources/README.md +++ b/commonmark-test-util/src/main/resources/README.md @@ -1,6 +1,6 @@ These files are copied from the CommonMark repositories, namely: -https://github.com/commonmark/CommonMark/blob/master/spec.txt +https://github.com/commonmark/commonmark-spec/blob/master/spec.txt https://github.com/commonmark/cmark/blob/master/test/regression.txt https://github.com/commonmark/commonmark.js/blob/master/test/regression.txt diff --git a/commonmark-test-util/src/main/resources/cmark-regression.txt b/commonmark-test-util/src/main/resources/cmark-regression.txt index 62b1e7efe..5f1dc5e24 100644 --- a/commonmark-test-util/src/main/resources/cmark-regression.txt +++ b/commonmark-test-util/src/main/resources/cmark-regression.txt @@ -4,7 +4,8 @@ Issue #113: EOL character weirdness on Windows (Important: first line ends with CR + CR + LF) ```````````````````````````````` example -line1 +line1 + line2 .

line1

@@ -154,3 +155,39 @@ Issue #289. .

[a](<b) c>

```````````````````````````````` + +Issue #334 - UTF-8 BOM + +```````````````````````````````` example +# Hi +. +

Hi

+```````````````````````````````` + +Issue commonmark.js#213 - type 7 blocks can't interrupt +paragraph + +```````````````````````````````` example +- +. +
    +
  • +
  • +
+```````````````````````````````` + +Issue #383 - emphasis parsing. + +```````````````````````````````` example +*****Hello*world**** +. +

**Helloworld

+```````````````````````````````` + diff --git a/commonmark-test-util/src/main/resources/commonmark.js-regression.txt b/commonmark-test-util/src/main/resources/commonmark.js-regression.txt index ec5143eff..16a0e8c35 100644 --- a/commonmark-test-util/src/main/resources/commonmark.js-regression.txt +++ b/commonmark-test-util/src/main/resources/commonmark.js-regression.txt @@ -80,7 +80,7 @@ Issue jgm/CommonMark#468 - backslash at end of link definition

[]: test

```````````````````````````````` -Issue jgm/commonmark.js#121 - punctuation set different +Issue commonmark/commonmark.js#121 - punctuation set different ```````````````````````````````` example ^_test_ @@ -122,7 +122,15 @@ Double-encoding. ```````````````````````````````` example [XSS](javascript&colon;alert%28'XSS'%29) . -

XSS

+

XSS

+```````````````````````````````` + +PR #179 + +```````````````````````````````` example +[link](https://www.example.com/home/%25batty) +. +

link

```````````````````````````````` Issue commonamrk#517 - script, pre, style close tag without @@ -158,4 +166,53 @@ text text

```````````````````````````````` +Issue #196. + +```````````````````````````````` example +a +. +

a

+```````````````````````````````` + +Issue #211 + +```````````````````````````````` example +[\ +foo]: /uri + +[\ +foo] +. +


+foo

+```````````````````````````````` + +Issue #213 - type 7 blocks can't interrupt +paragraph + +```````````````````````````````` example +- +. +
    +
  • +
  • +
+```````````````````````````````` + +Issue cmark/#383 - emphasis parsing. + +```````````````````````````````` example +*****Hello*world**** +. +

**Helloworld

+```````````````````````````````` diff --git a/commonmark-ext-gfm-tables/src/test/resources/gfm-spec.txt b/commonmark-test-util/src/main/resources/gfm-spec.txt similarity index 99% rename from commonmark-ext-gfm-tables/src/test/resources/gfm-spec.txt rename to commonmark-test-util/src/main/resources/gfm-spec.txt index 582131d70..d42f3369e 100644 --- a/commonmark-ext-gfm-tables/src/test/resources/gfm-spec.txt +++ b/commonmark-test-util/src/main/resources/gfm-spec.txt @@ -130,7 +130,7 @@ questions it does not answer: not require that. This is hardly a "corner case," and divergences between implementations on this issue often lead to surprises for users in real documents. (See [this comment by John - Gruber](http://article.gmane.org/gmane.text.markdown.general/1997).) + Gruber](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/1997).) 2. Is a blank line needed before a block quote or heading? Most implementations do not require the blank line. However, @@ -138,7 +138,7 @@ questions it does not answer: also to ambiguities in parsing (note that some implementations put the heading inside the blockquote, while others do not). (John Gruber has also spoken [in favor of requiring the blank - lines](http://article.gmane.org/gmane.text.markdown.general/2146).) + lines](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/2146).) 3. Is a blank line needed before an indented code block? (`Markdown.pl` requires it, but this is not mentioned in the @@ -171,7 +171,7 @@ questions it does not answer: ``` (There are some relevant comments by John Gruber - [here](http://article.gmane.org/gmane.text.markdown.general/2554).) + [here](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/2554).) 5. Can list markers be indented? Can ordered list markers be right-aligned? @@ -1001,10 +1001,7 @@ interpretable as a [code fence], [ATX heading][ATX headings], A [setext heading underline](@) is a sequence of `=` characters or a sequence of `-` characters, with no more than 3 -spaces indentation and any number of trailing spaces. If a line -containing a single `-` can be interpreted as an -empty [list items], it should be interpreted this way -and not as a [setext heading underline]. +spaces of indentation and any number of trailing spaces or tabs. The heading is a level 1 heading if `=` characters are used in the [setext heading underline], and a level 2 heading if `-` @@ -1638,7 +1635,7 @@ has been found, the code block contains all of the lines after the opening code fence until the end of the containing block (or document). (An alternative spec would require backtracking in the event that a closing code fence is not found. But this makes parsing -much less efficient, and there seems to be no real down side to the +much less efficient, and there seems to be no real downside to the behavior described here.) A fenced code block may interrupt a paragraph, and does not require @@ -2068,7 +2065,7 @@ followed by an uppercase ASCII letter.\ ``. -6. **Start condition:** line begins the string `<` or ``, or the string `/>`.\ @@ -5279,7 +5276,7 @@ well. ([reStructuredText](http://docutils.sourceforge.net/rst.html) takes a different approach, requiring blank lines before lists even inside other list items.) -In order to solve of unwanted lists in paragraphs with +In order to solve the problem of unwanted lists in paragraphs with hard-wrapped numerals, we allow only lists starting with `1` to interrupt paragraphs. Thus, @@ -6929,7 +6926,7 @@ foo__bar__ ```````````````````````````````` example __foo, __bar__, baz__ . -

foo, bar, baz

+

foo, bar, baz

```````````````````````````````` @@ -7200,7 +7197,7 @@ foo***bar***baz ```````````````````````````````` example foo******bar*********baz . -

foobar***baz

+

foobar***baz

```````````````````````````````` @@ -7271,21 +7268,21 @@ __foo _bar_ baz__ ```````````````````````````````` example __foo __bar__ baz__ . -

foo bar baz

+

foo bar baz

```````````````````````````````` ```````````````````````````````` example ____foo__ bar__ . -

foo bar

+

foo bar

```````````````````````````````` ```````````````````````````````` example **foo **bar**** . -

foo bar

+

foo bar

```````````````````````````````` @@ -7570,14 +7567,14 @@ switching delimiters: ```````````````````````````````` example ****foo**** . -

foo

+

foo

```````````````````````````````` ```````````````````````````````` example ____foo____ . -

foo

+

foo

```````````````````````````````` @@ -7588,7 +7585,7 @@ delimiters: ```````````````````````````````` example ******foo****** . -

foo

+

foo

```````````````````````````````` @@ -7604,7 +7601,7 @@ Rule 14: ```````````````````````````````` example _____foo_____ . -

foo

+

foo

```````````````````````````````` @@ -9410,10 +9407,9 @@ character, and a `>` character. A [closing tag](@) consists of the string ``. -An [HTML comment](@) consists of ``, -where *text* does not start with `>` or `->`, does not end with `-`, -and does not contain `--`. (See the -[HTML5 spec](http://www.w3.org/TR/html5/syntax.html#comments).) +An [HTML comment](@) consists of ``, ``, or ``, and `-->` (see the +[HTML spec](https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state)). A [processing instruction](@) consists of the string ` -. -

foo

-```````````````````````````````` - - -```````````````````````````````` example -foo +foo . -

foo <!-- not a comment -- two hyphens -->

+

foo

```````````````````````````````` - -Not comments: - ```````````````````````````````` example foo foo --> -foo +foo foo --> . -

foo <!--> foo -->

-

foo <!-- foo--->

+

foo foo -->

+

foo foo -->

```````````````````````````````` @@ -10224,4 +10210,3 @@ closers: After we're done, we remove all delimiters above `stack_bottom` from the delimiter stack. - diff --git a/commonmark-test-util/src/main/resources/spec.txt b/commonmark-test-util/src/main/resources/spec.txt index 3913de442..f1fab281e 100644 --- a/commonmark-test-util/src/main/resources/spec.txt +++ b/commonmark-test-util/src/main/resources/spec.txt @@ -1,9 +1,9 @@ --- title: CommonMark Spec author: John MacFarlane -version: 0.29 -date: '2019-04-06' -license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)' +version: '0.31.2' +date: '2024-01-28' +license: '[CC-BY-SA 4.0](https://creativecommons.org/licenses/by-sa/4.0/)' ... # Introduction @@ -14,7 +14,7 @@ Markdown is a plain text format for writing structured documents, based on conventions for indicating formatting in email and usenet posts. It was developed by John Gruber (with help from Aaron Swartz) and released in 2004 in the form of a -[syntax description](http://daringfireball.net/projects/markdown/syntax) +[syntax description](https://daringfireball.net/projects/markdown/syntax) and a Perl script (`Markdown.pl`) for converting Markdown to HTML. In the next decade, dozens of implementations were developed in many languages. Some extended the original @@ -34,10 +34,10 @@ As Gruber writes: > Markdown-formatted document should be publishable as-is, as > plain text, without looking like it's been marked up with tags > or formatting instructions. -> () +> () The point can be illustrated by comparing a sample of -[AsciiDoc](http://www.methods.co.nz/asciidoc/) with +[AsciiDoc](https://asciidoc.org/) with an equivalent sample of Markdown. Here is a sample of AsciiDoc from the AsciiDoc manual: @@ -103,7 +103,7 @@ source, not just in the processed document. ## Why is a spec needed? John Gruber's [canonical description of Markdown's -syntax](http://daringfireball.net/projects/markdown/syntax) +syntax](https://daringfireball.net/projects/markdown/syntax) does not specify the syntax unambiguously. Here are some examples of questions it does not answer: @@ -114,7 +114,7 @@ questions it does not answer: not require that. This is hardly a "corner case," and divergences between implementations on this issue often lead to surprises for users in real documents. (See [this comment by John - Gruber](http://article.gmane.org/gmane.text.markdown.general/1997).) + Gruber](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/1997).) 2. Is a blank line needed before a block quote or heading? Most implementations do not require the blank line. However, @@ -122,7 +122,7 @@ questions it does not answer: also to ambiguities in parsing (note that some implementations put the heading inside the blockquote, while others do not). (John Gruber has also spoken [in favor of requiring the blank - lines](http://article.gmane.org/gmane.text.markdown.general/2146).) + lines](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/2146).) 3. Is a blank line needed before an indented code block? (`Markdown.pl` requires it, but this is not mentioned in the @@ -155,7 +155,7 @@ questions it does not answer: ``` (There are some relevant comments by John Gruber - [here](http://article.gmane.org/gmane.text.markdown.general/2554).) + [here](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/2554).) 5. Can list markers be indented? Can ordered list markers be right-aligned? @@ -270,6 +270,16 @@ of representing the structural distinctions we need to make, and the choice of HTML for the tests makes it possible to run the tests against an implementation without writing an abstract syntax tree renderer. +Note that not every feature of the HTML samples is mandated by +the spec. For example, the spec says what counts as a link +destination, but it doesn't mandate that non-ASCII characters in +the URL be percent-encoded. To use the automatic tests, +implementers will need to provide a renderer that conforms to +the expectations of the spec examples (percent-encoding +non-ASCII characters in URLs). But a conforming implementation +can use a different renderer and may choose not to +percent-encode non-ASCII characters in URLs. + This document is generated from a text file, `spec.txt`, written in Markdown with a small extension for the side-by-side tests. The script `tools/makespec.py` can be used to convert `spec.txt` into @@ -294,37 +304,31 @@ of [characters] rather than bytes. A conforming parser may be limited to a certain encoding. A [line](@) is a sequence of zero or more [characters] -other than newline (`U+000A`) or carriage return (`U+000D`), +other than line feed (`U+000A`) or carriage return (`U+000D`), followed by a [line ending] or by the end of file. -A [line ending](@) is a newline (`U+000A`), a carriage return -(`U+000D`) not followed by a newline, or a carriage return and a -following newline. +A [line ending](@) is a line feed (`U+000A`), a carriage return +(`U+000D`) not followed by a line feed, or a carriage return and a +following line feed. A line containing no characters, or a line containing only spaces (`U+0020`) or tabs (`U+0009`), is called a [blank line](@). The following definitions of character classes will be used in this spec: -A [whitespace character](@) is a space -(`U+0020`), tab (`U+0009`), newline (`U+000A`), line tabulation (`U+000B`), -form feed (`U+000C`), or carriage return (`U+000D`). - -[Whitespace](@) is a sequence of one or more [whitespace -characters]. +A [Unicode whitespace character](@) is a character in the Unicode `Zs` general +category, or a tab (`U+0009`), line feed (`U+000A`), form feed (`U+000C`), or +carriage return (`U+000D`). -A [Unicode whitespace character](@) is -any code point in the Unicode `Zs` general category, or a tab (`U+0009`), -carriage return (`U+000D`), newline (`U+000A`), or form feed -(`U+000C`). +[Unicode whitespace](@) is a sequence of one or more +[Unicode whitespace characters]. -[Unicode whitespace](@) is a sequence of one -or more [Unicode whitespace characters]. +A [tab](@) is `U+0009`. A [space](@) is `U+0020`. -A [non-whitespace character](@) is any character -that is not a [whitespace character]. +An [ASCII control character](@) is a character between `U+0000–1F` (both +including) or `U+007F`. An [ASCII punctuation character](@) is `!`, `"`, `#`, `$`, `%`, `&`, `'`, `(`, `)`, @@ -333,14 +337,13 @@ is `!`, `"`, `#`, `$`, `%`, `&`, `'`, `(`, `)`, `[`, `\`, `]`, `^`, `_`, `` ` `` (U+005B–0060), `{`, `|`, `}`, or `~` (U+007B–007E). -A [punctuation character](@) is an [ASCII -punctuation character] or anything in -the general Unicode categories `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`. +A [Unicode punctuation character](@) is a character in the Unicode `P` +(puncuation) or `S` (symbol) general categories. ## Tabs Tabs in lines are not expanded to [spaces]. However, -in contexts where whitespace helps to define block structure, +in contexts where spaces help to define block structure, tabs behave as if they were replaced by spaces with a tab stop of 4 characters. @@ -478,3267 +481,3620 @@ bar For security reasons, the Unicode character `U+0000` must be replaced with the REPLACEMENT CHARACTER (`U+FFFD`). -# Blocks and inlines - -We can think of a document as a sequence of -[blocks](@)---structural elements like paragraphs, block -quotations, lists, headings, rules, and code blocks. Some blocks (like -block quotes and list items) contain other blocks; others (like -headings and paragraphs) contain [inline](@) content---text, -links, emphasized text, images, code spans, and so on. -## Precedence +## Backslash escapes -Indicators of block structure always take precedence over indicators -of inline structure. So, for example, the following is a list with -two items, not a list with one item containing a code span: +Any ASCII punctuation character may be backslash-escaped: ```````````````````````````````` example -- `one -- two` +\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~ . -
    -
  • `one
  • -
  • two`
  • -
+

!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~

```````````````````````````````` -This means that parsing can proceed in two steps: first, the block -structure of the document can be discerned; second, text lines inside -paragraphs, headings, and other block constructs can be parsed for inline -structure. The second step requires information about link reference -definitions that will be available only at the end of the first -step. Note that the first step requires processing lines in sequence, -but the second can be parallelized, since the inline parsing of -one block element does not affect the inline parsing of any other. - -## Container blocks and leaf blocks - -We can divide blocks into two types: -[container blocks](@), -which can contain other blocks, and [leaf blocks](@), -which cannot. - -# Leaf blocks +Backslashes before other characters are treated as literal +backslashes: -This section describes the different kinds of leaf block that make up a -Markdown document. +```````````````````````````````` example +\→\A\a\ \3\φ\« +. +

\→\A\a\ \3\φ\«

+```````````````````````````````` -## Thematic breaks -A line consisting of 0-3 spaces of indentation, followed by a sequence -of three or more matching `-`, `_`, or `*` characters, each followed -optionally by any number of spaces or tabs, forms a -[thematic break](@). +Escaped characters are treated as regular characters and do +not have their usual Markdown meanings: ```````````````````````````````` example -*** ---- -___ +\*not emphasized* +\
not a tag +\[not a link](/foo) +\`not code` +1\. not a list +\* not a list +\# not a heading +\[foo]: /url "not a reference" +\ö not a character entity . -
-
-
+

*not emphasized* +<br/> not a tag +[not a link](/foo) +`not code` +1. not a list +* not a list +# not a heading +[foo]: /url "not a reference" +&ouml; not a character entity

```````````````````````````````` -Wrong characters: +If a backslash is itself escaped, the following character is not: ```````````````````````````````` example -+++ +\\*emphasis* . -

+++

+

\emphasis

```````````````````````````````` +A backslash at the end of the line is a [hard line break]: + ```````````````````````````````` example -=== +foo\ +bar . -

===

+

foo
+bar

```````````````````````````````` -Not enough characters: +Backslash escapes do not work in code blocks, code spans, autolinks, or +raw HTML: ```````````````````````````````` example --- -** -__ +`` \[\` `` . -

-- -** -__

+

\[\`

```````````````````````````````` -One to three spaces indent are allowed: - ```````````````````````````````` example - *** - *** - *** + \[\] . -
-
-
+
\[\]
+
```````````````````````````````` -Four spaces is too many: - ```````````````````````````````` example - *** +~~~ +\[\] +~~~ . -
***
+
\[\]
 
```````````````````````````````` ```````````````````````````````` example -Foo - *** + . -

Foo -***

+

https://example.com?find=\*

```````````````````````````````` -More than three characters may be used: - ```````````````````````````````` example -_____________________________________ + . -
+
```````````````````````````````` -Spaces are allowed between the characters: +But they work in all other contexts, including URLs and link titles, +link references, and [info strings] in [fenced code blocks]: ```````````````````````````````` example - - - - +[foo](/bar\* "ti\*tle") . -
+

foo

```````````````````````````````` ```````````````````````````````` example - ** * ** * ** * ** +[foo] + +[foo]: /bar\* "ti\*tle" . -
+

foo

```````````````````````````````` ```````````````````````````````` example -- - - - +``` foo\+bar +foo +``` . -
+
foo
+
```````````````````````````````` -Spaces are allowed at the end: +## Entity and numeric character references -```````````````````````````````` example -- - - - -. -
-```````````````````````````````` +Valid HTML entity references and numeric character references +can be used in place of the corresponding Unicode character, +with the following exceptions: +- Entity and character references are not recognized in code + blocks and code spans. -However, no other characters may occur in the line: +- Entity and character references cannot stand in place of + special characters that define structural elements in + CommonMark. For example, although `*` can be used + in place of a literal `*` character, `*` cannot replace + `*` in emphasis delimiters, bullet list markers, or thematic + breaks. -```````````````````````````````` example -_ _ _ _ a +Conforming CommonMark parsers need not store information about +whether a particular character was represented in the source +using a Unicode character or an entity reference. -a------ +[Entity references](@) consist of `&` + any of the valid +HTML5 entity names + `;`. The +document +is used as an authoritative source for the valid entity +references and their corresponding code points. ----a--- +```````````````````````````````` example +  & © Æ Ď +¾ ℋ ⅆ +∲ ≧̸ . -

_ _ _ _ a

-

a------

-

---a---

+

  & © Æ Ď +¾ ℋ ⅆ +∲ ≧̸

```````````````````````````````` -It is required that all of the [non-whitespace characters] be the same. -So, this is not a thematic break: +[Decimal numeric character +references](@) +consist of `&#` + a string of 1--7 arabic digits + `;`. A +numeric character reference is parsed as the corresponding +Unicode character. Invalid Unicode code points will be replaced by +the REPLACEMENT CHARACTER (`U+FFFD`). For security reasons, +the code point `U+0000` will also be replaced by `U+FFFD`. ```````````````````````````````` example - *-* +# Ӓ Ϡ � . -

-

+

# Ӓ Ϡ �

```````````````````````````````` -Thematic breaks do not need blank lines before or after: +[Hexadecimal numeric character +references](@) consist of `&#` + +either `X` or `x` + a string of 1-6 hexadecimal digits + `;`. +They too are parsed as the corresponding Unicode character (this +time specified with a hexadecimal numeral instead of decimal). ```````````````````````````````` example -- foo -*** -- bar +" ആ ಫ . -
    -
  • foo
  • -
-
-
    -
  • bar
  • -
+

" ആ ಫ

```````````````````````````````` -Thematic breaks can interrupt a paragraph: +Here are some nonentities: ```````````````````````````````` example -Foo -*** -bar +  &x; &#; &#x; +� +&#abcdef0; +&ThisIsNotDefined; &hi?; . -

Foo

-
-

bar

+

&nbsp &x; &#; &#x; +&#87654321; +&#abcdef0; +&ThisIsNotDefined; &hi?;

```````````````````````````````` -If a line of dashes that meets the above conditions for being a -thematic break could also be interpreted as the underline of a [setext -heading], the interpretation as a -[setext heading] takes precedence. Thus, for example, -this is a setext heading, not a paragraph followed by a thematic break: +Although HTML5 does accept some entity references +without a trailing semicolon (such as `©`), these are not +recognized here, because it makes the grammar too ambiguous: ```````````````````````````````` example -Foo ---- -bar +© . -

Foo

-

bar

+

&copy

```````````````````````````````` -When both a thematic break and a list item are possible -interpretations of a line, the thematic break takes precedence: +Strings that are not on the list of HTML5 named entities are not +recognized as entity references either: ```````````````````````````````` example -* Foo -* * * -* Bar +&MadeUpEntity; . -
    -
  • Foo
  • -
-
-
    -
  • Bar
  • -
+

&MadeUpEntity;

```````````````````````````````` -If you want a thematic break in a list item, use a different bullet: +Entity and numeric character references are recognized in any +context besides code spans or code blocks, including +URLs, [link titles], and [fenced code block][] [info strings]: ```````````````````````````````` example -- Foo -- * * * + . -
    -
  • Foo
  • -
  • -
    -
  • -
+
```````````````````````````````` -## ATX headings - -An [ATX heading](@) -consists of a string of characters, parsed as inline content, between an -opening sequence of 1--6 unescaped `#` characters and an optional -closing sequence of any number of unescaped `#` characters. -The opening sequence of `#` characters must be followed by a -[space] or by the end of line. The optional closing sequence of `#`s must be -preceded by a [space] and may be followed by spaces only. The opening -`#` character may be indented 0-3 spaces. The raw contents of the -heading are stripped of leading and trailing spaces before being parsed -as inline content. The heading level is equal to the number of `#` -characters in the opening sequence. +```````````````````````````````` example +[foo](/föö "föö") +. +

foo

+```````````````````````````````` -Simple headings: ```````````````````````````````` example -# foo -## foo -### foo -#### foo -##### foo -###### foo +[foo] + +[foo]: /föö "föö" . -

foo

-

foo

-

foo

-

foo

-
foo
-
foo
+

foo

```````````````````````````````` -More than six `#` characters is not a heading: - ```````````````````````````````` example -####### foo +``` föö +foo +``` . -

####### foo

+
foo
+
```````````````````````````````` -At least one space is required between the `#` characters and the -heading's contents, unless the heading is empty. Note that many -implementations currently do not require the space. However, the -space was required by the -[original ATX implementation](http://www.aaronsw.com/2002/atx/atx.py), -and it helps prevent things like the following from being parsed as -headings: +Entity and numeric character references are treated as literal +text in code spans and code blocks: ```````````````````````````````` example -#5 bolt - -#hashtag +`föö` . -

#5 bolt

-

#hashtag

+

f&ouml;&ouml;

```````````````````````````````` -This is not a heading, because the first `#` is escaped: - ```````````````````````````````` example -\## foo + föfö . -

## foo

+
f&ouml;f&ouml;
+
```````````````````````````````` -Contents are parsed as inlines: +Entity and numeric character references cannot be used +in place of symbols indicating structure in CommonMark +documents. ```````````````````````````````` example -# foo *bar* \*baz\* +*foo* +*foo* . -

foo bar *baz*

+

*foo* +foo

```````````````````````````````` +```````````````````````````````` example +* foo -Leading and trailing [whitespace] is ignored in parsing inline content: +* foo +. +

* foo

+
    +
  • foo
  • +
+```````````````````````````````` ```````````````````````````````` example -# foo +foo bar . -

foo

+

foo + +bar

```````````````````````````````` +```````````````````````````````` example + foo +. +

→foo

+```````````````````````````````` -One to three spaces indentation are allowed: ```````````````````````````````` example - ### foo - ## foo - # foo +[a](url "tit") . -

foo

-

foo

-

foo

+

[a](url "tit")

```````````````````````````````` -Four spaces are too much: + +# Blocks and inlines + +We can think of a document as a sequence of +[blocks](@)---structural elements like paragraphs, block +quotations, lists, headings, rules, and code blocks. Some blocks (like +block quotes and list items) contain other blocks; others (like +headings and paragraphs) contain [inline](@) content---text, +links, emphasized text, images, code spans, and so on. + +## Precedence + +Indicators of block structure always take precedence over indicators +of inline structure. So, for example, the following is a list with +two items, not a list with one item containing a code span: ```````````````````````````````` example - # foo +- `one +- two` . -
# foo
-
+
    +
  • `one
  • +
  • two`
  • +
```````````````````````````````` +This means that parsing can proceed in two steps: first, the block +structure of the document can be discerned; second, text lines inside +paragraphs, headings, and other block constructs can be parsed for inline +structure. The second step requires information about link reference +definitions that will be available only at the end of the first +step. Note that the first step requires processing lines in sequence, +but the second can be parallelized, since the inline parsing of +one block element does not affect the inline parsing of any other. + +## Container blocks and leaf blocks + +We can divide blocks into two types: +[container blocks](#container-blocks), +which can contain other blocks, and [leaf blocks](#leaf-blocks), +which cannot. + +# Leaf blocks + +This section describes the different kinds of leaf block that make up a +Markdown document. + +## Thematic breaks + +A line consisting of optionally up to three spaces of indentation, followed by a +sequence of three or more matching `-`, `_`, or `*` characters, each followed +optionally by any number of spaces or tabs, forms a +[thematic break](@). + ```````````````````````````````` example -foo - # bar +*** +--- +___ . -

foo -# bar

+
+
+
```````````````````````````````` -A closing sequence of `#` characters is optional: +Wrong characters: ```````````````````````````````` example -## foo ## - ### bar ### ++++ . -

foo

-

bar

+

+++

```````````````````````````````` -It need not be the same length as the opening sequence: - ```````````````````````````````` example -# foo ################################## -##### foo ## +=== . -

foo

-
foo
+

===

```````````````````````````````` -Spaces are allowed after the closing sequence: +Not enough characters: ```````````````````````````````` example -### foo ### +-- +** +__ . -

foo

+

-- +** +__

```````````````````````````````` -A sequence of `#` characters with anything but [spaces] following it -is not a closing sequence, but counts as part of the contents of the -heading: +Up to three spaces of indentation are allowed: ```````````````````````````````` example -### foo ### b + *** + *** + *** . -

foo ### b

+
+
+
```````````````````````````````` -The closing sequence must be preceded by a space: +Four spaces of indentation is too many: ```````````````````````````````` example -# foo# + *** . -

foo#

+
***
+
```````````````````````````````` -Backslash-escaped `#` characters do not count as part -of the closing sequence: - ```````````````````````````````` example -### foo \### -## foo #\## -# foo \# +Foo + *** . -

foo ###

-

foo ###

-

foo #

+

Foo +***

```````````````````````````````` -ATX headings need not be separated from surrounding content by blank -lines, and they can interrupt paragraphs: +More than three characters may be used: ```````````````````````````````` example -**** -## foo -**** +_____________________________________ .
-

foo

-
```````````````````````````````` +Spaces and tabs are allowed between the characters: + ```````````````````````````````` example -Foo bar -# baz -Bar foo + - - - . -

Foo bar

-

baz

-

Bar foo

+
```````````````````````````````` -ATX headings can be empty: - ```````````````````````````````` example -## -# -### ### + ** * ** * ** * ** . -

-

-

+
```````````````````````````````` -## Setext headings +```````````````````````````````` example +- - - - +. +
+```````````````````````````````` -A [setext heading](@) consists of one or more -lines of text, each containing at least one [non-whitespace -character], with no more than 3 spaces indentation, followed by -a [setext heading underline]. The lines of text must be such -that, were they not followed by the setext heading underline, -they would be interpreted as a paragraph: they cannot be -interpretable as a [code fence], [ATX heading][ATX headings], -[block quote][block quotes], [thematic break][thematic breaks], -[list item][list items], or [HTML block][HTML blocks]. -A [setext heading underline](@) is a sequence of -`=` characters or a sequence of `-` characters, with no more than 3 -spaces indentation and any number of trailing spaces. If a line -containing a single `-` can be interpreted as an -empty [list items], it should be interpreted this way -and not as a [setext heading underline]. +Spaces and tabs are allowed at the end: -The heading is a level 1 heading if `=` characters are used in -the [setext heading underline], and a level 2 heading if `-` -characters are used. The contents of the heading are the result -of parsing the preceding lines of text as CommonMark inline -content. +```````````````````````````````` example +- - - - +. +
+```````````````````````````````` -In general, a setext heading need not be preceded or followed by a -blank line. However, it cannot interrupt a paragraph, so when a -setext heading comes after a paragraph, a blank line is needed between -them. -Simple examples: +However, no other characters may occur in the line: ```````````````````````````````` example -Foo *bar* -========= +_ _ _ _ a -Foo *bar* ---------- +a------ + +---a--- . -

Foo bar

-

Foo bar

+

_ _ _ _ a

+

a------

+

---a---

```````````````````````````````` -The content of the header may span more than one line: +It is required that all of the characters other than spaces or tabs be the same. +So, this is not a thematic break: ```````````````````````````````` example -Foo *bar -baz* -==== + *-* . -

Foo bar -baz

+

-

```````````````````````````````` -The contents are the result of parsing the headings's raw -content as inlines. The heading's raw content is formed by -concatenating the lines and removing initial and final -[whitespace]. + +Thematic breaks do not need blank lines before or after: ```````````````````````````````` example - Foo *bar -baz*→ -==== +- foo +*** +- bar . -

Foo bar -baz

+
    +
  • foo
  • +
+
+
    +
  • bar
  • +
```````````````````````````````` -The underlining can be any length: +Thematic breaks can interrupt a paragraph: ```````````````````````````````` example Foo -------------------------- - -Foo -= +*** +bar . -

Foo

-

Foo

+

Foo

+
+

bar

```````````````````````````````` -The heading content can be indented up to three spaces, and need -not line up with the underlining: +If a line of dashes that meets the above conditions for being a +thematic break could also be interpreted as the underline of a [setext +heading], the interpretation as a +[setext heading] takes precedence. Thus, for example, +this is a setext heading, not a paragraph followed by a thematic break: ```````````````````````````````` example - Foo +Foo --- - - Foo ------ - - Foo - === +bar .

Foo

-

Foo

-

Foo

+

bar

```````````````````````````````` -Four spaces indent is too much: +When both a thematic break and a list item are possible +interpretations of a line, the thematic break takes precedence: ```````````````````````````````` example - Foo - --- - - Foo ---- +* Foo +* * * +* Bar . -
Foo
----
+
    +
  • Foo
  • +
+
+
    +
  • Bar
  • +
+```````````````````````````````` -Foo -
+ +If you want a thematic break in a list item, use a different bullet: + +```````````````````````````````` example +- Foo +- * * * +. +
    +
  • Foo
  • +

  • +
  • +
```````````````````````````````` -The setext heading underline can be indented up to three spaces, and -may have trailing spaces: +## ATX headings + +An [ATX heading](@) +consists of a string of characters, parsed as inline content, between an +opening sequence of 1--6 unescaped `#` characters and an optional +closing sequence of any number of unescaped `#` characters. +The opening sequence of `#` characters must be followed by spaces or tabs, or +by the end of line. The optional closing sequence of `#`s must be preceded by +spaces or tabs and may be followed by spaces or tabs only. The opening +`#` character may be preceded by up to three spaces of indentation. The raw +contents of the heading are stripped of leading and trailing space or tabs +before being parsed as inline content. The heading level is equal to the number +of `#` characters in the opening sequence. + +Simple headings: ```````````````````````````````` example -Foo - ---- +# foo +## foo +### foo +#### foo +##### foo +###### foo . -

Foo

+

foo

+

foo

+

foo

+

foo

+
foo
+
foo
```````````````````````````````` -Four spaces is too much: +More than six `#` characters is not a heading: ```````````````````````````````` example -Foo - --- +####### foo . -

Foo ----

+

####### foo

```````````````````````````````` -The setext heading underline cannot contain internal spaces: +At least one space or tab is required between the `#` characters and the +heading's contents, unless the heading is empty. Note that many +implementations currently do not require the space. However, the +space was required by the +[original ATX implementation](http://www.aaronsw.com/2002/atx/atx.py), +and it helps prevent things like the following from being parsed as +headings: ```````````````````````````````` example -Foo -= = +#5 bolt -Foo ---- - +#hashtag . -

Foo -= =

-

Foo

-
+

#5 bolt

+

#hashtag

```````````````````````````````` -Trailing spaces in the content line do not cause a line break: +This is not a heading, because the first `#` is escaped: ```````````````````````````````` example -Foo ------ +\## foo . -

Foo

+

## foo

```````````````````````````````` -Nor does a backslash at the end: +Contents are parsed as inlines: ```````````````````````````````` example -Foo\ ----- +# foo *bar* \*baz\* . -

Foo\

+

foo bar *baz*

```````````````````````````````` -Since indicators of block structure take precedence over -indicators of inline structure, the following are setext headings: +Leading and trailing spaces or tabs are ignored in parsing inline content: ```````````````````````````````` example -`Foo ----- -` - - +# foo . -

`Foo

-

`

-

<a title="a lot

-

of dashes"/>

+

foo

```````````````````````````````` -The setext heading underline cannot be a [lazy continuation -line] in a list item or block quote: +Up to three spaces of indentation are allowed: ```````````````````````````````` example -> Foo ---- + ### foo + ## foo + # foo . -
-

Foo

-
-
+

foo

+

foo

+

foo

```````````````````````````````` +Four spaces of indentation is too many: + ```````````````````````````````` example -> foo -bar -=== + # foo . -
-

foo -bar -===

-
+
# foo
+
```````````````````````````````` ```````````````````````````````` example -- Foo ---- +foo + # bar . -
    -
  • Foo
  • -
-
+

foo +# bar

```````````````````````````````` -A blank line is needed between a paragraph and a following -setext heading, since otherwise the paragraph becomes part -of the heading's content: +A closing sequence of `#` characters is optional: ```````````````````````````````` example -Foo -Bar ---- +## foo ## + ### bar ### . -

Foo -Bar

+

foo

+

bar

```````````````````````````````` -But in general a blank line is not required before or after -setext headings: +It need not be the same length as the opening sequence: ```````````````````````````````` example ---- -Foo ---- -Bar ---- -Baz +# foo ################################## +##### foo ## . -
-

Foo

-

Bar

-

Baz

+

foo

+
foo
```````````````````````````````` -Setext headings cannot be empty: +Spaces or tabs are allowed after the closing sequence: ```````````````````````````````` example +### foo ### +. +

foo

+```````````````````````````````` -==== + +A sequence of `#` characters with anything but spaces or tabs following it +is not a closing sequence, but counts as part of the contents of the +heading: + +```````````````````````````````` example +### foo ### b . -

====

+

foo ### b

```````````````````````````````` -Setext heading text lines must not be interpretable as block -constructs other than paragraphs. So, the line of dashes -in these examples gets interpreted as a thematic break: +The closing sequence must be preceded by a space or tab: ```````````````````````````````` example ---- ---- +# foo# . -
-
+

foo#

```````````````````````````````` +Backslash-escaped `#` characters do not count as part +of the closing sequence: + ```````````````````````````````` example -- foo ------ +### foo \### +## foo #\## +# foo \# . -
    -
  • foo
  • -
-
+

foo ###

+

foo ###

+

foo #

```````````````````````````````` +ATX headings need not be separated from surrounding content by blank +lines, and they can interrupt paragraphs: + ```````````````````````````````` example - foo ---- +**** +## foo +**** . -
foo
-
+
+

foo


```````````````````````````````` ```````````````````````````````` example -> foo ------ +Foo bar +# baz +Bar foo . -
-

foo

-
-
+

Foo bar

+

baz

+

Bar foo

```````````````````````````````` -If you want a heading with `> foo` as its literal text, you can -use backslash escapes: +ATX headings can be empty: ```````````````````````````````` example -\> foo ------- +## +# +### ### . -

> foo

+

+

+

```````````````````````````````` -**Compatibility note:** Most existing Markdown implementations -do not allow the text of setext headings to span multiple lines. -But there is no consensus about how to interpret +## Setext headings -``` markdown -Foo -bar ---- -baz -``` +A [setext heading](@) consists of one or more +lines of text, not interrupted by a blank line, of which the first line does not +have more than 3 spaces of indentation, followed by +a [setext heading underline]. The lines of text must be such +that, were they not followed by the setext heading underline, +they would be interpreted as a paragraph: they cannot be +interpretable as a [code fence], [ATX heading][ATX headings], +[block quote][block quotes], [thematic break][thematic breaks], +[list item][list items], or [HTML block][HTML blocks]. -One can find four different interpretations: +A [setext heading underline](@) is a sequence of +`=` characters or a sequence of `-` characters, with no more than 3 +spaces of indentation and any number of trailing spaces or tabs. -1. paragraph "Foo", heading "bar", paragraph "baz" -2. paragraph "Foo bar", thematic break, paragraph "baz" -3. paragraph "Foo bar --- baz" -4. heading "Foo bar", paragraph "baz" +The heading is a level 1 heading if `=` characters are used in +the [setext heading underline], and a level 2 heading if `-` +characters are used. The contents of the heading are the result +of parsing the preceding lines of text as CommonMark inline +content. -We find interpretation 4 most natural, and interpretation 4 -increases the expressive power of CommonMark, by allowing -multiline headings. Authors who want interpretation 1 can -put a blank line after the first paragraph: +In general, a setext heading need not be preceded or followed by a +blank line. However, it cannot interrupt a paragraph, so when a +setext heading comes after a paragraph, a blank line is needed between +them. + +Simple examples: ```````````````````````````````` example -Foo +Foo *bar* +========= -bar ---- -baz +Foo *bar* +--------- . -

Foo

-

bar

-

baz

+

Foo bar

+

Foo bar

```````````````````````````````` -Authors who want interpretation 2 can put blank lines around -the thematic break, +The content of the header may span more than one line: ```````````````````````````````` example -Foo -bar +Foo *bar +baz* +==== +. +

Foo bar +baz

+```````````````````````````````` ---- +The contents are the result of parsing the headings's raw +content as inlines. The heading's raw content is formed by +concatenating the lines and removing initial and final +spaces or tabs. -baz +```````````````````````````````` example + Foo *bar +baz*→ +==== . -

Foo -bar

-
-

baz

+

Foo bar +baz

```````````````````````````````` -or use a thematic break that cannot count as a [setext heading -underline], such as +The underlining can be any length: ```````````````````````````````` example Foo -bar -* * * -baz +------------------------- + +Foo += . -

Foo -bar

-
-

baz

+

Foo

+

Foo

```````````````````````````````` -Authors who want interpretation 3 can use backslash escapes: +The heading content can be preceded by up to three spaces of indentation, and +need not line up with the underlining: ```````````````````````````````` example -Foo -bar -\--- -baz -. -

Foo -bar + Foo --- -baz

-```````````````````````````````` + Foo +----- -## Indented code blocks + Foo + === +. +

Foo

+

Foo

+

Foo

+```````````````````````````````` -An [indented code block](@) is composed of one or more -[indented chunks] separated by blank lines. -An [indented chunk](@) is a sequence of non-blank lines, -each indented four or more spaces. The contents of the code block are -the literal contents of the lines, including trailing -[line endings], minus four spaces of indentation. -An indented code block has no [info string]. -An indented code block cannot interrupt a paragraph, so there must be -a blank line between a paragraph and a following indented code block. -(A blank line is not needed, however, between a code block and a following -paragraph.) +Four spaces of indentation is too many: ```````````````````````````````` example - a simple - indented code block + Foo + --- + + Foo +--- . -
a simple
-  indented code block
+
Foo
+---
+
+Foo
 
+
```````````````````````````````` -If there is any ambiguity between an interpretation of indentation -as a code block and as indicating that material belongs to a [list -item][list items], the list item interpretation takes precedence: +The setext heading underline can be preceded by up to three spaces of +indentation, and may have trailing spaces or tabs: ```````````````````````````````` example - - foo - - bar +Foo + ---- . -
    -
  • -

    foo

    -

    bar

    -
  • -
+

Foo

```````````````````````````````` -```````````````````````````````` example -1. foo - - - bar -. -
    -
  1. -

    foo

    -
      -
    • bar
    • -
    -
  2. -
-```````````````````````````````` - - - -The contents of a code block are literal text, and do not get parsed -as Markdown: +Four spaces of indentation is too many: ```````````````````````````````` example -
- *hi* - - - one +Foo + --- . -
<a/>
-*hi*
-
-- one
-
+

Foo +---

```````````````````````````````` -Here we have three chunks separated by blank lines: +The setext heading underline cannot contain internal spaces or tabs: ```````````````````````````````` example - chunk1 +Foo += = - chunk2 - - - - chunk3 +Foo +--- - . -
chunk1
-
-chunk2
-
-
-
-chunk3
-
+

Foo += =

+

Foo

+
```````````````````````````````` -Any initial spaces beyond four will be included in the content, even -in interior blank lines: +Trailing spaces or tabs in the content line do not cause a hard line break: ```````````````````````````````` example - chunk1 - - chunk2 +Foo +----- . -
chunk1
-  
-  chunk2
-
+

Foo

```````````````````````````````` -An indented code block cannot interrupt a paragraph. (This -allows hanging indents and the like.) +Nor does a backslash at the end: ```````````````````````````````` example -Foo - bar - +Foo\ +---- . -

Foo -bar

+

Foo\

```````````````````````````````` -However, any non-blank line with fewer than four leading spaces ends -the code block immediately. So a paragraph may occur immediately -after indented code: +Since indicators of block structure take precedence over +indicators of inline structure, the following are setext headings: ```````````````````````````````` example - foo -bar +`Foo +---- +` + +
. -
foo
-
-

bar

+

`Foo

+

`

+

<a title="a lot

+

of dashes"/>

```````````````````````````````` -And indented code can occur immediately before and after other kinds of -blocks: +The setext heading underline cannot be a [lazy continuation +line] in a list item or block quote: ```````````````````````````````` example -# Heading - foo -Heading ------- - foo ----- +> Foo +--- . -

Heading

-
foo
-
-

Heading

-
foo
-
+
+

Foo

+

```````````````````````````````` -The first line can be indented more than four spaces: - ```````````````````````````````` example - foo - bar +> foo +bar +=== . -
    foo
+
+

foo bar -

+===

+ ```````````````````````````````` -Blank lines preceding or following an indented code block -are not included in it: - ```````````````````````````````` example - - - foo - - +- Foo +--- . -
foo
-
+
    +
  • Foo
  • +
+
```````````````````````````````` -Trailing spaces are included in the code block's content: +A blank line is needed between a paragraph and a following +setext heading, since otherwise the paragraph becomes part +of the heading's content: ```````````````````````````````` example - foo +Foo +Bar +--- . -
foo  
-
+

Foo +Bar

```````````````````````````````` +But in general a blank line is not required before or after +setext headings: -## Fenced code blocks - -A [code fence](@) is a sequence -of at least three consecutive backtick characters (`` ` ``) or -tildes (`~`). (Tildes and backticks cannot be mixed.) -A [fenced code block](@) -begins with a code fence, indented no more than three spaces. - -The line with the opening code fence may optionally contain some text -following the code fence; this is trimmed of leading and trailing -whitespace and called the [info string](@). If the [info string] comes -after a backtick fence, it may not contain any backtick -characters. (The reason for this restriction is that otherwise -some inline code would be incorrectly interpreted as the -beginning of a fenced code block.) - -The content of the code block consists of all subsequent lines, until -a closing [code fence] of the same type as the code block -began with (backticks or tildes), and with at least as many backticks -or tildes as the opening code fence. If the leading code fence is -indented N spaces, then up to N spaces of indentation are removed from -each line of the content (if present). (If a content line is not -indented, it is preserved unchanged. If it is indented less than N -spaces, all of the indentation is removed.) - -The closing code fence may be indented up to three spaces, and may be -followed only by spaces, which are ignored. If the end of the -containing block (or document) is reached and no closing code fence -has been found, the code block contains all of the lines after the -opening code fence until the end of the containing block (or -document). (An alternative spec would require backtracking in the -event that a closing code fence is not found. But this makes parsing -much less efficient, and there seems to be no real down side to the -behavior described here.) - -A fenced code block may interrupt a paragraph, and does not require -a blank line either before or after. +```````````````````````````````` example +--- +Foo +--- +Bar +--- +Baz +. +
+

Foo

+

Bar

+

Baz

+```````````````````````````````` -The content of a code fence is treated as literal text, not parsed -as inlines. The first word of the [info string] is typically used to -specify the language of the code sample, and rendered in the `class` -attribute of the `code` tag. However, this spec does not mandate any -particular treatment of the [info string]. -Here is a simple example with backticks: +Setext headings cannot be empty: ```````````````````````````````` example -``` -< - > -``` + +==== . -
<
- >
-
+

====

```````````````````````````````` -With tildes: +Setext heading text lines must not be interpretable as block +constructs other than paragraphs. So, the line of dashes +in these examples gets interpreted as a thematic break: ```````````````````````````````` example -~~~ -< - > -~~~ +--- +--- . -
<
- >
-
+
+
```````````````````````````````` -Fewer than three backticks is not enough: ```````````````````````````````` example -`` -foo -`` +- foo +----- . -

foo

+
    +
  • foo
  • +
+
```````````````````````````````` -The closing code fence must use the same character as the opening -fence: ```````````````````````````````` example -``` -aaa -~~~ -``` + foo +--- . -
aaa
-~~~
+
foo
 
+
```````````````````````````````` ```````````````````````````````` example -~~~ -aaa -``` -~~~ +> foo +----- . -
aaa
-```
-
+
+

foo

+
+
```````````````````````````````` -The closing code fence must be at least as long as the opening fence: +If you want a heading with `> foo` as its literal text, you can +use backslash escapes: ```````````````````````````````` example -```` -aaa -``` -`````` +\> foo +------ . -
aaa
-```
-
+

> foo

```````````````````````````````` -```````````````````````````````` example -~~~~ -aaa -~~~ -~~~~ -. -
aaa
-~~~
-
-```````````````````````````````` +**Compatibility note:** Most existing Markdown implementations +do not allow the text of setext headings to span multiple lines. +But there is no consensus about how to interpret +``` markdown +Foo +bar +--- +baz +``` -Unclosed code blocks are closed by the end of the document -(or the enclosing [block quote][block quotes] or [list item][list items]): +One can find four different interpretations: -```````````````````````````````` example -``` -. -
-```````````````````````````````` +1. paragraph "Foo", heading "bar", paragraph "baz" +2. paragraph "Foo bar", thematic break, paragraph "baz" +3. paragraph "Foo bar --- baz" +4. heading "Foo bar", paragraph "baz" +We find interpretation 4 most natural, and interpretation 4 +increases the expressive power of CommonMark, by allowing +multiline headings. Authors who want interpretation 1 can +put a blank line after the first paragraph: ```````````````````````````````` example -````` +Foo -``` -aaa +bar +--- +baz . -

-```
-aaa
-
+

Foo

+

bar

+

baz

```````````````````````````````` +Authors who want interpretation 2 can put blank lines around +the thematic break, + ```````````````````````````````` example -> ``` -> aaa +Foo +bar -bbb +--- + +baz . -
-
aaa
-
-
-

bbb

+

Foo +bar

+
+

baz

```````````````````````````````` -A code block can have all empty lines as its content: +or use a thematic break that cannot count as a [setext heading +underline], such as ```````````````````````````````` example -``` - - -``` +Foo +bar +* * * +baz . -

-  
-
+

Foo +bar

+
+

baz

```````````````````````````````` -A code block can be empty: +Authors who want interpretation 3 can use backslash escapes: ```````````````````````````````` example -``` -``` +Foo +bar +\--- +baz . -
+

Foo +bar +--- +baz

```````````````````````````````` -Fences can be indented. If the opening fence is indented, -content lines will have equivalent opening indentation removed, -if present: +## Indented code blocks + +An [indented code block](@) is composed of one or more +[indented chunks] separated by blank lines. +An [indented chunk](@) is a sequence of non-blank lines, +each preceded by four or more spaces of indentation. The contents of the code +block are the literal contents of the lines, including trailing +[line endings], minus four spaces of indentation. +An indented code block has no [info string]. + +An indented code block cannot interrupt a paragraph, so there must be +a blank line between a paragraph and a following indented code block. +(A blank line is not needed, however, between a code block and a following +paragraph.) ```````````````````````````````` example - ``` - aaa -aaa -``` + a simple + indented code block . -
aaa
-aaa
+
a simple
+  indented code block
 
```````````````````````````````` +If there is any ambiguity between an interpretation of indentation +as a code block and as indicating that material belongs to a [list +item][list items], the list item interpretation takes precedence: + ```````````````````````````````` example - ``` -aaa - aaa -aaa - ``` + - foo + + bar . -
aaa
-aaa
-aaa
-
+
    +
  • +

    foo

    +

    bar

    +
  • +
```````````````````````````````` ```````````````````````````````` example - ``` - aaa - aaa - aaa - ``` +1. foo + + - bar . -
aaa
- aaa
-aaa
-
+
    +
  1. +

    foo

    +
      +
    • bar
    • +
    +
  2. +
```````````````````````````````` -Four spaces indentation produces an indented code block: + +The contents of a code block are literal text, and do not get parsed +as Markdown: ```````````````````````````````` example - ``` - aaa - ``` +
+ *hi* + + - one . -
```
-aaa
-```
+
<a/>
+*hi*
+
+- one
 
```````````````````````````````` -Closing fences may be indented by 0-3 spaces, and their indentation -need not match that of the opening fence: +Here we have three chunks separated by blank lines: ```````````````````````````````` example -``` -aaa - ``` + chunk1 + + chunk2 + + + + chunk3 . -
aaa
-
-```````````````````````````````` +
chunk1
 
+chunk2
 
-```````````````````````````````` example
-   ```
-aaa
-  ```
-.
-
aaa
+
+
+chunk3
 
```````````````````````````````` -This is not a closing fence, because it is indented 4 spaces: +Any initial spaces or tabs beyond four spaces of indentation will be included in +the content, even in interior blank lines: ```````````````````````````````` example -``` -aaa - ``` + chunk1 + + chunk2 . -
aaa
-    ```
+
chunk1
+  
+  chunk2
 
```````````````````````````````` - -Code fences (opening and closing) cannot contain internal spaces: +An indented code block cannot interrupt a paragraph. (This +allows hanging indents and the like.) ```````````````````````````````` example -``` ``` -aaa +Foo + bar + . -

-aaa

+

Foo +bar

```````````````````````````````` +However, any non-blank line with fewer than four spaces of indentation ends +the code block immediately. So a paragraph may occur immediately +after indented code: + ```````````````````````````````` example -~~~~~~ -aaa -~~~ ~~ + foo +bar . -
aaa
-~~~ ~~
+
foo
 
+

bar

```````````````````````````````` -Fenced code blocks can interrupt paragraphs, and can be followed -directly by paragraphs, without a blank line between: +And indented code can occur immediately before and after other kinds of +blocks: ```````````````````````````````` example -foo -``` -bar -``` -baz +# Heading + foo +Heading +------ + foo +---- . -

foo

-
bar
+

Heading

+
foo
 
-

baz

+

Heading

+
foo
+
+
```````````````````````````````` -Other blocks can also occur before and after fenced code blocks -without an intervening blank line: +The first line can be preceded by more than four spaces of indentation: ```````````````````````````````` example -foo ---- -~~~ -bar -~~~ -# baz + foo + bar . -

foo

-
bar
+
    foo
+bar
 
-

baz

```````````````````````````````` -An [info string] can be provided after the opening code fence. -Although this spec doesn't mandate any particular treatment of -the info string, the first word is typically used to specify -the language of the code block. In HTML output, the language is -normally indicated by adding a class to the `code` element consisting -of `language-` followed by the language name. +Blank lines preceding or following an indented code block +are not included in it: ```````````````````````````````` example -```ruby -def foo(x) - return 3 -end -``` + + + foo + + . -
def foo(x)
-  return 3
-end
+
foo
 
```````````````````````````````` +Trailing spaces or tabs are included in the code block's content: + ```````````````````````````````` example -~~~~ ruby startline=3 $%@#$ -def foo(x) - return 3 -end -~~~~~~~ + foo . -
def foo(x)
-  return 3
-end
+
foo  
 
```````````````````````````````` -```````````````````````````````` example -````; -```` -. -
-```````````````````````````````` +## Fenced code blocks -[Info strings] for backtick code blocks cannot contain backticks: +A [code fence](@) is a sequence +of at least three consecutive backtick characters (`` ` ``) or +tildes (`~`). (Tildes and backticks cannot be mixed.) +A [fenced code block](@) +begins with a code fence, preceded by up to three spaces of indentation. + +The line with the opening code fence may optionally contain some text +following the code fence; this is trimmed of leading and trailing +spaces or tabs and called the [info string](@). If the [info string] comes +after a backtick fence, it may not contain any backtick +characters. (The reason for this restriction is that otherwise +some inline code would be incorrectly interpreted as the +beginning of a fenced code block.) + +The content of the code block consists of all subsequent lines, until +a closing [code fence] of the same type as the code block +began with (backticks or tildes), and with at least as many backticks +or tildes as the opening code fence. If the leading code fence is +preceded by N spaces of indentation, then up to N spaces of indentation are +removed from each line of the content (if present). (If a content line is not +indented, it is preserved unchanged. If it is indented N spaces or less, all +of the indentation is removed.) + +The closing code fence may be preceded by up to three spaces of indentation, and +may be followed only by spaces or tabs, which are ignored. If the end of the +containing block (or document) is reached and no closing code fence +has been found, the code block contains all of the lines after the +opening code fence until the end of the containing block (or +document). (An alternative spec would require backtracking in the +event that a closing code fence is not found. But this makes parsing +much less efficient, and there seems to be no real downside to the +behavior described here.) + +A fenced code block may interrupt a paragraph, and does not require +a blank line either before or after. + +The content of a code fence is treated as literal text, not parsed +as inlines. The first word of the [info string] is typically used to +specify the language of the code sample, and rendered in the `class` +attribute of the `code` tag. However, this spec does not mandate any +particular treatment of the [info string]. + +Here is a simple example with backticks: ```````````````````````````````` example -``` aa ``` -foo +``` +< + > +``` . -

aa -foo

+
<
+ >
+
```````````````````````````````` -[Info strings] for tilde code blocks can contain backticks and tildes: +With tildes: ```````````````````````````````` example -~~~ aa ``` ~~~ -foo +~~~ +< + > ~~~ . -
foo
+
<
+ >
 
```````````````````````````````` +Fewer than three backticks is not enough: + +```````````````````````````````` example +`` +foo +`` +. +

foo

+```````````````````````````````` -Closing code fences cannot have [info strings]: +The closing code fence must use the same character as the opening +fence: ```````````````````````````````` example ``` -``` aaa +aaa +~~~ ``` . -
``` aaa
+
aaa
+~~~
 
```````````````````````````````` +```````````````````````````````` example +~~~ +aaa +``` +~~~ +. +
aaa
+```
+
+```````````````````````````````` -## HTML blocks -An [HTML block](@) is a group of lines that is treated -as raw HTML (and will not be escaped in HTML output). +The closing code fence must be at least as long as the opening fence: -There are seven kinds of [HTML block], which can be defined by their -start and end conditions. The block begins with a line that meets a -[start condition](@) (after up to three spaces optional indentation). -It ends with the first subsequent line that meets a matching [end -condition](@), or the last line of the document, or the last line of -the [container block](#container-blocks) containing the current HTML -block, if no line is encountered that meets the [end condition]. If -the first line meets both the [start condition] and the [end -condition], the block will contain just that line. +```````````````````````````````` example +```` +aaa +``` +`````` +. +
aaa
+```
+
+```````````````````````````````` -1. **Start condition:** line begins with the string ``, or the end of the line.\ -**End condition:** line contains an end tag -``, `
`, or `` (case-insensitive; it -need not match the start tag). -2. **Start condition:** line begins with the string ``. +```````````````````````````````` example +~~~~ +aaa +~~~ +~~~~ +. +
aaa
+~~~
+
+```````````````````````````````` -3. **Start condition:** line begins with the string ``. -4. **Start condition:** line begins with the string ``. +Unclosed code blocks are closed by the end of the document +(or the enclosing [block quote][block quotes] or [list item][list items]): -5. **Start condition:** line begins with the string -``. +```````````````````````````````` example +``` +. +
+```````````````````````````````` -6. **Start condition:** line begins the string `<` or ``, or -the string `/>`.\ -**End condition:** line is followed by a [blank line]. -7. **Start condition:** line begins with a complete [open tag] -(with any [tag name] other than `script`, -`style`, or `pre`) or a complete [closing tag], -followed only by [whitespace] or the end of the line.\ -**End condition:** line is followed by a [blank line]. +```````````````````````````````` example +````` -HTML blocks continue until they are closed by their appropriate -[end condition], or the last line of the document or other [container -block](#container-blocks). This means any HTML **within an HTML -block** that might otherwise be recognised as a start condition will -be ignored by the parser and passed through as-is, without changing -the parser's state. +``` +aaa +. +

+```
+aaa
+
+```````````````````````````````` -For instance, `
` within a HTML block started by `` will not affect
-the parser state; as the HTML block was started in by start condition 6, it
-will end at any blank line. This can be surprising:
 
 ```````````````````````````````` example
-
-
-**Hello**,
+> ```
+> aaa
 
-_world_.
-
-
+bbb . -
-
-**Hello**,
-

world. -

-
+
+
aaa
+
+
+

bbb

```````````````````````````````` -In this case, the HTML block is terminated by the newline — the `**Hello**` -text remains verbatim — and regular parsing resumes, with a paragraph, -emphasised `world` and inline and block HTML following. - -All types of [HTML blocks] except type 7 may interrupt -a paragraph. Blocks of type 7 may not interrupt a paragraph. -(This restriction is intended to prevent unwanted interpretation -of long tags inside a wrapped paragraph as starting HTML blocks.) -Some simple examples follow. Here are some basic HTML blocks -of type 6: +A code block can have all empty lines as its content: ```````````````````````````````` example - - - - -
- hi -
- -okay. -. - - - - -
- hi -
-

okay.

-```````````````````````````````` - +``` -```````````````````````````````` example - -*foo* +
```````````````````````````````` -Here we have two HTML blocks with a Markdown paragraph between them: +Fences can be indented. If the opening fence is indented, +content lines will have equivalent opening indentation removed, +if present: ```````````````````````````````` example -
- -*Markdown* - -
+ ``` + aaa +aaa +``` . -
-

Markdown

-
+
aaa
+aaa
+
```````````````````````````````` -The tag on the first line can be partial, as long -as it is split where there would be whitespace: - ```````````````````````````````` example -
-
+ ``` +aaa + aaa +aaa + ``` . -
-
+
aaa
+aaa
+aaa
+
```````````````````````````````` ```````````````````````````````` example -
-
+ ``` + aaa + aaa + aaa + ``` . -
-
+
aaa
+ aaa
+aaa
+
```````````````````````````````` -An open tag need not be closed: -```````````````````````````````` example -
-*foo* +Four spaces of indentation is too many: -*bar* +```````````````````````````````` example + ``` + aaa + ``` . -
-*foo* -

bar

+
```
+aaa
+```
+
```````````````````````````````` - -A partial tag need not even be completed (garbage -in, garbage out): +Closing fences may be preceded by up to three spaces of indentation, and their +indentation need not match that of the opening fence: ```````````````````````````````` example -
aaa +
```````````````````````````````` ```````````````````````````````` example -
aaa +
```````````````````````````````` -The initial tag doesn't even need to be a valid -tag, as long as it starts like one: +This is not a closing fence, because it is indented 4 spaces: ```````````````````````````````` example -
aaa + ``` +
```````````````````````````````` -In type 6 blocks, the initial tag need not be on a line by -itself: + +Code fences (opening and closing) cannot contain internal spaces or tabs: ```````````````````````````````` example - +``` ``` +aaa . - +

+aaa

```````````````````````````````` ```````````````````````````````` example -
-foo -
+~~~~~~ +aaa +~~~ ~~ . -
-foo -
+
aaa
+~~~ ~~
+
```````````````````````````````` -Everything until the next blank line or end of document -gets included in the HTML block. So, in the following -example, what looks like a Markdown code block -is actually part of the HTML block, which continues until a blank -line or the end of the document is reached: +Fenced code blocks can interrupt paragraphs, and can be followed +directly by paragraphs, without a blank line between: ```````````````````````````````` example -
-``` c -int x = 33; +foo ``` -. -
-``` c -int x = 33; +bar ``` +baz +. +

foo

+
bar
+
+

baz

```````````````````````````````` -To start an [HTML block] with a tag that is *not* in the -list of block-level tags in (6), you must put the tag by -itself on the first line (and it must be complete): +Other blocks can also occur before and after fenced code blocks +without an intervening blank line: ```````````````````````````````` example - -*bar* - +foo +--- +~~~ +bar +~~~ +# baz . - -*bar* - +

foo

+
bar
+
+

baz

```````````````````````````````` -In type 7 blocks, the [tag name] can be anything: +An [info string] can be provided after the opening code fence. +Although this spec doesn't mandate any particular treatment of +the info string, the first word is typically used to specify +the language of the code block. In HTML output, the language is +normally indicated by adding a class to the `code` element consisting +of `language-` followed by the language name. ```````````````````````````````` example - -*bar* - +```ruby +def foo(x) + return 3 +end +``` . - -*bar* - +
def foo(x)
+  return 3
+end
+
```````````````````````````````` ```````````````````````````````` example - -*bar* - +~~~~ ruby startline=3 $%@#$ +def foo(x) + return 3 +end +~~~~~~~ . - -*bar* - +
def foo(x)
+  return 3
+end
+
```````````````````````````````` ```````````````````````````````` example - -*bar* +````; +```` . - -*bar* +
```````````````````````````````` -These rules are designed to allow us to work with tags that -can function as either block-level or inline-level tags. -The `` tag is a nice example. We can surround content with -`` tags in three different ways. In this case, we get a raw -HTML block, because the `` tag is on a line by itself: +[Info strings] for backtick code blocks cannot contain backticks: ```````````````````````````````` example - -*foo* - +``` aa ``` +foo . - -*foo* - +

aa +foo

```````````````````````````````` -In this case, we get a raw HTML block that just includes -the `` tag (because it ends with the following blank -line). So the contents get interpreted as CommonMark: +[Info strings] for tilde code blocks can contain backticks and tildes: ```````````````````````````````` example - - -*foo* - - +~~~ aa ``` ~~~ +foo +~~~ . - -

foo

-
+
foo
+
```````````````````````````````` -Finally, in this case, the `` tags are interpreted -as [raw HTML] *inside* the CommonMark paragraph. (Because -the tag is not on a line by itself, we get inline HTML -rather than an [HTML block].) +Closing code fences cannot have [info strings]: ```````````````````````````````` example -*foo* +``` +``` aaa +``` . -

foo

+
``` aaa
+
```````````````````````````````` -HTML tags designed to contain literal content -(`script`, `style`, `pre`), comments, processing instructions, -and declarations are treated somewhat differently. -Instead of ending at the first blank line, these blocks -end at the first line containing a corresponding end tag. -As a result, these blocks can contain blank lines: -A pre tag (type 1): +## HTML blocks -```````````````````````````````` example -

-import Text.HTML.TagSoup
+An [HTML block](@) is a group of lines that is treated
+as raw HTML (and will not be escaped in HTML output).
 
-main :: IO ()
-main = print $ parseTags tags
-
-okay -. -

-import Text.HTML.TagSoup
+There are seven kinds of [HTML block], which can be defined by their
+start and end conditions.  The block begins with a line that meets a
+[start condition](@) (after up to three optional spaces of indentation).
+It ends with the first subsequent line that meets a matching
+[end condition](@), or the last line of the document, or the last line of
+the [container block](#container-blocks) containing the current HTML
+block, if no line is encountered that meets the [end condition].  If
+the first line meets both the [start condition] and the [end
+condition], the block will contain just that line.
 
-main :: IO ()
-main = print $ parseTags tags
-
-

okay

-```````````````````````````````` +1. **Start condition:** line begins with the string ``, or the end of the line.\ +**End condition:** line contains an end tag +`
`, ``, ``, or `` (case-insensitive; it +need not match the start tag). +2. **Start condition:** line begins with the string ``. -A script tag (type 1): +3. **Start condition:** line begins with the string ``. -```````````````````````````````` example - -okay -. - -

okay

-```````````````````````````````` +6. **Start condition:** line begins with the string `<` or ``, or +the string `/>`.\ +**End condition:** line is followed by a [blank line]. +7. **Start condition:** line begins with a complete [open tag] +(with any [tag name] other than `pre`, `script`, +`style`, or `textarea`) or a complete [closing tag], +followed by zero or more spaces and tabs, followed by the end of the line.\ +**End condition:** line is followed by a [blank line]. -A style tag (type 1): +HTML blocks continue until they are closed by their appropriate +[end condition], or the last line of the document or other [container +block](#container-blocks). This means any HTML **within an HTML +block** that might otherwise be recognised as a start condition will +be ignored by the parser and passed through as-is, without changing +the parser's state. + +For instance, `
` within an HTML block started by `` will not affect
+the parser state; as the HTML block was started in by start condition 6, it
+will end at any blank line. This can be surprising:
 
 ```````````````````````````````` example
-
-okay
+_world_.
+
+
. - -

okay

+
+
+**Hello**,
+

world. +

+
```````````````````````````````` +In this case, the HTML block is terminated by the blank line — the `**Hello**` +text remains verbatim — and regular parsing resumes, with a paragraph, +emphasised `world` and inline and block HTML following. -If there is no matching end tag, the block will end at the -end of the document (or the enclosing [block quote][block quotes] -or [list item][list items]): - -```````````````````````````````` example - +
*foo* . - -

foo

+ +*foo* ```````````````````````````````` +Here we have two HTML blocks with a Markdown paragraph between them: + ```````````````````````````````` example -*bar* -*baz* +
+ +*Markdown* + +
. -*bar* -

baz

+
+

Markdown

+
```````````````````````````````` -Note that anything on the last line after the -end tag will be included in the [HTML block]: +The tag on the first line can be partial, as long +as it is split where there would be whitespace: ```````````````````````````````` example -1. *bar* +
+
. -1. *bar* +
+
```````````````````````````````` -A comment (type 2): +```````````````````````````````` example +
+
+. +
+
+```````````````````````````````` + +An open tag need not be closed: ```````````````````````````````` example - -okay +*bar* . - -

okay

+
+*foo* +

bar

```````````````````````````````` -A processing instruction (type 3): +A partial tag need not even be completed (garbage +in, garbage out): ```````````````````````````````` example -'; - -?> -okay +
'; - -?> -

okay

+
+
+
-okay +
-

okay

+
- - + . - -
<!-- foo -->
-
+ ```````````````````````````````` ```````````````````````````````` example -
- -
+
+foo +
. -
-
<div>
-
+
+foo +
```````````````````````````````` -An HTML block of types 1--6 can interrupt a paragraph, and need not be -preceded by a blank line. +Everything until the next blank line or end of document +gets included in the HTML block. So, in the following +example, what looks like a Markdown code block +is actually part of the HTML block, which continues until a blank +line or the end of the document is reached: ```````````````````````````````` example -Foo -
-bar -
+
+``` c +int x = 33; +``` . -

Foo

-
-bar -
+
+``` c +int x = 33; +``` ```````````````````````````````` -However, a following blank line is needed, except at the end of -a document, and except for blocks of types 1--5, [above][HTML -block]: +To start an [HTML block] with a tag that is *not* in the +list of block-level tags in (6), you must put the tag by +itself on the first line (and it must be complete): ```````````````````````````````` example -
-bar -
-*foo* + +*bar* + . -
-bar -
-*foo* + +*bar* + ```````````````````````````````` -HTML blocks of type 7 cannot interrupt a paragraph: +In type 7 blocks, the [tag name] can be anything: ```````````````````````````````` example -Foo - -baz + +*bar* + . -

Foo - -baz

+ +*bar* + ```````````````````````````````` -This rule differs from John Gruber's original Markdown syntax -specification, which says: +```````````````````````````````` example + +*bar* + +. + +*bar* + +```````````````````````````````` -> The only restrictions are that block-level HTML elements — -> e.g. `
`, ``, `
`, `

`, etc. — must be separated from -> surrounding content by blank lines, and the start and end tags of the -> block should not be indented with tabs or spaces. -In some ways Gruber's rule is more restrictive than the one given -here: +```````````````````````````````` example + +*bar* +. + +*bar* +```````````````````````````````` -- It requires that an HTML block be preceded by a blank line. -- It does not allow the start tag to be indented. -- It requires a matching end tag, which it also does not allow to - be indented. -Most Markdown implementations (including some of Gruber's own) do not -respect all of these restrictions. +These rules are designed to allow us to work with tags that +can function as either block-level or inline-level tags. +The `` tag is a nice example. We can surround content with +`` tags in three different ways. In this case, we get a raw +HTML block, because the `` tag is on a line by itself: -There is one respect, however, in which Gruber's rule is more liberal -than the one given here, since it allows blank lines to occur inside -an HTML block. There are two reasons for disallowing them here. -First, it removes the need to parse balanced tags, which is -expensive and can require backtracking from the end of the document -if no matching end tag is found. Second, it provides a very simple -and flexible way of including Markdown content inside HTML tags: -simply separate the Markdown from the HTML using blank lines: +```````````````````````````````` example + +*foo* + +. + +*foo* + +```````````````````````````````` -Compare: + +In this case, we get a raw HTML block that just includes +the `` tag (because it ends with the following blank +line). So the contents get interpreted as CommonMark: ```````````````````````````````` example -

+ -*Emphasized* text. +*foo* -
+ . -
-

Emphasized text.

-
+ +

foo

+
```````````````````````````````` +Finally, in this case, the `` tags are interpreted +as [raw HTML] *inside* the CommonMark paragraph. (Because +the tag is not on a line by itself, we get inline HTML +rather than an [HTML block].) + ```````````````````````````````` example -
-*Emphasized* text. -
+*foo* . -
-*Emphasized* text. -
+

foo

```````````````````````````````` -Some Markdown implementations have adopted a convention of -interpreting content inside tags as text if the open tag has -the attribute `markdown=1`. The rule given above seems a simpler and -more elegant way of achieving the same expressive power, which is also -much simpler to parse. +HTML tags designed to contain literal content +(`pre`, `script`, `style`, `textarea`), comments, processing instructions, +and declarations are treated somewhat differently. +Instead of ending at the first blank line, these blocks +end at the first line containing a corresponding end tag. +As a result, these blocks can contain blank lines: -The main potential drawback is that one can no longer paste HTML -blocks into Markdown documents with 100% reliability. However, -*in most cases* this will work fine, because the blank lines in -HTML are usually followed by HTML block tags. For example: +A pre tag (type 1): ```````````````````````````````` example -
+

+import Text.HTML.TagSoup
 
-
+main :: IO () +main = print $ parseTags tags + +okay +. +

+import Text.HTML.TagSoup
 
-
+main :: IO () +main = print $ parseTags tags + +

okay

+```````````````````````````````` - -
-Hi -
+A script tag (type 1): + +```````````````````````````````` example + +okay . - - - - -
-Hi -
+ +

okay

```````````````````````````````` -There are problems, however, if the inner tags are indented -*and* separated by spaces, as then they will be interpreted as -an indented code block: +A textarea tag (type 1): ```````````````````````````````` example - + . -
- -
<td>
-  Hi
-</td>
-
- -
-```````````````````````````````` - + +```````````````````````````````` -A [link reference definition] -does not correspond to a structural element of a document. Instead, it -defines a label which can be used in [reference links] -and reference-style [images] elsewhere in the document. [Link -reference definitions] can come either before or after the links that use -them. +A style tag (type 1): ```````````````````````````````` example -[foo]: /url "title" + +okay . -

foo

+ +

okay

```````````````````````````````` +If there is no matching end tag, the block will end at the +end of the document (or the enclosing [block quote][block quotes] +or [list item][list items]): + ```````````````````````````````` example - [foo]: - /url - 'the title' + +*foo* . -

foo

+ +

foo

```````````````````````````````` -However, it may not contain a [blank line]: - ```````````````````````````````` example -[foo]: /url 'title - -with blank line' - -[foo] +*bar* +*baz* . -

[foo]: /url 'title

-

with blank line'

-

[foo]

+*bar* +

baz

```````````````````````````````` -The title may be omitted: +Note that anything on the last line after the +end tag will be included in the [HTML block]: ```````````````````````````````` example -[foo]: -/url - -[foo] +1. *bar* . -

foo

+1. *bar* ```````````````````````````````` -The link destination may not be omitted: +A comment (type 2): ```````````````````````````````` example -[foo]: + +okay . -

[foo]:

-

[foo]

-```````````````````````````````` + +

okay

+```````````````````````````````` -```````````````````````````````` example -[foo]: <> -[foo] -. -

foo

-```````````````````````````````` -The title must be separated from the link destination by -whitespace: +A processing instruction (type 3): ```````````````````````````````` example -[foo]: (baz) +'; + +?> +okay . -

[foo]: (baz)

-

[foo]

+'; + +?> +

okay

```````````````````````````````` -Both title and destination can contain backslash escapes -and literal backslashes: +A declaration (type 4): ```````````````````````````````` example -[foo]: /url\bar\*baz "foo\"bar\baz" - -[foo] + . -

foo

+ ```````````````````````````````` -A link can come before its corresponding definition: +CDATA (type 5): ```````````````````````````````` example -[foo] - -[foo]: url -. -

foo

-```````````````````````````````` + +okay +. +foo

+ return 0; + } +} +]]> +

okay

```````````````````````````````` -As noted in the section on [Links], matching of labels is -case-insensitive (see [matches]). +The opening tag can be preceded by up to three spaces of indentation, but not +four: ```````````````````````````````` example -[FOO]: /url + -[Foo] + . -

Foo

+ +
<!-- foo -->
+
```````````````````````````````` ```````````````````````````````` example -[ΑΓΩ]: /φου +
-[αγω] +
. -

αγω

+
+
<div>
+
```````````````````````````````` -Here is a link reference definition with no corresponding link. -It contributes nothing to the document. +An HTML block of types 1--6 can interrupt a paragraph, and need not be +preceded by a blank line. ```````````````````````````````` example -[foo]: /url +Foo +
+bar +
. +

Foo

+
+bar +
```````````````````````````````` -Here is another one: +However, a following blank line is needed, except at the end of +a document, and except for blocks of types 1--5, [above][HTML +block]: ```````````````````````````````` example -[ -foo -]: /url +
bar +
+*foo* . -

bar

+
+bar +
+*foo* ```````````````````````````````` -This is not a link reference definition, because there are -[non-whitespace characters] after the title: +HTML blocks of type 7 cannot interrupt a paragraph: ```````````````````````````````` example -[foo]: /url "title" ok +Foo + +baz . -

[foo]: /url "title" ok

+

Foo + +baz

```````````````````````````````` -This is a link reference definition, but it has no title: - -```````````````````````````````` example -[foo]: /url -"title" ok -. -

"title" ok

-```````````````````````````````` +This rule differs from John Gruber's original Markdown syntax +specification, which says: +> The only restrictions are that block-level HTML elements — +> e.g. `
`, ``, `
`, `

`, etc. — must be separated from +> surrounding content by blank lines, and the start and end tags of the +> block should not be indented with spaces or tabs. -This is not a link reference definition, because it is indented -four spaces: +In some ways Gruber's rule is more restrictive than the one given +here: -```````````````````````````````` example - [foo]: /url "title" +- It requires that an HTML block be preceded by a blank line. +- It does not allow the start tag to be indented. +- It requires a matching end tag, which it also does not allow to + be indented. -[foo] -. -

[foo]: /url "title"
-
-

[foo]

-```````````````````````````````` +Most Markdown implementations (including some of Gruber's own) do not +respect all of these restrictions. +There is one respect, however, in which Gruber's rule is more liberal +than the one given here, since it allows blank lines to occur inside +an HTML block. There are two reasons for disallowing them here. +First, it removes the need to parse balanced tags, which is +expensive and can require backtracking from the end of the document +if no matching end tag is found. Second, it provides a very simple +and flexible way of including Markdown content inside HTML tags: +simply separate the Markdown from the HTML using blank lines: -This is not a link reference definition, because it occurs inside -a code block: +Compare: ```````````````````````````````` example -``` -[foo]: /url -``` +
-[foo] +*Emphasized* text. + +
. -
[foo]: /url
-
-

[foo]

+
+

Emphasized text.

+
```````````````````````````````` -A [link reference definition] cannot interrupt a paragraph. - ```````````````````````````````` example -Foo -[bar]: /baz - -[bar] +
+*Emphasized* text. +
. -

Foo -[bar]: /baz

-

[bar]

+
+*Emphasized* text. +
```````````````````````````````` -However, it can directly follow other block elements, such as headings -and thematic breaks, and it need not be followed by a blank line. - -```````````````````````````````` example -# [Foo] -[foo]: /url -> bar -. -

Foo

-
-

bar

-
-```````````````````````````````` +Some Markdown implementations have adopted a convention of +interpreting content inside tags as text if the open tag has +the attribute `markdown=1`. The rule given above seems a simpler and +more elegant way of achieving the same expressive power, which is also +much simpler to parse. -```````````````````````````````` example -[foo]: /url -bar -=== -[foo] -. -

bar

-

foo

-```````````````````````````````` +The main potential drawback is that one can no longer paste HTML +blocks into Markdown documents with 100% reliability. However, +*in most cases* this will work fine, because the blank lines in +HTML are usually followed by HTML block tags. For example: ```````````````````````````````` example -[foo]: /url -=== -[foo] -. -

=== -foo

-```````````````````````````````` +
+ -Several [link reference definitions] -can occur one after another, without intervening blank lines. + -```````````````````````````````` example -[foo]: /foo-url "foo" -[bar]: /bar-url - "bar" -[baz]: /baz-url + -[foo], -[bar], -[baz] +
+Hi +
. -

foo, -bar, -baz

+ + + + +
+Hi +
```````````````````````````````` -[Link reference definitions] can occur -inside block containers, like lists and block quotations. They -affect the entire document, not just the container in which they -are defined: +There are problems, however, if the inner tags are indented +*and* separated by spaces, as then they will be interpreted as +an indented code block: ```````````````````````````````` example -[foo] + -> [foo]: /url -. -

foo

-
-
-```````````````````````````````` + + -Whether something is a [link reference definition] is -independent of whether the link reference it defines is -used in the document. Thus, for example, the following -document contains just a link reference definition, and -no visible content: + -```````````````````````````````` example -[foo]: /url +
+ Hi +
. -```````````````````````````````` + + +
<td>
+  Hi
+</td>
+
+ +
+```````````````````````````````` -## Paragraphs +Fortunately, blank lines are usually not necessary and can be +deleted. The exception is inside `
` tags, but as described
+[above][HTML blocks], raw HTML blocks starting with `
`
+*can* contain blank lines.
 
-A sequence of non-blank lines that cannot be interpreted as other
-kinds of blocks forms a [paragraph](@).
-The contents of the paragraph are the result of parsing the
-paragraph's raw content as inlines.  The paragraph's raw content
-is formed by concatenating the lines and removing initial and final
-[whitespace].
+## Link reference definitions
 
-A simple example with two paragraphs:
+A [link reference definition](@)
+consists of a [link label], optionally preceded by up to three spaces of
+indentation, followed
+by a colon (`:`), optional spaces or tabs (including up to one
+[line ending]), a [link destination],
+optional spaces or tabs (including up to one
+[line ending]), and an optional [link
+title], which if it is present must be separated
+from the [link destination] by spaces or tabs.
+No further character may occur.
+
+A [link reference definition]
+does not correspond to a structural element of a document.  Instead, it
+defines a label which can be used in [reference links]
+and reference-style [images] elsewhere in the document.  [Link
+reference definitions] can come either before or after the links that use
+them.
 
 ```````````````````````````````` example
-aaa
+[foo]: /url "title"
 
-bbb
+[foo]
 .
-

aaa

-

bbb

+

foo

```````````````````````````````` -Paragraphs can contain multiple lines, but no blank lines: - ```````````````````````````````` example -aaa -bbb + [foo]: + /url + 'the title' -ccc -ddd +[foo] . -

aaa -bbb

-

ccc -ddd

+

foo

```````````````````````````````` -Multiple blank lines between paragraph have no effect: - ```````````````````````````````` example -aaa - +[Foo*bar\]]:my_(url) 'title (with parens)' -bbb +[Foo*bar\]] . -

aaa

-

bbb

+

Foo*bar]

```````````````````````````````` -Leading spaces are skipped: - ```````````````````````````````` example - aaa - bbb +[Foo bar]: + +'title' + +[Foo bar] . -

aaa -bbb

+

Foo bar

```````````````````````````````` -Lines after the first may be indented any amount, since indented -code blocks cannot interrupt paragraphs. +The title may extend over multiple lines: ```````````````````````````````` example -aaa - bbb - ccc +[foo]: /url ' +title +line1 +line2 +' + +[foo] . -

aaa -bbb -ccc

+

foo

```````````````````````````````` -However, the first line may be indented at most three spaces, -or an indented code block will be triggered: +However, it may not contain a [blank line]: ```````````````````````````````` example - aaa -bbb +[foo]: /url 'title + +with blank line' + +[foo] . -

aaa -bbb

+

[foo]: /url 'title

+

with blank line'

+

[foo]

```````````````````````````````` +The title may be omitted: + ```````````````````````````````` example - aaa -bbb +[foo]: +/url + +[foo] . -
aaa
-
-

bbb

+

foo

```````````````````````````````` -Final spaces are stripped before inline parsing, so a paragraph -that ends with two or more spaces will not end with a [hard line -break]: +The link destination may not be omitted: ```````````````````````````````` example -aaa -bbb +[foo]: + +[foo] . -

aaa
-bbb

+

[foo]:

+

[foo]

```````````````````````````````` + However, an empty link destination may be specified using + angle brackets: -## Blank lines +```````````````````````````````` example +[foo]: <> -[Blank lines] between block-level elements are ignored, -except for the role they play in determining whether a [list] -is [tight] or [loose]. +[foo] +. +

foo

+```````````````````````````````` -Blank lines at the beginning and end of the document are also ignored. +The title must be separated from the link destination by +spaces or tabs: ```````````````````````````````` example - - -aaa - - -# aaa +[foo]: (baz) - +[foo] . -

aaa

-

aaa

+

[foo]: (baz)

+

[foo]

```````````````````````````````` +Both title and destination can contain backslash escapes +and literal backslashes: -# Container blocks +```````````````````````````````` example +[foo]: /url\bar\*baz "foo\"bar\baz" -A [container block](#container-blocks) is a block that has other -blocks as its contents. There are two basic kinds of container blocks: -[block quotes] and [list items]. -[Lists] are meta-containers for [list items]. +[foo] +. +

foo

+```````````````````````````````` -We define the syntax for container blocks recursively. The general -form of the definition is: -> If X is a sequence of blocks, then the result of -> transforming X in such-and-such a way is a container of type Y -> with these blocks as its content. +A link can come before its corresponding definition: -So, we explain what counts as a block quote or list item by explaining -how these can be *generated* from their contents. This should suffice -to define the syntax, although it does not give a recipe for *parsing* -these constructions. (A recipe is provided below in the section entitled -[A parsing strategy](#appendix-a-parsing-strategy).) +```````````````````````````````` example +[foo] -## Block quotes +[foo]: url +. +

foo

+```````````````````````````````` -A [block quote marker](@) -consists of 0-3 spaces of initial indent, plus (a) the character `>` together -with a following space, or (b) a single character `>` not followed by a space. -The following rules define [block quotes]: +If there are several matching definitions, the first one takes +precedence: -1. **Basic case.** If a string of lines *Ls* constitute a sequence - of blocks *Bs*, then the result of prepending a [block quote - marker] to the beginning of each line in *Ls* - is a [block quote](#block-quotes) containing *Bs*. +```````````````````````````````` example +[foo] -2. **Laziness.** If a string of lines *Ls* constitute a [block - quote](#block-quotes) with contents *Bs*, then the result of deleting - the initial [block quote marker] from one or - more lines in which the next [non-whitespace character] after the [block - quote marker] is [paragraph continuation - text] is a block quote with *Bs* as its content. - [Paragraph continuation text](@) is text - that will be parsed as part of the content of a paragraph, but does - not occur at the beginning of the paragraph. +[foo]: first +[foo]: second +. +

foo

+```````````````````````````````` -3. **Consecutiveness.** A document cannot contain two [block - quotes] in a row unless there is a [blank line] between them. -Nothing else counts as a [block quote](#block-quotes). +As noted in the section on [Links], matching of labels is +case-insensitive (see [matches]). + +```````````````````````````````` example +[FOO]: /url + +[Foo] +. +

Foo

+```````````````````````````````` -Here is a simple example: ```````````````````````````````` example -> # Foo -> bar -> baz +[ΑΓΩ]: /φου + +[αγω] . -
-

Foo

-

bar -baz

-
+

αγω

```````````````````````````````` -The spaces after the `>` characters can be omitted: +Whether something is a [link reference definition] is +independent of whether the link reference it defines is +used in the document. Thus, for example, the following +document contains just a link reference definition, and +no visible content: ```````````````````````````````` example -># Foo ->bar -> baz +[foo]: /url . -
-

Foo

-

bar -baz

-
```````````````````````````````` -The `>` characters can be indented 1-3 spaces: +Here is another one: ```````````````````````````````` example - > # Foo - > bar - > baz -. -
-

Foo

-

bar -baz

-
+[ +foo +]: /url +bar +. +

bar

```````````````````````````````` -Four spaces gives us a code block: +This is not a link reference definition, because there are +characters other than spaces or tabs after the title: ```````````````````````````````` example - > # Foo - > bar - > baz +[foo]: /url "title" ok . -
> # Foo
-> bar
-> baz
-
+

[foo]: /url "title" ok

```````````````````````````````` -The Laziness clause allows us to omit the `>` before -[paragraph continuation text]: +This is a link reference definition, but it has no title: ```````````````````````````````` example -> # Foo -> bar -baz +[foo]: /url +"title" ok . -
-

Foo

-

bar -baz

-
+

"title" ok

```````````````````````````````` -A block quote can contain some lazy and some non-lazy -continuation lines: +This is not a link reference definition, because it is indented +four spaces: ```````````````````````````````` example -> bar -baz -> foo + [foo]: /url "title" + +[foo] . -
-

bar -baz -foo

-
+
[foo]: /url "title"
+
+

[foo]

```````````````````````````````` -Laziness only applies to lines that would have been continuations of -paragraphs had they been prepended with [block quote markers]. -For example, the `> ` cannot be omitted in the second line of +This is not a link reference definition, because it occurs inside +a code block: -``` markdown -> foo -> --- +```````````````````````````````` example +``` +[foo]: /url ``` -without changing the meaning: - -```````````````````````````````` example -> foo ---- +[foo] . -
-

foo

-
-
+
[foo]: /url
+
+

[foo]

```````````````````````````````` -Similarly, if we omit the `> ` in the second line of - -``` markdown -> - foo -> - bar -``` - -then the block quote ends after the first line: +A [link reference definition] cannot interrupt a paragraph. ```````````````````````````````` example -> - foo -- bar +Foo +[bar]: /baz + +[bar] . -
-
    -
  • foo
  • -
-
-
    -
  • bar
  • -
+

Foo +[bar]: /baz

+

[bar]

```````````````````````````````` -For the same reason, we can't omit the `> ` in front of -subsequent lines of an indented or fenced code block: +However, it can directly follow other block elements, such as headings +and thematic breaks, and it need not be followed by a blank line. ```````````````````````````````` example -> foo - bar +# [Foo] +[foo]: /url +> bar . +

Foo

-
foo
-
+

bar

-
bar
-
```````````````````````````````` - ```````````````````````````````` example -> ``` -foo -``` +[foo]: /url +bar +=== +[foo] . -
-
-
-

foo

-
+

bar

+

foo

```````````````````````````````` - -Note that in the following case, we have a [lazy -continuation line]: - ```````````````````````````````` example -> foo - - bar +[foo]: /url +=== +[foo] . -
-

foo -- bar

-
+

=== +foo

```````````````````````````````` -To see why, note that in +Several [link reference definitions] +can occur one after another, without intervening blank lines. -```markdown -> foo -> - bar -``` +```````````````````````````````` example +[foo]: /foo-url "foo" +[bar]: /bar-url + "bar" +[baz]: /baz-url -the `- bar` is indented too far to start a list, and can't -be an indented code block because indented code blocks cannot -interrupt paragraphs, so it is [paragraph continuation text]. +[foo], +[bar], +[baz] +. +

foo, +bar, +baz

+```````````````````````````````` -A block quote can be empty: + +[Link reference definitions] can occur +inside block containers, like lists and block quotations. They +affect the entire document, not just the container in which they +are defined: ```````````````````````````````` example -> +[foo] + +> [foo]: /url . +

foo

```````````````````````````````` +## Paragraphs + +A sequence of non-blank lines that cannot be interpreted as other +kinds of blocks forms a [paragraph](@). +The contents of the paragraph are the result of parsing the +paragraph's raw content as inlines. The paragraph's raw content +is formed by concatenating the lines and removing initial and final +spaces or tabs. + +A simple example with two paragraphs: + ```````````````````````````````` example -> -> -> +aaa + +bbb . -
-
+

aaa

+

bbb

```````````````````````````````` -A block quote can have initial or final blank lines: +Paragraphs can contain multiple lines, but no blank lines: ```````````````````````````````` example -> -> foo -> +aaa +bbb + +ccc +ddd . -
-

foo

-
+

aaa +bbb

+

ccc +ddd

```````````````````````````````` -A blank line always separates block quotes: +Multiple blank lines between paragraphs have no effect: ```````````````````````````````` example -> foo +aaa -> bar + +bbb . -
-

foo

-
-
-

bar

-
+

aaa

+

bbb

```````````````````````````````` -(Most current Markdown implementations, including John Gruber's -original `Markdown.pl`, will parse this example as a single block quote -with two paragraphs. But it seems better to allow the author to decide -whether two block quotes or one are wanted.) - -Consecutiveness means that if we put these block quotes together, -we get a single block quote: +Leading spaces or tabs are skipped: ```````````````````````````````` example -> foo -> bar + aaa + bbb . -
-

foo -bar

-
+

aaa +bbb

```````````````````````````````` -To get a block quote with two paragraphs, use: +Lines after the first may be indented any amount, since indented +code blocks cannot interrupt paragraphs. ```````````````````````````````` example -> foo -> -> bar +aaa + bbb + ccc . -
-

foo

-

bar

-
+

aaa +bbb +ccc

```````````````````````````````` -Block quotes can interrupt paragraphs: +However, the first line may be preceded by up to three spaces of indentation. +Four spaces of indentation is too many: ```````````````````````````````` example -foo + aaa +bbb +. +

aaa +bbb

+```````````````````````````````` + + +```````````````````````````````` example + aaa +bbb +. +
aaa
+
+

bbb

+```````````````````````````````` + + +Final spaces or tabs are stripped before inline parsing, so a paragraph +that ends with two or more spaces will not end with a [hard line +break]: + +```````````````````````````````` example +aaa +bbb +. +

aaa
+bbb

+```````````````````````````````` + + +## Blank lines + +[Blank lines] between block-level elements are ignored, +except for the role they play in determining whether a [list] +is [tight] or [loose]. + +Blank lines at the beginning and end of the document are also ignored. + +```````````````````````````````` example + + +aaa + + +# aaa + + +. +

aaa

+

aaa

+```````````````````````````````` + + + +# Container blocks + +A [container block](#container-blocks) is a block that has other +blocks as its contents. There are two basic kinds of container blocks: +[block quotes] and [list items]. +[Lists] are meta-containers for [list items]. + +We define the syntax for container blocks recursively. The general +form of the definition is: + +> If X is a sequence of blocks, then the result of +> transforming X in such-and-such a way is a container of type Y +> with these blocks as its content. + +So, we explain what counts as a block quote or list item by explaining +how these can be *generated* from their contents. This should suffice +to define the syntax, although it does not give a recipe for *parsing* +these constructions. (A recipe is provided below in the section entitled +[A parsing strategy](#appendix-a-parsing-strategy).) + +## Block quotes + +A [block quote marker](@), +optionally preceded by up to three spaces of indentation, +consists of (a) the character `>` together with a following space of +indentation, or (b) a single character `>` not followed by a space of +indentation. + +The following rules define [block quotes]: + +1. **Basic case.** If a string of lines *Ls* constitute a sequence + of blocks *Bs*, then the result of prepending a [block quote + marker] to the beginning of each line in *Ls* + is a [block quote](#block-quotes) containing *Bs*. + +2. **Laziness.** If a string of lines *Ls* constitute a [block + quote](#block-quotes) with contents *Bs*, then the result of deleting + the initial [block quote marker] from one or + more lines in which the next character other than a space or tab after the + [block quote marker] is [paragraph continuation + text] is a block quote with *Bs* as its content. + [Paragraph continuation text](@) is text + that will be parsed as part of the content of a paragraph, but does + not occur at the beginning of the paragraph. + +3. **Consecutiveness.** A document cannot contain two [block + quotes] in a row unless there is a [blank line] between them. + +Nothing else counts as a [block quote](#block-quotes). + +Here is a simple example: + +```````````````````````````````` example +> # Foo > bar +> baz . -

foo

-

bar

+

Foo

+

bar +baz

```````````````````````````````` -In general, blank lines are not needed before or after block -quotes: +The space or tab after the `>` characters can be omitted: ```````````````````````````````` example -> aaa -*** -> bbb +># Foo +>bar +> baz .
-

aaa

-
-
-
-

bbb

+

Foo

+

bar +baz

```````````````````````````````` -However, because of laziness, a blank line is needed between -a block quote and a following paragraph: +The `>` characters can be preceded by up to three spaces of indentation: ```````````````````````````````` example -> bar -baz + > # Foo + > bar + > baz .
+

Foo

bar baz

```````````````````````````````` +Four spaces of indentation is too many: + ```````````````````````````````` example -> bar + > # Foo + > bar + > baz +. +
> # Foo
+> bar
+> baz
+
+```````````````````````````````` + + +The Laziness clause allows us to omit the `>` before +[paragraph continuation text]: +```````````````````````````````` example +> # Foo +> bar baz .
-

bar

+

Foo

+

bar +baz

-

baz

```````````````````````````````` +A block quote can contain some lazy and some non-lazy +continuation lines: + ```````````````````````````````` example > bar -> baz +> foo .
-

bar

+

bar +baz +foo

-

baz

```````````````````````````````` -It is a consequence of the Laziness rule that any number -of initial `>`s may be omitted on a continuation line of a -nested block quote: +Laziness only applies to lines that would have been continuations of +paragraphs had they been prepended with [block quote markers]. +For example, the `> ` cannot be omitted in the second line of + +``` markdown +> foo +> --- +``` + +without changing the meaning: ```````````````````````````````` example -> > > foo -bar +> foo +--- .
-
-
-

foo -bar

-
-
+

foo

+
```````````````````````````````` +Similarly, if we omit the `> ` in the second line of + +``` markdown +> - foo +> - bar +``` + +then the block quote ends after the first line: + ```````````````````````````````` example ->>> foo -> bar ->>baz +> - foo +- bar .
-
-
-

foo -bar -baz

-
-
+
    +
  • foo
  • +
+
    +
  • bar
  • +
```````````````````````````````` -When including an indented code block in a block quote, -remember that the [block quote marker] includes -both the `>` and a following space. So *five spaces* are needed after -the `>`: +For the same reason, we can't omit the `> ` in front of +subsequent lines of an indented or fenced code block: ```````````````````````````````` example -> code - -> not code +> foo + bar .
-
code
+
foo
 
+
bar
+
+```````````````````````````````` + + +```````````````````````````````` example +> ``` +foo +``` +.
-

not code

+
+

foo

+
```````````````````````````````` +Note that in the following case, we have a [lazy +continuation line]: -## List items +```````````````````````````````` example +> foo + - bar +. +
+

foo +- bar

+
+```````````````````````````````` + + +To see why, note that in + +```markdown +> foo +> - bar +``` + +the `- bar` is indented too far to start a list, and can't +be an indented code block because indented code blocks cannot +interrupt paragraphs, so it is [paragraph continuation text]. + +A block quote can be empty: + +```````````````````````````````` example +> +. +
+
+```````````````````````````````` + + +```````````````````````````````` example +> +> +> +. +
+
+```````````````````````````````` + + +A block quote can have initial or final blank lines: + +```````````````````````````````` example +> +> foo +> +. +
+

foo

+
+```````````````````````````````` + + +A blank line always separates block quotes: + +```````````````````````````````` example +> foo + +> bar +. +
+

foo

+
+
+

bar

+
+```````````````````````````````` + + +(Most current Markdown implementations, including John Gruber's +original `Markdown.pl`, will parse this example as a single block quote +with two paragraphs. But it seems better to allow the author to decide +whether two block quotes or one are wanted.) + +Consecutiveness means that if we put these block quotes together, +we get a single block quote: + +```````````````````````````````` example +> foo +> bar +. +
+

foo +bar

+
+```````````````````````````````` + + +To get a block quote with two paragraphs, use: + +```````````````````````````````` example +> foo +> +> bar +. +
+

foo

+

bar

+
+```````````````````````````````` + + +Block quotes can interrupt paragraphs: + +```````````````````````````````` example +foo +> bar +. +

foo

+
+

bar

+
+```````````````````````````````` + + +In general, blank lines are not needed before or after block +quotes: + +```````````````````````````````` example +> aaa +*** +> bbb +. +
+

aaa

+
+
+
+

bbb

+
+```````````````````````````````` + + +However, because of laziness, a blank line is needed between +a block quote and a following paragraph: + +```````````````````````````````` example +> bar +baz +. +
+

bar +baz

+
+```````````````````````````````` + + +```````````````````````````````` example +> bar + +baz +. +
+

bar

+
+

baz

+```````````````````````````````` + + +```````````````````````````````` example +> bar +> +baz +. +
+

bar

+
+

baz

+```````````````````````````````` + + +It is a consequence of the Laziness rule that any number +of initial `>`s may be omitted on a continuation line of a +nested block quote: + +```````````````````````````````` example +> > > foo +bar +. +
+
+
+

foo +bar

+
+
+
+```````````````````````````````` + + +```````````````````````````````` example +>>> foo +> bar +>>baz +. +
+
+
+

foo +bar +baz

+
+
+
+```````````````````````````````` + + +When including an indented code block in a block quote, +remember that the [block quote marker] includes +both the `>` and a following space of indentation. So *five spaces* are needed +after the `>`: + +```````````````````````````````` example +> code + +> not code +. +
+
code
+
+
+
+

not code

+
+```````````````````````````````` + + + +## List items A [list marker](@) is a [bullet list marker] or an [ordered list marker]. @@ -3755,10 +4111,10 @@ in some browsers.) The following rules define [list items]: 1. **Basic case.** If a sequence of lines *Ls* constitute a sequence of - blocks *Bs* starting with a [non-whitespace character], and *M* is a - list marker of width *W* followed by 1 ≤ *N* ≤ 4 spaces, then the result - of prepending *M* and the following spaces to the first line of - *Ls*, and indenting subsequent lines of *Ls* by *W + N* spaces, is a + blocks *Bs* starting with a character other than a space or tab, and *M* is + a list marker of width *W* followed by 1 ≤ *N* ≤ 4 spaces of indentation, + then the result of prepending *M* and the following spaces to the first line + of *Ls*, and indenting subsequent lines of *Ls* by *W + N* spaces, is a list item with *Bs* as its contents. The type of the list item (bullet or ordered) is determined by the type of its list marker. If the list item is ordered, then it is also assigned a start @@ -3823,8 +4179,8 @@ with two lines.

The most important thing to notice is that the position of the text after the list marker determines how much indentation is needed in subsequent blocks in the list item. If the list -marker takes up two spaces, and there are three spaces between -the list marker and the next [non-whitespace character], then blocks +marker takes up two spaces of indentation, and there are three spaces between +the list marker and the next character other than a space or tab, then blocks must be indented five spaces in order to fall under the list item. @@ -3885,10 +4241,10 @@ put under the list item: It is tempting to think of this in terms of columns: the continuation -blocks must be indented at least to the column of the first -[non-whitespace character] after the list marker. However, that is not quite right. -The spaces after the list marker determine how much relative indentation -is needed. Which column this indentation reaches will depend on +blocks must be indented at least to the column of the first character other than +a space or tab after the list marker. However, that is not quite right. +The spaces of indentation after the list marker determine how much relative +indentation is needed. Which column this indentation reaches will depend on how the list item is embedded in other constructions, as shown by this example: @@ -3935,7 +4291,7 @@ far enough past the blockquote marker: ```````````````````````````````` -Note that at least one space is needed between the list marker and +Note that at least one space or tab is needed between the list marker and any following content, so these are not list items: ```````````````````````````````` example @@ -4067,16 +4423,16 @@ A start number may not be negative: 2. **Item starting with indented code.** If a sequence of lines *Ls* constitute a sequence of blocks *Bs* starting with an indented code block, and *M* is a list marker of width *W* followed by - one space, then the result of prepending *M* and the following - space to the first line of *Ls*, and indenting subsequent lines of - *Ls* by *W + 1* spaces, is a list item with *Bs* as its contents. + one space of indentation, then the result of prepending *M* and the + following space to the first line of *Ls*, and indenting subsequent lines + of *Ls* by *W + 1* spaces, is a list item with *Bs* as its contents. If a line is empty, then it need not be indented. The type of the list item (bullet or ordered) is determined by the type of its list marker. If the list item is ordered, then it is also assigned a start number, based on the ordered list marker. -An indented code block will have to be indented four spaces beyond -the edge of the region where text will be included in the list item. +An indented code block will have to be preceded by four spaces of indentation +beyond the edge of the region where text will be included in the list item. In the following case that is 6 spaces: ```````````````````````````````` example @@ -4112,8 +4468,8 @@ And in this case it is 11 spaces: If the *first* block in the list item is an indented code block, -then by rule #2, the contents must be indented *one* space after the -list marker: +then by rule #2, the contents must be preceded by *one* space of indentation +after the list marker: ```````````````````````````````` example indented code @@ -4149,7 +4505,7 @@ paragraph ```````````````````````````````` -Note that an additional space indent is interpreted as space +Note that an additional space of indentation is interpreted as space inside the code block: ```````````````````````````````` example @@ -4173,10 +4529,10 @@ inside the code block: Note that rules #1 and #2 only apply to two cases: (a) cases in which the lines to be included in a list item begin with a -[non-whitespace character], and (b) cases in which +character other than a space or tab, and (b) cases in which they begin with an indented code block. In a case like the following, where the first block begins with -a three-space indent, the rules do not allow us to form a list item by +three spaces of indentation, the rules do not allow us to form a list item by indenting the whole thing and prepending a list marker: ```````````````````````````````` example @@ -4201,8 +4557,8 @@ bar ```````````````````````````````` -This is not a significant restriction, because when a block begins -with 1-3 spaces indent, the indentation can always be removed without +This is not a significant restriction, because when a block is preceded by up to +three spaces of indentation, the indentation can always be removed without a change in interpretation, allowing rule #1 to be applied. So, in the above case: @@ -4222,11 +4578,10 @@ the above case: 3. **Item starting with a blank line.** If a sequence of lines *Ls* starting with a single [blank line] constitute a (possibly empty) - sequence of blocks *Bs*, not separated from each other by more than - one blank line, and *M* is a list marker of width *W*, + sequence of blocks *Bs*, and *M* is a list marker of width *W*, then the result of prepending *M* to the first line of *Ls*, and - indenting subsequent lines of *Ls* by *W + 1* spaces, is a list - item with *Bs* as its contents. + preceding subsequent lines of *Ls* by *W + 1* spaces of indentation, is a + list item with *Bs* as its contents. If a line is empty, then it need not be indented. The type of the list item (bullet or ordered) is determined by the type of its list marker. If the list item is ordered, then it is also assigned a @@ -4301,7 +4656,7 @@ Here is an empty bullet list item: ```````````````````````````````` -It does not matter whether there are spaces following the [list marker]: +It does not matter whether there are spaces or tabs following the [list marker]: ```````````````````````````````` example - foo @@ -4358,9 +4713,9 @@ foo 4. **Indentation.** If a sequence of lines *Ls* constitutes a list item - according to rule #1, #2, or #3, then the result of indenting each line - of *Ls* by 1-3 spaces (the same for each line) also constitutes a - list item with the same contents and attributes. If a line is + according to rule #1, #2, or #3, then the result of preceding each line + of *Ls* by up to three spaces of indentation (the same for each line) also + constitutes a list item with the same contents and attributes. If a line is empty, then it need not be indented. Indented one space: @@ -4459,7 +4814,7 @@ Four spaces indent gives a code block: 5. **Laziness.** If a string of lines *Ls* constitute a [list item](#list-items) with contents *Bs*, then the result of deleting some or all of the indentation from one or more lines in which the - next [non-whitespace character] after the indentation is + next character other than a space or tab after the indentation is [paragraph continuation text] is a list item with the same contents and attributes. The unindented lines are called @@ -4544,7 +4899,7 @@ continued here.

The rules for sublists follow from the general rules [above][List items]. A sublist must be indented the same number -of spaces a paragraph would need to be in order to be included +of spaces of indentation a paragraph would need to be in order to be included in the list item. So, in this case we need two spaces indent: @@ -4777,8 +5132,8 @@ The choice of four spaces is arbitrary. It can be learned, but it is not likely to be guessed, and it trips up beginners regularly. Would it help to adopt a two-space rule? The problem is that such -a rule, together with the rule allowing 1--3 spaces indentation of the -initial list marker, allows text that is indented *less than* the +a rule, together with the rule allowing up to three spaces of indentation for +the initial list marker, allows text that is indented *less than* the original list marker to be included in the list item. For example, `Markdown.pl` parses @@ -4994,11 +5349,11 @@ by itself should be a paragraph followed by a nested sublist. Since it is well established Markdown practice to allow lists to interrupt paragraphs inside list items, the [principle of uniformity] requires us to allow this outside list items as -well. ([reStructuredText](http://docutils.sourceforge.net/rst.html) +well. ([reStructuredText](https://docutils.sourceforge.net/rst.html) takes a different approach, requiring blank lines before lists even inside other list items.) -In order to solve of unwanted lists in paragraphs with +In order to solve the problem of unwanted lists in paragraphs with hard-wrapped numerals, we allow only lists starting with `1` to interrupt paragraphs. Thus, @@ -5170,8 +5525,8 @@ item: ```````````````````````````````` -Note, however, that list items may not be indented more than -three spaces. Here `- e` is treated as a paragraph continuation +Note, however, that list items may not be preceded by more than +three spaces of indentation. Here `- e` is treated as a paragraph continuation line, because it is indented more than three spaces: ```````````````````````````````` example @@ -5257,7 +5612,7 @@ So is this, with a empty second item: ```````````````````````````````` -These are loose lists, even though there is no space between the items, +These are loose lists, even though there are no blank lines between the items, because one of the items directly contains two block-level elements with a blank line between them: @@ -5278,572 +5633,233 @@ with a blank line between them:
  • d

    -
  • - -```````````````````````````````` - - -```````````````````````````````` example -- a -- b - - [ref]: /url -- d -. -
      -
    • -

      a

      -
    • -
    • -

      b

      -
    • -
    • -

      d

      -
    • -
    -```````````````````````````````` - - -This is a tight list, because the blank lines are in a code block: - -```````````````````````````````` example -- a -- ``` - b - - - ``` -- c -. -
      -
    • a
    • -
    • -
      b
      -
      -
      -
      -
    • -
    • c
    • -
    -```````````````````````````````` - - -This is a tight list, because the blank line is between two -paragraphs of a sublist. So the sublist is loose while -the outer list is tight: - -```````````````````````````````` example -- a - - b - - c -- d -. -
      -
    • a -
        -
      • -

        b

        -

        c

        -
      • -
      -
    • -
    • d
    • -
    -```````````````````````````````` - - -This is a tight list, because the blank line is inside the -block quote: - -```````````````````````````````` example -* a - > b - > -* c -. -
      -
    • a -
      -

      b

      -
      -
    • -
    • c
    • -
    -```````````````````````````````` - - -This list is tight, because the consecutive block elements -are not separated by blank lines: - -```````````````````````````````` example -- a - > b - ``` - c - ``` -- d -. -
      -
    • a -
      -

      b

      -
      -
      c
      -
      -
    • -
    • d
    • -
    -```````````````````````````````` - - -A single-paragraph list is tight: - -```````````````````````````````` example -- a -. -
      -
    • a
    • -
    -```````````````````````````````` - - -```````````````````````````````` example -- a - - b -. -
      -
    • a -
        -
      • b
      • -
      -
    • -
    -```````````````````````````````` - - -This list is loose, because of the blank line between the -two block elements in the list item: - -```````````````````````````````` example -1. ``` - foo - ``` - - bar -. -
      -
    1. -
      foo
      -
      -

      bar

      -
    2. -
    -```````````````````````````````` - - -Here the outer list is loose, the inner list tight: - -```````````````````````````````` example -* foo - * bar - - baz -. -
      -
    • -

      foo

      -
        -
      • bar
      • -
      -

      baz

      -
    • -
    -```````````````````````````````` - - -```````````````````````````````` example -- a - - b - - c - -- d - - e - - f -. -
      -
    • -

      a

      -
        -
      • b
      • -
      • c
      • -
      -
    • -
    • -

      d

      -
        -
      • e
      • -
      • f
      • -
      -
    • -
    -```````````````````````````````` - - -# Inlines - -Inlines are parsed sequentially from the beginning of the character -stream to the end (left to right, in left-to-right languages). -Thus, for example, in - -```````````````````````````````` example -`hi`lo` -. -

    hilo`

    -```````````````````````````````` - -`hi` is parsed as code, leaving the backtick at the end as a literal -backtick. - - -## Backslash escapes - -Any ASCII punctuation character may be backslash-escaped: - -```````````````````````````````` example -\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~ -. -

    !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~

    -```````````````````````````````` - - -Backslashes before other characters are treated as literal -backslashes: - -```````````````````````````````` example -\→\A\a\ \3\φ\« -. -

    \→\A\a\ \3\φ\«

    -```````````````````````````````` - - -Escaped characters are treated as regular characters and do -not have their usual Markdown meanings: - -```````````````````````````````` example -\*not emphasized* -\
    not a tag -\[not a link](/foo) -\`not code` -1\. not a list -\* not a list -\# not a heading -\[foo]: /url "not a reference" -\ö not a character entity -. -

    *not emphasized* -<br/> not a tag -[not a link](/foo) -`not code` -1. not a list -* not a list -# not a heading -[foo]: /url "not a reference" -&ouml; not a character entity

    -```````````````````````````````` - - -If a backslash is itself escaped, the following character is not: - -```````````````````````````````` example -\\*emphasis* -. -

    \emphasis

    -```````````````````````````````` - - -A backslash at the end of the line is a [hard line break]: - -```````````````````````````````` example -foo\ -bar -. -

    foo
    -bar

    -```````````````````````````````` - - -Backslash escapes do not work in code blocks, code spans, autolinks, or -raw HTML: - -```````````````````````````````` example -`` \[\` `` -. -

    \[\`

    -```````````````````````````````` - - -```````````````````````````````` example - \[\] -. -
    \[\]
    -
    -```````````````````````````````` - - -```````````````````````````````` example -~~~ -\[\] -~~~ -. -
    \[\]
    -
    -```````````````````````````````` - - -```````````````````````````````` example - -. -

    http://example.com?find=\*

    -```````````````````````````````` - - -```````````````````````````````` example - -. - -```````````````````````````````` - - -But they work in all other contexts, including URLs and link titles, -link references, and [info strings] in [fenced code blocks]: - -```````````````````````````````` example -[foo](/bar\* "ti\*tle") -. -

    foo

    -```````````````````````````````` - - -```````````````````````````````` example -[foo] - -[foo]: /bar\* "ti\*tle" -. -

    foo

    -```````````````````````````````` - - -```````````````````````````````` example -``` foo\+bar -foo -``` -. -
    foo
    -
    -```````````````````````````````` - - - -## Entity and numeric character references - -Valid HTML entity references and numeric character references -can be used in place of the corresponding Unicode character, -with the following exceptions: - -- Entity and character references are not recognized in code - blocks and code spans. - -- Entity and character references cannot stand in place of - special characters that define structural elements in - CommonMark. For example, although `*` can be used - in place of a literal `*` character, `*` cannot replace - `*` in emphasis delimiters, bullet list markers, or thematic - breaks. - -Conforming CommonMark parsers need not store information about -whether a particular character was represented in the source -using a Unicode character or an entity reference. - -[Entity references](@) consist of `&` + any of the valid -HTML5 entity names + `;`. The -document -is used as an authoritative source for the valid entity -references and their corresponding code points. - -```````````````````````````````` example -  & © Æ Ď -¾ ℋ ⅆ -∲ ≧̸ -. -

      & © Æ Ď -¾ ℋ ⅆ -∲ ≧̸

    -```````````````````````````````` - - -[Decimal numeric character -references](@) -consist of `&#` + a string of 1--7 arabic digits + `;`. A -numeric character reference is parsed as the corresponding -Unicode character. Invalid Unicode code points will be replaced by -the REPLACEMENT CHARACTER (`U+FFFD`). For security reasons, -the code point `U+0000` will also be replaced by `U+FFFD`. - -```````````````````````````````` example -# Ӓ Ϡ � -. -

    # Ӓ Ϡ �

    -```````````````````````````````` - - -[Hexadecimal numeric character -references](@) consist of `&#` + -either `X` or `x` + a string of 1-6 hexadecimal digits + `;`. -They too are parsed as the corresponding Unicode character (this -time specified with a hexadecimal numeral instead of decimal). - -```````````````````````````````` example -" ആ ಫ -. -

    " ആ ಫ

    + + ```````````````````````````````` -Here are some nonentities: - ```````````````````````````````` example -  &x; &#; &#x; -� -&#abcdef0; -&ThisIsNotDefined; &hi?; +- a +- b + + [ref]: /url +- d . -

    &nbsp &x; &#; &#x; -&#987654321; -&#abcdef0; -&ThisIsNotDefined; &hi?;

    +
      +
    • +

      a

      +
    • +
    • +

      b

      +
    • +
    • +

      d

      +
    • +
    ```````````````````````````````` -Although HTML5 does accept some entity references -without a trailing semicolon (such as `©`), these are not -recognized here, because it makes the grammar too ambiguous: +This is a tight list, because the blank lines are in a code block: ```````````````````````````````` example -© +- a +- ``` + b + + + ``` +- c . -

    &copy

    +
      +
    • a
    • +
    • +
      b
      +
      +
      +
      +
    • +
    • c
    • +
    ```````````````````````````````` -Strings that are not on the list of HTML5 named entities are not -recognized as entity references either: +This is a tight list, because the blank line is between two +paragraphs of a sublist. So the sublist is loose while +the outer list is tight: ```````````````````````````````` example -&MadeUpEntity; +- a + - b + + c +- d . -

    &MadeUpEntity;

    +
      +
    • a +
        +
      • +

        b

        +

        c

        +
      • +
      +
    • +
    • d
    • +
    ```````````````````````````````` -Entity and numeric character references are recognized in any -context besides code spans or code blocks, including -URLs, [link titles], and [fenced code block][] [info strings]: +This is a tight list, because the blank line is inside the +block quote: ```````````````````````````````` example - +* a + > b + > +* c . - +
      +
    • a +
      +

      b

      +
      +
    • +
    • c
    • +
    ```````````````````````````````` +This list is tight, because the consecutive block elements +are not separated by blank lines: + ```````````````````````````````` example -[foo](/föö "föö") +- a + > b + ``` + c + ``` +- d . -

    foo

    +
      +
    • a +
      +

      b

      +
      +
      c
      +
      +
    • +
    • d
    • +
    ```````````````````````````````` -```````````````````````````````` example -[foo] +A single-paragraph list is tight: -[foo]: /föö "föö" +```````````````````````````````` example +- a . -

    foo

    +
      +
    • a
    • +
    ```````````````````````````````` ```````````````````````````````` example -``` föö -foo -``` +- a + - b . -
    foo
    -
    +
      +
    • a +
        +
      • b
      • +
      +
    • +
    ```````````````````````````````` -Entity and numeric character references are treated as literal -text in code spans and code blocks: +This list is loose, because of the blank line between the +two block elements in the list item: ```````````````````````````````` example -`föö` -. -

    f&ouml;&ouml;

    -```````````````````````````````` - +1. ``` + foo + ``` -```````````````````````````````` example - föfö + bar . -
    f&ouml;f&ouml;
    +
      +
    1. +
      foo
       
      +

      bar

      +
    2. +
    ```````````````````````````````` -Entity and numeric character references cannot be used -in place of symbols indicating structure in CommonMark -documents. +Here the outer list is loose, the inner list tight: ```````````````````````````````` example -*foo* -*foo* +* foo + * bar + + baz . -

    *foo* -foo

    +
      +
    • +

      foo

      +
        +
      • bar
      • +
      +

      baz

      +
    • +
    ```````````````````````````````` + ```````````````````````````````` example -* foo +- a + - b + - c -* foo +- d + - e + - f . -

    * foo

      -
    • foo
    • +
    • +

      a

      +
        +
      • b
      • +
      • c
      • +
      +
    • +
    • +

      d

      +
        +
      • e
      • +
      • f
      • +
      +
    ```````````````````````````````` -```````````````````````````````` example -foo bar -. -

    foo -bar

    -```````````````````````````````` +# Inlines + +Inlines are parsed sequentially from the beginning of the character +stream to the end (left to right, in left-to-right languages). +Thus, for example, in ```````````````````````````````` example - foo +`hi`lo` . -

    →foo

    +

    hilo`

    ```````````````````````````````` +`hi` is parsed as code, leaving the backtick at the end as a literal +backtick. -```````````````````````````````` example -[a](url "tit") -. -

    [a](url "tit")

    -```````````````````````````````` ## Code spans @@ -5854,7 +5870,7 @@ preceded nor followed by a backtick. A [code span](@) begins with a backtick string and ends with a backtick string of equal length. The contents of the code span are -the characters between the two backtick strings, normalized in the +the characters between these two backtick strings, normalized in the following ways: - First, [line endings] are converted to [spaces]. @@ -6038,18 +6054,18 @@ But this is an HTML tag: And this is code: ```````````````````````````````` example -`` +`` . -

    <http://foo.bar.baz>`

    +

    <https://foo.bar.baz>`

    ```````````````````````````````` But this is an autolink: ```````````````````````````````` example -` +` . -

    http://foo.bar.`baz`

    +

    https://foo.bar.`baz`

    ```````````````````````````````` @@ -6082,7 +6098,7 @@ closing backtick strings to be equal in length: ## Emphasis and strong emphasis John Gruber's original [Markdown syntax -description](http://daringfireball.net/projects/markdown/syntax#em) says: +description](https://daringfireball.net/projects/markdown/syntax#em) says: > Markdown treats asterisks (`*`) and underscores (`_`) as indicators of > emphasis. Text wrapped with one `*` or `_` will be wrapped with an HTML @@ -6133,17 +6149,17 @@ a non-backslash-escaped `_` character. A [left-flanking delimiter run](@) is a [delimiter run] that is (1) not followed by [Unicode whitespace], -and either (2a) not followed by a [punctuation character], or -(2b) followed by a [punctuation character] and -preceded by [Unicode whitespace] or a [punctuation character]. +and either (2a) not followed by a [Unicode punctuation character], or +(2b) followed by a [Unicode punctuation character] and +preceded by [Unicode whitespace] or a [Unicode punctuation character]. For purposes of this definition, the beginning and the end of the line count as Unicode whitespace. A [right-flanking delimiter run](@) is a [delimiter run] that is (1) not preceded by [Unicode whitespace], -and either (2a) not preceded by a [punctuation character], or -(2b) preceded by a [punctuation character] and -followed by [Unicode whitespace] or a [punctuation character]. +and either (2a) not preceded by a [Unicode punctuation character], or +(2b) preceded by a [Unicode punctuation character] and +followed by [Unicode whitespace] or a [Unicode punctuation character]. For purposes of this definition, the beginning and the end of the line count as Unicode whitespace. @@ -6184,7 +6200,7 @@ Here are some examples of delimiter runs. (The idea of distinguishing left-flanking and right-flanking delimiter runs based on the character before and the character after comes from Roopesh Chander's -[vfmd](http://www.vfmd.org/vfmd-spec/specification/#procedure-for-identifying-emphasis-tags). +[vfmd](https://web.archive.org/web/20220608143320/http://www.vfmd.org/vfmd-spec/specification/#procedure-for-identifying-emphasis-tags). vfmd uses the terminology "emphasis indicator string" instead of "delimiter run," and its rules for distinguishing left- and right-flanking runs are a bit more complex than the ones given here.) @@ -6198,7 +6214,7 @@ The following rules define emphasis and strong emphasis: it is part of a [left-flanking delimiter run] and either (a) not part of a [right-flanking delimiter run] or (b) part of a [right-flanking delimiter run] - preceded by punctuation. + preceded by a [Unicode punctuation character]. 3. A single `*` character [can close emphasis](@) iff it is part of a [right-flanking delimiter run]. @@ -6207,7 +6223,7 @@ The following rules define emphasis and strong emphasis: it is part of a [right-flanking delimiter run] and either (a) not part of a [left-flanking delimiter run] or (b) part of a [left-flanking delimiter run] - followed by punctuation. + followed by a [Unicode punctuation character]. 5. A double `**` [can open strong emphasis](@) iff it is part of a [left-flanking delimiter run]. @@ -6216,7 +6232,7 @@ The following rules define emphasis and strong emphasis: it is part of a [left-flanking delimiter run] and either (a) not part of a [right-flanking delimiter run] or (b) part of a [right-flanking delimiter run] - preceded by punctuation. + preceded by a [Unicode punctuation character]. 7. A double `**` [can close strong emphasis](@) iff it is part of a [right-flanking delimiter run]. @@ -6225,7 +6241,7 @@ The following rules define emphasis and strong emphasis: it is part of a [right-flanking delimiter run] and either (a) not part of a [left-flanking delimiter run] or (b) part of a [left-flanking delimiter run] - followed by punctuation. + followed by a [Unicode punctuation character]. 9. Emphasis begins with a delimiter that [can open emphasis] and ends with a delimiter that [can close emphasis], and that uses the same @@ -6326,6 +6342,21 @@ Unicode nonbreaking spaces count as whitespace, too: ```````````````````````````````` +Unicode symbols count as punctuation, too: + +```````````````````````````````` example +*$*alpha. + +*£*bravo. + +*€*charlie. +. +

    *$*alpha.

    +

    *£*bravo.

    +

    *€*charlie.

    +```````````````````````````````` + + Intraword emphasis with `*` is permitted: ```````````````````````````````` example @@ -6437,7 +6468,7 @@ whitespace: ```````````````````````````````` -A newline also counts as whitespace: +A line ending also counts as whitespace: ```````````````````````````````` example *foo bar @@ -6602,7 +6633,7 @@ __ foo bar__ ```````````````````````````````` -A newline counts as whitespace: +A line ending counts as whitespace: ```````````````````````````````` example __ foo bar__ @@ -6881,7 +6912,7 @@ emphasis sections in this example: The same condition ensures that the following cases are all strong emphasis nested inside -emphasis, even when the interior spaces are +emphasis, even when the interior whitespace is omitted: @@ -7411,16 +7442,16 @@ _a `_`_ ```````````````````````````````` example -**a +**a . -

    **ahttp://foo.bar/?q=**

    +

    **ahttps://foo.bar/?q=**

    ```````````````````````````````` ```````````````````````````````` example -__a +__a . -

    __ahttp://foo.bar/?q=__

    +

    __ahttps://foo.bar/?q=__

    ```````````````````````````````` @@ -7458,13 +7489,14 @@ following rules apply: A [link destination](@) consists of either - a sequence of zero or more characters between an opening `<` and a - closing `>` that contains no line breaks or unescaped + closing `>` that contains no line endings or unescaped `<` or `>` characters, or -- a nonempty sequence of characters that does not start with - `<`, does not include ASCII space or control characters, and - includes parentheses only if (a) they are backslash-escaped or - (b) they are part of a balanced pair of unescaped parentheses. +- a nonempty sequence of characters that does not start with `<`, + does not include [ASCII control characters][ASCII control character] + or [space] character, and includes parentheses only if (a) they are + backslash-escaped or (b) they are part of a balanced pair of + unescaped parentheses. (Implementations may impose limits on parentheses nesting to avoid performance issues, but at least three levels of nesting should be supported.) @@ -7487,10 +7519,14 @@ Although [link titles] may span multiple lines, they may not contain a [blank line]. An [inline link](@) consists of a [link text] followed immediately -by a left parenthesis `(`, optional [whitespace], an optional -[link destination], an optional [link title] separated from the link -destination by [whitespace], optional [whitespace], and a right -parenthesis `)`. The link's text consists of the inlines contained +by a left parenthesis `(`, an optional [link destination], an optional +[link title], and a right parenthesis `)`. +These four components may be separated by spaces, tabs, and up to one line +ending. +If both [link destination] and [link title] are present, they *must* be +separated by spaces, tabs, and up to one line ending. + +The link's text consists of the inlines contained in the [link text] (excluding the enclosing square brackets). The link's URI consists of the link destination, excluding enclosing `<...>` if present, with backslash-escapes in effect as described @@ -7507,7 +7543,8 @@ Here is a simple inline link: ```````````````````````````````` -The title may be omitted: +The title, the link text and even +the destination may be omitted: ```````````````````````````````` example [link](/uri) @@ -7515,8 +7552,12 @@ The title may be omitted:

    link

    ```````````````````````````````` +```````````````````````````````` example +[](./target.md) +. +

    +```````````````````````````````` -Both the title and the destination may be omitted: ```````````````````````````````` example [link]() @@ -7531,6 +7572,13 @@ Both the title and the destination may be omitted:

    link

    ```````````````````````````````` + +```````````````````````````````` example +[]() +. +

    +```````````````````````````````` + The destination can only contain spaces if it is enclosed in pointy brackets: @@ -7546,7 +7594,7 @@ enclosed in pointy brackets:

    link

    ```````````````````````````````` -The destination cannot contain line breaks, +The destination cannot contain line endings, even if enclosed in pointy brackets: ```````````````````````````````` example @@ -7615,6 +7663,13 @@ balanced: However, if you have unbalanced parentheses, you need to escape or use the `<...>` form: +```````````````````````````````` example +[link](foo(and(bar)) +. +

    [link](foo(and(bar))

    +```````````````````````````````` + + ```````````````````````````````` example [link](foo\(and\(bar\)) . @@ -7644,13 +7699,13 @@ A link can contain fragment identifiers and queries: ```````````````````````````````` example [link](#fragment) -[link](http://example.com#fragment) +[link](https://example.com#fragment) -[link](http://example.com?foo=3#frag) +[link](https://example.com?foo=3#frag) .

    link

    -

    link

    -

    link

    +

    link

    +

    link

    ```````````````````````````````` @@ -7714,7 +7769,8 @@ may be used in titles: ```````````````````````````````` -Titles must be separated from the link using a [whitespace]. +Titles must be separated from the link using spaces, tabs, and up to one line +ending. Other [Unicode whitespace] like non-breaking space doesn't work. ```````````````````````````````` example @@ -7757,7 +7813,8 @@ titles with no closing quotation mark, though 1.0.2b8 does not. It seems preferable to adopt a simple, rational rule that works the same way in inline links and link reference definitions.) -[Whitespace] is allowed around the destination and title: +Spaces, tabs, and up to one line ending is allowed around the destination and +title: ```````````````````````````````` example [link]( /uri @@ -7892,9 +7949,9 @@ and autolinks over link grouping: ```````````````````````````````` example -[foo +[foo . -

    [foohttp://example.com/?search=](uri)

    +

    [foohttps://example.com/?search=](uri)

    ```````````````````````````````` @@ -7908,7 +7965,8 @@ that [matches] a [link reference definition] elsewhere in the document. A [link label](@) begins with a left bracket (`[`) and ends with the first right bracket (`]`) that is not backslash-escaped. -Between these brackets there must be at least one [non-whitespace character]. +Between these brackets there must be at least one character that is not a space, +tab, or line ending. Unescaped square bracket characters are not allowed inside the opening and closing square brackets of [link labels]. A link label can have at most 999 characters inside the square @@ -7918,14 +7976,13 @@ One label [matches](@) another just in case their normalized forms are equal. To normalize a label, strip off the opening and closing brackets, perform the *Unicode case fold*, strip leading and trailing -[whitespace] and collapse consecutive internal -[whitespace] to a single space. If there are multiple +spaces, tabs, and line endings, and collapse consecutive internal +spaces, tabs, and line endings to a single space. If there are multiple matching reference link definitions, the one that comes first in the document is used. (It is desirable in such cases to emit a warning.) -The contents of the first link label are parsed as inlines, which are -used as the link's text. The link's URI and title are provided by the -matching [link reference definition]. +The link's URI and title are provided by the matching [link +reference definition]. Here is a simple example: @@ -8018,11 +8075,11 @@ emphasis grouping: ```````````````````````````````` example -[foo *bar][ref] +[foo *bar][ref]* [ref]: /uri . -

    foo *bar

    +

    foo *bar*

    ```````````````````````````````` @@ -8048,11 +8105,11 @@ and autolinks over link grouping: ```````````````````````````````` example -[foo +[foo [ref]: /uri . -

    [foohttp://example.com/?search=][ref]

    +

    [foohttps://example.com/?search=][ref]

    ```````````````````````````````` @@ -8070,15 +8127,15 @@ Matching is case-insensitive: Unicode case fold is used: ```````````````````````````````` example -[Толпой][Толпой] is a Russian word. +[ẞ] -[ТОЛПОЙ]: /url +[SS]: /url . -

    Толпой is a Russian word.

    +

    ```````````````````````````````` -Consecutive internal [whitespace] is treated as one space for +Consecutive internal spaces, tabs, and line endings are treated as one space for purposes of determining matching: ```````````````````````````````` example @@ -8091,7 +8148,7 @@ purposes of determining matching: ```````````````````````````````` -No [whitespace] is allowed between the [link text] and the +No spaces, tabs, or line endings are allowed between the [link text] and the [link label]: ```````````````````````````````` example @@ -8221,7 +8278,8 @@ Note that in this example `]` is not backslash-escaped: ```````````````````````````````` -A [link label] must contain at least one [non-whitespace character]: +A [link label] must contain at least one character that is not a space, tab, or +line ending: ```````````````````````````````` example [] @@ -8251,7 +8309,7 @@ A [collapsed reference link](@) consists of a [link label] that [matches] a [link reference definition] elsewhere in the document, followed by the string `[]`. -The contents of the first link label are parsed as inlines, +The contents of the link label are parsed as inlines, which are used as the link's text. The link's URI and title are provided by the matching reference link definition. Thus, `[foo][]` is equivalent to `[foo][foo]`. @@ -8286,7 +8344,7 @@ The link labels are case-insensitive: -As with full reference links, [whitespace] is not +As with full reference links, spaces, tabs, or line endings are not allowed between the two sets of brackets: ```````````````````````````````` example @@ -8304,7 +8362,7 @@ A [shortcut reference link](@) consists of a [link label] that [matches] a [link reference definition] elsewhere in the document and is not followed by `[]` or a link label. -The contents of the first link label are parsed as inlines, +The contents of the link label are parsed as inlines, which are used as the link's text. The link's URI and title are provided by the matching link reference definition. Thus, `[foo]` is equivalent to `[foo][]`. @@ -8391,7 +8449,7 @@ following closing bracket: ```````````````````````````````` -Full and compact references take precedence over shortcut +Full and collapsed references take precedence over shortcut references: ```````````````````````````````` example @@ -8614,7 +8672,7 @@ The labels are case-insensitive: ```````````````````````````````` -As with reference links, [whitespace] is not allowed +As with reference links, spaces, tabs, and line endings, are not allowed between the two sets of brackets: ```````````````````````````````` example @@ -8707,9 +8765,9 @@ a link to the URI, with the URI as the link's label. An [absolute URI](@), for these purposes, consists of a [scheme] followed by a colon (`:`) -followed by zero or more characters other than ASCII -[whitespace] and control characters, `<`, and `>`. If -the URI includes these characters, they must be percent-encoded +followed by zero or more characters other than [ASCII control +characters][ASCII control character], [space], `<`, and `>`. +If the URI includes these characters, they must be percent-encoded (e.g. `%20` for a space). For purposes of this spec, a [scheme](@) is any sequence @@ -8727,9 +8785,9 @@ Here are some valid autolinks: ```````````````````````````````` example - + . -

    http://foo.bar.baz/test?q=hello&id=22&boolean

    +

    https://foo.bar.baz/test?q=hello&id=22&boolean

    ```````````````````````````````` @@ -8769,9 +8827,9 @@ with their syntax: ```````````````````````````````` example - + . -

    http://../

    +

    https://../

    ```````````````````````````````` @@ -8785,18 +8843,18 @@ with their syntax: Spaces are not allowed in autolinks: ```````````````````````````````` example - + . -

    <http://foo.bar/baz bim>

    +

    <https://foo.bar/baz bim>

    ```````````````````````````````` Backslash-escapes do not work inside autolinks: ```````````````````````````````` example - + . -

    http://example.com/\[\

    +

    https://example.com/\[\

    ```````````````````````````````` @@ -8848,9 +8906,9 @@ These are not autolinks: ```````````````````````````````` example -< http://foo.bar > +< https://foo.bar > . -

    < http://foo.bar >

    +

    < https://foo.bar >

    ```````````````````````````````` @@ -8869,9 +8927,9 @@ These are not autolinks: ```````````````````````````````` example -http://example.com +https://example.com . -

    http://example.com

    +

    https://example.com

    ```````````````````````````````` @@ -8895,7 +8953,7 @@ A [tag name](@) consists of an ASCII letter followed by zero or more ASCII letters, digits, or hyphens (`-`). -An [attribute](@) consists of [whitespace], +An [attribute](@) consists of spaces, tabs, and up to one line ending, an [attribute name], and an optional [attribute value specification]. @@ -8905,9 +8963,9 @@ letters, digits, `_`, `.`, `:`, or `-`. (Note: This is the XML specification restricted to ASCII. HTML5 is laxer.) An [attribute value specification](@) -consists of optional [whitespace], -a `=` character, optional [whitespace], and an [attribute -value]. +consists of optional spaces, tabs, and up to one line ending, +a `=` character, optional spaces, tabs, and up to one line ending, +and an [attribute value]. An [attribute value](@) consists of an [unquoted attribute value], @@ -8915,7 +8973,7 @@ a [single-quoted attribute value], or a [double-quoted attribute value]. An [unquoted attribute value](@) is a nonempty string of characters not -including [whitespace], `"`, `'`, `=`, `<`, `>`, or `` ` ``. +including spaces, tabs, line endings, `"`, `'`, `=`, `<`, `>`, or `` ` ``. A [single-quoted attribute value](@) consists of `'`, zero or more @@ -8926,26 +8984,24 @@ consists of `"`, zero or more characters not including `"`, and a final `"`. An [open tag](@) consists of a `<` character, a [tag name], -zero or more [attributes], optional [whitespace], an optional `/` -character, and a `>` character. +zero or more [attributes], optional spaces, tabs, and up to one line ending, +an optional `/` character, and a `>` character. A [closing tag](@) consists of the string ``. +[tag name], optional spaces, tabs, and up to one line ending, and the character +`>`. -An [HTML comment](@) consists of ``, -where *text* does not start with `>` or `->`, does not end with `-`, -and does not contain `--`. (See the -[HTML5 spec](http://www.w3.org/TR/html5/syntax.html#comments).) +An [HTML comment](@) consists of ``, ``, or ``, and `-->` (see the +[HTML spec](https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state)). A [processing instruction](@) consists of the string ``, and the string `?>`. -A [declaration](@) consists of the -string ``, and the character `>`. +A [declaration](@) consists of the string ``, and the character `>`. A [CDATA section](@) consists of the string `< @@ -9046,7 +9102,7 @@ bim!bop />

    ```````````````````````````````` -Missing [whitespace]: +Missing whitespace: ```````````````````````````````` example @@ -9076,30 +9132,20 @@ Illegal attributes in closing tag: Comments: ```````````````````````````````` example -foo +foo . -

    foo

    +

    foo

    ```````````````````````````````` - -```````````````````````````````` example -foo -. -

    foo <!-- not a comment -- two hyphens -->

    -```````````````````````````````` - - -Not comments: - ```````````````````````````````` example foo foo --> -foo +foo foo --> . -

    foo <!--> foo -->

    -

    foo <!-- foo--->

    +

    foo foo -->

    +

    foo foo -->

    ```````````````````````````````` @@ -9158,7 +9204,7 @@ foo
    ## Hard line breaks -A line break (not in a code span or HTML tag) that is preceded +A line ending (not in a code span or HTML tag) that is preceded by two or more spaces and does not occur at the end of a block is parsed as a [hard line break](@) (rendered in HTML as a `
    ` tag): @@ -9173,7 +9219,7 @@ baz

    For a more visible alternative, a backslash before the -[line ending] may be used instead of two spaces: +[line ending] may be used instead of two or more spaces: ```````````````````````````````` example foo\ @@ -9215,7 +9261,7 @@ bar

    ```````````````````````````````` -Line breaks can occur inside emphasis, links, and other constructs +Hard line breaks can occur inside emphasis, links, and other constructs that allow inline content: ```````````````````````````````` example @@ -9236,13 +9282,13 @@ bar

    ```````````````````````````````` -Line breaks do not occur inside code spans +Hard line breaks do not occur inside code spans ```````````````````````````````` example -`code +`code span` . -

    code span

    +

    code span

    ```````````````````````````````` @@ -9308,9 +9354,9 @@ foo ## Soft line breaks -A regular line break (not in a code span or HTML tag) that is not +A regular line ending (not in a code span or HTML tag) that is not preceded by two or more spaces or a backslash is parsed as a -[softbreak](@). (A softbreak may be rendered in HTML either as a +[softbreak](@). (A soft line break may be rendered in HTML either as a [line ending] or as a space. The result will be the same in browsers. In the examples here, a [line ending] will be used.) @@ -9336,7 +9382,7 @@ baz

    A conforming parser may render a soft line break in HTML either as a -line break or as a space. +line ending or as a space. A renderer may also provide an option to render soft line breaks as hard line breaks. @@ -9444,7 +9490,7 @@ blocks. But we cannot close unmatched blocks yet, because we may have a blocks, we look for new block starts (e.g. `>` for a block quote). If we encounter a new block start, we close any blocks unmatched in step 1 before creating the new block as a child of the last -matched block. +matched container block. 3. Finally, we look at the remainder of the line (after block markers like `>`, list markers, and indentation have been consumed). @@ -9628,7 +9674,7 @@ through the stack for an opening `[` or `![` delimiter. delimiter from the stack, and return a literal text node `]`. - If we find one and it's active, then we parse ahead to see if - we have an inline link/image, reference link/image, compact reference + we have an inline link/image, reference link/image, collapsed reference link/image, or shortcut reference link/image. + If we don't, then we remove the opening delimiter from the @@ -9660,8 +9706,9 @@ just above `stack_bottom` (or the first element if `stack_bottom` is NULL). We keep track of the `openers_bottom` for each delimiter -type (`*`, `_`) and each length of the closing delimiter run -(modulo 3). Initialize this to `stack_bottom`. +type (`*`, `_`), indexed to the length of the closing delimiter run +(modulo 3) and to whether the closing delimiter can also be an +opener. Initialize this to `stack_bottom`. Then we repeat the following until we run out of potential closers: @@ -9707,4 +9754,3 @@ closers: After we're done, we remove all delimiters above `stack_bottom` from the delimiter stack. - diff --git a/commonmark/.settings/org.eclipse.core.runtime.prefs b/commonmark/.settings/org.eclipse.core.runtime.prefs deleted file mode 100644 index 5a0ad22d2..000000000 --- a/commonmark/.settings/org.eclipse.core.runtime.prefs +++ /dev/null @@ -1,2 +0,0 @@ -eclipse.preferences.version=1 -line.separator=\n diff --git a/commonmark/.settings/org.eclipse.jdt.core.prefs b/commonmark/.settings/org.eclipse.jdt.core.prefs deleted file mode 100644 index 3c0d27c8f..000000000 --- a/commonmark/.settings/org.eclipse.jdt.core.prefs +++ /dev/null @@ -1,290 +0,0 @@ -eclipse.preferences.version=1 -org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7 -org.eclipse.jdt.core.compiler.compliance=1.7 -org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning -org.eclipse.jdt.core.compiler.source=1.7 -org.eclipse.jdt.core.formatter.align_type_members_on_columns=false -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=16 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation=0 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant=16 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call=16 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation=16 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression=16 -org.eclipse.jdt.core.formatter.alignment_for_assignment=0 -org.eclipse.jdt.core.formatter.alignment_for_binary_expression=16 -org.eclipse.jdt.core.formatter.alignment_for_compact_if=16 -org.eclipse.jdt.core.formatter.alignment_for_conditional_expression=80 -org.eclipse.jdt.core.formatter.alignment_for_enum_constants=0 -org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer=16 -org.eclipse.jdt.core.formatter.alignment_for_method_declaration=0 -org.eclipse.jdt.core.formatter.alignment_for_multiple_fields=16 -org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_resources_in_try=80 -org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation=16 -org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration=16 -org.eclipse.jdt.core.formatter.alignment_for_union_type_in_multicatch=16 -org.eclipse.jdt.core.formatter.blank_lines_after_imports=1 -org.eclipse.jdt.core.formatter.blank_lines_after_package=1 -org.eclipse.jdt.core.formatter.blank_lines_before_field=0 -org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration=0 -org.eclipse.jdt.core.formatter.blank_lines_before_imports=1 -org.eclipse.jdt.core.formatter.blank_lines_before_member_type=1 -org.eclipse.jdt.core.formatter.blank_lines_before_method=1 -org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk=1 -org.eclipse.jdt.core.formatter.blank_lines_before_package=0 -org.eclipse.jdt.core.formatter.blank_lines_between_import_groups=1 -org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations=1 -org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_array_initializer=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_block=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_block_in_case=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_enum_constant=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_lambda_body=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_method_declaration=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_switch=end_of_line -org.eclipse.jdt.core.formatter.brace_position_for_type_declaration=end_of_line -org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment=false -org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment=false -org.eclipse.jdt.core.formatter.comment.format_block_comments=true -org.eclipse.jdt.core.formatter.comment.format_header=false -org.eclipse.jdt.core.formatter.comment.format_html=true -org.eclipse.jdt.core.formatter.comment.format_javadoc_comments=true -org.eclipse.jdt.core.formatter.comment.format_line_comments=true -org.eclipse.jdt.core.formatter.comment.format_source_code=true -org.eclipse.jdt.core.formatter.comment.indent_parameter_description=true -org.eclipse.jdt.core.formatter.comment.indent_root_tags=true -org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags=insert -org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter=do not insert -org.eclipse.jdt.core.formatter.comment.line_length=120 -org.eclipse.jdt.core.formatter.comment.new_lines_at_block_boundaries=true -org.eclipse.jdt.core.formatter.comment.new_lines_at_javadoc_boundaries=true -org.eclipse.jdt.core.formatter.comment.preserve_white_space_between_code_and_line_comments=false -org.eclipse.jdt.core.formatter.compact_else_if=true -org.eclipse.jdt.core.formatter.continuation_indentation=2 -org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer=2 -org.eclipse.jdt.core.formatter.disabling_tag=@formatter\:off -org.eclipse.jdt.core.formatter.enabling_tag=@formatter\:on -org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line=false -org.eclipse.jdt.core.formatter.format_line_comment_starting_on_first_column=true -org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header=true -org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header=true -org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header=true -org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header=true -org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases=true -org.eclipse.jdt.core.formatter.indent_empty_lines=false -org.eclipse.jdt.core.formatter.indent_statements_compare_to_block=true -org.eclipse.jdt.core.formatter.indent_statements_compare_to_body=true -org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases=true -org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch=true -org.eclipse.jdt.core.formatter.indentation.size=4 -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_field=insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable=insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_method=insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_package=insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_type=insert -org.eclipse.jdt.core.formatter.insert_new_line_after_label=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_after_type_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter=insert -org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator=insert -org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_binary_operator=insert -org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters=insert -org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block=insert -org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast=insert -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert=insert -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case=insert -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional=insert -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for=insert -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters=insert -org.eclipse.jdt.core.formatter.insert_space_after_ellipsis=insert -org.eclipse.jdt.core.formatter.insert_space_after_lambda_arrow=insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer=insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_try=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional=insert -org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for=insert -org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_try_resources=insert -org.eclipse.jdt.core.formatter.insert_space_after_unary_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter=insert -org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator=insert -org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_binary_operator=insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer=insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_try=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert=insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional=insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for=insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_ellipsis=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_lambda_arrow=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_try=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while=insert -org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return=insert -org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw=insert -org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional=insert -org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_semicolon=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_try_resources=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_unary_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation=do not insert -org.eclipse.jdt.core.formatter.join_lines_in_comments=true -org.eclipse.jdt.core.formatter.join_wrapped_lines=false -org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line=false -org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line=false -org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line=false -org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line=false -org.eclipse.jdt.core.formatter.lineSplit=120 -org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column=false -org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column=false -org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body=0 -org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve=1 -org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line=true -org.eclipse.jdt.core.formatter.tabulation.char=space -org.eclipse.jdt.core.formatter.tabulation.size=4 -org.eclipse.jdt.core.formatter.use_on_off_tags=false -org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations=false -org.eclipse.jdt.core.formatter.wrap_before_binary_operator=true -org.eclipse.jdt.core.formatter.wrap_before_or_operator_multicatch=true -org.eclipse.jdt.core.formatter.wrap_outer_expressions_when_nested=true -org.eclipse.jdt.core.javaFormatter=org.eclipse.jdt.core.defaultJavaFormatter diff --git a/commonmark/pom.xml b/commonmark/pom.xml index 4850b2b45..4e060edaa 100644 --- a/commonmark/pom.xml +++ b/commonmark/pom.xml @@ -2,18 +2,18 @@ 4.0.0 - com.atlassian.commonmark + org.commonmark commonmark-parent - 0.14.1-SNAPSHOT + 0.28.1-SNAPSHOT commonmark commonmark-java core - Core of commonmark-java (implementation of CommonMark for parsing markdown and rendering to HTML) + Core of commonmark-java (a library for parsing Markdown to an AST, modifying the AST and rendering it to HTML or Markdown) - com.atlassian.commonmark + org.commonmark commonmark-test-util test @@ -29,22 +29,6 @@ - - - - org.apache.maven.plugins - maven-jar-plugin - - - - org.commonmark - - - - - - - benchmark @@ -54,7 +38,7 @@ org.codehaus.mojo exec-maven-plugin - 1.5.0 + 3.2.0 java test @@ -70,4 +54,12 @@ + + + BSD-2-Clause + https://opensource.org/licenses/BSD-2-Clause + repo + + + diff --git a/commonmark/src/main/java/module-info.java b/commonmark/src/main/java/module-info.java new file mode 100644 index 000000000..009fc7d18 --- /dev/null +++ b/commonmark/src/main/java/module-info.java @@ -0,0 +1,13 @@ +module org.commonmark { + exports org.commonmark; + exports org.commonmark.node; + exports org.commonmark.parser; + exports org.commonmark.parser.beta; + exports org.commonmark.parser.block; + exports org.commonmark.parser.delimiter; + exports org.commonmark.renderer; + exports org.commonmark.renderer.html; + exports org.commonmark.renderer.markdown; + exports org.commonmark.renderer.text; + exports org.commonmark.text; +} diff --git a/commonmark/src/main/java/org/commonmark/internal/BlockQuoteParser.java b/commonmark/src/main/java/org/commonmark/internal/BlockQuoteParser.java index 6b19f8aaf..572c491f8 100644 --- a/commonmark/src/main/java/org/commonmark/internal/BlockQuoteParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/BlockQuoteParser.java @@ -4,6 +4,7 @@ import org.commonmark.node.Block; import org.commonmark.node.BlockQuote; import org.commonmark.parser.block.*; +import org.commonmark.text.Characters; public class BlockQuoteParser extends AbstractBlockParser { @@ -30,7 +31,7 @@ public BlockContinue tryContinue(ParserState state) { if (isMarker(state, nextNonSpace)) { int newColumn = state.getColumn() + state.getIndent() + 1; // optional following space or tab - if (Parsing.isSpaceOrTab(state.getLine(), nextNonSpace + 1)) { + if (Characters.isSpaceOrTab(state.getLine().getContent(), nextNonSpace + 1)) { newColumn++; } return BlockContinue.atColumn(newColumn); @@ -40,17 +41,18 @@ public BlockContinue tryContinue(ParserState state) { } private static boolean isMarker(ParserState state, int index) { - CharSequence line = state.getLine(); + CharSequence line = state.getLine().getContent(); return state.getIndent() < Parsing.CODE_BLOCK_INDENT && index < line.length() && line.charAt(index) == '>'; } public static class Factory extends AbstractBlockParserFactory { + @Override public BlockStart tryStart(ParserState state, MatchedBlockParser matchedBlockParser) { int nextNonSpace = state.getNextNonSpaceIndex(); if (isMarker(state, nextNonSpace)) { int newColumn = state.getColumn() + state.getIndent() + 1; // optional following space or tab - if (Parsing.isSpaceOrTab(state.getLine(), nextNonSpace + 1)) { + if (Characters.isSpaceOrTab(state.getLine().getContent(), nextNonSpace + 1)) { newColumn++; } return BlockStart.of(new BlockQuoteParser()).atColumn(newColumn); diff --git a/commonmark/src/main/java/org/commonmark/internal/BlockStartImpl.java b/commonmark/src/main/java/org/commonmark/internal/BlockStartImpl.java index c7e967d46..516f944b2 100644 --- a/commonmark/src/main/java/org/commonmark/internal/BlockStartImpl.java +++ b/commonmark/src/main/java/org/commonmark/internal/BlockStartImpl.java @@ -9,6 +9,7 @@ public class BlockStartImpl extends BlockStart { private int newIndex = -1; private int newColumn = -1; private boolean replaceActiveBlockParser = false; + private int replaceParagraphLines = 0; public BlockStartImpl(BlockParser... blockParsers) { this.blockParsers = blockParsers; @@ -30,6 +31,10 @@ public boolean isReplaceActiveBlockParser() { return replaceActiveBlockParser; } + int getReplaceParagraphLines() { + return replaceParagraphLines; + } + @Override public BlockStart atIndex(int newIndex) { this.newIndex = newIndex; @@ -48,4 +53,12 @@ public BlockStart replaceActiveBlockParser() { return this; } + @Override + public BlockStart replaceParagraphLines(int lines) { + if (!(lines >= 1)) { + throw new IllegalArgumentException("Lines must be >= 1"); + } + this.replaceParagraphLines = lines; + return this; + } } diff --git a/commonmark/src/main/java/org/commonmark/internal/Bracket.java b/commonmark/src/main/java/org/commonmark/internal/Bracket.java index 70a8a6e25..c04b6ecda 100644 --- a/commonmark/src/main/java/org/commonmark/internal/Bracket.java +++ b/commonmark/src/main/java/org/commonmark/internal/Bracket.java @@ -1,15 +1,37 @@ package org.commonmark.internal; import org.commonmark.node.Text; +import org.commonmark.parser.beta.Position; /** - * Opening bracket for links ([) or images (![). + * Opening bracket for links ({@code [}), images ({@code ![}), or links with other markers. */ public class Bracket { - public final Text node; - public final int index; - public final boolean image; + /** + * The node of a marker such as {@code !} if present, null otherwise. + */ + public final Text markerNode; + + /** + * The position of the marker if present, null otherwise. + */ + public final Position markerPosition; + + /** + * The node of {@code [}. + */ + public final Text bracketNode; + + /** + * The position of {@code [}. + */ + public final Position bracketPosition; + + /** + * The position of the content (after the opening bracket) + */ + public final Position contentPosition; /** * Previous bracket. @@ -27,22 +49,24 @@ public class Bracket { public boolean allowed = true; /** - * Whether there is an unescaped bracket (opening or closing) anywhere after this opening bracket. + * Whether there is an unescaped bracket (opening or closing) after this opening bracket in the text parsed so far. */ public boolean bracketAfter = false; - static public Bracket link(Text node, int index, Bracket previous, Delimiter previousDelimiter) { - return new Bracket(node, index, previous, previousDelimiter, false); + static public Bracket link(Text bracketNode, Position bracketPosition, Position contentPosition, Bracket previous, Delimiter previousDelimiter) { + return new Bracket(null, null, bracketNode, bracketPosition, contentPosition, previous, previousDelimiter); } - static public Bracket image(Text node, int index, Bracket previous, Delimiter previousDelimiter) { - return new Bracket(node, index, previous, previousDelimiter, true); + static public Bracket withMarker(Text markerNode, Position markerPosition, Text bracketNode, Position bracketPosition, Position contentPosition, Bracket previous, Delimiter previousDelimiter) { + return new Bracket(markerNode, markerPosition, bracketNode, bracketPosition, contentPosition, previous, previousDelimiter); } - private Bracket(Text node, int index, Bracket previous, Delimiter previousDelimiter, boolean image) { - this.node = node; - this.index = index; - this.image = image; + private Bracket(Text markerNode, Position markerPosition, Text bracketNode, Position bracketPosition, Position contentPosition, Bracket previous, Delimiter previousDelimiter) { + this.markerNode = markerNode; + this.markerPosition = markerPosition; + this.bracketNode = bracketNode; + this.bracketPosition = bracketPosition; + this.contentPosition = contentPosition; this.previous = previous; this.previousDelimiter = previousDelimiter; } diff --git a/commonmark/src/main/java/org/commonmark/internal/Definitions.java b/commonmark/src/main/java/org/commonmark/internal/Definitions.java new file mode 100644 index 000000000..0377842c9 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/internal/Definitions.java @@ -0,0 +1,33 @@ +package org.commonmark.internal; + +import org.commonmark.node.DefinitionMap; + +import java.util.HashMap; +import java.util.Map; + +public class Definitions { + + private final Map, DefinitionMap> definitionsByType = new HashMap<>(); + + public void addDefinitions(DefinitionMap definitionMap) { + var existingMap = getMap(definitionMap.getType()); + if (existingMap == null) { + definitionsByType.put(definitionMap.getType(), definitionMap); + } else { + existingMap.addAll(definitionMap); + } + } + + public V getDefinition(Class type, String label) { + var definitionMap = getMap(type); + if (definitionMap == null) { + return null; + } + return definitionMap.get(label); + } + + private DefinitionMap getMap(Class type) { + //noinspection unchecked + return (DefinitionMap) definitionsByType.get(type); + } +} diff --git a/commonmark/src/main/java/org/commonmark/internal/Delimiter.java b/commonmark/src/main/java/org/commonmark/internal/Delimiter.java index 5988e9508..9083ce3cb 100644 --- a/commonmark/src/main/java/org/commonmark/internal/Delimiter.java +++ b/commonmark/src/main/java/org/commonmark/internal/Delimiter.java @@ -3,36 +3,33 @@ import org.commonmark.node.Text; import org.commonmark.parser.delimiter.DelimiterRun; +import java.util.List; + /** * Delimiter (emphasis, strong emphasis or custom emphasis). */ public class Delimiter implements DelimiterRun { - public final Text node; + public final List characters; public final char delimiterChar; + private final int originalLength; - /** - * Can open emphasis, see spec. - */ - public final boolean canOpen; + // Can open emphasis, see spec. + private final boolean canOpen; - /** - * Can close emphasis, see spec. - */ - public final boolean canClose; + // Can close emphasis, see spec. + private final boolean canClose; public Delimiter previous; public Delimiter next; - public int length = 1; - public int originalLength = 1; - - public Delimiter(Text node, char delimiterChar, boolean canOpen, boolean canClose, Delimiter previous) { - this.node = node; + public Delimiter(List characters, char delimiterChar, boolean canOpen, boolean canClose, Delimiter previous) { + this.characters = characters; this.delimiterChar = delimiterChar; this.canOpen = canOpen; this.canClose = canClose; this.previous = previous; + this.originalLength = characters.size(); } @Override @@ -47,11 +44,39 @@ public boolean canClose() { @Override public int length() { - return length; + return characters.size(); } @Override public int originalLength() { return originalLength; } + + @Override + public Text getOpener() { + return characters.get(characters.size() - 1); + } + + @Override + public Text getCloser() { + return characters.get(0); + } + + @Override + public Iterable getOpeners(int length) { + if (!(length >= 1 && length <= length())) { + throw new IllegalArgumentException("length must be between 1 and " + length() + ", was " + length); + } + + return characters.subList(characters.size() - length, characters.size()); + } + + @Override + public Iterable getClosers(int length) { + if (!(length >= 1 && length <= length())) { + throw new IllegalArgumentException("length must be between 1 and " + length() + ", was " + length); + } + + return characters.subList(0, length); + } } diff --git a/commonmark/src/main/java/org/commonmark/internal/DocumentBlockParser.java b/commonmark/src/main/java/org/commonmark/internal/DocumentBlockParser.java index 4a30544e7..db3d3854f 100644 --- a/commonmark/src/main/java/org/commonmark/internal/DocumentBlockParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/DocumentBlockParser.java @@ -2,6 +2,7 @@ import org.commonmark.node.Block; import org.commonmark.node.Document; +import org.commonmark.parser.SourceLine; import org.commonmark.parser.block.AbstractBlockParser; import org.commonmark.parser.block.BlockContinue; import org.commonmark.parser.block.ParserState; @@ -31,7 +32,7 @@ public BlockContinue tryContinue(ParserState state) { } @Override - public void addLine(CharSequence line) { + public void addLine(SourceLine line) { } } diff --git a/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java b/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java index d1b43a7b4..07d97296b 100644 --- a/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java @@ -1,20 +1,25 @@ package org.commonmark.internal; +import org.commonmark.internal.util.LineReader; import org.commonmark.internal.util.Parsing; import org.commonmark.node.*; -import org.commonmark.parser.InlineParser; +import org.commonmark.parser.IncludeSourceSpans; import org.commonmark.parser.InlineParserFactory; +import org.commonmark.parser.SourceLine; +import org.commonmark.parser.SourceLines; +import org.commonmark.parser.beta.LinkProcessor; +import org.commonmark.parser.beta.InlineContentParserFactory; import org.commonmark.parser.block.*; import org.commonmark.parser.delimiter.DelimiterProcessor; +import org.commonmark.text.Characters; -import java.io.BufferedReader; import java.io.IOException; import java.io.Reader; import java.util.*; public class DocumentParser implements ParserState { - private static final Set> CORE_FACTORY_TYPES = new LinkedHashSet<>(Arrays.asList( + private static final Set> CORE_FACTORY_TYPES = new LinkedHashSet<>(List.of( BlockQuote.class, Heading.class, FencedCodeBlock.class, @@ -37,8 +42,12 @@ public class DocumentParser implements ParserState { NODES_TO_CORE_FACTORIES = Collections.unmodifiableMap(map); } + private SourceLine line; - private CharSequence line; + /** + * Line index (0-based) + */ + private int lineIndex = -1; /** * current index (offset) in input line (0-based) @@ -62,22 +71,33 @@ public class DocumentParser implements ParserState { private final List blockParserFactories; private final InlineParserFactory inlineParserFactory; + private final List inlineContentParserFactories; private final List delimiterProcessors; + private final List linkProcessors; + private final Set linkMarkers; + private final IncludeSourceSpans includeSourceSpans; + private final int maxOpenBlockParsers; private final DocumentBlockParser documentBlockParser; - private final Map definitions = new LinkedHashMap<>(); + private final Definitions definitions = new Definitions(); - private List activeBlockParsers = new ArrayList<>(); - // LinkedHashSet to have a deterministic order - private Set allBlockParsers = new LinkedHashSet<>(); + private final List openBlockParsers = new ArrayList<>(); + private final List allBlockParsers = new ArrayList<>(); public DocumentParser(List blockParserFactories, InlineParserFactory inlineParserFactory, - List delimiterProcessors) { + List inlineContentParserFactories, List delimiterProcessors, + List linkProcessors, Set linkMarkers, + IncludeSourceSpans includeSourceSpans, int maxOpenBlockParsers) { this.blockParserFactories = blockParserFactories; this.inlineParserFactory = inlineParserFactory; + this.inlineContentParserFactories = inlineContentParserFactories; this.delimiterProcessors = delimiterProcessors; + this.linkProcessors = linkProcessors; + this.linkMarkers = linkMarkers; + this.includeSourceSpans = includeSourceSpans; + this.maxOpenBlockParsers = maxOpenBlockParsers; this.documentBlockParser = new DocumentBlockParser(); - activateBlockParser(this.documentBlockParser); + activateBlockParser(new OpenBlockParser(documentBlockParser, 0)); } public static Set> getDefaultBlockParserTypes() { @@ -94,47 +114,55 @@ public static List calculateBlockParserFactories(List> enabledBlockTypes) { + for (Class enabledBlockType : enabledBlockTypes) { + if (!NODES_TO_CORE_FACTORIES.containsKey(enabledBlockType)) { + throw new IllegalArgumentException("Can't enable block type " + enabledBlockType + ", possible options are: " + NODES_TO_CORE_FACTORIES.keySet()); + } + } + } + /** * The main parsing function. Returns a parsed document AST. */ public Document parse(String input) { int lineStart = 0; int lineBreak; - while ((lineBreak = Parsing.findLineBreak(input, lineStart)) != -1) { + while ((lineBreak = Characters.findLineBreak(input, lineStart)) != -1) { String line = input.substring(lineStart, lineBreak); - incorporateLine(line); + parseLine(line, lineStart); if (lineBreak + 1 < input.length() && input.charAt(lineBreak) == '\r' && input.charAt(lineBreak + 1) == '\n') { lineStart = lineBreak + 2; } else { lineStart = lineBreak + 1; } } - if (input.length() > 0 && (lineStart == 0 || lineStart < input.length())) { + if (!input.isEmpty() && (lineStart == 0 || lineStart < input.length())) { String line = input.substring(lineStart); - incorporateLine(line); + parseLine(line, lineStart); } return finalizeAndProcess(); } public Document parse(Reader input) throws IOException { - BufferedReader bufferedReader; - if (input instanceof BufferedReader) { - bufferedReader = (BufferedReader) input; - } else { - bufferedReader = new BufferedReader(input); - } - + var lineReader = new LineReader(input); + int inputIndex = 0; String line; - while ((line = bufferedReader.readLine()) != null) { - incorporateLine(line); + while ((line = lineReader.readLine()) != null) { + parseLine(line, inputIndex); + inputIndex += line.length(); + var eol = lineReader.getLineTerminator(); + if (eol != null) { + inputIndex += eol.length(); + } } return finalizeAndProcess(); } @Override - public CharSequence getLine() { + public SourceLine getLine() { return line; } @@ -165,32 +193,31 @@ public boolean isBlank() { @Override public BlockParser getActiveBlockParser() { - return activeBlockParsers.get(activeBlockParsers.size() - 1); + return openBlockParsers.get(openBlockParsers.size() - 1).blockParser; } /** * Analyze a line of text and update the document appropriately. We parse markdown text by calling this on each * line of input, then finalizing the document. */ - private void incorporateLine(CharSequence ln) { - line = Parsing.prepareLine(ln); - index = 0; - column = 0; - columnIsInTab = false; + private void parseLine(String ln, int inputIndex) { + setLine(ln, inputIndex); // For each containing block, try to parse the associated line start. - // Bail out on failure: container will point to the last matching block. - // Set all_matched to false if not all containers match. - // The document will always match, can be skipped + // The document will always match, so we can skip the first block parser and start at 1 matches int matches = 1; - for (BlockParser blockParser : activeBlockParsers.subList(1, activeBlockParsers.size())) { + for (int i = 1; i < openBlockParsers.size(); i++) { + OpenBlockParser openBlockParser = openBlockParsers.get(i); + BlockParser blockParser = openBlockParser.blockParser; findNextNonSpace(); BlockContinue result = blockParser.tryContinue(this); if (result instanceof BlockContinueImpl) { BlockContinueImpl blockContinue = (BlockContinueImpl) result; + openBlockParser.sourceIndex = getIndex(); if (blockContinue.isFinalize()) { - finalize(blockParser); + addSourceSpans(); + closeBlockParsers(openBlockParsers.size() - i); return; } else { if (blockContinue.getNewIndex() != -1) { @@ -205,19 +232,21 @@ private void incorporateLine(CharSequence ln) { } } - List unmatchedBlockParsers = new ArrayList<>(activeBlockParsers.subList(matches, activeBlockParsers.size())); - BlockParser lastMatchedBlockParser = activeBlockParsers.get(matches - 1); - BlockParser blockParser = lastMatchedBlockParser; - boolean allClosed = unmatchedBlockParsers.isEmpty(); + int unmatchedBlocks = openBlockParsers.size() - matches; + BlockParser blockParser = openBlockParsers.get(matches - 1).blockParser; + boolean startedNewBlock = false; + + int lastIndex = index; // Unless last matched container is a code block, try new container starts, // adding children to the last matched container: boolean tryBlockStarts = blockParser.getBlock() instanceof Paragraph || blockParser.isContainer(); while (tryBlockStarts) { + lastIndex = index; findNextNonSpace(); // this is a little performance optimization: - if (isBlank() || (indent < Parsing.CODE_BLOCK_INDENT && Parsing.isLetter(line, nextNonSpace))) { + if (isBlank() || (indent < Parsing.CODE_BLOCK_INDENT && Characters.isLetter(this.line.getContent(), nextNonSpace))) { setNewIndex(nextNonSpace); break; } @@ -228,9 +257,13 @@ private void incorporateLine(CharSequence ln) { break; } - if (!allClosed) { - finalizeBlocks(unmatchedBlockParsers); - allClosed = true; + startedNewBlock = true; + int sourceIndex = getIndex(); + + // We're starting a new block. If we have any previous blocks that need to be closed, we need to do it now. + if (unmatchedBlocks > 0) { + closeBlockParsers(unmatchedBlocks); + unmatchedBlocks = 0; } if (blockStart.getNewIndex() != -1) { @@ -239,12 +272,24 @@ private void incorporateLine(CharSequence ln) { setNewColumn(blockStart.getNewColumn()); } - if (blockStart.isReplaceActiveBlockParser()) { - prepareActiveBlockParserForReplacement(); + List replacedSourceSpans = null; + if (blockStart.getReplaceParagraphLines() >= 1 || blockStart.isReplaceActiveBlockParser()) { + var activeBlockParser = getActiveBlockParser(); + if (activeBlockParser instanceof ParagraphParser) { + var paragraphParser = (ParagraphParser) activeBlockParser; + var lines = blockStart.isReplaceActiveBlockParser() ? Integer.MAX_VALUE : blockStart.getReplaceParagraphLines(); + replacedSourceSpans = replaceParagraphLines(lines, paragraphParser); + } else if (blockStart.isReplaceActiveBlockParser()) { + replacedSourceSpans = prepareActiveBlockParserForReplacement(activeBlockParser); + } } for (BlockParser newBlockParser : blockStart.getBlockParsers()) { - blockParser = addChild(newBlockParser); + addChild(new OpenBlockParser(newBlockParser, sourceIndex)); + if (replacedSourceSpans != null) { + newBlockParser.getBlock().setSourceSpans(replacedSourceSpans); + } + blockParser = newBlockParser; tryBlockStarts = newBlockParser.isContainer(); } } @@ -252,37 +297,62 @@ private void incorporateLine(CharSequence ln) { // What remains at the offset is a text line. Add the text to the // appropriate block. - // First check for a lazy paragraph continuation: - if (!allClosed && !isBlank() && + // First check for a lazy continuation line + if (!startedNewBlock && !isBlank() && getActiveBlockParser().canHaveLazyContinuationLines()) { + openBlockParsers.get(openBlockParsers.size() - 1).sourceIndex = lastIndex; // lazy paragraph continuation addLine(); } else { // finalize any blocks not matched - if (!allClosed) { - finalizeBlocks(unmatchedBlockParsers); + if (unmatchedBlocks > 0) { + closeBlockParsers(unmatchedBlocks); } if (!blockParser.isContainer()) { addLine(); } else if (!isBlank()) { // create paragraph container for line - addChild(new ParagraphParser()); + ParagraphParser paragraphParser = new ParagraphParser(); + addChild(new OpenBlockParser(paragraphParser, lastIndex)); addLine(); + } else { + // This can happen for a list item like this: + // ``` + // * + // list item + // ``` + // + // The first line does not start a paragraph yet, but we still want to record source positions. + addSourceSpans(); } } } + private void setLine(String ln, int inputIndex) { + lineIndex++; + index = 0; + column = 0; + columnIsInTab = false; + + String lineContent = prepareLine(ln); + SourceSpan sourceSpan = null; + if (includeSourceSpans != IncludeSourceSpans.NONE) { + sourceSpan = SourceSpan.of(lineIndex, 0, inputIndex, lineContent.length()); + } + this.line = SourceLine.of(lineContent, sourceSpan); + } + private void findNextNonSpace() { int i = index; int cols = column; blank = true; - int length = line.length(); + int length = line.getContent().length(); while (i < length) { - char c = line.charAt(i); + char c = line.getContent().charAt(i); switch (c) { case ' ': i++; @@ -308,7 +378,7 @@ private void setNewIndex(int newIndex) { index = nextNonSpace; column = nextNonSpaceColumn; } - int length = line.length(); + int length = line.getContent().length(); while (index < newIndex && index != length) { advance(); } @@ -322,7 +392,7 @@ private void setNewColumn(int newColumn) { index = nextNonSpace; column = nextNonSpaceColumn; } - int length = line.length(); + int length = line.getContent().length(); while (column < newColumn && index != length) { advance(); } @@ -337,12 +407,11 @@ private void setNewColumn(int newColumn) { } private void advance() { - char c = line.charAt(index); + char c = line.getContent().charAt(index); + index++; if (c == '\t') { - index++; column += Parsing.columnsToNextTabStop(column); } else { - index++; column++; } } @@ -356,7 +425,7 @@ private void addLine() { if (columnIsInTab) { // Our column is in a partially consumed tab. Expand the remaining columns (to the next tab stop) to spaces. int afterTab = index + 1; - CharSequence rest = line.subSequence(afterTab, line.length()); + CharSequence rest = line.getContent().subSequence(afterTab, line.getContent().length()); int spaces = Parsing.columnsToNextTabStop(column); StringBuilder sb = new StringBuilder(spaces + rest.length()); for (int i = 0; i < spaces; i++) { @@ -364,13 +433,40 @@ private void addLine() { } sb.append(rest); content = sb.toString(); + } else if (index == 0) { + content = line.getContent(); } else { - content = line.subSequence(index, line.length()); + content = line.getContent().subSequence(index, line.getContent().length()); + } + SourceSpan sourceSpan = null; + if (includeSourceSpans == IncludeSourceSpans.BLOCKS_AND_INLINES && index < line.getSourceSpan().getLength()) { + // Note that if we're in a partially-consumed tab the length of the source span and the content don't match. + sourceSpan = line.getSourceSpan().subSpan(index); + } + getActiveBlockParser().addLine(SourceLine.of(content, sourceSpan)); + addSourceSpans(); + } + + private void addSourceSpans() { + if (includeSourceSpans != IncludeSourceSpans.NONE) { + // Don't add source spans for Document itself (it would get the whole source text), so start at 1, not 0 + for (int i = 1; i < openBlockParsers.size(); i++) { + var openBlockParser = openBlockParsers.get(i); + // In case of a lazy continuation line, the index is less than where the block parser would expect the + // contents to start, so let's use whichever is smaller. + int blockIndex = Math.min(openBlockParser.sourceIndex, index); + int length = line.getContent().length() - blockIndex; + if (length != 0) { + openBlockParser.blockParser.addSourceSpan(line.getSourceSpan().subSpan(blockIndex)); + } + } } - getActiveBlockParser().addLine(content); } private BlockStartImpl findBlockStart(BlockParser blockParser) { + if (openBlockParsers.size() > maxOpenBlockParsers) { + return null; + } MatchedBlockParser matchedBlockParser = new MatchedBlockParserImpl(blockParser); for (BlockParserFactory blockParserFactory : blockParserFactories) { BlockStart result = blockParserFactory.tryStart(this, matchedBlockParser); @@ -381,104 +477,99 @@ private BlockStartImpl findBlockStart(BlockParser blockParser) { return null; } - /** - * Finalize a block. Close it and do any necessary postprocessing, e.g. creating string_content from strings, - * setting the 'tight' or 'loose' status of a list, and parsing the beginnings of paragraphs for reference - * definitions. - */ - private void finalize(BlockParser blockParser) { - if (getActiveBlockParser() == blockParser) { - deactivateBlockParser(); - } - - if (blockParser instanceof ParagraphParser) { - addDefinitionsFrom((ParagraphParser) blockParser); - } - - blockParser.closeBlock(); - } - - private void addDefinitionsFrom(ParagraphParser paragraphParser) { - for (LinkReferenceDefinition definition : paragraphParser.getDefinitions()) { - // Add nodes into document before paragraph. - paragraphParser.getBlock().insertBefore(definition); - - String label = definition.getLabel(); - // spec: When there are multiple matching link reference definitions, the first is used - if (!definitions.containsKey(label)) { - definitions.put(label, definition); - } - } - } - /** * Walk through a block & children recursively, parsing string content into inline content where appropriate. */ private void processInlines() { - InlineParserContextImpl context = new InlineParserContextImpl(delimiterProcessors, definitions); - InlineParser inlineParser = inlineParserFactory.create(context); + var context = new InlineParserContextImpl(inlineContentParserFactories, delimiterProcessors, linkProcessors, linkMarkers, definitions); + var inlineParser = inlineParserFactory.create(context); - for (BlockParser blockParser : allBlockParsers) { + for (var blockParser : allBlockParsers) { blockParser.parseInlines(inlineParser); } } /** - * Add block of type tag as a child of the tip. If the tip can't accept children, close and finalize it and try - * its parent, and so on til we find a block that can accept children. + * Add block of type tag as a child of the tip. If the tip can't accept children, close and finalize it and try + * its parent, and so on until we find a block that can accept children. */ - private T addChild(T blockParser) { - while (!getActiveBlockParser().canContain(blockParser.getBlock())) { - finalize(getActiveBlockParser()); + private void addChild(OpenBlockParser openBlockParser) { + while (!getActiveBlockParser().canContain(openBlockParser.blockParser.getBlock())) { + closeBlockParsers(1); } - getActiveBlockParser().getBlock().appendChild(blockParser.getBlock()); - activateBlockParser(blockParser); + getActiveBlockParser().getBlock().appendChild(openBlockParser.blockParser.getBlock()); + activateBlockParser(openBlockParser); + } - return blockParser; + private void activateBlockParser(OpenBlockParser openBlockParser) { + openBlockParsers.add(openBlockParser); } - private void activateBlockParser(BlockParser blockParser) { - activeBlockParsers.add(blockParser); - allBlockParsers.add(blockParser); + private OpenBlockParser deactivateBlockParser() { + return openBlockParsers.remove(openBlockParsers.size() - 1); } - private void deactivateBlockParser() { - activeBlockParsers.remove(activeBlockParsers.size() - 1); + private List replaceParagraphLines(int lines, ParagraphParser paragraphParser) { + // Remove lines from paragraph as the new block is using them. + // If all lines are used, this also unlinks the Paragraph block. + var sourceSpans = paragraphParser.removeLines(lines); + // Close the paragraph block parser, which will finalize it. + closeBlockParsers(1); + return sourceSpans; } - private void prepareActiveBlockParserForReplacement() { - BlockParser old = getActiveBlockParser(); + private List prepareActiveBlockParserForReplacement(BlockParser blockParser) { + // Note that we don't want to parse inlines here, as it's getting replaced. deactivateBlockParser(); - allBlockParsers.remove(old); - - if (old instanceof ParagraphParser) { - ParagraphParser paragraphParser = (ParagraphParser) old; - // Collect any link reference definitions. Note that replacing the active block parser is done after a - // block parser got the current paragraph content using MatchedBlockParser#getContentString. In case the - // paragraph started with link reference definitions, we parse and strip them before the block parser gets - // the content. We want to keep them. - // If no replacement happens, we collect the definitions as part of finalizing paragraph blocks. - addDefinitionsFrom(paragraphParser); - } - old.getBlock().unlink(); + // Do this so that source positions are calculated, which we will carry over to the replacing block. + blockParser.closeBlock(); + blockParser.getBlock().unlink(); + return blockParser.getBlock().getSourceSpans(); + } + + private Document finalizeAndProcess() { + closeBlockParsers(openBlockParsers.size()); + processInlines(); + return documentBlockParser.getBlock(); + } + + private void closeBlockParsers(int count) { + for (int i = 0; i < count; i++) { + BlockParser blockParser = deactivateBlockParser().blockParser; + finalize(blockParser); + // Remember for inline parsing. Note that a lot of blocks don't need inline parsing. We could have a + // separate interface (e.g. BlockParserWithInlines) so that we only have to remember those that actually + // have inlines to parse. + allBlockParsers.add(blockParser); + } } /** - * Finalize blocks of previous line. Returns true. + * Finalize a block. Close it and do any necessary postprocessing, e.g. setting the content of blocks and + * collecting link reference definitions from paragraphs. */ - private void finalizeBlocks(List blockParsers) { - for (int i = blockParsers.size() - 1; i >= 0; i--) { - BlockParser blockParser = blockParsers.get(i); - finalize(blockParser); + private void finalize(BlockParser blockParser) { + addDefinitionsFrom(blockParser); + blockParser.closeBlock(); + } + + private void addDefinitionsFrom(BlockParser blockParser) { + for (var definitionMap : blockParser.getDefinitions()) { + definitions.addDefinitions(definitionMap); } } - private Document finalizeAndProcess() { - finalizeBlocks(this.activeBlockParsers); - this.processInlines(); - return this.documentBlockParser.getBlock(); + /** + * Prepares the input line replacing {@code \0} + */ + private static String prepareLine(String line) { + if (line.indexOf('\0') == -1) { + return line; + } else { + return line.replace('\0', '\uFFFD'); + } } private static class MatchedBlockParserImpl implements MatchedBlockParser { @@ -495,17 +586,22 @@ public BlockParser getMatchedBlockParser() { } @Override - public CharSequence getParagraphContent() { + public SourceLines getParagraphLines() { if (matchedBlockParser instanceof ParagraphParser) { ParagraphParser paragraphParser = (ParagraphParser) matchedBlockParser; - CharSequence content = paragraphParser.getContentString(); - if (content.length() == 0) { - return null; - } - - return content; + return paragraphParser.getParagraphLines(); } - return null; + return SourceLines.empty(); + } + } + + private static class OpenBlockParser { + private final BlockParser blockParser; + private int sourceIndex; + + OpenBlockParser(BlockParser blockParser, int sourceIndex) { + this.blockParser = blockParser; + this.sourceIndex = sourceIndex; } } } diff --git a/commonmark/src/main/java/org/commonmark/internal/FencedCodeBlockParser.java b/commonmark/src/main/java/org/commonmark/internal/FencedCodeBlockParser.java index e57cc7277..d550f1d25 100644 --- a/commonmark/src/main/java/org/commonmark/internal/FencedCodeBlockParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/FencedCodeBlockParser.java @@ -3,20 +3,26 @@ import org.commonmark.internal.util.Parsing; import org.commonmark.node.Block; import org.commonmark.node.FencedCodeBlock; +import org.commonmark.parser.SourceLine; import org.commonmark.parser.block.*; +import org.commonmark.text.Characters; import static org.commonmark.internal.util.Escaping.unescapeString; public class FencedCodeBlockParser extends AbstractBlockParser { private final FencedCodeBlock block = new FencedCodeBlock(); + private final char fenceChar; + private final int openingFenceLength; private String firstLine; private StringBuilder otherLines = new StringBuilder(); public FencedCodeBlockParser(char fenceChar, int fenceLength, int fenceIndent) { - block.setFenceChar(fenceChar); - block.setFenceLength(fenceLength); + this.fenceChar = fenceChar; + this.openingFenceLength = fenceLength; + block.setFenceCharacter(String.valueOf(fenceChar)); + block.setOpeningFenceLength(fenceLength); block.setFenceIndent(fenceIndent); } @@ -29,9 +35,8 @@ public Block getBlock() { public BlockContinue tryContinue(ParserState state) { int nextNonSpace = state.getNextNonSpaceIndex(); int newIndex = state.getIndex(); - CharSequence line = state.getLine(); - boolean closing = state.getIndent() < Parsing.CODE_BLOCK_INDENT && isClosing(line, nextNonSpace); - if (closing) { + CharSequence line = state.getLine().getContent(); + if (state.getIndent() < Parsing.CODE_BLOCK_INDENT && nextNonSpace < line.length() && tryClosing(line, nextNonSpace)) { // closing fence - we're at end of line, so we can finalize now return BlockContinue.finished(); } else { @@ -47,11 +52,11 @@ public BlockContinue tryContinue(ParserState state) { } @Override - public void addLine(CharSequence line) { + public void addLine(SourceLine line) { if (firstLine == null) { - firstLine = line.toString(); + firstLine = line.getContent().toString(); } else { - otherLines.append(line); + otherLines.append(line.getContent()); otherLines.append('\n'); } } @@ -73,9 +78,9 @@ public BlockStart tryStart(ParserState state, MatchedBlockParser matchedBlockPar } int nextNonSpace = state.getNextNonSpaceIndex(); - FencedCodeBlockParser blockParser = checkOpener(state.getLine(), nextNonSpace, indent); + FencedCodeBlockParser blockParser = checkOpener(state.getLine().getContent(), nextNonSpace, indent); if (blockParser != null) { - return BlockStart.of(blockParser).atIndex(nextNonSpace + blockParser.block.getFenceLength()); + return BlockStart.of(blockParser).atIndex(nextNonSpace + blockParser.block.getOpeningFenceLength()); } else { return BlockStart.none(); } @@ -103,7 +108,7 @@ private static FencedCodeBlockParser checkOpener(CharSequence line, int index, i } if (backticks >= 3 && tildes == 0) { // spec: If the info string comes after a backtick fence, it may not contain any backtick characters. - if (Parsing.find('`', line, index + backticks) != -1) { + if (Characters.find('`', line, index + backticks) != -1) { return null; } return new FencedCodeBlockParser('`', backticks, indent); @@ -118,15 +123,17 @@ private static FencedCodeBlockParser checkOpener(CharSequence line, int index, i // spec: The content of the code block consists of all subsequent lines, until a closing code fence of the same type // as the code block began with (backticks or tildes), and with at least as many backticks or tildes as the opening // code fence. - private boolean isClosing(CharSequence line, int index) { - char fenceChar = block.getFenceChar(); - int fenceLength = block.getFenceLength(); - int fences = Parsing.skip(fenceChar, line, index, line.length()) - index; - if (fences < fenceLength) { + private boolean tryClosing(CharSequence line, int index) { + int fences = Characters.skip(fenceChar, line, index, line.length()) - index; + if (fences < openingFenceLength) { return false; } // spec: The closing code fence [...] may be followed only by spaces, which are ignored. - int after = Parsing.skipSpaceTab(line, index + fences, line.length()); - return after == line.length(); + int after = Characters.skipSpaceTab(line, index + fences, line.length()); + if (after == line.length()) { + block.setClosingFenceLength(fences); + return true; + } + return false; } } diff --git a/commonmark/src/main/java/org/commonmark/internal/HeadingParser.java b/commonmark/src/main/java/org/commonmark/internal/HeadingParser.java index 2b72ba236..05f070137 100644 --- a/commonmark/src/main/java/org/commonmark/internal/HeadingParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/HeadingParser.java @@ -4,14 +4,19 @@ import org.commonmark.node.Block; import org.commonmark.node.Heading; import org.commonmark.parser.InlineParser; +import org.commonmark.parser.SourceLine; +import org.commonmark.parser.SourceLines; +import org.commonmark.parser.beta.Position; +import org.commonmark.parser.beta.Scanner; import org.commonmark.parser.block.*; +import org.commonmark.text.Characters; public class HeadingParser extends AbstractBlockParser { private final Heading block = new Heading(); - private final String content; + private final SourceLines content; - public HeadingParser(int level, String content) { + public HeadingParser(int level, SourceLines content) { block.setLevel(level); this.content = content; } @@ -40,21 +45,22 @@ public BlockStart tryStart(ParserState state, MatchedBlockParser matchedBlockPar return BlockStart.none(); } - CharSequence line = state.getLine(); + SourceLine line = state.getLine(); int nextNonSpace = state.getNextNonSpaceIndex(); - HeadingParser atxHeading = getAtxHeading(line, nextNonSpace); - if (atxHeading != null) { - return BlockStart.of(atxHeading).atIndex(line.length()); + if (line.getContent().charAt(nextNonSpace) == '#') { + HeadingParser atxHeading = getAtxHeading(line.substring(nextNonSpace, line.getContent().length())); + if (atxHeading != null) { + return BlockStart.of(atxHeading).atIndex(line.getContent().length()); + } } - int setextHeadingLevel = getSetextHeadingLevel(line, nextNonSpace); + int setextHeadingLevel = getSetextHeadingLevel(line.getContent(), nextNonSpace); if (setextHeadingLevel > 0) { - CharSequence paragraph = matchedBlockParser.getParagraphContent(); - if (paragraph != null) { - String content = paragraph.toString(); - return BlockStart.of(new HeadingParser(setextHeadingLevel, content)) - .atIndex(line.length()) - .replaceActiveBlockParser(); + SourceLines paragraph = matchedBlockParser.getParagraphLines(); + if (!paragraph.isEmpty()) { + return BlockStart.of(new HeadingParser(setextHeadingLevel, paragraph)) + .atIndex(line.getContent().length()) + .replaceParagraphLines(paragraph.getLines().size()); } } @@ -63,35 +69,67 @@ public BlockStart tryStart(ParserState state, MatchedBlockParser matchedBlockPar } // spec: An ATX heading consists of a string of characters, parsed as inline content, between an opening sequence of - // 1–6 unescaped # characters and an optional closing sequence of any number of unescaped # characters. The opening + // 1-6 unescaped # characters and an optional closing sequence of any number of unescaped # characters. The opening // sequence of # characters must be followed by a space or by the end of line. The optional closing sequence of #s // must be preceded by a space and may be followed by spaces only. - private static HeadingParser getAtxHeading(CharSequence line, int index) { - int level = Parsing.skip('#', line, index, line.length()) - index; + private static HeadingParser getAtxHeading(SourceLine line) { + Scanner scanner = Scanner.of(SourceLines.of(line)); + int level = scanner.matchMultiple('#'); if (level == 0 || level > 6) { return null; } - int start = index + level; - if (start >= line.length()) { + if (!scanner.hasNext()) { // End of line after markers is an empty heading - return new HeadingParser(level, ""); + return new HeadingParser(level, SourceLines.empty()); } - char next = line.charAt(start); + char next = scanner.peek(); if (!(next == ' ' || next == '\t')) { return null; } - int beforeSpace = Parsing.skipSpaceTabBackwards(line, line.length() - 1, start); - int beforeHash = Parsing.skipBackwards('#', line, beforeSpace, start); - int beforeTrailer = Parsing.skipSpaceTabBackwards(line, beforeHash, start); - if (beforeTrailer != beforeHash) { - return new HeadingParser(level, line.subSequence(start, beforeTrailer + 1).toString()); - } else { - return new HeadingParser(level, line.subSequence(start, beforeSpace + 1).toString()); + scanner.whitespace(); + Position start = scanner.position(); + Position end = start; + boolean hashCanEnd = true; + + while (scanner.hasNext()) { + char c = scanner.peek(); + switch (c) { + case '#': + if (hashCanEnd) { + scanner.matchMultiple('#'); + int whitespace = scanner.whitespace(); + // If there's other characters, the hashes and spaces were part of the heading + if (scanner.hasNext()) { + end = scanner.position(); + } + hashCanEnd = whitespace > 0; + } else { + scanner.next(); + end = scanner.position(); + } + break; + case ' ': + case '\t': + hashCanEnd = true; + scanner.next(); + break; + default: + hashCanEnd = false; + scanner.next(); + end = scanner.position(); + } + } + + SourceLines source = scanner.getSource(start, end); + String content = source.getContent(); + if (content.isEmpty()) { + return new HeadingParser(level, SourceLines.empty()); } + return new HeadingParser(level, source); } // spec: A setext heading underline is a sequence of = characters or a sequence of - characters, with no more than @@ -102,17 +140,19 @@ private static int getSetextHeadingLevel(CharSequence line, int index) { if (isSetextHeadingRest(line, index + 1, '=')) { return 1; } + break; case '-': if (isSetextHeadingRest(line, index + 1, '-')) { return 2; } + break; } return 0; } private static boolean isSetextHeadingRest(CharSequence line, int index, char marker) { - int afterMarker = Parsing.skip(marker, line, index, line.length()); - int afterSpace = Parsing.skipSpaceTab(line, afterMarker, line.length()); + int afterMarker = Characters.skip(marker, line, index, line.length()); + int afterSpace = Characters.skipSpaceTab(line, afterMarker, line.length()); return afterSpace >= line.length(); } } diff --git a/commonmark/src/main/java/org/commonmark/internal/HtmlBlockParser.java b/commonmark/src/main/java/org/commonmark/internal/HtmlBlockParser.java index 3b3a0e64f..123d9ec1f 100644 --- a/commonmark/src/main/java/org/commonmark/internal/HtmlBlockParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/HtmlBlockParser.java @@ -1,20 +1,35 @@ package org.commonmark.internal; -import org.commonmark.internal.util.Parsing; import org.commonmark.node.Block; import org.commonmark.node.HtmlBlock; import org.commonmark.node.Paragraph; +import org.commonmark.parser.SourceLine; import org.commonmark.parser.block.*; import java.util.regex.Pattern; public class HtmlBlockParser extends AbstractBlockParser { + private static final String TAGNAME = "[A-Za-z][A-Za-z0-9-]*"; + private static final String ATTRIBUTENAME = "[a-zA-Z_:][a-zA-Z0-9:._-]*"; + private static final String UNQUOTEDVALUE = "[^\"'=<>`\\x00-\\x20]+"; + private static final String SINGLEQUOTEDVALUE = "'[^']*'"; + private static final String DOUBLEQUOTEDVALUE = "\"[^\"]*\""; + private static final String ATTRIBUTEVALUE = "(?:" + UNQUOTEDVALUE + "|" + SINGLEQUOTEDVALUE + + "|" + DOUBLEQUOTEDVALUE + ")"; + private static final String ATTRIBUTEVALUESPEC = "(?:" + "\\s*=" + "\\s*" + ATTRIBUTEVALUE + + ")"; + private static final String ATTRIBUTE = "(?:" + "\\s+" + ATTRIBUTENAME + ATTRIBUTEVALUESPEC + + "?)"; + + private static final String OPENTAG = "<" + TAGNAME + ATTRIBUTE + "*" + "\\s*/?>"; + private static final String CLOSETAG = "]"; + private static final Pattern[][] BLOCK_PATTERNS = new Pattern[][]{ {null, null}, // not used (no type 0) { - Pattern.compile("^<(?:script|pre|style)(?:\\s|>|$)", Pattern.CASE_INSENSITIVE), - Pattern.compile("", Pattern.CASE_INSENSITIVE) + Pattern.compile("^<(?:script|pre|style|textarea)(?:\\s|>|$)", Pattern.CASE_INSENSITIVE), + Pattern.compile("", Pattern.CASE_INSENSITIVE) }, { Pattern.compile("^|"; - private static final String PROCESSINGINSTRUCTION = "[<][?].*?[?][>]"; - private static final String DECLARATION = "]*>"; - private static final String CDATA = ""; - private static final String HTMLTAG = "(?:" + Parsing.OPENTAG + "|" + Parsing.CLOSETAG + "|" + HTMLCOMMENT - + "|" + PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")"; - - private static final String ASCII_PUNCTUATION = "!\"#\\$%&'\\(\\)\\*\\+,\\-\\./:;<=>\\?@\\[\\\\\\]\\^_`\\{\\|\\}~"; - private static final Pattern PUNCTUATION = Pattern - .compile("^[" + ASCII_PUNCTUATION + "\\p{Pc}\\p{Pd}\\p{Pe}\\p{Pf}\\p{Pi}\\p{Po}\\p{Ps}]"); - - private static final Pattern HTML_TAG = Pattern.compile('^' + HTMLTAG, Pattern.CASE_INSENSITIVE); - - private static final Pattern ESCAPABLE = Pattern.compile('^' + Escaping.ESCAPABLE); - - private static final Pattern ENTITY_HERE = Pattern.compile('^' + Escaping.ENTITY, Pattern.CASE_INSENSITIVE); - - private static final Pattern TICKS = Pattern.compile("`+"); - - private static final Pattern TICKS_HERE = Pattern.compile("^`+"); - - private static final Pattern EMAIL_AUTOLINK = Pattern - .compile("^<([a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>"); - - private static final Pattern AUTOLINK = Pattern - .compile("^<[a-zA-Z][a-zA-Z0-9.+-]{1,31}:[^<>\u0000-\u0020]*>"); - - private static final Pattern SPNL = Pattern.compile("^ *(?:\n *)?"); - - private static final Pattern UNICODE_WHITESPACE_CHAR = Pattern.compile("^[\\p{Zs}\t\r\n\f]"); - - private static final Pattern WHITESPACE = Pattern.compile("\\s+"); - - private static final Pattern FINAL_SPACE = Pattern.compile(" *$"); - - private final BitSet specialCharacters; - private final BitSet delimiterCharacters; - private final Map delimiterProcessors; private final InlineParserContext context; + private final List inlineContentParserFactories; + private final Map delimiterProcessors; + private final List linkProcessors; + private final BitSet specialCharacters; + private final BitSet linkMarkers; - private String input; - private int index; + private Map> inlineParsers; + private Scanner scanner; + private boolean includeSourceSpans; + private int trailingSpaces; /** * Top delimiter (emphasis, strong emphasis or custom emphasis). (Brackets are on a separate stack, different @@ -71,39 +39,36 @@ public class InlineParserImpl implements InlineParser { */ private Bracket lastBracket; - public InlineParserImpl(InlineParserContext inlineParserContext) { - this.delimiterProcessors = calculateDelimiterProcessors(inlineParserContext.getCustomDelimiterProcessors()); - this.delimiterCharacters = calculateDelimiterCharacters(this.delimiterProcessors.keySet()); - this.specialCharacters = calculateSpecialCharacters(delimiterCharacters); - - this.context = inlineParserContext; + public InlineParserImpl(InlineParserContext context) { + this.context = context; + this.inlineContentParserFactories = calculateInlineContentParserFactories(context.getCustomInlineContentParserFactories()); + this.delimiterProcessors = calculateDelimiterProcessors(context.getCustomDelimiterProcessors()); + this.linkProcessors = calculateLinkProcessors(context.getCustomLinkProcessors()); + this.linkMarkers = calculateLinkMarkers(context.getCustomLinkMarkers()); + this.specialCharacters = calculateSpecialCharacters(linkMarkers, this.delimiterProcessors.keySet(), this.inlineContentParserFactories); } - public static BitSet calculateDelimiterCharacters(Set characters) { - BitSet bitSet = new BitSet(); - for (Character character : characters) { - bitSet.set(character); - } - return bitSet; + private List calculateInlineContentParserFactories(List customFactories) { + // Custom parsers can override built-in parsers if they want, so make sure they are tried first + var list = new ArrayList<>(customFactories); + list.add(new BackslashInlineParser.Factory()); + list.add(new BackticksInlineParser.Factory()); + list.add(new EntityInlineParser.Factory()); + list.add(new AutolinkInlineParser.Factory()); + list.add(new HtmlInlineParser.Factory()); + return list; } - public static BitSet calculateSpecialCharacters(BitSet delimiterCharacters) { - BitSet bitSet = new BitSet(); - bitSet.or(delimiterCharacters); - bitSet.set('\n'); - bitSet.set('`'); - bitSet.set('['); - bitSet.set(']'); - bitSet.set('\\'); - bitSet.set('!'); - bitSet.set('<'); - bitSet.set('&'); - return bitSet; + private List calculateLinkProcessors(List linkProcessors) { + // Custom link processors can override the built-in behavior, so make sure they are tried first + var list = new ArrayList<>(linkProcessors); + list.add(new CoreLinkProcessor()); + return list; } - public static Map calculateDelimiterProcessors(List delimiterProcessors) { - Map map = new HashMap<>(); - addDelimiterProcessors(Arrays.asList(new AsteriskDelimiterProcessor(), new UnderscoreDelimiterProcessor()), map); + private static Map calculateDelimiterProcessors(List delimiterProcessors) { + var map = new HashMap(); + addDelimiterProcessors(List.of(new AsteriskDelimiterProcessor(), new UnderscoreDelimiterProcessor()), map); addDelimiterProcessors(delimiterProcessors, map); return map; } @@ -141,282 +106,206 @@ private static void addDelimiterProcessorForChar(char delimiterChar, DelimiterPr } } + private static BitSet calculateLinkMarkers(Set linkMarkers) { + var bitSet = new BitSet(); + for (var c : linkMarkers) { + bitSet.set(c); + } + bitSet.set('!'); + return bitSet; + } + + private static BitSet calculateSpecialCharacters(BitSet linkMarkers, + Set delimiterCharacters, + List inlineContentParserFactories) { + BitSet bitSet = (BitSet) linkMarkers.clone(); + for (Character c : delimiterCharacters) { + bitSet.set(c); + } + for (var factory : inlineContentParserFactories) { + for (var c : factory.getTriggerCharacters()) { + bitSet.set(c); + } + } + bitSet.set('['); + bitSet.set(']'); + bitSet.set('!'); + bitSet.set('\n'); + return bitSet; + } + + private Map> createInlineContentParsers() { + var map = new HashMap>(); + for (var factory : inlineContentParserFactories) { + var parser = factory.create(); + for (var c : factory.getTriggerCharacters()) { + map.computeIfAbsent(c, k -> new ArrayList<>()).add(parser); + } + } + return map; + } + + @Override + public Scanner scanner() { + return scanner; + } + /** - * Parse content in block into inline children, using reference map to resolve references. + * Parse content in block into inline children, appending them to the block node. */ @Override - public void parse(String content, Node block) { - reset(content.trim()); + public void parse(SourceLines lines, Node block) { + reset(lines); - Node previous = null; while (true) { - Node node = parseInline(previous); - previous = node; - if (node != null) { - block.appendChild(node); - } else { + var nodes = parseInline(); + if (nodes == null) { break; } + for (Node node : nodes) { + block.appendChild(node); + } } processDelimiters(null); mergeChildTextNodes(block); } - void reset(String content) { - this.input = content; - this.index = 0; + void reset(SourceLines lines) { + this.scanner = Scanner.of(lines); + this.includeSourceSpans = !lines.getSourceSpans().isEmpty(); + this.trailingSpaces = 0; this.lastDelimiter = null; this.lastBracket = null; + this.inlineParsers = createInlineContentParsers(); } - - private Text text(String text, int beginIndex, int endIndex) { - return new Text(text.substring(beginIndex, endIndex)); - } - - private Text text(String text) { - return new Text(text); + private Text text(SourceLines sourceLines) { + Text text = new Text(sourceLines.getContent()); + text.setSourceSpans(sourceLines.getSourceSpans()); + return text; } /** - * Parse the next inline element in subject, advancing input index. + * Parse the next inline element in subject, advancing our position. * On success, return the new inline node. * On failure, return null. */ - private Node parseInline(Node previous) { - char c = peek(); - if (c == '\0') { - return null; - } + private List parseInline() { + char c = scanner.peek(); - Node node; switch (c) { - case '\n': - node = parseNewline(previous); - break; - case '\\': - node = parseBackslash(); - break; - case '`': - node = parseBackticks(); - break; case '[': - node = parseOpenBracket(); - break; - case '!': - node = parseBang(); - break; + return List.of(parseOpenBracket()); case ']': - node = parseCloseBracket(); - break; - case '<': - node = parseAutolink(); - if (node == null) { - node = parseHtmlInline(); - } - break; - case '&': - node = parseEntity(); - break; - default: - boolean isDelimiter = delimiterCharacters.get(c); - if (isDelimiter) { - DelimiterProcessor delimiterProcessor = delimiterProcessors.get(c); - node = parseDelimiters(delimiterProcessor, c); - } else { - node = parseString(); - } - break; - } - if (node != null) { - return node; - } else { - index++; - // When we get here, it's only for a single special character that turned out to not have a special meaning. - // So we shouldn't have a single surrogate here, hence it should be ok to turn it into a String. - String literal = String.valueOf(c); - return text(literal); - } - } - - /** - * If RE matches at current index in the input, advance index and return the match; otherwise return null. - */ - private String match(Pattern re) { - if (index >= input.length()) { - return null; - } - try { - Matcher matcher = re.matcher(input); - matcher.region(index, input.length()); - boolean m = matcher.find(); - if (m) { - index = matcher.end(); - return matcher.group(); - } else { + return List.of(parseCloseBracket()); + case '\n': + return List.of(parseLineBreak()); + case Scanner.END: return null; - } - } catch (StackOverflowError e) { - return null; } - } - - /** - * Returns the char at the current input index, or {@code '\0'} in case there are no more characters. - */ - private char peek() { - if (index < input.length()) { - return input.charAt(index); - } else { - return '\0'; - } - } - - /** - * Parse zero or more space characters, including at most one newline. - */ - private void spnl() { - match(SPNL); - } - /** - * Parse a newline. If it was preceded by two spaces, return a hard line break; otherwise a soft line break. - */ - private Node parseNewline(Node previous) { - index++; // assume we're at a \n - - // Check previous text for trailing spaces. - // The "endsWith" is an optimization to avoid an RE match in the common case. - if (previous instanceof Text && ((Text) previous).getLiteral().endsWith(" ")) { - Text text = (Text) previous; - String literal = text.getLiteral(); - Matcher matcher = FINAL_SPACE.matcher(literal); - int spaces = matcher.find() ? matcher.end() - matcher.start() : 0; - if (spaces > 0) { - text.setLiteral(literal.substring(0, literal.length() - spaces)); + if (linkMarkers.get(c)) { + var markerPosition = scanner.position(); + var nodes = parseLinkMarker(); + if (nodes != null) { + return nodes; } - if (spaces >= 2) { - return new HardLineBreak(); - } else { - return new SoftLineBreak(); + // Reset and try other things (e.g. inline parsers below) + scanner.setPosition(markerPosition); + } + + // No inline parser, delimiter or other special handling. + if (!specialCharacters.get(c)) { + return List.of(parseText()); + } + + List inlineParsers = this.inlineParsers.get(c); + if (inlineParsers != null) { + Position position = scanner.position(); + for (InlineContentParser inlineParser : inlineParsers) { + ParsedInline parsedInline = inlineParser.tryParse(this); + if (parsedInline instanceof ParsedInlineImpl) { + ParsedInlineImpl parsedInlineImpl = (ParsedInlineImpl) parsedInline; + Node node = parsedInlineImpl.getNode(); + scanner.setPosition(parsedInlineImpl.getPosition()); + if (includeSourceSpans && node.getSourceSpans().isEmpty()) { + node.setSourceSpans(scanner.getSource(position, scanner.position()).getSourceSpans()); + } + return List.of(node); + } else { + // Reset position + scanner.setPosition(position); + } } - } else { - return new SoftLineBreak(); } - } - - /** - * Parse a backslash-escaped special character, adding either the escaped character, a hard line break - * (if the backslash is followed by a newline), or a literal backslash to the block's children. - */ - private Node parseBackslash() { - index++; - Node node; - if (peek() == '\n') { - node = new HardLineBreak(); - index++; - } else if (index < input.length() && ESCAPABLE.matcher(input.substring(index, index + 1)).matches()) { - node = text(input, index, index + 1); - index++; - } else { - node = text("\\"); - } - return node; - } - /** - * Attempt to parse backticks, returning either a backtick code span or a literal sequence of backticks. - */ - private Node parseBackticks() { - String ticks = match(TICKS_HERE); - if (ticks == null) { - return null; - } - int afterOpenTicks = index; - String matched; - while ((matched = match(TICKS)) != null) { - if (matched.equals(ticks)) { - Code node = new Code(); - String content = input.substring(afterOpenTicks, index - ticks.length()); - content = content.replace('\n', ' '); - - // spec: If the resulting string both begins and ends with a space character, but does not consist - // entirely of space characters, a single space character is removed from the front and back. - if (content.length() >= 3 && - content.charAt(0) == ' ' && - content.charAt(content.length() - 1) == ' ' && - Parsing.hasNonSpace(content)) { - content = content.substring(1, content.length() - 1); - } - - node.setLiteral(content); - return node; + DelimiterProcessor delimiterProcessor = delimiterProcessors.get(c); + if (delimiterProcessor != null) { + List nodes = parseDelimiters(delimiterProcessor, c); + if (nodes != null) { + return nodes; } } - // If we got here, we didn't match a closing backtick sequence. - index = afterOpenTicks; - return text(ticks); + + // If we get here, even for a special/delimiter character, we will just treat it as text. + return List.of(parseText()); } /** * Attempt to parse delimiters like emphasis, strong emphasis or custom delimiters. */ - private Node parseDelimiters(DelimiterProcessor delimiterProcessor, char delimiterChar) { + private List parseDelimiters(DelimiterProcessor delimiterProcessor, char delimiterChar) { DelimiterData res = scanDelimiters(delimiterProcessor, delimiterChar); if (res == null) { return null; } - int length = res.count; - int startIndex = index; - index += length; - Text node = text(input, startIndex, index); + List characters = res.characters; // Add entry to stack for this opener - lastDelimiter = new Delimiter(node, delimiterChar, res.canOpen, res.canClose, lastDelimiter); - lastDelimiter.length = length; - lastDelimiter.originalLength = length; + lastDelimiter = new Delimiter(characters, delimiterChar, res.canOpen, res.canClose, lastDelimiter); if (lastDelimiter.previous != null) { lastDelimiter.previous.next = lastDelimiter; } - return node; + return characters; } /** * Add open bracket to delimiter stack and add a text node to block's children. */ private Node parseOpenBracket() { - int startIndex = index; - index++; + Position start = scanner.position(); + scanner.next(); + Position contentPosition = scanner.position(); - Text node = text("["); + Text node = text(scanner.getSource(start, contentPosition)); // Add entry to stack for this opener - addBracket(Bracket.link(node, startIndex, lastBracket, lastDelimiter)); + addBracket(Bracket.link(node, start, contentPosition, lastBracket, lastDelimiter)); return node; } /** - * If next character is [, and ! delimiter to delimiter stack and add a text node to block's children. - * Otherwise just add a text node. + * If next character is {@code [}, add a bracket to the stack. + * Otherwise, return null. */ - private Node parseBang() { - int startIndex = index; - index++; - if (peek() == '[') { - index++; - - Text node = text("!["); + private List parseLinkMarker() { + var markerPosition = scanner.position(); + scanner.next(); + var bracketPosition = scanner.position(); + if (scanner.next('[')) { + var contentPosition = scanner.position(); + var bangNode = text(scanner.getSource(markerPosition, bracketPosition)); + var bracketNode = text(scanner.getSource(bracketPosition, contentPosition)); // Add entry to stack for this opener - addBracket(Bracket.image(node, startIndex + 1, lastBracket, lastDelimiter)); - - return node; + addBracket(Bracket.withMarker(bangNode, markerPosition, bracketNode, bracketPosition, contentPosition, lastBracket, lastDelimiter)); + return List.of(bangNode, bracketNode); } else { - return text("!"); + return null; } } @@ -425,114 +314,170 @@ private Node parseBang() { * plain [ character. If there is a matching delimiter, remove it from the delimiter stack. */ private Node parseCloseBracket() { - index++; - int startIndex = index; + Position beforeClose = scanner.position(); + scanner.next(); + Position afterClose = scanner.position(); // Get previous `[` or `![` Bracket opener = lastBracket; if (opener == null) { // No matching opener, just return a literal. - return text("]"); + return text(scanner.getSource(beforeClose, afterClose)); } if (!opener.allowed) { - // Matching opener but it's not allowed, just return a literal. + // Matching opener, but it's not allowed, just return a literal. removeLastBracket(); - return text("]"); + return text(scanner.getSource(beforeClose, afterClose)); } - // Check to see if we have a link/image + var linkOrImage = parseLinkOrImage(opener, beforeClose); + if (linkOrImage != null) { + return linkOrImage; + } + scanner.setPosition(afterClose); - String dest = null; - String title = null; - boolean isLinkOrImage = false; - - // Maybe a inline link like `[foo](/uri "title")` - if (peek() == '(') { - index++; - spnl(); - if ((dest = parseLinkDestination()) != null) { - spnl(); - // title needs a whitespace before - if (WHITESPACE.matcher(input.substring(index - 1, index)).matches()) { - title = parseLinkTitle(); - spnl(); - } - if (peek() == ')') { - index++; - isLinkOrImage = true; - } else { - index = startIndex; - } - } + // Nothing parsed, just parse the bracket as text and continue + removeLastBracket(); + return text(scanner.getSource(beforeClose, afterClose)); + } + + private Node parseLinkOrImage(Bracket opener, Position beforeClose) { + var linkInfo = parseLinkInfo(opener, beforeClose); + if (linkInfo == null) { + return null; } + var processorStartPosition = scanner.position(); - // Maybe a reference link like `[foo][bar]`, `[foo][]` or `[foo]` - if (!isLinkOrImage) { - - // See if there's a link label like `[bar]` or `[]` - int beforeLabel = index; - parseLinkLabel(); - int labelLength = index - beforeLabel; - String ref = null; - if (labelLength > 2) { - ref = input.substring(beforeLabel, beforeLabel + labelLength); - } else if (!opener.bracketAfter) { - // If the second label is empty `[foo][]` or missing `[foo]`, then the first label is the reference. - // But it can only be a reference when there's no (unescaped) bracket in it. - // If there is, we don't even need to try to look up the reference. This is an optimization. - ref = input.substring(opener.index, startIndex); + for (var linkProcessor : linkProcessors) { + var linkResult = linkProcessor.process(linkInfo, scanner, context); + if (!(linkResult instanceof LinkResultImpl)) { + // Reset position in case the processor used the scanner, and it didn't work out. + scanner.setPosition(processorStartPosition); + continue; } - if (ref != null) { - String label = Escaping.normalizeReference(ref); - LinkReferenceDefinition definition = context.getLinkReferenceDefinition(label); - if (definition != null) { - dest = definition.getDestination(); - title = definition.getTitle(); - isLinkOrImage = true; - } + var result = (LinkResultImpl) linkResult; + var node = result.getNode(); + var position = result.getPosition(); + var includeMarker = result.isIncludeMarker(); + + switch (result.getType()) { + case WRAP: + scanner.setPosition(position); + return wrapBracket(opener, node, includeMarker); + case REPLACE: + scanner.setPosition(position); + return replaceBracket(opener, node, includeMarker); } } - if (isLinkOrImage) { - // If we got here, open is a potential opener - Node linkOrImage = opener.image ? new Image(dest, title) : new Link(dest, title); + return null; + } - Node node = opener.node.getNext(); - while (node != null) { - Node next = node.getNext(); - linkOrImage.appendChild(node); - node = next; - } + private LinkInfo parseLinkInfo(Bracket opener, Position beforeClose) { + // Check to see if we have a link (or image, with a ! in front). The different types: + // - Inline: `[foo](/uri)` or with optional title `[foo](/uri "title")` + // - Reference links + // - Full: `[foo][bar]` (foo is the text and bar is the label that needs to match a reference) + // - Collapsed: `[foo][]` (foo is both the text and label) + // - Shortcut: `[foo]` (foo is both the text and label) - // Process delimiters such as emphasis inside link/image - processDelimiters(opener.previousDelimiter); - mergeChildTextNodes(linkOrImage); - // We don't need the corresponding text node anymore, we turned it into a link/image node - opener.node.unlink(); - removeLastBracket(); + // Starting position is after the closing `]` + var afterClose = scanner.position(); - // Links within links are not allowed. We found this link, so there can be no other link around it. - if (!opener.image) { - Bracket bracket = lastBracket; - while (bracket != null) { - if (!bracket.image) { - // Disallow link opener. It will still get matched, but will not result in a link. - bracket.allowed = false; - } - bracket = bracket.previous; - } - } + // Maybe an inline link/image + var destinationTitle = parseInlineDestinationTitle(scanner); + if (destinationTitle != null) { + var text = scanner.getSource(opener.contentPosition, beforeClose).getContent(); + return new LinkInfoImpl(opener.markerNode, opener.bracketNode, text, null, destinationTitle.destination, destinationTitle.title, afterClose); + } + // Not an inline link/image, rewind back to after `]`. + scanner.setPosition(afterClose); - return linkOrImage; + // Maybe a reference link/image like `[foo][bar]`, `[foo][]` or `[foo]`. + // Note that even `[foo](` could be a valid link if foo is a reference, which is why we try this even if the `(` + // failed to be parsed as an inline link/image before. - } else { // no link or image - index = startIndex; - removeLastBracket(); + // See if there's a link label like `[bar]` or `[]` + var label = parseLinkLabel(scanner); + if (label == null) { + // No label, rewind back + scanner.setPosition(afterClose); + } + var textIsReference = label == null || label.isEmpty(); + if (opener.bracketAfter && textIsReference && opener.markerNode == null) { + // In case of shortcut or collapsed links, the text is used as the reference. But the reference is not allowed to + // contain an unescaped bracket, so if that's the case we don't need to continue. This is an optimization. + return null; + } + + var text = scanner.getSource(opener.contentPosition, beforeClose).getContent(); + return new LinkInfoImpl(opener.markerNode, opener.bracketNode, text, label, null, null, afterClose); + } + + private Node wrapBracket(Bracket opener, Node wrapperNode, boolean includeMarker) { + // Add all nodes between the opening bracket and now (closing bracket) as child nodes of the link + Node n = opener.bracketNode.getNext(); + while (n != null) { + Node next = n.getNext(); + wrapperNode.appendChild(n); + n = next; + } - return text("]"); + if (includeSourceSpans) { + var startPosition = includeMarker && opener.markerPosition != null ? opener.markerPosition : opener.bracketPosition; + wrapperNode.setSourceSpans(scanner.getSource(startPosition, scanner.position()).getSourceSpans()); } + + // Process delimiters such as emphasis inside link/image + processDelimiters(opener.previousDelimiter); + mergeChildTextNodes(wrapperNode); + // We don't need the corresponding text node anymore, we turned it into a link/image node + if (includeMarker && opener.markerNode != null) { + opener.markerNode.unlink(); + } + opener.bracketNode.unlink(); + removeLastBracket(); + + // Links within links are not allowed. We found this link, so there can be no other links around it. + if (opener.markerNode == null) { + disallowPreviousLinks(); + } + + return wrapperNode; + } + + private Node replaceBracket(Bracket opener, Node node, boolean includeMarker) { + // Remove delimiters (but keep text nodes) + while (lastDelimiter != null && lastDelimiter != opener.previousDelimiter) { + removeDelimiterKeepNode(lastDelimiter); + } + + if (includeSourceSpans) { + var startPosition = includeMarker && opener.markerPosition != null ? opener.markerPosition : opener.bracketPosition; + node.setSourceSpans(scanner.getSource(startPosition, scanner.position()).getSourceSpans()); + } + + removeLastBracket(); + + // Remove nodes that we added since the opener, because we're replacing them + Node n = includeMarker && opener.markerNode != null ? opener.markerNode : opener.bracketNode; + while (n != null) { + var next = n.getNext(); + n.unlink(); + n = next; + } + + // Links within links are not allowed. We found this link, so there can be no other links around it. + // Note that this makes any syntax like `[foo]` behave the same as built-in links, which is probably a good + // default (it works for footnotes). It might be useful for a `LinkProcessor` to be able to specify the + // behavior; something we could add to `LinkResult` in the future if requested. + if (opener.markerNode == null || !includeMarker) { + disallowPreviousLinks(); + } + + return node; } private void addBracket(Bracket bracket) { @@ -546,127 +491,154 @@ private void removeLastBracket() { lastBracket = lastBracket.previous; } + private void disallowPreviousLinks() { + Bracket bracket = lastBracket; + while (bracket != null) { + if (bracket.markerNode == null) { + // Disallow link opener. It will still get matched, but will not result in a link. + bracket.allowed = false; + } + bracket = bracket.previous; + } + } + + /** + * Try to parse the destination and an optional title for an inline link/image. + */ + private static DestinationTitle parseInlineDestinationTitle(Scanner scanner) { + if (!scanner.next('(')) { + return null; + } + + scanner.whitespace(); + String dest = parseLinkDestination(scanner); + if (dest == null) { + return null; + } + + String title = null; + int whitespace = scanner.whitespace(); + // title needs a whitespace before + if (whitespace >= 1) { + title = parseLinkTitle(scanner); + scanner.whitespace(); + } + if (!scanner.next(')')) { + // Don't have a closing `)`, so it's not a destination and title. + // Note that something like `[foo](` could still be valid later, `(` will just be text. + return null; + } + return new DestinationTitle(dest, title); + } + /** * Attempt to parse link destination, returning the string or null if no match. */ - private String parseLinkDestination() { - int afterDest = LinkScanner.scanLinkDestination(input, index); - if (afterDest == -1) { + private static String parseLinkDestination(Scanner scanner) { + char delimiter = scanner.peek(); + Position start = scanner.position(); + if (!LinkScanner.scanLinkDestination(scanner)) { return null; } String dest; - if (peek() == '<') { + if (delimiter == '<') { // chop off surrounding <..>: - dest = input.substring(index + 1, afterDest - 1); + String rawDestination = scanner.getSource(start, scanner.position()).getContent(); + dest = rawDestination.substring(1, rawDestination.length() - 1); } else { - dest = input.substring(index, afterDest); + dest = scanner.getSource(start, scanner.position()).getContent(); } - index = afterDest; return Escaping.unescapeString(dest); } /** * Attempt to parse link title (sans quotes), returning the string or null if no match. */ - private String parseLinkTitle() { - int afterTitle = LinkScanner.scanLinkTitle(input, index); - if (afterTitle == -1) { + private static String parseLinkTitle(Scanner scanner) { + Position start = scanner.position(); + if (!LinkScanner.scanLinkTitle(scanner)) { return null; } // chop off ', " or parens - String title = input.substring(index + 1, afterTitle - 1); - index = afterTitle; + String rawTitle = scanner.getSource(start, scanner.position()).getContent(); + String title = rawTitle.substring(1, rawTitle.length() - 1); return Escaping.unescapeString(title); } /** - * Attempt to parse a link label, returning number of characters parsed. + * Attempt to parse a link label, returning the label between the brackets or null. */ - int parseLinkLabel() { - if (index >= input.length() || input.charAt(index) != '[') { - return 0; + static String parseLinkLabel(Scanner scanner) { + if (!scanner.next('[')) { + return null; } - int startContent = index + 1; - int endContent = LinkScanner.scanLinkLabelContent(input, startContent); - // spec: A link label can have at most 999 characters inside the square brackets. - int contentLength = endContent - startContent; - if (endContent == -1 || contentLength > 999) { - return 0; - } - if (endContent >= input.length() || input.charAt(endContent) != ']') { - return 0; + Position start = scanner.position(); + if (!LinkScanner.scanLinkLabelContent(scanner)) { + return null; } - index = endContent + 1; - return contentLength + 2; - } + Position end = scanner.position(); - /** - * Attempt to parse an autolink (URL or email in pointy brackets). - */ - private Node parseAutolink() { - String m; - if ((m = match(EMAIL_AUTOLINK)) != null) { - String dest = m.substring(1, m.length() - 1); - Link node = new Link("mailto:" + dest, null); - node.appendChild(new Text(dest)); - return node; - } else if ((m = match(AUTOLINK)) != null) { - String dest = m.substring(1, m.length() - 1); - Link node = new Link(dest, null); - node.appendChild(new Text(dest)); - return node; - } else { + if (!scanner.next(']')) { return null; } - } - /** - * Attempt to parse inline HTML. - */ - private Node parseHtmlInline() { - String m = match(HTML_TAG); - if (m != null) { - HtmlInline node = new HtmlInline(); - node.setLiteral(m); - return node; - } else { + String content = scanner.getSource(start, end).getContent(); + // spec: A link label can have at most 999 characters inside the square brackets. + if (content.length() > 999) { return null; } + + return content; } - /** - * Attempt to parse a HTML style entity. - */ - private Node parseEntity() { - String m; - if ((m = match(ENTITY_HERE)) != null) { - return text(Html5Entities.entityToString(m)); + private Node parseLineBreak() { + scanner.next(); + + var hard = trailingSpaces >= 2; + trailingSpaces = 0; + if (hard) { + return new HardLineBreak(); } else { - return null; + return new SoftLineBreak(); } } /** - * Parse a run of ordinary characters, or a single character with a special meaning in markdown, as a plain string. + * Parse the next character as plain text, and possibly more if the following characters are non-special. */ - private Node parseString() { - int begin = index; - int length = input.length(); - while (index != length) { - if (specialCharacters.get(input.charAt(index))) { + private Node parseText() { + Position start = scanner.position(); + scanner.next(); + char c; + while (true) { + c = scanner.peek(); + if (c == Scanner.END || specialCharacters.get(c)) { break; } - index++; + scanner.next(); } - if (begin != index) { - return text(input, begin, index); - } else { - return null; + + SourceLines source = scanner.getSource(start, scanner.position()); + String content = source.getContent(); + + if (c == '\n') { + // We parsed until the end of the line. Trim any trailing spaces and remember them (for hard line breaks). + int end = Characters.skipBackwards(' ', content, content.length() - 1, 0) + 1; + trailingSpaces = content.length() - end; + content = content.substring(0, end); + } else if (c == Scanner.END) { + // For the last line, both tabs and spaces are trimmed for some reason (checked with commonmark.js). + int end = Characters.skipSpaceTabBackwards(content, content.length() - 1, 0) + 1; + content = content.substring(0, end); } + + Text text = new Text(content); + text.setSourceSpans(source.getSourceSpans()); + return text; } /** @@ -676,31 +648,32 @@ private Node parseString() { * @return information about delimiter run, or {@code null} */ private DelimiterData scanDelimiters(DelimiterProcessor delimiterProcessor, char delimiterChar) { - int startIndex = index; - - int delimiterCount = 0; - while (peek() == delimiterChar) { - delimiterCount++; - index++; - } + int before = scanner.peekPreviousCodePoint(); + Position start = scanner.position(); + // Quick check to see if we have enough delimiters. + int delimiterCount = scanner.matchMultiple(delimiterChar); if (delimiterCount < delimiterProcessor.getMinLength()) { - index = startIndex; + scanner.setPosition(start); return null; } - String before = startIndex == 0 ? "\n" : - input.substring(startIndex - 1, startIndex); + // We do have enough, extract a text node for each delimiter character. + List delimiters = new ArrayList<>(); + scanner.setPosition(start); + Position positionBefore = start; + while (scanner.next(delimiterChar)) { + delimiters.add(text(scanner.getSource(positionBefore, scanner.position()))); + positionBefore = scanner.position(); + } - char charAfter = peek(); - String after = charAfter == '\0' ? "\n" : - String.valueOf(charAfter); + int after = scanner.peekCodePoint(); // We could be more lazy here, in most cases we don't need to do every match case. - boolean beforeIsPunctuation = PUNCTUATION.matcher(before).matches(); - boolean beforeIsWhitespace = UNICODE_WHITESPACE_CHAR.matcher(before).matches(); - boolean afterIsPunctuation = PUNCTUATION.matcher(after).matches(); - boolean afterIsWhitespace = UNICODE_WHITESPACE_CHAR.matcher(after).matches(); + boolean beforeIsPunctuation = before == Scanner.END || Characters.isPunctuationCodePoint(before); + boolean beforeIsWhitespace = before == Scanner.END || Characters.isWhitespaceCodePoint(before); + boolean afterIsPunctuation = after == Scanner.END || Characters.isPunctuationCodePoint(after); + boolean afterIsWhitespace = after == Scanner.END || Characters.isWhitespaceCodePoint(after); boolean leftFlanking = !afterIsWhitespace && (!afterIsPunctuation || beforeIsWhitespace || beforeIsPunctuation); @@ -716,8 +689,7 @@ private DelimiterData scanDelimiters(DelimiterProcessor delimiterProcessor, char canClose = rightFlanking && delimiterChar == delimiterProcessor.getClosingCharacter(); } - index = startIndex; - return new DelimiterData(delimiterCount, canOpen, canClose); + return new DelimiterData(delimiters, canOpen, canClose); } private void processDelimiters(Delimiter stackBottom) { @@ -734,7 +706,7 @@ private void processDelimiters(Delimiter stackBottom) { char delimiterChar = closer.delimiterChar; DelimiterProcessor delimiterProcessor = delimiterProcessors.get(delimiterChar); - if (!closer.canClose || delimiterProcessor == null) { + if (!closer.canClose() || delimiterProcessor == null) { closer = closer.next; continue; } @@ -742,15 +714,15 @@ private void processDelimiters(Delimiter stackBottom) { char openingDelimiterChar = delimiterProcessor.getOpeningCharacter(); // Found delimiter closer. Now look back for first matching opener. - int useDelims = 0; + int usedDelims = 0; boolean openerFound = false; boolean potentialOpenerFound = false; Delimiter opener = closer.previous; while (opener != null && opener != stackBottom && opener != openersBottom.get(delimiterChar)) { - if (opener.canOpen && opener.delimiterChar == openingDelimiterChar) { + if (opener.canOpen() && opener.delimiterChar == openingDelimiterChar) { potentialOpenerFound = true; - useDelims = delimiterProcessor.getDelimiterUse(opener, closer); - if (useDelims > 0) { + usedDelims = delimiterProcessor.process(opener, closer); + if (usedDelims > 0) { openerFound = true; break; } @@ -768,7 +740,7 @@ private void processDelimiters(Delimiter stackBottom) { // we want to consider it next time because the number // of delimiters can change as we continue processing. openersBottom.put(delimiterChar, closer.previous); - if (!closer.canOpen) { + if (!closer.canOpen()) { // We can remove a closer that can't be an opener, // once we've seen there's no matching opener: removeDelimiterKeepNode(closer); @@ -778,33 +750,26 @@ private void processDelimiters(Delimiter stackBottom) { continue; } - Text openerNode = opener.node; - Text closerNode = closer.node; - - // Remove number of used delimiters from stack and inline nodes. - opener.length -= useDelims; - closer.length -= useDelims; - openerNode.setLiteral( - openerNode.getLiteral().substring(0, - openerNode.getLiteral().length() - useDelims)); - closerNode.setLiteral( - closerNode.getLiteral().substring(0, - closerNode.getLiteral().length() - useDelims)); + // Remove number of used delimiters nodes. + for (int i = 0; i < usedDelims; i++) { + Text delimiter = opener.characters.remove(opener.characters.size() - 1); + delimiter.unlink(); + } + for (int i = 0; i < usedDelims; i++) { + Text delimiter = closer.characters.remove(0); + delimiter.unlink(); + } removeDelimitersBetween(opener, closer); - // The delimiter processor can re-parent the nodes between opener and closer, - // so make sure they're contiguous already. Exclusive because we want to keep opener/closer themselves. - mergeTextNodesBetweenExclusive(openerNode, closerNode); - delimiterProcessor.process(openerNode, closerNode, useDelims); // No delimiter characters left to process, so we can remove delimiter and the now empty node. - if (opener.length == 0) { - removeDelimiterAndNode(opener); + if (opener.length() == 0) { + removeDelimiterAndNodes(opener); } - if (closer.length == 0) { + if (closer.length() == 0) { Delimiter next = closer.next; - removeDelimiterAndNode(closer); + removeDelimiterAndNodes(closer); closer = next; } } @@ -827,9 +792,7 @@ private void removeDelimitersBetween(Delimiter opener, Delimiter closer) { /** * Remove the delimiter and the corresponding text node. For used delimiters, e.g. `*` in `*foo*`. */ - private void removeDelimiterAndNode(Delimiter delim) { - Text node = delim.node; - node.unlink(); + private void removeDelimiterAndNodes(Delimiter delim) { removeDelimiter(delim); } @@ -852,18 +815,9 @@ private void removeDelimiter(Delimiter delim) { } } - private void mergeTextNodesBetweenExclusive(Node fromNode, Node toNode) { - // No nodes between them - if (fromNode == toNode || fromNode.getNext() == toNode) { - return; - } - - mergeTextNodesInclusive(fromNode.getNext(), toNode.getPrevious()); - } - private void mergeChildTextNodes(Node node) { - // No children or just one child node, no need for merging - if (node.getFirstChild() == node.getLastChild()) { + // No children, no need for merging + if (node.getFirstChild() == null) { return; } @@ -889,6 +843,8 @@ private void mergeTextNodesInclusive(Node fromNode, Node toNode) { first = null; last = null; length = 0; + + mergeChildTextNodes(node); } if (node == toNode) { break; @@ -903,29 +859,111 @@ private void mergeIfNeeded(Text first, Text last, int textLength) { if (first != null && last != null && first != last) { StringBuilder sb = new StringBuilder(textLength); sb.append(first.getLiteral()); + SourceSpans sourceSpans = null; + if (includeSourceSpans) { + sourceSpans = new SourceSpans(); + sourceSpans.addAll(first.getSourceSpans()); + } Node node = first.getNext(); Node stop = last.getNext(); while (node != stop) { sb.append(((Text) node).getLiteral()); + if (sourceSpans != null) { + sourceSpans.addAll(node.getSourceSpans()); + } + Node unlink = node; node = node.getNext(); unlink.unlink(); } String literal = sb.toString(); first.setLiteral(literal); + if (sourceSpans != null) { + first.setSourceSpans(sourceSpans.getSourceSpans()); + } } } private static class DelimiterData { - final int count; + final List characters; final boolean canClose; final boolean canOpen; - DelimiterData(int count, boolean canOpen, boolean canClose) { - this.count = count; + DelimiterData(List characters, boolean canOpen, boolean canClose) { + this.characters = characters; this.canOpen = canOpen; this.canClose = canClose; } } + + /** + * A destination and optional title for a link or image. + */ + private static class DestinationTitle { + final String destination; + final String title; + + public DestinationTitle(String destination, String title) { + this.destination = destination; + this.title = title; + } + } + + private static class LinkInfoImpl implements LinkInfo { + + private final Text marker; + private final Text openingBracket; + private final String text; + private final String label; + private final String destination; + private final String title; + private final Position afterTextBracket; + + private LinkInfoImpl(Text marker, Text openingBracket, String text, String label, + String destination, String title, Position afterTextBracket) { + this.marker = marker; + this.openingBracket = openingBracket; + this.text = text; + this.label = label; + this.destination = destination; + this.title = title; + this.afterTextBracket = afterTextBracket; + } + + @Override + public Text marker() { + return marker; + } + + @Override + public Text openingBracket() { + return openingBracket; + } + + @Override + public String text() { + return text; + } + + @Override + public String label() { + return label; + } + + @Override + public String destination() { + return destination; + } + + @Override + public String title() { + return title; + } + + @Override + public Position afterTextBracket() { + return afterTextBracket; + } + } } diff --git a/commonmark/src/main/java/org/commonmark/internal/LinkReferenceDefinitionParser.java b/commonmark/src/main/java/org/commonmark/internal/LinkReferenceDefinitionParser.java index 1fe2cbea7..637d3b111 100644 --- a/commonmark/src/main/java/org/commonmark/internal/LinkReferenceDefinitionParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/LinkReferenceDefinitionParser.java @@ -2,76 +2,96 @@ import org.commonmark.internal.util.Escaping; import org.commonmark.internal.util.LinkScanner; -import org.commonmark.internal.util.Parsing; import org.commonmark.node.LinkReferenceDefinition; +import org.commonmark.node.SourceSpan; +import org.commonmark.parser.SourceLine; +import org.commonmark.parser.SourceLines; +import org.commonmark.parser.beta.Position; +import org.commonmark.parser.beta.Scanner; import java.util.ArrayList; +import java.util.Collections; import java.util.List; /** * Parser for link reference definitions at the beginning of a paragraph. * - * @see
    Link reference definitions + * @see Link reference definitions */ public class LinkReferenceDefinitionParser { private State state = State.START_DEFINITION; - private final StringBuilder paragraph = new StringBuilder(); + private final List paragraphLines = new ArrayList<>(); private final List definitions = new ArrayList<>(); + private final List sourceSpans = new ArrayList<>(); private StringBuilder label; - private String normalizedLabel; private String destination; private char titleDelimiter; private StringBuilder title; private boolean referenceValid = false; - public void parse(CharSequence line) { - if (paragraph.length() != 0) { - paragraph.append('\n'); + public void parse(SourceLine line) { + paragraphLines.add(line); + if (state == State.PARAGRAPH) { + // We're in a paragraph now. Link reference definitions can only appear at the beginning, so once + // we're in a paragraph, there's no going back. + return; } - paragraph.append(line); - int i = 0; - while (i < line.length()) { + Scanner scanner = Scanner.of(SourceLines.of(line)); + while (scanner.hasNext()) { + boolean success; switch (state) { - case PARAGRAPH: { - // We're in a paragraph now. Link reference definitions can only appear at the beginning, so once - // we're in a paragraph, there's no going back. - return; - } case START_DEFINITION: { - i = startDefinition(line, i); + success = startDefinition(scanner); break; } case LABEL: { - i = label(line, i); + success = label(scanner); break; } case DESTINATION: { - i = destination(line, i); + success = destination(scanner); break; } case START_TITLE: { - i = startTitle(line, i); + success = startTitle(scanner); break; } case TITLE: { - i = title(line, i); + success = title(scanner); break; } + default: { + throw new IllegalStateException("Unknown parsing state: " + state); + } } - // -1 is returned if parsing failed, which means we fall back to treating text as a paragraph. - if (i == -1) { + // Parsing failed, which means we fall back to treating text as a paragraph. + if (!success) { state = State.PARAGRAPH; + // If parsing of the title part failed, we still have a valid reference that we can add, and we need to + // do it before the source span for this line is added. + finishReference(); return; } } } - CharSequence getParagraphContent() { - return paragraph; + public void addSourceSpan(SourceSpan sourceSpan) { + sourceSpans.add(sourceSpan); + } + + /** + * @return the lines that are normal paragraph content, without newlines + */ + SourceLines getParagraphLines() { + return SourceLines.of(paragraphLines); + } + + List getParagraphSourceSpans() { + return sourceSpans; } List getDefinitions() { @@ -83,96 +103,106 @@ State getState() { return state; } - private int startDefinition(CharSequence line, int i) { - i = Parsing.skipSpaceTab(line, i, line.length()); - if (i >= line.length() || line.charAt(i) != '[') { - return -1; + List removeLines(int lines) { + var removedSpans = Collections.unmodifiableList(new ArrayList<>( + sourceSpans.subList(Math.max(sourceSpans.size() - lines, 0), sourceSpans.size()))); + removeLast(lines, paragraphLines); + removeLast(lines, sourceSpans); + return removedSpans; + } + + private boolean startDefinition(Scanner scanner) { + // Finish any outstanding references now. We don't do this earlier because we need addSourceSpan to have been + // called before we do it. + finishReference(); + + scanner.whitespace(); + if (!scanner.next('[')) { + return false; } state = State.LABEL; label = new StringBuilder(); - int labelStart = i + 1; - if (labelStart >= line.length()) { + if (!scanner.hasNext()) { label.append('\n'); } - - return labelStart; + return true; } - private int label(CharSequence line, int i) { - int afterLabel = LinkScanner.scanLinkLabelContent(line, i); - if (afterLabel == -1) { - return -1; + private boolean label(Scanner scanner) { + Position start = scanner.position(); + if (!LinkScanner.scanLinkLabelContent(scanner)) { + return false; } - label.append(line, i, afterLabel); + label.append(scanner.getSource(start, scanner.position()).getContent()); - if (afterLabel >= line.length()) { + if (!scanner.hasNext()) { // label might continue on next line label.append('\n'); - return afterLabel; - } else if (line.charAt(afterLabel) == ']') { - int colon = afterLabel + 1; + return true; + } else if (scanner.next(']')) { // end of label - if (colon >= line.length() || line.charAt(colon) != ':') { - return -1; + if (!scanner.next(':')) { + return false; } // spec: A link label can have at most 999 characters inside the square brackets. if (label.length() > 999) { - return -1; + return false; } String normalizedLabel = Escaping.normalizeLabelContent(label.toString()); if (normalizedLabel.isEmpty()) { - return -1; + return false; } - this.normalizedLabel = normalizedLabel; state = State.DESTINATION; - return Parsing.skipSpaceTab(line, colon + 1, line.length()); + scanner.whitespace(); + return true; } else { - return -1; + return false; } } - private int destination(CharSequence line, int i) { - i = Parsing.skipSpaceTab(line, i, line.length()); - int afterDestination = LinkScanner.scanLinkDestination(line, i); - if (afterDestination == -1) { - return -1; + private boolean destination(Scanner scanner) { + scanner.whitespace(); + Position start = scanner.position(); + if (!LinkScanner.scanLinkDestination(scanner)) { + return false; } - destination = (line.charAt(i) == '<') - ? line.subSequence(i + 1, afterDestination - 1).toString() - : line.subSequence(i, afterDestination).toString(); + String rawDestination = scanner.getSource(start, scanner.position()).getContent(); + destination = rawDestination.startsWith("<") ? + rawDestination.substring(1, rawDestination.length() - 1) : + rawDestination; - int afterSpace = Parsing.skipSpaceTab(line, afterDestination, line.length()); - if (afterSpace >= line.length()) { + int whitespace = scanner.whitespace(); + if (!scanner.hasNext()) { // Destination was at end of line, so this is a valid reference for sure (and maybe a title). // If not at end of line, wait for title to be valid first. referenceValid = true; - paragraph.setLength(0); - } else if (afterSpace == afterDestination) { + paragraphLines.clear(); + } else if (whitespace == 0) { // spec: The title must be separated from the link destination by whitespace - return -1; + return false; } state = State.START_TITLE; - return afterSpace; + return true; } - private int startTitle(CharSequence line, int i) { - i = Parsing.skipSpaceTab(line, i, line.length()); - if (i >= line.length()) { + private boolean startTitle(Scanner scanner) { + scanner.whitespace(); + if (!scanner.hasNext()) { state = State.START_DEFINITION; - return i; + return true; } titleDelimiter = '\0'; - char c = line.charAt(i); + char c = scanner.peek(); switch (c) { case '"': case '\'': @@ -186,46 +216,48 @@ private int startTitle(CharSequence line, int i) { if (titleDelimiter != '\0') { state = State.TITLE; title = new StringBuilder(); - i++; - if (i == line.length()) { + scanner.next(); + if (!scanner.hasNext()) { title.append('\n'); } } else { - finishReference(); // There might be another reference instead, try that for the same character. state = State.START_DEFINITION; } - return i; + return true; } - private int title(CharSequence line, int i) { - int afterTitle = LinkScanner.scanLinkTitleContent(line, i, titleDelimiter); - if (afterTitle == -1) { - // Invalid title, stop - return -1; + private boolean title(Scanner scanner) { + Position start = scanner.position(); + if (!LinkScanner.scanLinkTitleContent(scanner, titleDelimiter)) { + // Invalid title, stop. Title collected so far must not be used. + title = null; + return false; } - title.append(line.subSequence(i, afterTitle)); + title.append(scanner.getSource(start, scanner.position()).getContent()); - if (afterTitle >= line.length()) { - // Title still going, continue on next line + if (!scanner.hasNext()) { + // Title ran until the end of line, so continue on next line (until we find the delimiter) title.append('\n'); - return afterTitle; + return true; } - int afterTitleDelimiter = afterTitle + 1; - int afterSpace = Parsing.skipSpaceTab(line, afterTitleDelimiter, line.length()); - if (afterSpace != line.length()) { + // Skip delimiter character + scanner.next(); + scanner.whitespace(); + if (scanner.hasNext()) { // spec: No further non-whitespace characters may occur on the line. - return -1; + // Title collected so far must not be used. + title = null; + return false; } referenceValid = true; - finishReference(); - paragraph.setLength(0); + paragraphLines.clear(); // See if there's another definition. state = State.START_DEFINITION; - return afterSpace; + return true; } private void finishReference() { @@ -235,15 +267,27 @@ private void finishReference() { String d = Escaping.unescapeString(destination); String t = title != null ? Escaping.unescapeString(title.toString()) : null; - definitions.add(new LinkReferenceDefinition(normalizedLabel, d, t)); + LinkReferenceDefinition definition = new LinkReferenceDefinition(label.toString(), d, t); + definition.setSourceSpans(sourceSpans); + sourceSpans.clear(); + definitions.add(definition); label = null; referenceValid = false; - normalizedLabel = null; destination = null; title = null; } + private static void removeLast(int n, List list) { + if (n >= list.size()) { + list.clear(); + } else { + for (int i = 0; i < n; i++) { + list.remove(list.size() - 1); + } + } + } + enum State { // Looking for the start of a definition, i.e. `[` START_DEFINITION, diff --git a/commonmark/src/main/java/org/commonmark/internal/ListBlockParser.java b/commonmark/src/main/java/org/commonmark/internal/ListBlockParser.java index de1558f92..fbf034757 100644 --- a/commonmark/src/main/java/org/commonmark/internal/ListBlockParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/ListBlockParser.java @@ -4,6 +4,8 @@ import org.commonmark.node.*; import org.commonmark.parser.block.*; +import java.util.Objects; + public class ListBlockParser extends AbstractBlockParser { private final ListBlock block; @@ -90,7 +92,7 @@ private static ListData parseList(CharSequence line, final int markerIndex, fina if (inParagraph) { // If the list item is ordered, the start number must be 1 to interrupt a paragraph. - if (listBlock instanceof OrderedList && ((OrderedList) listBlock).getStartNumber() != 1) { + if (listBlock instanceof OrderedList && ((OrderedList) listBlock).getMarkerStartNumber() != 1) { return null; } // Empty list item can not interrupt a paragraph. @@ -116,7 +118,7 @@ private static ListMarkerData parseListMarker(CharSequence line, int index) { case '*': if (isSpaceTabOrEnd(line, index + 1)) { BulletList bulletList = new BulletList(); - bulletList.setBulletMarker(c); + bulletList.setMarker(String.valueOf(c)); return new ListMarkerData(bulletList, index + 1); } else { return null; @@ -126,7 +128,7 @@ private static ListMarkerData parseListMarker(CharSequence line, int index) { } } - // spec: An ordered list marker is a sequence of 1–9 arabic digits (0-9), followed by either a `.` character or a + // spec: An ordered list marker is a sequence of 1-9 arabic digits (0-9), followed by either a `.` character or a // `)` character. private static ListMarkerData parseOrderedList(CharSequence line, int index) { int digits = 0; @@ -154,8 +156,8 @@ private static ListMarkerData parseOrderedList(CharSequence line, int index) { if (digits >= 1 && isSpaceTabOrEnd(line, i + 1)) { String number = line.subSequence(index, i).toString(); OrderedList orderedList = new OrderedList(); - orderedList.setStartNumber(Integer.parseInt(number)); - orderedList.setDelimiter(c); + orderedList.setMarkerStartNumber(Integer.parseInt(number)); + orderedList.setMarkerDelimiter(String.valueOf(c)); return new ListMarkerData(orderedList, i + 1); } else { return null; @@ -188,17 +190,13 @@ private static boolean isSpaceTabOrEnd(CharSequence line, int index) { */ private static boolean listsMatch(ListBlock a, ListBlock b) { if (a instanceof BulletList && b instanceof BulletList) { - return equals(((BulletList) a).getBulletMarker(), ((BulletList) b).getBulletMarker()); + return Objects.equals(((BulletList) a).getMarker(), ((BulletList) b).getMarker()); } else if (a instanceof OrderedList && b instanceof OrderedList) { - return equals(((OrderedList) a).getDelimiter(), ((OrderedList) b).getDelimiter()); + return Objects.equals(((OrderedList) a).getMarkerDelimiter(), ((OrderedList) b).getMarkerDelimiter()); } return false; } - private static boolean equals(Object a, Object b) { - return (a == null) ? (b == null) : a.equals(b); - } - public static class Factory extends AbstractBlockParserFactory { @Override @@ -210,14 +208,14 @@ public BlockStart tryStart(ParserState state, MatchedBlockParser matchedBlockPar } int markerIndex = state.getNextNonSpaceIndex(); int markerColumn = state.getColumn() + state.getIndent(); - boolean inParagraph = matchedBlockParser.getParagraphContent() != null; - ListData listData = parseList(state.getLine(), markerIndex, markerColumn, inParagraph); + boolean inParagraph = !matchedBlockParser.getParagraphLines().isEmpty(); + ListData listData = parseList(state.getLine().getContent(), markerIndex, markerColumn, inParagraph); if (listData == null) { return BlockStart.none(); } int newColumn = listData.contentColumn; - ListItemParser listItemParser = new ListItemParser(newColumn - state.getColumn()); + ListItemParser listItemParser = new ListItemParser(state.getIndent(), newColumn - state.getColumn()); // prepend the list block if needed if (!(matched instanceof ListBlockParser) || diff --git a/commonmark/src/main/java/org/commonmark/internal/ListItemParser.java b/commonmark/src/main/java/org/commonmark/internal/ListItemParser.java index 96b086dab..49722dff2 100644 --- a/commonmark/src/main/java/org/commonmark/internal/ListItemParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/ListItemParser.java @@ -20,8 +20,10 @@ public class ListItemParser extends AbstractBlockParser { private boolean hadBlankLine; - public ListItemParser(int contentIndent) { + public ListItemParser(int markerIndent, int contentIndent) { this.contentIndent = contentIndent; + block.setMarkerIndent(markerIndent); + block.setContentIndent(contentIndent); } @Override @@ -66,6 +68,7 @@ public BlockContinue tryContinue(ParserState state) { if (state.getIndent() >= contentIndent) { return BlockContinue.atColumn(state.getColumn() + contentIndent); } else { + // Note: We'll hit this case for lazy continuation lines, they will get added later. return BlockContinue.none(); } } diff --git a/commonmark/src/main/java/org/commonmark/internal/ParagraphParser.java b/commonmark/src/main/java/org/commonmark/internal/ParagraphParser.java index ee2899a93..27eb1e647 100644 --- a/commonmark/src/main/java/org/commonmark/internal/ParagraphParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/ParagraphParser.java @@ -1,9 +1,9 @@ package org.commonmark.internal; -import org.commonmark.node.Block; -import org.commonmark.node.LinkReferenceDefinition; -import org.commonmark.node.Paragraph; +import org.commonmark.node.*; import org.commonmark.parser.InlineParser; +import org.commonmark.parser.SourceLine; +import org.commonmark.parser.SourceLines; import org.commonmark.parser.block.AbstractBlockParser; import org.commonmark.parser.block.BlockContinue; import org.commonmark.parser.block.ParserState; @@ -13,7 +13,7 @@ public class ParagraphParser extends AbstractBlockParser { private final Paragraph block = new Paragraph(); - private LinkReferenceDefinitionParser linkReferenceDefinitionParser = new LinkReferenceDefinitionParser(); + private final LinkReferenceDefinitionParser linkReferenceDefinitionParser = new LinkReferenceDefinitionParser(); @Override public boolean canHaveLazyContinuationLines() { @@ -35,30 +35,52 @@ public BlockContinue tryContinue(ParserState state) { } @Override - public void addLine(CharSequence line) { + public void addLine(SourceLine line) { linkReferenceDefinitionParser.parse(line); } + @Override + public void addSourceSpan(SourceSpan sourceSpan) { + // Some source spans might belong to link reference definitions, others to the paragraph. + // The parser will handle that. + linkReferenceDefinitionParser.addSourceSpan(sourceSpan); + } + + @Override + public List> getDefinitions() { + var map = new DefinitionMap<>(LinkReferenceDefinition.class); + for (var def : linkReferenceDefinitionParser.getDefinitions()) { + map.putIfAbsent(def.getLabel(), def); + } + return List.of(map); + } + @Override public void closeBlock() { - if (linkReferenceDefinitionParser.getParagraphContent().length() == 0) { + for (var def : linkReferenceDefinitionParser.getDefinitions()) { + block.insertBefore(def); + } + + if (linkReferenceDefinitionParser.getParagraphLines().isEmpty()) { block.unlink(); + } else { + block.setSourceSpans(linkReferenceDefinitionParser.getParagraphSourceSpans()); } } @Override public void parseInlines(InlineParser inlineParser) { - CharSequence content = linkReferenceDefinitionParser.getParagraphContent(); - if (content.length() > 0) { - inlineParser.parse(content.toString(), block); + SourceLines lines = linkReferenceDefinitionParser.getParagraphLines(); + if (!lines.isEmpty()) { + inlineParser.parse(lines, block); } } - public CharSequence getContentString() { - return linkReferenceDefinitionParser.getParagraphContent(); + public SourceLines getParagraphLines() { + return linkReferenceDefinitionParser.getParagraphLines(); } - public List getDefinitions() { - return linkReferenceDefinitionParser.getDefinitions(); + public List removeLines(int lines) { + return linkReferenceDefinitionParser.removeLines(lines); } } diff --git a/commonmark/src/main/java/org/commonmark/internal/StaggeredDelimiterProcessor.java b/commonmark/src/main/java/org/commonmark/internal/StaggeredDelimiterProcessor.java index c510edbe5..2836e346a 100644 --- a/commonmark/src/main/java/org/commonmark/internal/StaggeredDelimiterProcessor.java +++ b/commonmark/src/main/java/org/commonmark/internal/StaggeredDelimiterProcessor.java @@ -1,6 +1,5 @@ package org.commonmark.internal; -import org.commonmark.node.Text; import org.commonmark.parser.delimiter.DelimiterProcessor; import org.commonmark.parser.delimiter.DelimiterRun; @@ -52,7 +51,7 @@ void add(DelimiterProcessor dp) { added = true; break; } else if (len == pLen) { - throw new IllegalArgumentException("Cannot add two delimiter processors for char '" + delim + "' and minimum length " + len); + throw new IllegalArgumentException("Cannot add two delimiter processors for char '" + delim + "' and minimum length " + len + "; conflicting processors: " + p + ", " + dp); } } if (!added) { @@ -71,12 +70,7 @@ private DelimiterProcessor findProcessor(int len) { } @Override - public int getDelimiterUse(DelimiterRun opener, DelimiterRun closer) { - return findProcessor(opener.length()).getDelimiterUse(opener, closer); - } - - @Override - public void process(Text opener, Text closer, int delimiterUse) { - findProcessor(delimiterUse).process(opener, closer, delimiterUse); + public int process(DelimiterRun openingRun, DelimiterRun closingRun) { + return findProcessor(openingRun.length()).process(openingRun, closingRun); } } diff --git a/commonmark/src/main/java/org/commonmark/internal/ThematicBreakParser.java b/commonmark/src/main/java/org/commonmark/internal/ThematicBreakParser.java index 6d9edf761..0f0613221 100644 --- a/commonmark/src/main/java/org/commonmark/internal/ThematicBreakParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/ThematicBreakParser.java @@ -8,6 +8,10 @@ public class ThematicBreakParser extends AbstractBlockParser { private final ThematicBreak block = new ThematicBreak(); + public ThematicBreakParser(String literal) { + block.setLiteral(literal); + } + @Override public Block getBlock() { return block; @@ -27,9 +31,10 @@ public BlockStart tryStart(ParserState state, MatchedBlockParser matchedBlockPar return BlockStart.none(); } int nextNonSpace = state.getNextNonSpaceIndex(); - CharSequence line = state.getLine(); + CharSequence line = state.getLine().getContent(); if (isThematicBreak(line, nextNonSpace)) { - return BlockStart.of(new ThematicBreakParser()).atIndex(line.length()); + var literal = String.valueOf(line.subSequence(state.getIndex(), line.length())); + return BlockStart.of(new ThematicBreakParser(literal)).atIndex(line.length()); } else { return BlockStart.none(); } diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/AutolinkInlineParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/AutolinkInlineParser.java new file mode 100644 index 000000000..a18966e54 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/internal/inline/AutolinkInlineParser.java @@ -0,0 +1,61 @@ +package org.commonmark.internal.inline; + +import org.commonmark.node.Link; +import org.commonmark.node.Text; +import org.commonmark.parser.SourceLines; +import org.commonmark.parser.beta.*; + +import java.util.Set; +import java.util.regex.Pattern; + +/** + * Attempt to parse an autolink (URL or email in pointy brackets). + */ +public class AutolinkInlineParser implements InlineContentParser { + + private static final Pattern URI = Pattern + .compile("^[a-zA-Z][a-zA-Z0-9.+-]{1,31}:[^<>\u0000-\u0020]*$"); + + private static final Pattern EMAIL = Pattern + .compile("^([a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)$"); + + @Override + public ParsedInline tryParse(InlineParserState inlineParserState) { + Scanner scanner = inlineParserState.scanner(); + scanner.next(); + Position textStart = scanner.position(); + if (scanner.find('>') > 0) { + SourceLines textSource = scanner.getSource(textStart, scanner.position()); + String content = textSource.getContent(); + scanner.next(); + + String destination = null; + if (URI.matcher(content).matches()) { + destination = content; + } else if (EMAIL.matcher(content).matches()) { + destination = "mailto:" + content; + } + + if (destination != null) { + Link link = new Link(destination, null); + Text text = new Text(content); + text.setSourceSpans(textSource.getSourceSpans()); + link.appendChild(text); + return ParsedInline.of(link, scanner.position()); + } + } + return ParsedInline.none(); + } + + public static class Factory implements InlineContentParserFactory { + @Override + public Set getTriggerCharacters() { + return Set.of('<'); + } + + @Override + public InlineContentParser create() { + return new AutolinkInlineParser(); + } + } +} diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/BackslashInlineParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/BackslashInlineParser.java new file mode 100644 index 000000000..7baeed4de --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/internal/inline/BackslashInlineParser.java @@ -0,0 +1,48 @@ +package org.commonmark.internal.inline; + +import org.commonmark.internal.util.Escaping; +import org.commonmark.node.HardLineBreak; +import org.commonmark.node.Text; +import org.commonmark.parser.beta.*; + +import java.util.Set; +import java.util.regex.Pattern; + +/** + * Parse a backslash-escaped special character, adding either the escaped character, a hard line break + * (if the backslash is followed by a newline), or a literal backslash to the block's children. + */ +public class BackslashInlineParser implements InlineContentParser { + + private static final Pattern ESCAPABLE = Pattern.compile('^' + Escaping.ESCAPABLE); + + @Override + public ParsedInline tryParse(InlineParserState inlineParserState) { + Scanner scanner = inlineParserState.scanner(); + // Backslash + scanner.next(); + + char next = scanner.peek(); + if (next == '\n') { + scanner.next(); + return ParsedInline.of(new HardLineBreak(), scanner.position()); + } else if (ESCAPABLE.matcher(String.valueOf(next)).matches()) { + scanner.next(); + return ParsedInline.of(new Text(String.valueOf(next)), scanner.position()); + } else { + return ParsedInline.of(new Text("\\"), scanner.position()); + } + } + + public static class Factory implements InlineContentParserFactory { + @Override + public Set getTriggerCharacters() { + return Set.of('\\'); + } + + @Override + public InlineContentParser create() { + return new BackslashInlineParser(); + } + } +} diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/BackticksInlineParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/BackticksInlineParser.java new file mode 100644 index 000000000..b8e8984e8 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/internal/inline/BackticksInlineParser.java @@ -0,0 +1,63 @@ +package org.commonmark.internal.inline; + +import org.commonmark.node.Code; +import org.commonmark.node.Text; +import org.commonmark.parser.SourceLines; +import org.commonmark.parser.beta.*; +import org.commonmark.text.Characters; + +import java.util.Set; + +/** + * Attempt to parse backticks, returning either a backtick code span or a literal sequence of backticks. + */ +public class BackticksInlineParser implements InlineContentParser { + + @Override + public ParsedInline tryParse(InlineParserState inlineParserState) { + Scanner scanner = inlineParserState.scanner(); + Position start = scanner.position(); + int openingTicks = scanner.matchMultiple('`'); + Position afterOpening = scanner.position(); + + while (scanner.find('`') > 0) { + Position beforeClosing = scanner.position(); + int count = scanner.matchMultiple('`'); + if (count == openingTicks) { + Code node = new Code(); + + String content = scanner.getSource(afterOpening, beforeClosing).getContent(); + content = content.replace('\n', ' '); + + // spec: If the resulting string both begins and ends with a space character, but does not consist + // entirely of space characters, a single space character is removed from the front and back. + if (content.length() >= 3 && + content.charAt(0) == ' ' && + content.charAt(content.length() - 1) == ' ' && + Characters.hasNonSpace(content)) { + content = content.substring(1, content.length() - 1); + } + + node.setLiteral(content); + return ParsedInline.of(node, scanner.position()); + } + } + + // If we got here, we didn't find a matching closing backtick sequence. + SourceLines source = scanner.getSource(start, afterOpening); + Text text = new Text(source.getContent()); + return ParsedInline.of(text, afterOpening); + } + + public static class Factory implements InlineContentParserFactory { + @Override + public Set getTriggerCharacters() { + return Set.of('`'); + } + + @Override + public InlineContentParser create() { + return new BackticksInlineParser(); + } + } +} diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/CoreLinkProcessor.java b/commonmark/src/main/java/org/commonmark/internal/inline/CoreLinkProcessor.java new file mode 100644 index 000000000..528750aba --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/internal/inline/CoreLinkProcessor.java @@ -0,0 +1,37 @@ +package org.commonmark.internal.inline; + +import org.commonmark.node.Image; +import org.commonmark.node.Link; +import org.commonmark.node.LinkReferenceDefinition; +import org.commonmark.parser.InlineParserContext; +import org.commonmark.parser.beta.LinkInfo; +import org.commonmark.parser.beta.LinkProcessor; +import org.commonmark.parser.beta.LinkResult; +import org.commonmark.parser.beta.Scanner; + +public class CoreLinkProcessor implements LinkProcessor { + + @Override + public LinkResult process(LinkInfo linkInfo, Scanner scanner, InlineParserContext context) { + if (linkInfo.destination() != null) { + // Inline link + return process(linkInfo, scanner, linkInfo.destination(), linkInfo.title()); + } + + var label = linkInfo.label(); + var ref = label != null && !label.isEmpty() ? label : linkInfo.text(); + var def = context.getDefinition(LinkReferenceDefinition.class, ref); + if (def != null) { + // Reference link + return process(linkInfo, scanner, def.getDestination(), def.getTitle()); + } + return LinkResult.none(); + } + + private static LinkResult process(LinkInfo linkInfo, Scanner scanner, String destination, String title) { + if (linkInfo.marker() != null && linkInfo.marker().getLiteral().equals("!")) { + return LinkResult.wrapTextIn(new Image(destination, title), scanner.position()).includeMarker(); + } + return LinkResult.wrapTextIn(new Link(destination, title), scanner.position()); + } +} diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/EmphasisDelimiterProcessor.java b/commonmark/src/main/java/org/commonmark/internal/inline/EmphasisDelimiterProcessor.java index 98b43938c..493e4299c 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/EmphasisDelimiterProcessor.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/EmphasisDelimiterProcessor.java @@ -1,9 +1,6 @@ package org.commonmark.internal.inline; -import org.commonmark.node.Emphasis; -import org.commonmark.node.Node; -import org.commonmark.node.StrongEmphasis; -import org.commonmark.node.Text; +import org.commonmark.node.*; import org.commonmark.parser.delimiter.DelimiterProcessor; import org.commonmark.parser.delimiter.DelimiterRun; @@ -31,35 +28,39 @@ public int getMinLength() { } @Override - public int getDelimiterUse(DelimiterRun opener, DelimiterRun closer) { + public int process(DelimiterRun openingRun, DelimiterRun closingRun) { // "multiple of 3" rule for internal delimiter runs - if ((opener.canClose() || closer.canOpen()) && - closer.originalLength() % 3 != 0 && - (opener.originalLength() + closer.originalLength()) % 3 == 0) { + if ((openingRun.canClose() || closingRun.canOpen()) && + closingRun.originalLength() % 3 != 0 && + (openingRun.originalLength() + closingRun.originalLength()) % 3 == 0) { return 0; } + + int usedDelimiters; + Node emphasis; // calculate actual number of delimiters used from this closer - if (opener.length() >= 2 && closer.length() >= 2) { - return 2; + if (openingRun.length() >= 2 && closingRun.length() >= 2) { + usedDelimiters = 2; + emphasis = new StrongEmphasis(String.valueOf(delimiterChar) + delimiterChar); } else { - return 1; + usedDelimiters = 1; + emphasis = new Emphasis(String.valueOf(delimiterChar)); } - } - @Override - public void process(Text opener, Text closer, int delimiterUse) { - String singleDelimiter = String.valueOf(getOpeningCharacter()); - Node emphasis = delimiterUse == 1 - ? new Emphasis(singleDelimiter) - : new StrongEmphasis(singleDelimiter + singleDelimiter); + SourceSpans sourceSpans = SourceSpans.empty(); + sourceSpans.addAllFrom(openingRun.getOpeners(usedDelimiters)); - Node tmp = opener.getNext(); - while (tmp != null && tmp != closer) { - Node next = tmp.getNext(); - emphasis.appendChild(tmp); - tmp = next; + Text opener = openingRun.getOpener(); + for (Node node : Nodes.between(opener, closingRun.getCloser())) { + emphasis.appendChild(node); + sourceSpans.addAll(node.getSourceSpans()); } + sourceSpans.addAllFrom(closingRun.getClosers(usedDelimiters)); + + emphasis.setSourceSpans(sourceSpans.getSourceSpans()); opener.insertAfter(emphasis); + + return usedDelimiters; } } diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/EntityInlineParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/EntityInlineParser.java new file mode 100644 index 000000000..c24e60747 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/internal/inline/EntityInlineParser.java @@ -0,0 +1,69 @@ +package org.commonmark.internal.inline; + +import org.commonmark.internal.util.Html5Entities; +import org.commonmark.node.Text; +import org.commonmark.parser.beta.*; +import org.commonmark.text.AsciiMatcher; + +import java.util.Set; + +/** + * Attempts to parse an HTML entity or numeric character reference. + */ +public class EntityInlineParser implements InlineContentParser { + + private static final AsciiMatcher hex = AsciiMatcher.builder().range('0', '9').range('A', 'F').range('a', 'f').build(); + private static final AsciiMatcher dec = AsciiMatcher.builder().range('0', '9').build(); + private static final AsciiMatcher entityStart = AsciiMatcher.builder().range('A', 'Z').range('a', 'z').build(); + private static final AsciiMatcher entityContinue = entityStart.newBuilder().range('0', '9').build(); + + @Override + public ParsedInline tryParse(InlineParserState inlineParserState) { + Scanner scanner = inlineParserState.scanner(); + Position start = scanner.position(); + // Skip `&` + scanner.next(); + + char c = scanner.peek(); + if (c == '#') { + // Numeric + scanner.next(); + if (scanner.next('x') || scanner.next('X')) { + int digits = scanner.match(hex); + if (1 <= digits && digits <= 6 && scanner.next(';')) { + return entity(scanner, start); + } + } else { + int digits = scanner.match(dec); + if (1 <= digits && digits <= 7 && scanner.next(';')) { + return entity(scanner, start); + } + } + } else if (entityStart.matches(c)) { + scanner.match(entityContinue); + if (scanner.next(';')) { + return entity(scanner, start); + } + } + + return ParsedInline.none(); + } + + private ParsedInline entity(Scanner scanner, Position start) { + String text = scanner.getSource(start, scanner.position()).getContent(); + return ParsedInline.of(new Text(Html5Entities.entityToString(text)), scanner.position()); + } + + public static class Factory implements InlineContentParserFactory { + + @Override + public Set getTriggerCharacters() { + return Set.of('&'); + } + + @Override + public InlineContentParser create() { + return new EntityInlineParser(); + } + } +} diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/HtmlInlineParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/HtmlInlineParser.java new file mode 100644 index 000000000..a48ea5022 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/internal/inline/HtmlInlineParser.java @@ -0,0 +1,217 @@ +package org.commonmark.internal.inline; + +import org.commonmark.node.HtmlInline; +import org.commonmark.parser.beta.*; +import org.commonmark.text.AsciiMatcher; + +import java.util.Set; + +/** + * Attempt to parse inline HTML. + */ +public class HtmlInlineParser implements InlineContentParser { + + private static final AsciiMatcher asciiLetter = AsciiMatcher.builder().range('A', 'Z').range('a', 'z').build(); + + // spec: A tag name consists of an ASCII letter followed by zero or more ASCII letters, digits, or hyphens (-). + private static final AsciiMatcher tagNameStart = asciiLetter; + private static final AsciiMatcher tagNameContinue = tagNameStart.newBuilder().range('0', '9').c('-').build(); + + // spec: An attribute name consists of an ASCII letter, _, or :, followed by zero or more ASCII letters, digits, + // _, ., :, or -. (Note: This is the XML specification restricted to ASCII. HTML5 is laxer.) + private static final AsciiMatcher attributeStart = asciiLetter.newBuilder().c('_').c(':').build(); + private static final AsciiMatcher attributeContinue = attributeStart.newBuilder().range('0', '9').c('.').c('-').build(); + // spec: An unquoted attribute value is a nonempty string of characters not including whitespace, ", ', =, <, >, or `. + private static final AsciiMatcher attributeValueEnd = AsciiMatcher.builder() + .c(' ').c('\t').c('\n').c('\u000B').c('\f').c('\r') + .c('"').c('\'').c('=').c('<').c('>').c('`') + .build(); + + @Override + public ParsedInline tryParse(InlineParserState inlineParserState) { + Scanner scanner = inlineParserState.scanner(); + Position start = scanner.position(); + // Skip over `<` + scanner.next(); + + char c = scanner.peek(); + if (tagNameStart.matches(c)) { + if (tryOpenTag(scanner)) { + return htmlInline(start, scanner); + } + } else if (c == '/') { + if (tryClosingTag(scanner)) { + return htmlInline(start, scanner); + } + } else if (c == '?') { + if (tryProcessingInstruction(scanner)) { + return htmlInline(start, scanner); + } + } else if (c == '!') { + // comment, declaration or CDATA + scanner.next(); + c = scanner.peek(); + if (c == '-') { + if (tryComment(scanner)) { + return htmlInline(start, scanner); + } + } else if (c == '[') { + if (tryCdata(scanner)) { + return htmlInline(start, scanner); + } + } else if (asciiLetter.matches(c)) { + if (tryDeclaration(scanner)) { + return htmlInline(start, scanner); + } + } + } + + return ParsedInline.none(); + } + + private static ParsedInline htmlInline(Position start, Scanner scanner) { + String text = scanner.getSource(start, scanner.position()).getContent(); + HtmlInline node = new HtmlInline(); + node.setLiteral(text); + return ParsedInline.of(node, scanner.position()); + } + + private static boolean tryOpenTag(Scanner scanner) { + // spec: An open tag consists of a < character, a tag name, zero or more attributes, optional whitespace, + // an optional / character, and a > character. + scanner.next(); + scanner.match(tagNameContinue); + boolean whitespace = scanner.whitespace() >= 1; + // spec: An attribute consists of whitespace, an attribute name, and an optional attribute value specification. + while (whitespace && scanner.match(attributeStart) >= 1) { + scanner.match(attributeContinue); + // spec: An attribute value specification consists of optional whitespace, a = character, + // optional whitespace, and an attribute value. + whitespace = scanner.whitespace() >= 1; + if (scanner.next('=')) { + scanner.whitespace(); + char valueStart = scanner.peek(); + if (valueStart == '\'') { + scanner.next(); + if (scanner.find('\'') < 0) { + return false; + } + scanner.next(); + } else if (valueStart == '"') { + scanner.next(); + if (scanner.find('"') < 0) { + return false; + } + scanner.next(); + } else { + if (scanner.find(attributeValueEnd) <= 0) { + return false; + } + } + + // Whitespace is required between attributes + whitespace = scanner.whitespace() >= 1; + } + } + + scanner.next('/'); + return scanner.next('>'); + } + + private static boolean tryClosingTag(Scanner scanner) { + // spec: A closing tag consists of the string . + scanner.next(); + if (scanner.match(tagNameStart) >= 1) { + scanner.match(tagNameContinue); + scanner.whitespace(); + return scanner.next('>'); + } + return false; + } + + private static boolean tryProcessingInstruction(Scanner scanner) { + // spec: A processing instruction consists of the string , + // and the string ?>. + scanner.next(); + while (scanner.find('?') > 0) { + scanner.next(); + if (scanner.next('>')) { + return true; + } + } + return false; + } + + private static boolean tryComment(Scanner scanner) { + // spec: An [HTML comment](@) consists of ``, ``, or ``, and `-->` (see the + // [HTML spec](https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state)). + + // Skip first `-` + scanner.next(); + if (!scanner.next('-')) { + return false; + } + + if (scanner.next('>') || scanner.next("->")) { + return true; + } + + while (scanner.find('-') >= 0) { + if (scanner.next("-->")) { + return true; + } else { + scanner.next(); + } + } + + return false; + } + + private static boolean tryCdata(Scanner scanner) { + // spec: A CDATA section consists of the string , + // and the string ]]>. + + // Skip `[` + scanner.next(); + + if (scanner.next("CDATA[")) { + while (scanner.find(']') >= 0) { + if (scanner.next("]]>")) { + return true; + } else { + scanner.next(); + } + } + } + + return false; + } + + private static boolean tryDeclaration(Scanner scanner) { + // spec: A declaration consists of the string , and the character >. + scanner.match(asciiLetter); + if (scanner.whitespace() <= 0) { + return false; + } + if (scanner.find('>') >= 0) { + scanner.next(); + return true; + } + return false; + } + + public static class Factory implements InlineContentParserFactory { + + @Override + public Set getTriggerCharacters() { + return Set.of('<'); + } + + @Override + public InlineContentParser create() { + return new HtmlInlineParser(); + } + } +} diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/LinkResultImpl.java b/commonmark/src/main/java/org/commonmark/internal/inline/LinkResultImpl.java new file mode 100644 index 000000000..c05b24451 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/internal/inline/LinkResultImpl.java @@ -0,0 +1,46 @@ +package org.commonmark.internal.inline; + +import org.commonmark.node.Node; +import org.commonmark.parser.beta.LinkResult; +import org.commonmark.parser.beta.Position; + +public class LinkResultImpl implements LinkResult { + @Override + public LinkResult includeMarker() { + includeMarker = true; + return this; + } + + public enum Type { + WRAP, + REPLACE + } + + private final Type type; + private final Node node; + private final Position position; + + private boolean includeMarker = false; + + public LinkResultImpl(Type type, Node node, Position position) { + this.type = type; + this.node = node; + this.position = position; + } + + public Type getType() { + return type; + } + + public Node getNode() { + return node; + } + + public Position getPosition() { + return position; + } + + public boolean isIncludeMarker() { + return includeMarker; + } +} diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/ParsedInlineImpl.java b/commonmark/src/main/java/org/commonmark/internal/inline/ParsedInlineImpl.java new file mode 100644 index 000000000..a77630610 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/internal/inline/ParsedInlineImpl.java @@ -0,0 +1,23 @@ +package org.commonmark.internal.inline; + +import org.commonmark.node.Node; +import org.commonmark.parser.beta.ParsedInline; +import org.commonmark.parser.beta.Position; + +public class ParsedInlineImpl implements ParsedInline { + private final Node node; + private final Position position; + + public ParsedInlineImpl(Node node, Position position) { + this.node = node; + this.position = position; + } + + public Node getNode() { + return node; + } + + public Position getPosition() { + return position; + } +} diff --git a/commonmark/src/main/java/org/commonmark/internal/renderer/NodeRendererMap.java b/commonmark/src/main/java/org/commonmark/internal/renderer/NodeRendererMap.java index e3adaa11f..c74f90758 100644 --- a/commonmark/src/main/java/org/commonmark/internal/renderer/NodeRendererMap.java +++ b/commonmark/src/main/java/org/commonmark/internal/renderer/NodeRendererMap.java @@ -3,24 +3,39 @@ import org.commonmark.node.Node; import org.commonmark.renderer.NodeRenderer; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; public class NodeRendererMap { + private final List nodeRenderers = new ArrayList<>(); private final Map, NodeRenderer> renderers = new HashMap<>(32); + /** + * Set the renderer for each {@link NodeRenderer#getNodeTypes()}, unless there was already a renderer set (first wins). + */ public void add(NodeRenderer nodeRenderer) { - for (Class nodeType : nodeRenderer.getNodeTypes()) { - // Overwrite existing renderer - renderers.put(nodeType, nodeRenderer); + nodeRenderers.add(nodeRenderer); + for (var nodeType : nodeRenderer.getNodeTypes()) { + // The first node renderer for a node type "wins". + renderers.putIfAbsent(nodeType, nodeRenderer); } } public void render(Node node) { - NodeRenderer nodeRenderer = renderers.get(node.getClass()); + var nodeRenderer = renderers.get(node.getClass()); if (nodeRenderer != null) { nodeRenderer.render(node); } } + + public void beforeRoot(Node node) { + nodeRenderers.forEach(r -> r.beforeRoot(node)); + } + + public void afterRoot(Node node) { + nodeRenderers.forEach(r -> r.afterRoot(node)); + } } diff --git a/commonmark/src/main/java/org/commonmark/internal/renderer/text/BulletListHolder.java b/commonmark/src/main/java/org/commonmark/internal/renderer/text/BulletListHolder.java deleted file mode 100644 index f08ccebd6..000000000 --- a/commonmark/src/main/java/org/commonmark/internal/renderer/text/BulletListHolder.java +++ /dev/null @@ -1,16 +0,0 @@ -package org.commonmark.internal.renderer.text; - -import org.commonmark.node.BulletList; - -public class BulletListHolder extends ListHolder { - private final char marker; - - public BulletListHolder(ListHolder parent, BulletList list) { - super(parent); - marker = list.getBulletMarker(); - } - - public char getMarker() { - return marker; - } -} diff --git a/commonmark/src/main/java/org/commonmark/internal/renderer/text/ListHolder.java b/commonmark/src/main/java/org/commonmark/internal/renderer/text/ListHolder.java deleted file mode 100644 index cb06d4a9d..000000000 --- a/commonmark/src/main/java/org/commonmark/internal/renderer/text/ListHolder.java +++ /dev/null @@ -1,27 +0,0 @@ -package org.commonmark.internal.renderer.text; - -public abstract class ListHolder { - private static final String INDENT_DEFAULT = " "; - private static final String INDENT_EMPTY = ""; - - private final ListHolder parent; - private final String indent; - - ListHolder(ListHolder parent) { - this.parent = parent; - - if (parent != null) { - indent = parent.indent + INDENT_DEFAULT; - } else { - indent = INDENT_EMPTY; - } - } - - public ListHolder getParent() { - return parent; - } - - public String getIndent() { - return indent; - } -} diff --git a/commonmark/src/main/java/org/commonmark/internal/renderer/text/OrderedListHolder.java b/commonmark/src/main/java/org/commonmark/internal/renderer/text/OrderedListHolder.java deleted file mode 100644 index e02ecea7c..000000000 --- a/commonmark/src/main/java/org/commonmark/internal/renderer/text/OrderedListHolder.java +++ /dev/null @@ -1,26 +0,0 @@ -package org.commonmark.internal.renderer.text; - -import org.commonmark.node.OrderedList; - -public class OrderedListHolder extends ListHolder { - private final char delimiter; - private int counter; - - public OrderedListHolder(ListHolder parent, OrderedList list) { - super(parent); - delimiter = list.getDelimiter(); - counter = list.getStartNumber(); - } - - public char getDelimiter() { - return delimiter; - } - - public int getCounter() { - return counter; - } - - public void increaseCounter() { - counter++; - } -} diff --git a/commonmark/src/main/java/org/commonmark/internal/util/Escaping.java b/commonmark/src/main/java/org/commonmark/internal/util/Escaping.java index 15197556c..3350003c0 100644 --- a/commonmark/src/main/java/org/commonmark/internal/util/Escaping.java +++ b/commonmark/src/main/java/org/commonmark/internal/util/Escaping.java @@ -1,6 +1,6 @@ package org.commonmark.internal.util; -import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.Locale; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -49,7 +49,7 @@ public void replace(String input, StringBuilder sb) { sb.append(input, 1, input.length()); } } else { - byte[] bytes = input.getBytes(Charset.forName("UTF-8")); + byte[] bytes = input.getBytes(StandardCharsets.UTF_8); for (byte b : bytes) { sb.append('%'); sb.append(HEX_DIGITS[(b >> 4) & 0xF]); @@ -111,16 +111,17 @@ public static String percentEncodeUrl(String s) { return replaceAll(ESCAPE_IN_URI, s, URI_REPLACER); } - public static String normalizeReference(String input) { - // Strip '[' and ']' - String stripped = input.substring(1, input.length() - 1); - return normalizeLabelContent(stripped); - } - public static String normalizeLabelContent(String input) { String trimmed = input.trim(); - String lowercase = trimmed.toLowerCase(Locale.ROOT); - return WHITESPACE.matcher(lowercase).replaceAll(" "); + + // This is necessary to correctly case fold "\u1E9E" (LATIN CAPITAL LETTER SHARP S) to "SS": + // "\u1E9E".toLowerCase(Locale.ROOT) -> "\u00DF" (LATIN SMALL LETTER SHARP S) + // "\u00DF".toUpperCase(Locale.ROOT) -> "SS" + // Note that doing upper first (or only upper without lower) wouldn't work because: + // "\u1E9E".toUpperCase(Locale.ROOT) -> "\u1E9E" + String caseFolded = trimmed.toLowerCase(Locale.ROOT).toUpperCase(Locale.ROOT); + + return WHITESPACE.matcher(caseFolded).replaceAll(" "); } private static String replaceAll(Pattern p, String s, Replacer replacer) { diff --git a/commonmark/src/main/java/org/commonmark/internal/util/Html5Entities.java b/commonmark/src/main/java/org/commonmark/internal/util/Html5Entities.java index 5215a44df..8da53c053 100644 --- a/commonmark/src/main/java/org/commonmark/internal/util/Html5Entities.java +++ b/commonmark/src/main/java/org/commonmark/internal/util/Html5Entities.java @@ -5,24 +5,31 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.Map; -import java.util.regex.Matcher; -import java.util.regex.Pattern; public class Html5Entities { private static final Map NAMED_CHARACTER_REFERENCES = readEntities(); - private static final Pattern NUMERIC_PATTERN = Pattern.compile("^&#[Xx]?"); - private static final String ENTITY_PATH = "/org/commonmark/internal/util/entities.properties"; + private static final String ENTITY_PATH = "/org/commonmark/internal/util/entities.txt"; public static String entityToString(String input) { - Matcher matcher = NUMERIC_PATTERN.matcher(input); + if (!input.startsWith("&") || !input.endsWith(";")) { + return input; + } + + String value = input.substring(1, input.length() - 1); + if (value.startsWith("#")) { + value = value.substring(1); + int base = 10; + if (value.startsWith("x") || value.startsWith("X")) { + value = value.substring(1); + base = 16; + } - if (matcher.find()) { - int base = matcher.end() == 2 ? 10 : 16; try { - int codePoint = Integer.parseInt(input.substring(matcher.end(), input.length() - 1), base); + int codePoint = Integer.parseInt(value, base); if (codePoint == 0) { return "\uFFFD"; } @@ -31,8 +38,7 @@ public static String entityToString(String input) { return "\uFFFD"; } } else { - String name = input.substring(1, input.length() - 1); - String s = NAMED_CHARACTER_REFERENCES.get(name); + String s = NAMED_CHARACTER_REFERENCES.get(value); if (s != null) { return s; } else { @@ -44,7 +50,7 @@ public static String entityToString(String input) { private static Map readEntities() { Map entities = new HashMap<>(); InputStream stream = Html5Entities.class.getResourceAsStream(ENTITY_PATH); - Charset charset = Charset.forName("UTF-8"); + Charset charset = StandardCharsets.UTF_8; try (BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, charset))) { String line; while ((line = bufferedReader.readLine()) != null) { diff --git a/commonmark/src/main/java/org/commonmark/internal/util/LineReader.java b/commonmark/src/main/java/org/commonmark/internal/util/LineReader.java new file mode 100644 index 000000000..b44098257 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/internal/util/LineReader.java @@ -0,0 +1,149 @@ +package org.commonmark.internal.util; + +import java.io.Closeable; +import java.io.IOException; +import java.io.Reader; + +/** + * Reads lines from a reader like {@link java.io.BufferedReader} but also returns the line terminators. + *

    + * Line terminators can be either a line feed {@code "\n"}, carriage return {@code "\r"}, or a carriage return followed + * by a line feed {@code "\r\n"}. Call {@link #getLineTerminator()} after {@link #readLine()} to obtain the + * corresponding line terminator. If a stream has a line at the end without a terminator, {@link #getLineTerminator()} + * returns {@code null}. + */ +public class LineReader implements Closeable { + + // Same as java.io.BufferedReader + static final int CHAR_BUFFER_SIZE = 8192; + static final int EXPECTED_LINE_LENGTH = 80; + + private Reader reader; + private char[] cbuf; + + private int position = 0; + private int limit = 0; + + private String lineTerminator = null; + + public LineReader(Reader reader) { + this.reader = reader; + this.cbuf = new char[CHAR_BUFFER_SIZE]; + } + + /** + * Read a line of text. + * + * @return the line, or {@code null} when the end of the stream has been reached and no more lines can be read + */ + public String readLine() throws IOException { + StringBuilder sb = null; + boolean cr = false; + + while (true) { + if (position >= limit) { + fill(); + } + + if (cr) { + // We saw a CR before, check if we have CR LF or just CR. + if (position < limit && cbuf[position] == '\n') { + position++; + return line(sb.toString(), "\r\n"); + } else { + return line(sb.toString(), "\r"); + } + } + + if (position >= limit) { + // End of stream, return either the last line without terminator or null for end. + return line(sb != null ? sb.toString() : null, null); + } + + int start = position; + int i = position; + for (; i < limit; i++) { + char c = cbuf[i]; + if (c == '\n') { + position = i + 1; + return line(finish(sb, start, i), "\n"); + } else if (c == '\r') { + if (i + 1 < limit) { + // We know what the next character is, so we can check now whether we have + // a CR LF or just a CR and return. + if (cbuf[i + 1] == '\n') { + position = i + 2; + return line(finish(sb, start, i), "\r\n"); + } else { + position = i + 1; + return line(finish(sb, start, i), "\r"); + } + } else { + // We don't know what the next character is yet, check on next iteration. + cr = true; + position = i + 1; + break; + } + } + } + + if (position < i) { + position = i; + } + + // Haven't found a finished line yet, copy the data from the buffer so that we can fill + // the buffer again. + if (sb == null) { + sb = new StringBuilder(EXPECTED_LINE_LENGTH); + } + sb.append(cbuf, start, i - start); + } + } + + /** + * Return the line terminator of the last read line from {@link #readLine()}. + * + * @return {@code "\n"}, {@code "\r"}, {@code "\r\n"}, or {@code null} + */ + public String getLineTerminator() { + return lineTerminator; + } + + @Override + public void close() throws IOException { + if (reader == null) { + return; + } + try { + reader.close(); + } finally { + reader = null; + cbuf = null; + } + } + + private void fill() throws IOException { + int read; + do { + read = reader.read(cbuf, 0, cbuf.length); + } while (read == 0); + if (read > 0) { + limit = read; + position = 0; + } + } + + private String line(String line, String lineTerminator) { + this.lineTerminator = lineTerminator; + return line; + } + + private String finish(StringBuilder sb, int start, int end) { + int len = end - start; + if (sb == null) { + return new String(cbuf, start, len); + } else { + return sb.append(cbuf, start, len).toString(); + } + } +} diff --git a/commonmark/src/main/java/org/commonmark/internal/util/LinkScanner.java b/commonmark/src/main/java/org/commonmark/internal/util/LinkScanner.java index f25cd59e5..ffed047e5 100644 --- a/commonmark/src/main/java/org/commonmark/internal/util/LinkScanner.java +++ b/commonmark/src/main/java/org/commonmark/internal/util/LinkScanner.java @@ -1,69 +1,76 @@ package org.commonmark.internal.util; +import org.commonmark.parser.beta.Scanner; + public class LinkScanner { /** - * Attempt to scan the contents of a link label (inside the brackets), returning the position after the content or - * -1. The returned position can either be the closing {@code ]}, or the end of the line if the label continues on + * Attempt to scan the contents of a link label (inside the brackets), stopping after the content or returning false. + * The stopped position can bei either the closing {@code ]}, or the end of the line if the label continues on * the next line. */ - public static int scanLinkLabelContent(CharSequence input, int start) { - for (int i = start; i < input.length(); i++) { - char c = input.charAt(i); - switch (c) { + public static boolean scanLinkLabelContent(Scanner scanner) { + while (scanner.hasNext()) { + switch (scanner.peek()) { case '\\': - if (Parsing.isEscapable(input, i + 1)) { - i += 1; + scanner.next(); + if (isEscapable(scanner.peek())) { + scanner.next(); } break; case ']': - return i; + return true; case '[': // spec: Unescaped square bracket characters are not allowed inside the opening and closing // square brackets of link labels. - return -1; + return false; + default: + scanner.next(); } } - return input.length(); + return true; } /** - * Attempt to scan a link destination, returning the position after the destination or -1. + * Attempt to scan a link destination, stopping after the destination or returning false. */ - public static int scanLinkDestination(CharSequence input, int start) { - if (start >= input.length()) { - return -1; + public static boolean scanLinkDestination(Scanner scanner) { + if (!scanner.hasNext()) { + return false; } - if (input.charAt(start) == '<') { - for (int i = start + 1; i < input.length(); i++) { - char c = input.charAt(i); - switch (c) { + if (scanner.next('<')) { + while (scanner.hasNext()) { + switch (scanner.peek()) { case '\\': - if (Parsing.isEscapable(input, i + 1)) { - i += 1; + scanner.next(); + if (isEscapable(scanner.peek())) { + scanner.next(); } break; case '\n': case '<': - return -1; + return false; case '>': - return i + 1; + scanner.next(); + return true; + default: + scanner.next(); } } - return -1; + return false; } else { - return scanLinkDestinationWithBalancedParens(input, start); + return scanLinkDestinationWithBalancedParens(scanner); } } - public static int scanLinkTitle(CharSequence input, int start) { - if (start >= input.length()) { - return -1; + public static boolean scanLinkTitle(Scanner scanner) { + if (!scanner.hasNext()) { + return false; } char endDelimiter; - switch (input.charAt(start)) { + switch (scanner.peek()) { case '"': endDelimiter = '"'; break; @@ -74,75 +81,122 @@ public static int scanLinkTitle(CharSequence input, int start) { endDelimiter = ')'; break; default: - return -1; + return false; } + scanner.next(); - int afterContent = scanLinkTitleContent(input, start + 1, endDelimiter); - if (afterContent == -1) { - return -1; + if (!scanLinkTitleContent(scanner, endDelimiter)) { + return false; } - - if (afterContent >= input.length() || input.charAt(afterContent) != endDelimiter) { - // missing or wrong end delimiter - return -1; + if (!scanner.hasNext()) { + return false; } - - return afterContent + 1; + scanner.next(); + return true; } - public static int scanLinkTitleContent(CharSequence input, int start, char endDelimiter) { - for (int i = start; i < input.length(); i++) { - char c = input.charAt(i); - if (c == '\\' && Parsing.isEscapable(input, i + 1)) { - i += 1; + public static boolean scanLinkTitleContent(Scanner scanner, char endDelimiter) { + while (scanner.hasNext()) { + char c = scanner.peek(); + if (c == '\\') { + scanner.next(); + if (isEscapable(scanner.peek())) { + scanner.next(); + } } else if (c == endDelimiter) { - return i; + return true; } else if (endDelimiter == ')' && c == '(') { // unescaped '(' in title within parens is invalid - return -1; + return false; + } else { + scanner.next(); } } - return input.length(); + return true; } // spec: a nonempty sequence of characters that does not start with <, does not include ASCII space or control // characters, and includes parentheses only if (a) they are backslash-escaped or (b) they are part of a balanced // pair of unescaped parentheses - private static int scanLinkDestinationWithBalancedParens(CharSequence input, int start) { + private static boolean scanLinkDestinationWithBalancedParens(Scanner scanner) { int parens = 0; - for (int i = start; i < input.length(); i++) { - char c = input.charAt(i); + boolean empty = true; + while (scanner.hasNext()) { + char c = scanner.peek(); switch (c) { - case '\0': case ' ': - return i != start ? i : -1; + return !empty; case '\\': - if (Parsing.isEscapable(input, i + 1)) { - i += 1; + scanner.next(); + if (isEscapable(scanner.peek())) { + scanner.next(); } break; case '(': parens++; // Limit to 32 nested parens for pathological cases if (parens > 32) { - return -1; + return false; } + scanner.next(); break; case ')': if (parens == 0) { - return i; + return true; } else { parens--; } + scanner.next(); break; default: // or control character if (Character.isISOControl(c)) { - return i != start ? i : -1; + return !empty; } + scanner.next(); break; } + empty = false; + } + return true; + } + + private static boolean isEscapable(char c) { + switch (c) { + case '!': + case '"': + case '#': + case '$': + case '%': + case '&': + case '\'': + case '(': + case ')': + case '*': + case '+': + case ',': + case '-': + case '.': + case '/': + case ':': + case ';': + case '<': + case '=': + case '>': + case '?': + case '@': + case '[': + case '\\': + case ']': + case '^': + case '_': + case '`': + case '{': + case '|': + case '}': + case '~': + return true; } - return input.length(); + return false; } } diff --git a/commonmark/src/main/java/org/commonmark/internal/util/Parsing.java b/commonmark/src/main/java/org/commonmark/internal/util/Parsing.java index d429d9db0..972fdef62 100644 --- a/commonmark/src/main/java/org/commonmark/internal/util/Parsing.java +++ b/commonmark/src/main/java/org/commonmark/internal/util/Parsing.java @@ -1,208 +1,10 @@ package org.commonmark.internal.util; public class Parsing { - - private static final String TAGNAME = "[A-Za-z][A-Za-z0-9-]*"; - private static final String ATTRIBUTENAME = "[a-zA-Z_:][a-zA-Z0-9:._-]*"; - private static final String UNQUOTEDVALUE = "[^\"'=<>`\\x00-\\x20]+"; - private static final String SINGLEQUOTEDVALUE = "'[^']*'"; - private static final String DOUBLEQUOTEDVALUE = "\"[^\"]*\""; - private static final String ATTRIBUTEVALUE = "(?:" + UNQUOTEDVALUE + "|" + SINGLEQUOTEDVALUE - + "|" + DOUBLEQUOTEDVALUE + ")"; - private static final String ATTRIBUTEVALUESPEC = "(?:" + "\\s*=" + "\\s*" + ATTRIBUTEVALUE - + ")"; - private static final String ATTRIBUTE = "(?:" + "\\s+" + ATTRIBUTENAME + ATTRIBUTEVALUESPEC - + "?)"; - - public static final String OPENTAG = "<" + TAGNAME + ATTRIBUTE + "*" + "\\s*/?>"; - public static final String CLOSETAG = "]"; - public static int CODE_BLOCK_INDENT = 4; public static int columnsToNextTabStop(int column) { // Tab stop is 4 return 4 - (column % 4); } - - public static int find(char c, CharSequence s, int startIndex) { - int length = s.length(); - for (int i = startIndex; i < length; i++) { - if (s.charAt(i) == c) { - return i; - } - } - return -1; - } - - public static int findLineBreak(CharSequence s, int startIndex) { - int length = s.length(); - for (int i = startIndex; i < length; i++) { - switch (s.charAt(i)) { - case '\n': - case '\r': - return i; - } - } - return -1; - } - - public static boolean isBlank(CharSequence s) { - return findNonSpace(s, 0) == -1; - } - - public static boolean hasNonSpace(CharSequence s) { - int length = s.length(); - int skipped = skip(' ', s, 0, length); - return skipped != length; - } - - public static boolean isLetter(CharSequence s, int index) { - int codePoint = Character.codePointAt(s, index); - return Character.isLetter(codePoint); - } - - public static boolean isSpaceOrTab(CharSequence s, int index) { - if (index < s.length()) { - switch (s.charAt(index)) { - case ' ': - case '\t': - return true; - } - } - return false; - } - - public static boolean isEscapable(CharSequence s, int index) { - if (index < s.length()) { - switch (s.charAt(index)) { - case '!': - case '"': - case '#': - case '$': - case '%': - case '&': - case '\'': - case '(': - case ')': - case '*': - case '+': - case ',': - case '-': - case '.': - case '/': - case ':': - case ';': - case '<': - case '=': - case '>': - case '?': - case '@': - case '[': - case '\\': - case ']': - case '^': - case '_': - case '`': - case '{': - case '|': - case '}': - case '~': - return true; - } - } - return false; - } - - /** - * Prepares the input line replacing {@code \0} - */ - public static CharSequence prepareLine(CharSequence line) { - // Avoid building a new string in the majority of cases (no \0) - StringBuilder sb = null; - int length = line.length(); - for (int i = 0; i < length; i++) { - char c = line.charAt(i); - switch (c) { - case '\0': - if (sb == null) { - sb = new StringBuilder(length); - sb.append(line, 0, i); - } - sb.append('\uFFFD'); - break; - default: - if (sb != null) { - sb.append(c); - } - } - } - - if (sb != null) { - return sb.toString(); - } else { - return line; - } - } - - public static int skip(char skip, CharSequence s, int startIndex, int endIndex) { - for (int i = startIndex; i < endIndex; i++) { - if (s.charAt(i) != skip) { - return i; - } - } - return endIndex; - } - - public static int skipBackwards(char skip, CharSequence s, int startIndex, int lastIndex) { - for (int i = startIndex; i >= lastIndex; i--) { - if (s.charAt(i) != skip) { - return i; - } - } - return lastIndex - 1; - } - - public static int skipSpaceTab(CharSequence s, int startIndex, int endIndex) { - for (int i = startIndex; i < endIndex; i++) { - switch (s.charAt(i)) { - case ' ': - case '\t': - break; - default: - return i; - } - } - return endIndex; - } - - public static int skipSpaceTabBackwards(CharSequence s, int startIndex, int lastIndex) { - for (int i = startIndex; i >= lastIndex; i--) { - switch (s.charAt(i)) { - case ' ': - case '\t': - break; - default: - return i; - } - } - return lastIndex - 1; - } - - private static int findNonSpace(CharSequence s, int startIndex) { - int length = s.length(); - for (int i = startIndex; i < length; i++) { - switch (s.charAt(i)) { - case ' ': - case '\t': - case '\n': - case '\u000B': - case '\f': - case '\r': - break; - default: - return i; - } - } - return -1; - } } diff --git a/commonmark/src/main/java/org/commonmark/node/Block.java b/commonmark/src/main/java/org/commonmark/node/Block.java index e6a317d7c..332346b0e 100644 --- a/commonmark/src/main/java/org/commonmark/node/Block.java +++ b/commonmark/src/main/java/org/commonmark/node/Block.java @@ -1,7 +1,11 @@ package org.commonmark.node; +/** + * Block nodes such as paragraphs, list blocks, code blocks etc. + */ public abstract class Block extends Node { + @Override public Block getParent() { return (Block) super.getParent(); } diff --git a/commonmark/src/main/java/org/commonmark/node/BlockQuote.java b/commonmark/src/main/java/org/commonmark/node/BlockQuote.java index 160f25ae2..f68252398 100644 --- a/commonmark/src/main/java/org/commonmark/node/BlockQuote.java +++ b/commonmark/src/main/java/org/commonmark/node/BlockQuote.java @@ -1,5 +1,15 @@ package org.commonmark.node; +/** + * A block quote, e.g.: + *

    + * > Some quoted text
    + * 
    + *

    + * Note that child nodes are themselves blocks, e.g. {@link Paragraph}, {@link ListBlock} etc. + * + * @see CommonMark Spec + */ public class BlockQuote extends Block { @Override diff --git a/commonmark/src/main/java/org/commonmark/node/BulletList.java b/commonmark/src/main/java/org/commonmark/node/BulletList.java index 127862312..014f4d3b2 100644 --- a/commonmark/src/main/java/org/commonmark/node/BulletList.java +++ b/commonmark/src/main/java/org/commonmark/node/BulletList.java @@ -1,20 +1,50 @@ package org.commonmark.node; +/** + * A bullet list, e.g.: + *

    + * - One
    + * - Two
    + * - Three
    + * 
    + *

    + * The children are {@link ListItem} blocks, which contain other blocks (or nested lists). + * + * @see CommonMark Spec: List items + */ public class BulletList extends ListBlock { - private char bulletMarker; + private String marker; @Override public void accept(Visitor visitor) { visitor.visit(this); } + /** + * @return the bullet list marker that was used, e.g. {@code -}, {@code *} or {@code +}, if available, or null otherwise + */ + public String getMarker() { + return marker; + } + + public void setMarker(String marker) { + this.marker = marker; + } + + /** + * @deprecated use {@link #getMarker()} instead + */ + @Deprecated public char getBulletMarker() { - return bulletMarker; + return marker != null && !marker.isEmpty() ? marker.charAt(0) : '\0'; } + /** + * @deprecated use {@link #getMarker()} instead + */ + @Deprecated public void setBulletMarker(char bulletMarker) { - this.bulletMarker = bulletMarker; + this.marker = bulletMarker != '\0' ? String.valueOf(bulletMarker) : null; } - } diff --git a/commonmark/src/main/java/org/commonmark/node/Code.java b/commonmark/src/main/java/org/commonmark/node/Code.java index 0b47ecb71..3b79e0c9c 100644 --- a/commonmark/src/main/java/org/commonmark/node/Code.java +++ b/commonmark/src/main/java/org/commonmark/node/Code.java @@ -1,5 +1,13 @@ package org.commonmark.node; +/** + * Inline code span, e.g.: + *

    + * Some `inline code`
    + * 
    + * + * @see CommonMark Spec + */ public class Code extends Node { private String literal; @@ -16,6 +24,10 @@ public void accept(Visitor visitor) { visitor.visit(this); } + /** + * @return the literal text in the code span (note that it's not necessarily the raw text between tildes, + * e.g. when spaces are stripped) + */ public String getLiteral() { return literal; } diff --git a/commonmark/src/main/java/org/commonmark/node/CustomBlock.java b/commonmark/src/main/java/org/commonmark/node/CustomBlock.java index 6596ec1a0..cad88933a 100644 --- a/commonmark/src/main/java/org/commonmark/node/CustomBlock.java +++ b/commonmark/src/main/java/org/commonmark/node/CustomBlock.java @@ -1,5 +1,8 @@ package org.commonmark.node; +/** + * A block that extensions can subclass to define custom blocks (not part of the core specification). + */ public abstract class CustomBlock extends Block { @Override diff --git a/commonmark/src/main/java/org/commonmark/node/CustomNode.java b/commonmark/src/main/java/org/commonmark/node/CustomNode.java index a68e5cc11..88f0254da 100644 --- a/commonmark/src/main/java/org/commonmark/node/CustomNode.java +++ b/commonmark/src/main/java/org/commonmark/node/CustomNode.java @@ -1,5 +1,8 @@ package org.commonmark.node; +/** + * A node that extensions can subclass to define custom nodes (not part of the core specification). + */ public abstract class CustomNode extends Node { @Override public void accept(Visitor visitor) { diff --git a/commonmark/src/main/java/org/commonmark/node/DefinitionMap.java b/commonmark/src/main/java/org/commonmark/node/DefinitionMap.java new file mode 100644 index 000000000..59cb88274 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/node/DefinitionMap.java @@ -0,0 +1,67 @@ +package org.commonmark.node; + +import org.commonmark.internal.util.Escaping; + +import java.util.Collection; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Set; + +/** + * A map that can be used to store and look up reference definitions by a label. The labels are case-insensitive and + * normalized, the same way as for {@link LinkReferenceDefinition} nodes. + * + * @param the type of value + */ +public class DefinitionMap { + + private final Class type; + // LinkedHashMap for determinism and to preserve document order + private final Map definitions = new LinkedHashMap<>(); + + public DefinitionMap(Class type) { + this.type = type; + } + + public Class getType() { + return type; + } + + public void addAll(DefinitionMap that) { + for (var entry : that.definitions.entrySet()) { + // Note that keys are already normalized, so we can add them directly + definitions.putIfAbsent(entry.getKey(), entry.getValue()); + } + } + + /** + * Store a new definition unless one is already in the map. If there is no definition for that label yet, return null. + * Otherwise, return the existing definition. + *

    + * The label is normalized by the definition map before storing. + */ + public D putIfAbsent(String label, D definition) { + String normalizedLabel = Escaping.normalizeLabelContent(label); + + // spec: When there are multiple matching link reference definitions, the first is used + return definitions.putIfAbsent(normalizedLabel, definition); + } + + /** + * Look up a definition by label. The label is normalized by the definition map before lookup. + * + * @return the value or null + */ + public D get(String label) { + String normalizedLabel = Escaping.normalizeLabelContent(label); + return definitions.get(normalizedLabel); + } + + public Set keySet() { + return definitions.keySet(); + } + + public Collection values() { + return definitions.values(); + } +} diff --git a/commonmark/src/main/java/org/commonmark/node/Document.java b/commonmark/src/main/java/org/commonmark/node/Document.java index 5b7e74189..b4968c206 100644 --- a/commonmark/src/main/java/org/commonmark/node/Document.java +++ b/commonmark/src/main/java/org/commonmark/node/Document.java @@ -1,5 +1,8 @@ package org.commonmark.node; +/** + * The root block of a document, containing the top-level blocks. + */ public class Document extends Block { @Override diff --git a/commonmark/src/main/java/org/commonmark/node/Emphasis.java b/commonmark/src/main/java/org/commonmark/node/Emphasis.java index 9877e7b63..5efc8c327 100644 --- a/commonmark/src/main/java/org/commonmark/node/Emphasis.java +++ b/commonmark/src/main/java/org/commonmark/node/Emphasis.java @@ -1,5 +1,13 @@ package org.commonmark.node; +/** + * Emphasis, e.g.: + *

    + * Some *emphasis* or _emphasis_
    + * 
    + * + * @see CommonMark Spec: Emphasis and strong emphasis + */ public class Emphasis extends Node implements Delimited { private String delimiter; diff --git a/commonmark/src/main/java/org/commonmark/node/FencedCodeBlock.java b/commonmark/src/main/java/org/commonmark/node/FencedCodeBlock.java index 7e2612331..0e279a470 100644 --- a/commonmark/src/main/java/org/commonmark/node/FencedCodeBlock.java +++ b/commonmark/src/main/java/org/commonmark/node/FencedCodeBlock.java @@ -1,9 +1,22 @@ package org.commonmark.node; +/** + * A fenced code block, e.g.: + *
    + * ```
    + * foo
    + * bar
    + * ```
    + * 
    + *

    + * + * @see CommonMark Spec + */ public class FencedCodeBlock extends Block { - private char fenceChar; - private int fenceLength; + private String fenceCharacter; + private Integer openingFenceLength; + private Integer closingFenceLength; private int fenceIndent; private String info; @@ -14,20 +27,47 @@ public void accept(Visitor visitor) { visitor.visit(this); } - public char getFenceChar() { - return fenceChar; + /** + * @return the fence character that was used, e.g. {@code `} or {@code ~}, if available, or null otherwise + */ + public String getFenceCharacter() { + return fenceCharacter; } - public void setFenceChar(char fenceChar) { - this.fenceChar = fenceChar; + public void setFenceCharacter(String fenceCharacter) { + this.fenceCharacter = fenceCharacter; } - public int getFenceLength() { - return fenceLength; + /** + * @return the length of the opening fence (how many of {{@link #getFenceCharacter()}} were used to start the code + * block) if available, or null otherwise + */ + public Integer getOpeningFenceLength() { + return openingFenceLength; } - public void setFenceLength(int fenceLength) { - this.fenceLength = fenceLength; + public void setOpeningFenceLength(Integer openingFenceLength) { + if (openingFenceLength != null && openingFenceLength < 3) { + throw new IllegalArgumentException("openingFenceLength needs to be >= 3"); + } + checkFenceLengths(openingFenceLength, closingFenceLength); + this.openingFenceLength = openingFenceLength; + } + + /** + * @return the length of the closing fence (how many of {@link #getFenceCharacter()} were used to end the code + * block) if available, or null otherwise + */ + public Integer getClosingFenceLength() { + return closingFenceLength; + } + + public void setClosingFenceLength(Integer closingFenceLength) { + if (closingFenceLength != null && closingFenceLength < 3) { + throw new IllegalArgumentException("closingFenceLength needs to be >= 3"); + } + checkFenceLengths(openingFenceLength, closingFenceLength); + this.closingFenceLength = closingFenceLength; } public int getFenceIndent() { @@ -39,7 +79,7 @@ public void setFenceIndent(int fenceIndent) { } /** - * @see CommonMark spec + * @see CommonMark spec */ public String getInfo() { return info; @@ -56,4 +96,44 @@ public String getLiteral() { public void setLiteral(String literal) { this.literal = literal; } + + /** + * @deprecated use {@link #getFenceCharacter()} instead + */ + @Deprecated + public char getFenceChar() { + return fenceCharacter != null && !fenceCharacter.isEmpty() ? fenceCharacter.charAt(0) : '\0'; + } + + /** + * @deprecated use {@link #setFenceCharacter} instead + */ + @Deprecated + public void setFenceChar(char fenceChar) { + this.fenceCharacter = fenceChar != '\0' ? String.valueOf(fenceChar) : null; + } + + /** + * @deprecated use {@link #getOpeningFenceLength} instead + */ + @Deprecated + public int getFenceLength() { + return openingFenceLength != null ? openingFenceLength : 0; + } + + /** + * @deprecated use {@link #setOpeningFenceLength} instead + */ + @Deprecated + public void setFenceLength(int fenceLength) { + this.openingFenceLength = fenceLength != 0 ? fenceLength : null; + } + + private static void checkFenceLengths(Integer openingFenceLength, Integer closingFenceLength) { + if (openingFenceLength != null && closingFenceLength != null) { + if (closingFenceLength < openingFenceLength) { + throw new IllegalArgumentException("fence lengths required to be: closingFenceLength >= openingFenceLength"); + } + } + } } diff --git a/commonmark/src/main/java/org/commonmark/node/HardLineBreak.java b/commonmark/src/main/java/org/commonmark/node/HardLineBreak.java index 0640fc3c4..28874ec01 100644 --- a/commonmark/src/main/java/org/commonmark/node/HardLineBreak.java +++ b/commonmark/src/main/java/org/commonmark/node/HardLineBreak.java @@ -1,5 +1,15 @@ package org.commonmark.node; +/** + * A hard line break, e.g.: + *

    + * line\
    + * break
    + * 
    + *

    + * + * @see CommonMark Spec + */ public class HardLineBreak extends Node { @Override diff --git a/commonmark/src/main/java/org/commonmark/node/Heading.java b/commonmark/src/main/java/org/commonmark/node/Heading.java index 41f3b2504..5369d8739 100644 --- a/commonmark/src/main/java/org/commonmark/node/Heading.java +++ b/commonmark/src/main/java/org/commonmark/node/Heading.java @@ -1,5 +1,17 @@ package org.commonmark.node; +/** + * A heading, e.g.: + *

    + * First heading
    + * =============
    + *
    + * ## Another heading
    + * 
    + * + * @see CommonMark Spec: ATX headings + * @see CommonMark Spec: Setext headings + */ public class Heading extends Block { private int level; diff --git a/commonmark/src/main/java/org/commonmark/node/HtmlBlock.java b/commonmark/src/main/java/org/commonmark/node/HtmlBlock.java index ad46c56ce..fbe00927d 100644 --- a/commonmark/src/main/java/org/commonmark/node/HtmlBlock.java +++ b/commonmark/src/main/java/org/commonmark/node/HtmlBlock.java @@ -3,7 +3,7 @@ /** * HTML block * - * @see CommonMark Spec + * @see CommonMark Spec */ public class HtmlBlock extends Block { diff --git a/commonmark/src/main/java/org/commonmark/node/HtmlInline.java b/commonmark/src/main/java/org/commonmark/node/HtmlInline.java index 291fcde3c..35360c639 100644 --- a/commonmark/src/main/java/org/commonmark/node/HtmlInline.java +++ b/commonmark/src/main/java/org/commonmark/node/HtmlInline.java @@ -3,7 +3,7 @@ /** * Inline HTML element. * - * @see CommonMark Spec + * @see CommonMark Spec */ public class HtmlInline extends Node { diff --git a/commonmark/src/main/java/org/commonmark/node/Image.java b/commonmark/src/main/java/org/commonmark/node/Image.java index 63481773a..1b31f6020 100644 --- a/commonmark/src/main/java/org/commonmark/node/Image.java +++ b/commonmark/src/main/java/org/commonmark/node/Image.java @@ -1,5 +1,13 @@ package org.commonmark.node; +/** + * An image, e.g.: + *
    + * ![foo](/url "title")
    + * 
    + * + * @see CommonMark Spec + */ public class Image extends Node { private String destination; diff --git a/commonmark/src/main/java/org/commonmark/node/IndentedCodeBlock.java b/commonmark/src/main/java/org/commonmark/node/IndentedCodeBlock.java index ccafca943..97642b7f3 100644 --- a/commonmark/src/main/java/org/commonmark/node/IndentedCodeBlock.java +++ b/commonmark/src/main/java/org/commonmark/node/IndentedCodeBlock.java @@ -1,5 +1,17 @@ package org.commonmark.node; +/** + * An indented code block, e.g.: + *
    
    + * Code follows:
    + *
    + *     foo
    + *     bar
    + * 
    + *

    + * + * @see CommonMark Spec + */ public class IndentedCodeBlock extends Block { private String literal; diff --git a/commonmark/src/main/java/org/commonmark/node/Link.java b/commonmark/src/main/java/org/commonmark/node/Link.java index b2ed8c2a1..4edc7f676 100644 --- a/commonmark/src/main/java/org/commonmark/node/Link.java +++ b/commonmark/src/main/java/org/commonmark/node/Link.java @@ -18,7 +18,7 @@ * Note that the text in the link can contain inline formatting, so it could also contain an {@link Image} or * {@link Emphasis}, etc. * - * @see CommonMark Spec for links + * @see CommonMark Spec */ public class Link extends Node { @@ -46,6 +46,9 @@ public void setDestination(String destination) { this.destination = destination; } + /** + * @return the title or null + */ public String getTitle() { return title; } diff --git a/commonmark/src/main/java/org/commonmark/node/LinkReferenceDefinition.java b/commonmark/src/main/java/org/commonmark/node/LinkReferenceDefinition.java index 3f8bfd0f0..b866781f0 100644 --- a/commonmark/src/main/java/org/commonmark/node/LinkReferenceDefinition.java +++ b/commonmark/src/main/java/org/commonmark/node/LinkReferenceDefinition.java @@ -9,9 +9,9 @@ * They can be referenced anywhere else in the document to produce a link using [foo]. The definitions * themselves are usually not rendered in the final output. * - * @see Link reference definitions + * @see CommonMark Spec */ -public class LinkReferenceDefinition extends Node { +public class LinkReferenceDefinition extends Block { private String label; private String destination; diff --git a/commonmark/src/main/java/org/commonmark/node/ListBlock.java b/commonmark/src/main/java/org/commonmark/node/ListBlock.java index 69482f66e..1290bc622 100644 --- a/commonmark/src/main/java/org/commonmark/node/ListBlock.java +++ b/commonmark/src/main/java/org/commonmark/node/ListBlock.java @@ -1,12 +1,15 @@ package org.commonmark.node; +/** + * A list block like {@link BulletList} or {@link OrderedList}. + */ public abstract class ListBlock extends Block { private boolean tight; /** * @return whether this list is tight or loose - * @see CommonMark Spec for tight lists + * @see CommonMark Spec for tight lists */ public boolean isTight() { return tight; diff --git a/commonmark/src/main/java/org/commonmark/node/ListItem.java b/commonmark/src/main/java/org/commonmark/node/ListItem.java index aa526be01..c4d1214e7 100644 --- a/commonmark/src/main/java/org/commonmark/node/ListItem.java +++ b/commonmark/src/main/java/org/commonmark/node/ListItem.java @@ -1,9 +1,78 @@ package org.commonmark.node; +/** + * A child of a {@link ListBlock}, containing other blocks (e.g. {@link Paragraph}, other lists, etc). + *

    + * Note that a list item can't directly contain {@link Text}, it needs to be: + * {@link ListItem} : {@link Paragraph} : {@link Text}. + * If you want a list that is rendered tightly, create a list with {@link ListBlock#setTight(boolean)}. + * + * @see CommonMark Spec: List items + */ public class ListItem extends Block { + private Integer markerIndent; + private Integer contentIndent; + @Override public void accept(Visitor visitor) { visitor.visit(this); } + + /** + * Returns the indent of the marker such as "-" or "1." in columns (spaces or tab stop of 4) if available, or null + * otherwise. + *

    + * Some examples and their marker indent: + *

    - Foo
    + * Marker indent: 0 + *
     - Foo
    + * Marker indent: 1 + *
      1. Foo
    + * Marker indent: 2 + */ + public Integer getMarkerIndent() { + return markerIndent; + } + + public void setMarkerIndent(Integer markerIndent) { + this.markerIndent = markerIndent; + } + + /** + * Returns the indent of the content in columns (spaces or tab stop of 4) if available, or null otherwise. + * The content indent is counted from the beginning of the line and includes the marker on the first line. + *

    + * Some examples and their content indent: + *

    - Foo
    + * Content indent: 2 + *
     - Foo
    + * Content indent: 3 + *
      1. Foo
    + * Content indent: 5 + *

    + * Note that subsequent lines in the same list item need to be indented by at least the content indent to be counted + * as part of the list item. + */ + public Integer getContentIndent() { + return contentIndent; + } + + public void setContentIndent(Integer contentIndent) { + this.contentIndent = contentIndent; + } + + /** + * @deprecated list items should only contain block nodes; if you're trying to create a list that is rendered + * without paragraphs, use {@link ListBlock#setTight(boolean)} instead. + */ + @Override + @Deprecated + public void appendChild(Node child) { + super.appendChild(child); + } + + public void appendChild(Block child) { + super.appendChild(child); + } } diff --git a/commonmark/src/main/java/org/commonmark/node/Node.java b/commonmark/src/main/java/org/commonmark/node/Node.java index e7b24c08c..d95a72c60 100644 --- a/commonmark/src/main/java/org/commonmark/node/Node.java +++ b/commonmark/src/main/java/org/commonmark/node/Node.java @@ -1,5 +1,14 @@ package org.commonmark.node; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * The base class of all CommonMark AST nodes ({@link Block} and inlines). + *

    + * A node can have multiple children, and a parent (except for the root node). + */ public abstract class Node { private Node parent = null; @@ -7,6 +16,7 @@ public abstract class Node { private Node lastChild = null; private Node prev = null; private Node next = null; + private List sourceSpans = null; public abstract void accept(Visitor visitor); @@ -76,6 +86,9 @@ public void unlink() { this.prev = null; } + /** + * Inserts the {@code sibling} node after {@code this} node. + */ public void insertAfter(Node sibling) { sibling.unlink(); sibling.next = this.next; @@ -90,6 +103,9 @@ public void insertAfter(Node sibling) { } } + /** + * Inserts the {@code sibling} node before {@code this} node. + */ public void insertBefore(Node sibling) { sibling.unlink(); sibling.prev = this.prev; @@ -104,6 +120,41 @@ public void insertBefore(Node sibling) { } } + /** + * @return the source spans of this node if included by the parser, an empty list otherwise + * @since 0.16.0 + */ + public List getSourceSpans() { + return sourceSpans != null ? Collections.unmodifiableList(sourceSpans) : List.of(); + } + + /** + * Replace the current source spans with the provided list. + * + * @param sourceSpans the new source spans to set + * @since 0.16.0 + */ + public void setSourceSpans(List sourceSpans) { + if (sourceSpans.isEmpty()) { + this.sourceSpans = null; + } else { + this.sourceSpans = new ArrayList<>(sourceSpans); + } + } + + /** + * Add a source span to the end of the list. + * + * @param sourceSpan the source span to add + * @since 0.16.0 + */ + public void addSourceSpan(SourceSpan sourceSpan) { + if (sourceSpans == null) { + this.sourceSpans = new ArrayList<>(); + } + this.sourceSpans.add(sourceSpan); + } + @Override public String toString() { return getClass().getSimpleName() + "{" + toStringAttributes() + "}"; diff --git a/commonmark/src/main/java/org/commonmark/node/Nodes.java b/commonmark/src/main/java/org/commonmark/node/Nodes.java new file mode 100644 index 000000000..22d5932af --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/node/Nodes.java @@ -0,0 +1,66 @@ +package org.commonmark.node; + +import java.util.Iterator; + +/** + * Utility class for working with multiple {@link Node}s. + * + * @since 0.16.0 + */ +public class Nodes { + + private Nodes() { + } + + /** + * The nodes between (not including) start and end. + */ + public static Iterable between(Node start, Node end) { + return new NodeIterable(start.getNext(), end); + } + + private static class NodeIterable implements Iterable { + + private final Node first; + private final Node end; + + private NodeIterable(Node first, Node end) { + this.first = first; + this.end = end; + } + + @Override + public Iterator iterator() { + return new NodeIterator(first, end); + } + } + + private static class NodeIterator implements Iterator { + + private Node node; + private final Node end; + + private NodeIterator(Node first, Node end) { + node = first; + this.end = end; + } + + @Override + public boolean hasNext() { + return node != null && node != end; + } + + @Override + public Node next() { + Node result = node; + node = node.getNext(); + return result; + } + + @Override + public void remove() { + throw new UnsupportedOperationException("remove"); + } + } +} + diff --git a/commonmark/src/main/java/org/commonmark/node/OrderedList.java b/commonmark/src/main/java/org/commonmark/node/OrderedList.java index 1f988234c..61f8902c0 100644 --- a/commonmark/src/main/java/org/commonmark/node/OrderedList.java +++ b/commonmark/src/main/java/org/commonmark/node/OrderedList.java @@ -1,29 +1,78 @@ package org.commonmark.node; +/** + * An ordered list, e.g.: + *

    
    + * 1. One
    + * 2. Two
    + * 3. Three
    + * 
    + *

    + * The children are {@link ListItem} blocks, which contain other blocks (or nested lists). + * + * @see CommonMark Spec: List items + */ public class OrderedList extends ListBlock { - private int startNumber; - private char delimiter; + private String markerDelimiter; + private Integer markerStartNumber; @Override public void accept(Visitor visitor) { visitor.visit(this); } + /** + * @return the start number used in the marker, e.g. {@code 1}, if available, or null otherwise + */ + public Integer getMarkerStartNumber() { + return markerStartNumber; + } + + public void setMarkerStartNumber(Integer markerStartNumber) { + this.markerStartNumber = markerStartNumber; + } + + /** + * @return the delimiter used in the marker, e.g. {@code .} or {@code )}, if available, or null otherwise + */ + public String getMarkerDelimiter() { + return markerDelimiter; + } + + public void setMarkerDelimiter(String markerDelimiter) { + this.markerDelimiter = markerDelimiter; + } + + /** + * @deprecated use {@link #getMarkerStartNumber()} instead + */ + @Deprecated public int getStartNumber() { - return startNumber; + return markerStartNumber != null ? markerStartNumber : 0; } + /** + * @deprecated use {@link #setMarkerStartNumber} instead + */ + @Deprecated public void setStartNumber(int startNumber) { - this.startNumber = startNumber; + this.markerStartNumber = startNumber != 0 ? startNumber : null; } + /** + * @deprecated use {@link #getMarkerDelimiter()} instead + */ + @Deprecated public char getDelimiter() { - return delimiter; + return markerDelimiter != null && !markerDelimiter.isEmpty() ? markerDelimiter.charAt(0) : '\0'; } + /** + * @deprecated use {@link #setMarkerDelimiter} instead + */ + @Deprecated public void setDelimiter(char delimiter) { - this.delimiter = delimiter; + this.markerDelimiter = delimiter != '\0' ? String.valueOf(delimiter) : null; } - } diff --git a/commonmark/src/main/java/org/commonmark/node/Paragraph.java b/commonmark/src/main/java/org/commonmark/node/Paragraph.java index 0c3f88f39..b298f1ce4 100644 --- a/commonmark/src/main/java/org/commonmark/node/Paragraph.java +++ b/commonmark/src/main/java/org/commonmark/node/Paragraph.java @@ -1,5 +1,10 @@ package org.commonmark.node; +/** + * A paragraph block, contains inline nodes such as {@link Text} + * + * @see CommonMark Spec + */ public class Paragraph extends Block { @Override diff --git a/commonmark/src/main/java/org/commonmark/node/SoftLineBreak.java b/commonmark/src/main/java/org/commonmark/node/SoftLineBreak.java index e66458912..87445db56 100644 --- a/commonmark/src/main/java/org/commonmark/node/SoftLineBreak.java +++ b/commonmark/src/main/java/org/commonmark/node/SoftLineBreak.java @@ -1,5 +1,14 @@ package org.commonmark.node; +/** + * A soft line break (as opposed to a {@link HardLineBreak}), e.g. between: + *

    + * foo
    + * bar
    + * 
    + * + * @see CommonMark Spec + */ public class SoftLineBreak extends Node { @Override diff --git a/commonmark/src/main/java/org/commonmark/node/SourceSpan.java b/commonmark/src/main/java/org/commonmark/node/SourceSpan.java new file mode 100644 index 000000000..6558cc84a --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/node/SourceSpan.java @@ -0,0 +1,150 @@ +package org.commonmark.node; + +import java.util.Objects; + +/** + * A source span references a snippet of text from the source input. + *

    + * It has a starting position (line and column index) and a length of how many characters it spans. + *

    + * For example, this CommonMark source text: + *

    
    + * > foo
    + * 
    + * The {@link BlockQuote} node would have this source span: line 0, column 0, length 5. + *

    + * The {@link Paragraph} node inside it would have: line 0, column 2, length 3. + *

    + * If a block has multiple lines, it will have a source span for each line. + *

    + * Note that the column index and length are measured in Java characters (UTF-16 code units). If you're outputting them + * to be consumed by another programming language, e.g. one that uses UTF-8 strings, you will need to translate them, + * otherwise characters such as emojis will result in incorrect positions. + * + * @since 0.16.0 + */ +public class SourceSpan { + + private final int lineIndex; + private final int columnIndex; + private final int inputIndex; + private final int length; + + public static SourceSpan of(int line, int col, int input, int length) { + return new SourceSpan(line, col, input, length); + } + + /** + * @deprecated Use {{@link #of(int, int, int, int)}} instead to also specify input index. Using the deprecated one + * will set {@link #inputIndex} to 0. + */ + @Deprecated + public static SourceSpan of(int lineIndex, int columnIndex, int length) { + return of(lineIndex, columnIndex, 0, length); + } + + private SourceSpan(int lineIndex, int columnIndex, int inputIndex, int length) { + if (lineIndex < 0) { + throw new IllegalArgumentException("lineIndex " + lineIndex + " must be >= 0"); + } + if (columnIndex < 0) { + throw new IllegalArgumentException("columnIndex " + columnIndex + " must be >= 0"); + } + if (inputIndex < 0) { + throw new IllegalArgumentException("inputIndex " + inputIndex + " must be >= 0"); + } + if (length < 0) { + throw new IllegalArgumentException("length " + length + " must be >= 0"); + } + this.lineIndex = lineIndex; + this.columnIndex = columnIndex; + this.inputIndex = inputIndex; + this.length = length; + } + + /** + * @return 0-based line index, e.g. 0 for first line, 1 for the second line, etc + */ + public int getLineIndex() { + return lineIndex; + } + + /** + * @return 0-based index of column (character on line) in source, e.g. 0 for the first character of a line, 1 for + * the second character, etc + */ + public int getColumnIndex() { + return columnIndex; + } + + /** + * @return 0-based index in whole input + * @since 0.24.0 + */ + public int getInputIndex() { + return inputIndex; + } + + /** + * @return length of the span in characters + */ + public int getLength() { + return length; + } + + public SourceSpan subSpan(int beginIndex) { + return subSpan(beginIndex, length); + } + + public SourceSpan subSpan(int beginIndex, int endIndex) { + if (beginIndex < 0) { + throw new IndexOutOfBoundsException("beginIndex " + beginIndex + " + must be >= 0"); + } + if (beginIndex > length) { + throw new IndexOutOfBoundsException("beginIndex " + beginIndex + " must be <= length " + length); + } + if (endIndex < 0) { + throw new IndexOutOfBoundsException("endIndex " + endIndex + " + must be >= 0"); + } + if (endIndex > length) { + throw new IndexOutOfBoundsException("endIndex " + endIndex + " must be <= length " + length); + } + if (beginIndex > endIndex) { + throw new IndexOutOfBoundsException("beginIndex " + beginIndex + " must be <= endIndex " + endIndex); + } + if (beginIndex == 0 && endIndex == length) { + return this; + } + return new SourceSpan(lineIndex, columnIndex + beginIndex, inputIndex + beginIndex, endIndex - beginIndex); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + SourceSpan that = (SourceSpan) o; + return lineIndex == that.lineIndex && + columnIndex == that.columnIndex && + inputIndex == that.inputIndex && + length == that.length; + } + + @Override + public int hashCode() { + return Objects.hash(lineIndex, columnIndex, inputIndex, length); + } + + @Override + public String toString() { + return "SourceSpan{" + + "line=" + lineIndex + + ", column=" + columnIndex + + ", input=" + inputIndex + + ", length=" + length + + "}"; + } +} diff --git a/commonmark/src/main/java/org/commonmark/node/SourceSpans.java b/commonmark/src/main/java/org/commonmark/node/SourceSpans.java new file mode 100644 index 000000000..975d7fbdb --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/node/SourceSpans.java @@ -0,0 +1,52 @@ +package org.commonmark.node; + +import java.util.ArrayList; +import java.util.List; + +/** + * A list of source spans that can be added to. Takes care of merging adjacent source spans. + * + * @since 0.16.0 + */ +public class SourceSpans { + + private List sourceSpans; + + public static SourceSpans empty() { + return new SourceSpans(); + } + + public List getSourceSpans() { + return sourceSpans != null ? sourceSpans : List.of(); + } + + public void addAllFrom(Iterable nodes) { + for (Node node : nodes) { + addAll(node.getSourceSpans()); + } + } + + public void addAll(List other) { + if (other.isEmpty()) { + return; + } + + if (sourceSpans == null) { + sourceSpans = new ArrayList<>(); + } + + if (sourceSpans.isEmpty()) { + sourceSpans.addAll(other); + } else { + int lastIndex = sourceSpans.size() - 1; + SourceSpan a = sourceSpans.get(lastIndex); + SourceSpan b = other.get(0); + if (a.getInputIndex() + a.getLength() == b.getInputIndex()) { + sourceSpans.set(lastIndex, SourceSpan.of(a.getLineIndex(), a.getColumnIndex(), a.getInputIndex(), a.getLength() + b.getLength())); + sourceSpans.addAll(other.subList(1, other.size())); + } else { + sourceSpans.addAll(other); + } + } + } +} diff --git a/commonmark/src/main/java/org/commonmark/node/StrongEmphasis.java b/commonmark/src/main/java/org/commonmark/node/StrongEmphasis.java index dbff571cd..0dbeed3df 100644 --- a/commonmark/src/main/java/org/commonmark/node/StrongEmphasis.java +++ b/commonmark/src/main/java/org/commonmark/node/StrongEmphasis.java @@ -1,5 +1,13 @@ package org.commonmark.node; +/** + * Strong emphasis, e.g.: + *

    
    + * Some **strong emphasis** or __strong emphasis__
    + * 
    + * + * @see CommonMark Spec: Emphasis and strong emphasis + */ public class StrongEmphasis extends Node implements Delimited { private String delimiter; diff --git a/commonmark/src/main/java/org/commonmark/node/Text.java b/commonmark/src/main/java/org/commonmark/node/Text.java index f16fc907b..9a04c41c1 100644 --- a/commonmark/src/main/java/org/commonmark/node/Text.java +++ b/commonmark/src/main/java/org/commonmark/node/Text.java @@ -1,5 +1,15 @@ package org.commonmark.node; +/** + * A text node, e.g. in: + *
    + * foo *bar*
    + * 
    + *

    + * The foo is a text node, and the bar inside the emphasis is also a text node. + * + * @see CommonMark Spec + */ public class Text extends Node { private String literal; diff --git a/commonmark/src/main/java/org/commonmark/node/ThematicBreak.java b/commonmark/src/main/java/org/commonmark/node/ThematicBreak.java index f81abaa31..a31131e07 100644 --- a/commonmark/src/main/java/org/commonmark/node/ThematicBreak.java +++ b/commonmark/src/main/java/org/commonmark/node/ThematicBreak.java @@ -1,9 +1,34 @@ package org.commonmark.node; +/** + * A thematic break, e.g. between text: + *

    + * Some text
    + *
    + * ___
    + *
    + * Some other text.
    + * 
    + * + * @see CommonMark Spec + */ public class ThematicBreak extends Block { + private String literal; + @Override public void accept(Visitor visitor) { visitor.visit(this); } + + /** + * @return the source literal that represents this node, if available + */ + public String getLiteral() { + return literal; + } + + public void setLiteral(String literal) { + this.literal = literal; + } } diff --git a/commonmark/src/main/java/org/commonmark/package-info.java b/commonmark/src/main/java/org/commonmark/package-info.java index e3f0e0572..b683017f6 100644 --- a/commonmark/src/main/java/org/commonmark/package-info.java +++ b/commonmark/src/main/java/org/commonmark/package-info.java @@ -1,10 +1,10 @@ /** * Root package of commonmark-java - *

    *

      *
    • {@link org.commonmark.parser} for parsing input text to AST nodes
    • *
    • {@link org.commonmark.node} for AST node types and visitors
    • *
    • {@link org.commonmark.renderer.html} for HTML rendering
    • + *
    • {@link org.commonmark.renderer.markdown} for Markdown rendering
    • *
    */ package org.commonmark; diff --git a/commonmark/src/main/java/org/commonmark/parser/IncludeSourceSpans.java b/commonmark/src/main/java/org/commonmark/parser/IncludeSourceSpans.java new file mode 100644 index 000000000..91d2b4e00 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/parser/IncludeSourceSpans.java @@ -0,0 +1,22 @@ +package org.commonmark.parser; + +/** + * Whether to include {@link org.commonmark.node.SourceSpan} or not while parsing, + * see {@link Parser.Builder#includeSourceSpans(IncludeSourceSpans)}. + * + * @since 0.16.0 + */ +public enum IncludeSourceSpans { + /** + * Do not include source spans. + */ + NONE, + /** + * Include source spans on {@link org.commonmark.node.Block} nodes. + */ + BLOCKS, + /** + * Include source spans on block nodes and inline nodes. + */ + BLOCKS_AND_INLINES, +} diff --git a/commonmark/src/main/java/org/commonmark/parser/InlineParser.java b/commonmark/src/main/java/org/commonmark/parser/InlineParser.java index 492c3cc8a..49043a64f 100644 --- a/commonmark/src/main/java/org/commonmark/parser/InlineParser.java +++ b/commonmark/src/main/java/org/commonmark/parser/InlineParser.java @@ -8,8 +8,8 @@ public interface InlineParser { /** - * @param input the content to parse as inline + * @param lines the source content to parse as inline * @param node the node to append resulting nodes to (as children) */ - void parse(String input, Node node); + void parse(SourceLines lines, Node node); } diff --git a/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java b/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java index 467742e2c..12007610b 100644 --- a/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java +++ b/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java @@ -1,9 +1,12 @@ package org.commonmark.parser; import org.commonmark.node.LinkReferenceDefinition; +import org.commonmark.parser.beta.LinkProcessor; +import org.commonmark.parser.beta.InlineContentParserFactory; import org.commonmark.parser.delimiter.DelimiterProcessor; import java.util.List; +import java.util.Set; /** * Context for inline parsing. @@ -11,15 +14,47 @@ public interface InlineParserContext { /** - * @return custom delimiter processors that have been configured with {@link Parser.Builder#customDelimiterProcessor(DelimiterProcessor)} + * @return custom inline content parsers that have been configured with + * {@link Parser.Builder#customInlineContentParserFactory(InlineContentParserFactory)} + */ + List getCustomInlineContentParserFactories(); + + /** + * @return custom delimiter processors that have been configured with + * {@link Parser.Builder#customDelimiterProcessor(DelimiterProcessor)} */ List getCustomDelimiterProcessors(); + /** + * @return custom link processors that have been configured with {@link Parser.Builder#linkProcessor}. + */ + List getCustomLinkProcessors(); + + /** + * @return custom link markers that have been configured with {@link Parser.Builder#linkMarker}. + */ + Set getCustomLinkMarkers(); + /** * Look up a {@link LinkReferenceDefinition} for a given label. + *

    + * Note that the passed in label does not need to be normalized; implementations are responsible for doing the + * normalization before lookup. * * @param label the link label to look up * @return the definition if one exists, {@code null} otherwise + * @deprecated use {@link #getDefinition} with {@link LinkReferenceDefinition} instead */ + @Deprecated LinkReferenceDefinition getLinkReferenceDefinition(String label); + + /** + * Look up a definition of a type for a given label. + *

    + * Note that the passed in label does not need to be normalized; implementations are responsible for doing the + * normalization before lookup. + * + * @return the definition if one exists, null otherwise + */ + D getDefinition(Class type, String label); } diff --git a/commonmark/src/main/java/org/commonmark/parser/InlineParserFactory.java b/commonmark/src/main/java/org/commonmark/parser/InlineParserFactory.java index 34c384a8a..c1640e9d8 100644 --- a/commonmark/src/main/java/org/commonmark/parser/InlineParserFactory.java +++ b/commonmark/src/main/java/org/commonmark/parser/InlineParserFactory.java @@ -4,5 +4,9 @@ * Factory for custom inline parser. */ public interface InlineParserFactory { + + /** + * Create an {@link InlineParser} to use for parsing inlines. This is called once per parsed document. + */ InlineParser create(InlineParserContext inlineParserContext); } diff --git a/commonmark/src/main/java/org/commonmark/parser/Parser.java b/commonmark/src/main/java/org/commonmark/parser/Parser.java index 5e15158ad..8faac789b 100644 --- a/commonmark/src/main/java/org/commonmark/parser/Parser.java +++ b/commonmark/src/main/java/org/commonmark/parser/Parser.java @@ -1,19 +1,21 @@ package org.commonmark.parser; import org.commonmark.Extension; +import org.commonmark.internal.Definitions; import org.commonmark.internal.DocumentParser; import org.commonmark.internal.InlineParserContextImpl; import org.commonmark.internal.InlineParserImpl; import org.commonmark.node.*; +import org.commonmark.parser.beta.LinkInfo; +import org.commonmark.parser.beta.LinkProcessor; +import org.commonmark.parser.beta.InlineContentParserFactory; +import org.commonmark.parser.beta.LinkResult; import org.commonmark.parser.block.BlockParserFactory; import org.commonmark.parser.delimiter.DelimiterProcessor; import java.io.IOException; import java.io.Reader; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Set; +import java.util.*; /** @@ -28,20 +30,31 @@ public class Parser { private final List blockParserFactories; + private final List inlineContentParserFactories; private final List delimiterProcessors; + private final List linkProcessors; + private final Set linkMarkers; private final InlineParserFactory inlineParserFactory; private final List postProcessors; + private final IncludeSourceSpans includeSourceSpans; + private final int maxOpenBlockParsers; private Parser(Builder builder) { this.blockParserFactories = DocumentParser.calculateBlockParserFactories(builder.blockParserFactories, builder.enabledBlockTypes); this.inlineParserFactory = builder.getInlineParserFactory(); this.postProcessors = builder.postProcessors; + this.inlineContentParserFactories = builder.inlineContentParserFactories; this.delimiterProcessors = builder.delimiterProcessors; + this.linkProcessors = builder.linkProcessors; + this.linkMarkers = builder.linkMarkers; + this.includeSourceSpans = builder.includeSourceSpans; + this.maxOpenBlockParsers = builder.maxOpenBlockParsers; // Try to construct an inline parser. Invalid configuration might result in an exception, which we want to // detect as soon as possible. - this.inlineParserFactory.create(new InlineParserContextImpl(delimiterProcessors, - Collections.emptyMap())); + var context = new InlineParserContextImpl( + inlineContentParserFactories, delimiterProcessors, linkProcessors, linkMarkers, new Definitions()); + this.inlineParserFactory.create(context); } /** @@ -62,9 +75,7 @@ public static Builder builder() { * @return the root node */ public Node parse(String input) { - if (input == null) { - throw new NullPointerException("input must not be null"); - } + Objects.requireNonNull(input, "input must not be null"); DocumentParser documentParser = createDocumentParser(); Node document = documentParser.parse(input); return postProcess(document); @@ -89,17 +100,15 @@ public Node parse(String input) { * @throws IOException when reading throws an exception */ public Node parseReader(Reader input) throws IOException { - if (input == null) { - throw new NullPointerException("input must not be null"); - } - + Objects.requireNonNull(input, "input must not be null"); DocumentParser documentParser = createDocumentParser(); Node document = documentParser.parse(input); return postProcess(document); } private DocumentParser createDocumentParser() { - return new DocumentParser(blockParserFactories, inlineParserFactory, delimiterProcessors); + return new DocumentParser(blockParserFactories, inlineParserFactory, inlineContentParserFactories, + delimiterProcessors, linkProcessors, linkMarkers, includeSourceSpans, maxOpenBlockParsers); } private Node postProcess(Node document) { @@ -114,10 +123,15 @@ private Node postProcess(Node document) { */ public static class Builder { private final List blockParserFactories = new ArrayList<>(); + private final List inlineContentParserFactories = new ArrayList<>(); private final List delimiterProcessors = new ArrayList<>(); + private final List linkProcessors = new ArrayList<>(); private final List postProcessors = new ArrayList<>(); + private final Set linkMarkers = new HashSet<>(); private Set> enabledBlockTypes = DocumentParser.getDefaultBlockParserTypes(); private InlineParserFactory inlineParserFactory; + private IncludeSourceSpans includeSourceSpans = IncludeSourceSpans.NONE; + private int maxOpenBlockParsers = Integer.MAX_VALUE; /** * @return the configured {@link Parser} @@ -131,9 +145,7 @@ public Parser build() { * @return {@code this} */ public Builder extensions(Iterable extensions) { - if (extensions == null) { - throw new NullPointerException("extensions must not be null"); - } + Objects.requireNonNull(extensions, "extensions must not be null"); for (Extension extension : extensions) { if (extension instanceof ParserExtension) { ParserExtension parserExtension = (ParserExtension) extension; @@ -162,24 +174,58 @@ public Builder extensions(Iterable extensions) { * E.g., to only parse headings and lists: *

              *     {@code
    -         *     Parser.builder().enabledBlockTypes(new HashSet<>(Arrays.asList(Heading.class, ListBlock.class)));
    +         *     Parser.builder().enabledBlockTypes(Set.of(Heading.class, ListBlock.class));
              *     }
              * 
    * * @param enabledBlockTypes A list of block nodes the parser will parse. - * If this list is empty, the parser will not recognize any CommonMark core features. + * If this list is empty, the parser will not recognize any CommonMark core features. * @return {@code this} */ public Builder enabledBlockTypes(Set> enabledBlockTypes) { - if (enabledBlockTypes == null) { - throw new NullPointerException("enabledBlockTypes must not be null"); - } + Objects.requireNonNull(enabledBlockTypes, "enabledBlockTypes must not be null"); + DocumentParser.checkEnabledBlockTypes(enabledBlockTypes); this.enabledBlockTypes = enabledBlockTypes; return this; } /** - * Adds a custom block parser factory. + * Whether to calculate source positions for parsed {@link Node Nodes}, see {@link Node#getSourceSpans()}. + *

    + * By default, source spans are disabled. + * + * @param includeSourceSpans which kind of source spans should be included + * @return {@code this} + * @since 0.16.0 + */ + public Builder includeSourceSpans(IncludeSourceSpans includeSourceSpans) { + this.includeSourceSpans = includeSourceSpans; + return this; + } + + /** + * Limit how many block parsers may be open at once while parsing. + *

    + * Once the limit is reached, additional block starts are treated as plain text instead of + * creating deeper nested block structure. + *

    + * The document root parser is not counted. The default is unlimited, so callers that keep + * using {@code Parser.builder().build()} preserve behavior. + * + * @param maxOpenBlockParsers maximum number of open non-document block parsers, must be + * zero or greater + * @return {@code this} + */ + public Builder maxOpenBlockParsers(int maxOpenBlockParsers) { + if (maxOpenBlockParsers < 0) { + throw new IllegalArgumentException("maxOpenBlockParsers must be >= 0"); + } + this.maxOpenBlockParsers = maxOpenBlockParsers; + return this; + } + + /** + * Add a custom block parser factory. *

    * Note that custom factories are applied before the built-in factories. This is so that * extensions can change how some syntax is parsed that would otherwise be handled by built-in factories. @@ -189,35 +235,78 @@ public Builder enabledBlockTypes(Set> enabledBlockTypes) * @return {@code this} */ public Builder customBlockParserFactory(BlockParserFactory blockParserFactory) { - if (blockParserFactory == null) { - throw new NullPointerException("blockParserFactory must not be null"); - } + Objects.requireNonNull(blockParserFactory, "blockParserFactory must not be null"); blockParserFactories.add(blockParserFactory); return this; } /** - * Adds a custom delimiter processor. + * Add a factory for a custom inline content parser, for extending inline parsing or overriding built-in parsing. + *

    + * Note that parsers are triggered based on a special character as specified by + * {@link InlineContentParserFactory#getTriggerCharacters()}. It is possible to register multiple parsers for the same + * character, or even for some built-in special character such as {@code `}. The custom parsers are tried first + * in order in which they are registered, and then the built-in ones. + */ + public Builder customInlineContentParserFactory(InlineContentParserFactory inlineContentParserFactory) { + Objects.requireNonNull(inlineContentParserFactory, "inlineContentParser must not be null"); + inlineContentParserFactories.add(inlineContentParserFactory); + return this; + } + + /** + * Add a custom delimiter processor for inline parsing. *

    * Note that multiple delimiter processors with the same characters can be added, as long as they have a * different minimum length. In that case, the processor with the shortest matching length is used. Adding more * than one delimiter processor with the same character and minimum length is invalid. + *

    + * If you want more control over how parsing is done, you might want to use + * {@link #customInlineContentParserFactory} instead. * * @param delimiterProcessor a delimiter processor implementation * @return {@code this} */ public Builder customDelimiterProcessor(DelimiterProcessor delimiterProcessor) { - if (delimiterProcessor == null) { - throw new NullPointerException("delimiterProcessor must not be null"); - } + Objects.requireNonNull(delimiterProcessor, "delimiterProcessor must not be null"); delimiterProcessors.add(delimiterProcessor); return this; } + /** + * Add a custom link/image processor for inline parsing. + *

    + * Multiple link processors can be added, and will be tried in order in which they were added. If no link + * processor applies, the normal behavior applies. That means these can override built-in link parsing. + * + * @param linkProcessor a link processor implementation + * @return {@code this} + */ + public Builder linkProcessor(LinkProcessor linkProcessor) { + Objects.requireNonNull(linkProcessor, "linkProcessor must not be null"); + linkProcessors.add(linkProcessor); + return this; + } + + /** + * Add a custom link marker for link processing. A link marker is a character like {@code !} which, if it + * appears before the {@code [} of a link, changes the meaning of the link. + *

    + * If a link marker followed by a valid link is parsed, the {@link org.commonmark.parser.beta.LinkInfo} + * that is passed to {@link LinkProcessor} will have its {@link LinkInfo#marker()} set. A link processor should + * check the {@link Text#getLiteral()} and then do any processing, and will probably want to use {@link LinkResult#includeMarker()}. + * + * @param linkMarker a link marker character + * @return {@code this} + */ + public Builder linkMarker(Character linkMarker) { + Objects.requireNonNull(linkMarker, "linkMarker must not be null"); + linkMarkers.add(linkMarker); + return this; + } + public Builder postProcessor(PostProcessor postProcessor) { - if (postProcessor == null) { - throw new NullPointerException("postProcessor must not be null"); - } + Objects.requireNonNull(postProcessor, "postProcessor must not be null"); postProcessors.add(postProcessor); return this; } @@ -234,7 +323,6 @@ public Builder postProcessor(PostProcessor postProcessor) { * link ([title](http://)) * image (![alt](http://)) *

    - *

    * Note that if this method is not called or the inline parser factory is set to null, then the default * implementation will be used. * @@ -249,13 +337,9 @@ public Builder inlineParserFactory(InlineParserFactory inlineParserFactory) { private InlineParserFactory getInlineParserFactory() { if (inlineParserFactory != null) { return inlineParserFactory; + } else { + return InlineParserImpl::new; } - return new InlineParserFactory() { - @Override - public InlineParser create(InlineParserContext inlineParserContext) { - return new InlineParserImpl(inlineParserContext); - } - }; } } diff --git a/commonmark/src/main/java/org/commonmark/parser/SourceLine.java b/commonmark/src/main/java/org/commonmark/parser/SourceLine.java new file mode 100644 index 000000000..92a8cdfaf --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/parser/SourceLine.java @@ -0,0 +1,47 @@ +package org.commonmark.parser; + +import org.commonmark.node.SourceSpan; + +import java.util.Objects; + +/** + * A line or part of a line from the input source. + * + * @since 0.16.0 + */ +public class SourceLine { + + private final CharSequence content; + private final SourceSpan sourceSpan; + + public static SourceLine of(CharSequence content, SourceSpan sourceSpan) { + return new SourceLine(content, sourceSpan); + } + + private SourceLine(CharSequence content, SourceSpan sourceSpan) { + this.content = Objects.requireNonNull(content, "content must not be null"); + this.sourceSpan = sourceSpan; + } + + public CharSequence getContent() { + return content; + } + + public SourceSpan getSourceSpan() { + return sourceSpan; + } + + public SourceLine substring(int beginIndex, int endIndex) { + CharSequence newContent = content.subSequence(beginIndex, endIndex); + SourceSpan newSourceSpan = null; + if (sourceSpan != null) { + int length = endIndex - beginIndex; + if (length != 0) { + int columnIndex = sourceSpan.getColumnIndex() + beginIndex; + int inputIndex = sourceSpan.getInputIndex() + beginIndex; + newSourceSpan = SourceSpan.of(sourceSpan.getLineIndex(), columnIndex, inputIndex, length); + } + } + return SourceLine.of(newContent, newSourceSpan); + } +} diff --git a/commonmark/src/main/java/org/commonmark/parser/SourceLines.java b/commonmark/src/main/java/org/commonmark/parser/SourceLines.java new file mode 100644 index 000000000..0b4290341 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/parser/SourceLines.java @@ -0,0 +1,66 @@ +package org.commonmark.parser; + +import org.commonmark.node.SourceSpan; + +import java.util.ArrayList; +import java.util.List; + +/** + * A set of lines ({@link SourceLine}) from the input source. + * + * @since 0.16.0 + */ +public class SourceLines { + + private final List lines = new ArrayList<>(); + + public static SourceLines empty() { + return new SourceLines(); + } + + public static SourceLines of(SourceLine sourceLine) { + SourceLines sourceLines = new SourceLines(); + sourceLines.addLine(sourceLine); + return sourceLines; + } + + public static SourceLines of(List sourceLines) { + SourceLines result = new SourceLines(); + result.lines.addAll(sourceLines); + return result; + } + + public void addLine(SourceLine sourceLine) { + lines.add(sourceLine); + } + + public List getLines() { + return lines; + } + + public boolean isEmpty() { + return lines.isEmpty(); + } + + public String getContent() { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < lines.size(); i++) { + if (i != 0) { + sb.append('\n'); + } + sb.append(lines.get(i).getContent()); + } + return sb.toString(); + } + + public List getSourceSpans() { + List sourceSpans = new ArrayList<>(); + for (SourceLine line : lines) { + SourceSpan sourceSpan = line.getSourceSpan(); + if (sourceSpan != null) { + sourceSpans.add(sourceSpan); + } + } + return sourceSpans; + } +} diff --git a/commonmark/src/main/java/org/commonmark/parser/beta/InlineContentParser.java b/commonmark/src/main/java/org/commonmark/parser/beta/InlineContentParser.java new file mode 100644 index 000000000..bc5c9a54f --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/parser/beta/InlineContentParser.java @@ -0,0 +1,21 @@ +package org.commonmark.parser.beta; + +/** + * Parser for a type of inline content. Registered via a {@link InlineContentParserFactory} and created by its + * {@link InlineContentParserFactory#create() create} method. The lifetime of this is tied to each inline content + * snippet that is parsed, as a new instance is created for each. + */ +public interface InlineContentParser { + + /** + * Try to parse inline content starting from the current position. Note that the character at the current position + * is one of {@link InlineContentParserFactory#getTriggerCharacters()} of the factory that created this parser. + *

    + * For a given inline content snippet that is being parsed, this method can be called multiple times: each time a + * trigger character is encountered. + * + * @param inlineParserState the current state of the inline parser + * @return the result of parsing; can indicate that this parser is not interested, or that parsing was successful + */ + ParsedInline tryParse(InlineParserState inlineParserState); +} diff --git a/commonmark/src/main/java/org/commonmark/parser/beta/InlineContentParserFactory.java b/commonmark/src/main/java/org/commonmark/parser/beta/InlineContentParserFactory.java new file mode 100644 index 000000000..c86f93a41 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/parser/beta/InlineContentParserFactory.java @@ -0,0 +1,24 @@ +package org.commonmark.parser.beta; + +import java.util.Set; + +/** + * A factory for extending inline content parsing. + *

    + * See {@link org.commonmark.parser.Parser.Builder#customInlineContentParserFactory} for how to register it. + */ +public interface InlineContentParserFactory { + + /** + * An inline content parser needs to have a special "trigger" character which activates it. When this character is + * encountered during inline parsing, {@link InlineContentParser#tryParse} is called with the current parser state. + * It can also register for more than one trigger character. + */ + Set getTriggerCharacters(); + + /** + * Create an {@link InlineContentParser} that will do the parsing. Create is called once per text snippet of inline + * content inside block structures, and then called each time a trigger character is encountered. + */ + InlineContentParser create(); +} diff --git a/commonmark/src/main/java/org/commonmark/parser/beta/InlineParserState.java b/commonmark/src/main/java/org/commonmark/parser/beta/InlineParserState.java new file mode 100644 index 000000000..e434d45d6 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/parser/beta/InlineParserState.java @@ -0,0 +1,13 @@ +package org.commonmark.parser.beta; + +public interface InlineParserState { + + /** + * Return a scanner for the input for the current position (on the trigger character that the inline parser was + * added for). + *

    + * Note that this always returns the same instance, if you want to backtrack you need to use + * {@link Scanner#position()} and {@link Scanner#setPosition(Position)}. + */ + Scanner scanner(); +} diff --git a/commonmark/src/main/java/org/commonmark/parser/beta/LinkInfo.java b/commonmark/src/main/java/org/commonmark/parser/beta/LinkInfo.java new file mode 100644 index 000000000..b2fda57e4 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/parser/beta/LinkInfo.java @@ -0,0 +1,69 @@ +package org.commonmark.parser.beta; + +import org.commonmark.node.Text; + +/** + * A parsed link/image. There are different types of links. + *

    + * Inline links: + *

    + * [text](destination)
    + * [text](destination "title")
    + * 
    + *

    + * Reference links, which have different subtypes. Full:: + *

    + * [text][label]
    + * 
    + * Collapsed (label is ""): + *
    + * [text][]
    + * 
    + * Shortcut (label is null): + *
    + * [text]
    + * 
    + * Images use the same syntax as links but with a {@code !} {@link #marker()} front, e.g. {@code ![text](destination)}. + */ +public interface LinkInfo { + + /** + * The marker if present, or null. A marker is e.g. {@code !} for an image, or a custom marker as specified in + * {@link org.commonmark.parser.Parser.Builder#linkMarker}. + */ + Text marker(); + + /** + * The text node of the opening bracket {@code [}. + */ + Text openingBracket(); + + /** + * The text between the first brackets, e.g. `foo` in `[foo][bar]`. + */ + String text(); + + /** + * The label, or null for inline links or for shortcut links (in which case {@link #text()} should be used as the label). + */ + String label(); + + /** + * The destination if available, e.g. in `[foo](destination)`, or null + */ + String destination(); + + /** + * The title if available, e.g. in `[foo](destination "title")`, or null + */ + String title(); + + /** + * The position after the closing text bracket, e.g.: + *
    +     * [foo][bar]
    +     *      ^
    +     * 
    + */ + Position afterTextBracket(); +} diff --git a/commonmark/src/main/java/org/commonmark/parser/beta/LinkProcessor.java b/commonmark/src/main/java/org/commonmark/parser/beta/LinkProcessor.java new file mode 100644 index 000000000..3e448fd91 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/parser/beta/LinkProcessor.java @@ -0,0 +1,40 @@ +package org.commonmark.parser.beta; + +import org.commonmark.parser.InlineParserContext; + +/** + * An interface to decide how links/images are handled. + *

    + * Implementations need to be registered with a parser via {@link org.commonmark.parser.Parser.Builder#linkProcessor}. + * Then, when inline parsing is run, each parsed link/image is passed to the processor. This includes links like these: + *

    + *

    
    + * [text](destination)
    + * [text]
    + * [text][]
    + * [text][label]
    + * 
    + * And images: + *
    
    + * ![text](destination)
    + * ![text]
    + * ![text][]
    + * ![text][label]
    + * 
    + * See {@link LinkInfo} for accessing various parts of the parsed link/image. + *

    + * The processor can then inspect the link/image and decide what to do with it by returning the appropriate + * {@link LinkResult}. If it returns {@link LinkResult#none()}, the next registered processor is tried. If none of them + * apply, the link is handled as it normally would. + */ +public interface LinkProcessor { + + /** + * @param linkInfo information about the parsed link/image + * @param scanner the scanner at the current position after the parsed link/image + * @param context context for inline parsing + * @return what to do with the link/image, e.g. do nothing (try the next processor), wrap the text in a node, or + * replace the link/image with a node + */ + LinkResult process(LinkInfo linkInfo, Scanner scanner, InlineParserContext context); +} diff --git a/commonmark/src/main/java/org/commonmark/parser/beta/LinkResult.java b/commonmark/src/main/java/org/commonmark/parser/beta/LinkResult.java new file mode 100644 index 000000000..43bc82af8 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/parser/beta/LinkResult.java @@ -0,0 +1,50 @@ +package org.commonmark.parser.beta; + +import org.commonmark.internal.inline.LinkResultImpl; +import org.commonmark.node.Node; + +/** + * What to do with a link/image processed by {@link LinkProcessor}. + */ +public interface LinkResult { + /** + * Link not handled by processor. + */ + static LinkResult none() { + return null; + } + + /** + * Wrap the link text in a node. This is the normal behavior for links, e.g. for this: + *

    
    +     * [my *text*](destination)
    +     * 
    + * The text is {@code my *text*}, a text node and emphasis. The text is wrapped in a + * {@link org.commonmark.node.Link} node, which means the text is added as child nodes to it. + * + * @param node the node to which the link text nodes will be added as child nodes + * @param position the position to continue parsing from + */ + static LinkResult wrapTextIn(Node node, Position position) { + return new LinkResultImpl(LinkResultImpl.Type.WRAP, node, position); + } + + /** + * Replace the link with a node. E.g. for this: + *
    
    +     * [^foo]
    +     * 
    + * The processor could decide to create a {@code FootnoteReference} node instead which replaces the link. + * + * @param node the node to replace the link with + * @param position the position to continue parsing from + */ + static LinkResult replaceWith(Node node, Position position) { + return new LinkResultImpl(LinkResultImpl.Type.REPLACE, node, position); + } + + /** + * If a {@link LinkInfo#marker()} is present, include it in processing (i.e. treat it the same way as the brackets). + */ + LinkResult includeMarker(); +} diff --git a/commonmark/src/main/java/org/commonmark/parser/beta/ParsedInline.java b/commonmark/src/main/java/org/commonmark/parser/beta/ParsedInline.java new file mode 100644 index 000000000..5d1402cae --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/parser/beta/ParsedInline.java @@ -0,0 +1,24 @@ +package org.commonmark.parser.beta; + +import org.commonmark.internal.inline.ParsedInlineImpl; +import org.commonmark.node.Node; + +import java.util.Objects; + +/** + * The result of a single inline parser. Use the static methods to create instances. + *

    + * This interface is not intended to be implemented by clients. + */ +public interface ParsedInline { + + static ParsedInline none() { + return null; + } + + static ParsedInline of(Node node, Position position) { + Objects.requireNonNull(node, "node must not be null"); + Objects.requireNonNull(position, "position must not be null"); + return new ParsedInlineImpl(node, position); + } +} diff --git a/commonmark/src/main/java/org/commonmark/parser/beta/Position.java b/commonmark/src/main/java/org/commonmark/parser/beta/Position.java new file mode 100644 index 000000000..3dbb4870f --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/parser/beta/Position.java @@ -0,0 +1,16 @@ +package org.commonmark.parser.beta; + +/** + * Position within a {@link Scanner}. This is intentionally kept opaque so as not to expose the internal structure of + * the Scanner. + */ +public class Position { + + final int lineIndex; + final int index; + + Position(int lineIndex, int index) { + this.lineIndex = lineIndex; + this.index = index; + } +} diff --git a/commonmark/src/main/java/org/commonmark/parser/beta/Scanner.java b/commonmark/src/main/java/org/commonmark/parser/beta/Scanner.java new file mode 100644 index 000000000..324639493 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/parser/beta/Scanner.java @@ -0,0 +1,281 @@ +package org.commonmark.parser.beta; + +import org.commonmark.node.SourceSpan; +import org.commonmark.parser.SourceLine; +import org.commonmark.parser.SourceLines; +import org.commonmark.text.CharMatcher; + +import java.util.List; + +public class Scanner { + + /** + * Character representing the end of input source (or outside of the text in case of the "previous" methods). + *

    + * Note that we can use NULL to represent this because CommonMark does not allow those in the input (we replace them + * in the beginning of parsing). + */ + public static final char END = '\0'; + + // Lines without newlines at the end. The scanner will yield `\n` between lines because they're significant for + // parsing and the final output. There is no `\n` after the last line. + private final List lines; + // Which line we're at. + private int lineIndex; + // The index within the line. If index == length(), we pretend that there's a `\n` and only advance after we yield + // that. + private int index; + + // Current line or "" if at the end of the lines (using "" instead of null saves a null check) + private SourceLine line = SourceLine.of("", null); + private int lineLength = 0; + + Scanner(List lines, int lineIndex, int index) { + this.lines = lines; + this.lineIndex = lineIndex; + this.index = index; + if (!lines.isEmpty()) { + checkPosition(lineIndex, index); + setLine(lines.get(lineIndex)); + } + } + + public static Scanner of(SourceLines lines) { + return new Scanner(lines.getLines(), 0, 0); + } + + public char peek() { + if (index < lineLength) { + return line.getContent().charAt(index); + } else { + if (lineIndex < lines.size() - 1) { + return '\n'; + } else { + // Don't return newline for end of last line + return END; + } + } + } + + public int peekCodePoint() { + if (index < lineLength) { + char c = line.getContent().charAt(index); + if (Character.isHighSurrogate(c) && index + 1 < lineLength) { + char low = line.getContent().charAt(index + 1); + if (Character.isLowSurrogate(low)) { + return Character.toCodePoint(c, low); + } + } + return c; + } else { + if (lineIndex < lines.size() - 1) { + return '\n'; + } else { + // Don't return newline for end of last line + return END; + } + } + } + + public int peekPreviousCodePoint() { + if (index > 0) { + int prev = index - 1; + char c = line.getContent().charAt(prev); + if (Character.isLowSurrogate(c) && prev > 0) { + char high = line.getContent().charAt(prev - 1); + if (Character.isHighSurrogate(high)) { + return Character.toCodePoint(high, c); + } + } + return c; + } else { + if (lineIndex > 0) { + return '\n'; + } else { + return END; + } + } + } + + public boolean hasNext() { + if (index < lineLength) { + return true; + } else { + // No newline at end of last line + return lineIndex < lines.size() - 1; + } + } + + public void next() { + index++; + if (index > lineLength) { + lineIndex++; + if (lineIndex < lines.size()) { + setLine(lines.get(lineIndex)); + } else { + setLine(SourceLine.of("", null)); + } + index = 0; + } + } + + /** + * Check if the specified char is next and advance the position. + * + * @param c the char to check (including newline characters) + * @return true if matched and position was advanced, false otherwise + */ + public boolean next(char c) { + if (peek() == c) { + next(); + return true; + } else { + return false; + } + } + + /** + * Check if we have the specified content on the line and advanced the position. Note that if you want to match + * newline characters, use {@link #next(char)}. + * + * @param content the text content to match on a single line (excluding newline characters) + * @return true if matched and position was advanced, false otherwise + */ + public boolean next(String content) { + if (index < lineLength && index + content.length() <= lineLength) { + // Can't use startsWith because it's not available on CharSequence + for (int i = 0; i < content.length(); i++) { + if (line.getContent().charAt(index + i) != content.charAt(i)) { + return false; + } + } + index += content.length(); + return true; + } else { + return false; + } + } + + public int matchMultiple(char c) { + int count = 0; + while (peek() == c) { + count++; + next(); + } + return count; + } + + public int match(CharMatcher matcher) { + int count = 0; + while (matcher.matches(peek())) { + count++; + next(); + } + return count; + } + + public int whitespace() { + int count = 0; + while (true) { + switch (peek()) { + case ' ': + case '\t': + case '\n': + case '\u000B': + case '\f': + case '\r': + count++; + next(); + break; + default: + return count; + } + } + } + + public int find(char c) { + int count = 0; + while (true) { + char cur = peek(); + if (cur == Scanner.END) { + return -1; + } else if (cur == c) { + return count; + } + count++; + next(); + } + } + + public int find(CharMatcher matcher) { + int count = 0; + while (true) { + char c = peek(); + if (c == END) { + return -1; + } else if (matcher.matches(c)) { + return count; + } + count++; + next(); + } + } + + // Don't expose the int index, because it would be good if we could switch input to a List of lines later + // instead of one contiguous String. + public Position position() { + return new Position(lineIndex, index); + } + + public void setPosition(Position position) { + checkPosition(position.lineIndex, position.index); + this.lineIndex = position.lineIndex; + this.index = position.index; + setLine(lines.get(this.lineIndex)); + } + + // For cases where the caller appends the result to a StringBuilder, we could offer another method to avoid some + // unnecessary copying. + public SourceLines getSource(Position begin, Position end) { + if (begin.lineIndex == end.lineIndex) { + // Shortcut for common case of text from a single line + SourceLine line = lines.get(begin.lineIndex); + CharSequence newContent = line.getContent().subSequence(begin.index, end.index); + SourceSpan newSourceSpan = null; + SourceSpan sourceSpan = line.getSourceSpan(); + if (sourceSpan != null) { + newSourceSpan = sourceSpan.subSpan(begin.index, end.index); + } + return SourceLines.of(SourceLine.of(newContent, newSourceSpan)); + } else { + SourceLines sourceLines = SourceLines.empty(); + + SourceLine firstLine = lines.get(begin.lineIndex); + sourceLines.addLine(firstLine.substring(begin.index, firstLine.getContent().length())); + + // Lines between begin and end (we are appending the full line) + for (int line = begin.lineIndex + 1; line < end.lineIndex; line++) { + sourceLines.addLine(lines.get(line)); + } + + SourceLine lastLine = lines.get(end.lineIndex); + sourceLines.addLine(lastLine.substring(0, end.index)); + return sourceLines; + } + } + + private void setLine(SourceLine line) { + this.line = line; + this.lineLength = line.getContent().length(); + } + + private void checkPosition(int lineIndex, int index) { + if (lineIndex < 0 || lineIndex >= lines.size()) { + throw new IllegalArgumentException("Line index " + lineIndex + " out of range, number of lines: " + lines.size()); + } + SourceLine line = lines.get(lineIndex); + if (index < 0 || index > line.getContent().length()) { + throw new IllegalArgumentException("Index " + index + " out of range, line length: " + line.getContent().length()); + } + } +} diff --git a/commonmark/src/main/java/org/commonmark/parser/beta/package-info.java b/commonmark/src/main/java/org/commonmark/parser/beta/package-info.java new file mode 100644 index 000000000..029d80507 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/parser/beta/package-info.java @@ -0,0 +1,4 @@ +/** + * Experimental APIs to use for extensions. APIs are subject to change if necessary. + */ +package org.commonmark.parser.beta; diff --git a/commonmark/src/main/java/org/commonmark/parser/block/AbstractBlockParser.java b/commonmark/src/main/java/org/commonmark/parser/block/AbstractBlockParser.java index f806d105c..4fb1a05ac 100644 --- a/commonmark/src/main/java/org/commonmark/parser/block/AbstractBlockParser.java +++ b/commonmark/src/main/java/org/commonmark/parser/block/AbstractBlockParser.java @@ -1,7 +1,12 @@ package org.commonmark.parser.block; import org.commonmark.node.Block; +import org.commonmark.node.DefinitionMap; +import org.commonmark.node.SourceSpan; import org.commonmark.parser.InlineParser; +import org.commonmark.parser.SourceLine; + +import java.util.List; public abstract class AbstractBlockParser implements BlockParser { @@ -21,7 +26,17 @@ public boolean canContain(Block childBlock) { } @Override - public void addLine(CharSequence line) { + public void addLine(SourceLine line) { + } + + @Override + public void addSourceSpan(SourceSpan sourceSpan) { + getBlock().addSourceSpan(sourceSpan); + } + + @Override + public List> getDefinitions() { + return List.of(); } @Override diff --git a/commonmark/src/main/java/org/commonmark/parser/block/BlockParser.java b/commonmark/src/main/java/org/commonmark/parser/block/BlockParser.java index 0c903198c..32ff2a474 100644 --- a/commonmark/src/main/java/org/commonmark/parser/block/BlockParser.java +++ b/commonmark/src/main/java/org/commonmark/parser/block/BlockParser.java @@ -1,7 +1,12 @@ package org.commonmark.parser.block; import org.commonmark.node.Block; +import org.commonmark.node.DefinitionMap; +import org.commonmark.node.SourceSpan; import org.commonmark.parser.InlineParser; +import org.commonmark.parser.SourceLine; + +import java.util.List; /** * Parser for a specific block node. @@ -21,7 +26,7 @@ public interface BlockParser { * Lazy continuation lines are lines that were rejected by this {@link #tryContinue(ParserState)} but didn't match * any other block parsers either. *

    - * If true is returned here, those lines will get added via {@link #addLine(CharSequence)}. For false, the block is + * If true is returned here, those lines will get added via {@link #addLine(SourceLine)}. For false, the block is * closed instead. */ boolean canHaveLazyContinuationLines(); @@ -32,7 +37,26 @@ public interface BlockParser { BlockContinue tryContinue(ParserState parserState); - void addLine(CharSequence line); + /** + * Add the part of a line that belongs to this block parser to parse (i.e. without any container block markers). + * Note that the line will only include a {@link SourceLine#getSourceSpan()} if source spans are enabled for inlines. + */ + void addLine(SourceLine line); + + /** + * Add a source span of the currently parsed block. The default implementation in {@link AbstractBlockParser} adds + * it to the block. Unless you have some complicated parsing where you need to check source positions, you don't + * need to override this. + * + * @since 0.16.0 + */ + void addSourceSpan(SourceSpan sourceSpan); + + /** + * Return definitions parsed by this parser. The definitions returned here can later be accessed during inline + * parsing via {@link org.commonmark.parser.InlineParserContext#getDefinition}. + */ + List> getDefinitions(); void closeBlock(); diff --git a/commonmark/src/main/java/org/commonmark/parser/block/BlockStart.java b/commonmark/src/main/java/org/commonmark/parser/block/BlockStart.java index d9e7a2b49..c41f1caa3 100644 --- a/commonmark/src/main/java/org/commonmark/parser/block/BlockStart.java +++ b/commonmark/src/main/java/org/commonmark/parser/block/BlockStart.java @@ -10,18 +10,59 @@ public abstract class BlockStart { protected BlockStart() { } + /** + * Result for when there is no block start. + */ public static BlockStart none() { return null; } + /** + * Start block(s) with the specified parser(s). + */ public static BlockStart of(BlockParser... blockParsers) { return new BlockStartImpl(blockParsers); } + /** + * Continue parsing at the specified index. + * + * @param newIndex the new index, see {@link ParserState#getIndex()} + */ public abstract BlockStart atIndex(int newIndex); + /** + * Continue parsing at the specified column (for tab handling). + * + * @param newColumn the new column, see {@link ParserState#getColumn()} + */ public abstract BlockStart atColumn(int newColumn); + /** + * @deprecated use {@link #replaceParagraphLines(int)} instead; please raise an issue if that doesn't work for you + * for some reason. + */ + @Deprecated public abstract BlockStart replaceActiveBlockParser(); + /** + * Replace a number of lines from the current paragraph (as returned by + * {@link MatchedBlockParser#getParagraphLines()}) with the new block. + *

    + * This is useful for parsing blocks that start with normal paragraphs and only have special marker syntax in later + * lines, e.g. in this: + *

    +     * Foo
    +     * ===
    +     * 
    + * The Foo line is initially parsed as a normal paragraph, then === is parsed as a heading + * marker, replacing the 1 paragraph line before. The end result is a single Heading block. + *

    + * Note that source spans from the replaced lines are automatically added to the new block. + * + * @param lines the number of lines to replace (at least 1); use {@link Integer#MAX_VALUE} to replace the whole + * paragraph + */ + public abstract BlockStart replaceParagraphLines(int lines); + } diff --git a/commonmark/src/main/java/org/commonmark/parser/block/MatchedBlockParser.java b/commonmark/src/main/java/org/commonmark/parser/block/MatchedBlockParser.java index d4cd9d471..c4619d8c2 100644 --- a/commonmark/src/main/java/org/commonmark/parser/block/MatchedBlockParser.java +++ b/commonmark/src/main/java/org/commonmark/parser/block/MatchedBlockParser.java @@ -1,5 +1,7 @@ package org.commonmark.parser.block; +import org.commonmark.parser.SourceLines; + /** * Open block parser that was last matched during the continue phase. This is different from the currently active * block parser, as an unmatched block is only closed when a new block is started. @@ -10,11 +12,11 @@ public interface MatchedBlockParser { BlockParser getMatchedBlockParser(); /** - * Returns the current content of the paragraph if the matched block is a paragraph. The content can be multiple - * lines separated by {@code '\n'}. + * Returns the current paragraph lines if the matched block is a paragraph. If you want to use some or all of the + * lines for starting a new block instead, use {@link BlockStart#replaceParagraphLines(int)}. * - * @return paragraph content or {@code null} + * @return paragraph content or an empty list */ - CharSequence getParagraphContent(); + SourceLines getParagraphLines(); } diff --git a/commonmark/src/main/java/org/commonmark/parser/block/ParserState.java b/commonmark/src/main/java/org/commonmark/parser/block/ParserState.java index 8c63e964e..b32bbaee5 100644 --- a/commonmark/src/main/java/org/commonmark/parser/block/ParserState.java +++ b/commonmark/src/main/java/org/commonmark/parser/block/ParserState.java @@ -1,5 +1,7 @@ package org.commonmark.parser.block; +import org.commonmark.parser.SourceLine; + /** * State of the parser that is used in block parsers. *

    This interface is not intended to be implemented by clients.

    @@ -7,9 +9,9 @@ public interface ParserState { /** - * @return the current line + * @return the current source line being parsed (full line) */ - CharSequence getLine(); + SourceLine getLine(); /** * @return the current index within the line (0-based) diff --git a/commonmark/src/main/java/org/commonmark/parser/delimiter/DelimiterProcessor.java b/commonmark/src/main/java/org/commonmark/parser/delimiter/DelimiterProcessor.java index 0e8bc6fac..3b6abf214 100644 --- a/commonmark/src/main/java/org/commonmark/parser/delimiter/DelimiterProcessor.java +++ b/commonmark/src/main/java/org/commonmark/parser/delimiter/DelimiterProcessor.java @@ -6,6 +6,8 @@ * Custom delimiter processor for additional delimiters besides {@code _} and {@code *}. *

    * Note that implementations of this need to be thread-safe, the same instance may be used by multiple parsers. + * + * @see org.commonmark.parser.beta.InlineContentParserFactory */ public interface DelimiterProcessor { @@ -27,27 +29,18 @@ public interface DelimiterProcessor { int getMinLength(); /** - * Determine how many (if any) of the delimiter characters should be used. + * Process the delimiter runs. *

    - * This allows implementations to decide how many characters to use based on the properties of the delimiter runs. - * An implementation can also return 0 when it doesn't want to allow this particular combination of delimiter runs. - * - * @param opener the opening delimiter run - * @param closer the closing delimiter run - * @return how many delimiters should be used; must not be greater than length of either opener or closer - */ - int getDelimiterUse(DelimiterRun opener, DelimiterRun closer); - - /** - * Process the matched delimiters, e.g. by wrapping the nodes between opener and closer in a new node, or appending - * a new node after the opener. + * The processor can examine the runs and the nodes and decide if it wants to process or not. If not, it should not + * change any nodes and return 0. If yes, it should do the processing (wrapping nodes, etc) and then return how many + * delimiters were used. *

    - * Note that removal of the delimiter from the delimiter nodes and unlinking them is done by the caller. + * Note that removal (unlinking) of the used delimiter {@link Text} nodes is done by the caller. * - * @param opener the text node that contained the opening delimiter - * @param closer the text node that contained the closing delimiter - * @param delimiterUse the number of delimiters that were used + * @param openingRun the opening delimiter run + * @param closingRun the closing delimiter run + * @return how many delimiters were used; must not be greater than length of either opener or closer */ - void process(Text opener, Text closer, int delimiterUse); + int process(DelimiterRun openingRun, DelimiterRun closingRun); } diff --git a/commonmark/src/main/java/org/commonmark/parser/delimiter/DelimiterRun.java b/commonmark/src/main/java/org/commonmark/parser/delimiter/DelimiterRun.java index 29bdb8731..578eac96b 100644 --- a/commonmark/src/main/java/org/commonmark/parser/delimiter/DelimiterRun.java +++ b/commonmark/src/main/java/org/commonmark/parser/delimiter/DelimiterRun.java @@ -1,7 +1,9 @@ package org.commonmark.parser.delimiter; +import org.commonmark.node.Text; + /** - * A delimiter run is one or more of the same delimiter character. + * A delimiter run is one or more of the same delimiter character, e.g. {@code ***}. */ public interface DelimiterRun { @@ -25,4 +27,32 @@ public interface DelimiterRun { * as {{@link #length()}} */ int originalLength(); + + /** + * @return the innermost opening delimiter, e.g. for {@code ***} this is the last {@code *} + */ + Text getOpener(); + + /** + * @return the innermost closing delimiter, e.g. for {@code ***} this is the first {@code *} + */ + Text getCloser(); + + /** + * Get the opening delimiter nodes for the specified length of delimiters. Length must be between 1 and + * {@link #length()}. + *

    + * For example, for a delimiter run {@code ***}, calling this with 1 would return the last {@code *}. + * Calling it with 2 would return the second last {@code *} and the last {@code *}. + */ + Iterable getOpeners(int length); + + /** + * Get the closing delimiter nodes for the specified length of delimiters. Length must be between 1 and + * {@link #length()}. + *

    + * For example, for a delimiter run {@code ***}, calling this with 1 would return the first {@code *}. + * Calling it with 2 would return the first {@code *} and the second {@code *}. + */ + Iterable getClosers(int length); } diff --git a/commonmark/src/main/java/org/commonmark/renderer/NodeRenderer.java b/commonmark/src/main/java/org/commonmark/renderer/NodeRenderer.java index e2d5ebc96..4ae4b5dcd 100644 --- a/commonmark/src/main/java/org/commonmark/renderer/NodeRenderer.java +++ b/commonmark/src/main/java/org/commonmark/renderer/NodeRenderer.java @@ -20,4 +20,20 @@ public interface NodeRenderer { * @param node the node to render, will be an instance of one of {@link #getNodeTypes()} */ void render(Node node); + + /** + * Called before the root node is rendered, to do any initial processing at the start. + * + * @param rootNode the root (top-level) node + */ + default void beforeRoot(Node rootNode) { + } + + /** + * Called after the root node is rendered, to do any final processing at the end. + * + * @param rootNode the root (top-level) node + */ + default void afterRoot(Node rootNode) { + } } diff --git a/commonmark/src/main/java/org/commonmark/renderer/html/CoreHtmlNodeRenderer.java b/commonmark/src/main/java/org/commonmark/renderer/html/CoreHtmlNodeRenderer.java index 7d3552668..5c536558e 100644 --- a/commonmark/src/main/java/org/commonmark/renderer/html/CoreHtmlNodeRenderer.java +++ b/commonmark/src/main/java/org/commonmark/renderer/html/CoreHtmlNodeRenderer.java @@ -3,7 +3,9 @@ import org.commonmark.node.*; import org.commonmark.renderer.NodeRenderer; -import java.util.*; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Set; /** * The node renderer that renders all the core nodes (comes last in the order of node renderers). @@ -20,7 +22,7 @@ public CoreHtmlNodeRenderer(HtmlNodeRendererContext context) { @Override public Set> getNodeTypes() { - return new HashSet<>(Arrays.asList( + return Set.of( Document.class, Heading.class, Paragraph.class, @@ -41,7 +43,7 @@ public Set> getNodeTypes() { HtmlInline.class, SoftLineBreak.class, HardLineBreak.class - )); + ); } @Override @@ -67,13 +69,15 @@ public void visit(Heading heading) { @Override public void visit(Paragraph paragraph) { - boolean inTightList = isInTightList(paragraph); - if (!inTightList) { + boolean omitP = isInTightList(paragraph) || // + (context.shouldOmitSingleParagraphP() && paragraph.getParent() instanceof Document && // + paragraph.getPrevious() == null && paragraph.getNext() == null); + if (!omitP) { html.line(); html.tag("p", getAttrs(paragraph, "p")); } visitChildren(paragraph); - if (!inTightList) { + if (!omitP) { html.tag("/p"); html.line(); } @@ -135,7 +139,7 @@ public void visit(ThematicBreak thematicBreak) { @Override public void visit(IndentedCodeBlock indentedCodeBlock) { - renderCodeBlock(indentedCodeBlock.getLiteral(), indentedCodeBlock, Collections.emptyMap()); + renderCodeBlock(indentedCodeBlock.getLiteral(), indentedCodeBlock, Map.of()); } @Override @@ -168,7 +172,7 @@ public void visit(ListItem listItem) { @Override public void visit(OrderedList orderedList) { - int start = orderedList.getStartNumber(); + int start = orderedList.getMarkerStartNumber() != null ? orderedList.getMarkerStartNumber() : 1; Map attrs = new LinkedHashMap<>(); if (start != 1) { attrs.put("start", String.valueOf(start)); @@ -287,7 +291,7 @@ private boolean isInTightList(Paragraph paragraph) { } private Map getAttrs(Node node, String tagName) { - return getAttrs(node, tagName, Collections.emptyMap()); + return getAttrs(node, tagName, Map.of()); } private Map getAttrs(Node node, String tagName, Map defaultAttributes) { @@ -307,6 +311,11 @@ public void visit(Text text) { sb.append(text.getLiteral()); } + @Override + public void visit(Code code) { + sb.append(code.getLiteral()); + } + @Override public void visit(SoftLineBreak softLineBreak) { sb.append('\n'); diff --git a/commonmark/src/main/java/org/commonmark/renderer/html/DefaultUrlSanitizer.java b/commonmark/src/main/java/org/commonmark/renderer/html/DefaultUrlSanitizer.java index 6cc96c5e7..4c5bed12c 100644 --- a/commonmark/src/main/java/org/commonmark/renderer/html/DefaultUrlSanitizer.java +++ b/commonmark/src/main/java/org/commonmark/renderer/html/DefaultUrlSanitizer.java @@ -1,13 +1,10 @@ package org.commonmark.renderer.html; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashSet; -import java.util.Set; +import java.util.*; /** * - * Allows http, https and mailto protocols for url. + * Allows http, https, mailto, and data protocols for url. * Also allows protocol relative urls, and relative urls. * Implementation based on https://github.com/OWASP/java-html-sanitizer/blob/f07e44b034a45d94d6fd010279073c38b6933072/src/main/java/org/owasp/html/FilterUrlByProtocolAttributePolicy.java */ @@ -15,7 +12,7 @@ public class DefaultUrlSanitizer implements UrlSanitizer { private Set protocols; public DefaultUrlSanitizer() { - this(Arrays.asList("http", "https", "mailto")); + this(List.of("http", "https", "mailto", "data")); } public DefaultUrlSanitizer(Collection protocols) { diff --git a/commonmark/src/main/java/org/commonmark/renderer/html/HtmlNodeRendererContext.java b/commonmark/src/main/java/org/commonmark/renderer/html/HtmlNodeRendererContext.java index eb950ffa6..eecff0f44 100644 --- a/commonmark/src/main/java/org/commonmark/renderer/html/HtmlNodeRendererContext.java +++ b/commonmark/src/main/java/org/commonmark/renderer/html/HtmlNodeRendererContext.java @@ -17,8 +17,8 @@ public interface HtmlNodeRendererContext { /** * Let extensions modify the HTML tag attributes. * - * @param node the node for which the attributes are applied - * @param tagName the HTML tag name that these attributes are for (e.g. {@code h1}, {@code pre}, {@code code}). + * @param node the node for which the attributes are applied + * @param tagName the HTML tag name that these attributes are for (e.g. {@code h1}, {@code pre}, {@code code}). * @param attributes the attributes that were calculated by the renderer * @return the extended attributes with added/updated/removed entries */ @@ -47,6 +47,11 @@ public interface HtmlNodeRendererContext { */ boolean shouldEscapeHtml(); + /** + * @return whether documents that only contain a single paragraph should be rendered without the {@code

    } tag + */ + boolean shouldOmitSingleParagraphP(); + /** * @return true if the {@link UrlSanitizer} should be used. * @since 0.14.0 diff --git a/commonmark/src/main/java/org/commonmark/renderer/html/HtmlRenderer.java b/commonmark/src/main/java/org/commonmark/renderer/html/HtmlRenderer.java index 19f53594f..386abebf0 100644 --- a/commonmark/src/main/java/org/commonmark/renderer/html/HtmlRenderer.java +++ b/commonmark/src/main/java/org/commonmark/renderer/html/HtmlRenderer.java @@ -7,10 +7,7 @@ import org.commonmark.renderer.NodeRenderer; import org.commonmark.renderer.Renderer; -import java.util.ArrayList; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; +import java.util.*; /** * Renders a tree of nodes to HTML. @@ -25,17 +22,19 @@ public class HtmlRenderer implements Renderer { private final String softbreak; private final boolean escapeHtml; + private final boolean percentEncodeUrls; + private final boolean omitSingleParagraphP; private final boolean sanitizeUrls; private final UrlSanitizer urlSanitizer; - private final boolean percentEncodeUrls; private final List attributeProviderFactories; private final List nodeRendererFactories; private HtmlRenderer(Builder builder) { this.softbreak = builder.softbreak; this.escapeHtml = builder.escapeHtml; - this.sanitizeUrls = builder.sanitizeUrls; this.percentEncodeUrls = builder.percentEncodeUrls; + this.omitSingleParagraphP = builder.omitSingleParagraphP; + this.sanitizeUrls = builder.sanitizeUrls; this.urlSanitizer = builder.urlSanitizer; this.attributeProviderFactories = new ArrayList<>(builder.attributeProviderFactories); @@ -61,18 +60,16 @@ public static Builder builder() { @Override public void render(Node node, Appendable output) { - if (node == null) { - throw new NullPointerException("node must not be null"); - } + Objects.requireNonNull(node, "node must not be null"); RendererContext context = new RendererContext(new HtmlWriter(output)); + context.beforeRoot(node); context.render(node); + context.afterRoot(node); } @Override public String render(Node node) { - if (node == null) { - throw new NullPointerException("node must not be null"); - } + Objects.requireNonNull(node, "node must not be null"); StringBuilder sb = new StringBuilder(); render(node, sb); return sb.toString(); @@ -88,6 +85,7 @@ public static class Builder { private boolean sanitizeUrls = false; private UrlSanitizer urlSanitizer = new DefaultUrlSanitizer(); private boolean percentEncodeUrls = false; + private boolean omitSingleParagraphP = false; private List attributeProviderFactories = new ArrayList<>(); private List nodeRendererFactories = new ArrayList<>(); @@ -171,6 +169,17 @@ public Builder percentEncodeUrls(boolean percentEncodeUrls) { return this; } + /** + * Whether documents that only contain a single paragraph should be rendered without the {@code

    } tag. Set to + * {@code true} to render without the tag; the default of {@code false} always renders the tag. + * + * @return {@code this} + */ + public Builder omitSingleParagraphP(boolean omitSingleParagraphP) { + this.omitSingleParagraphP = omitSingleParagraphP; + return this; + } + /** * Add a factory for an attribute provider for adding/changing HTML attributes to the rendered tags. * @@ -178,9 +187,7 @@ public Builder percentEncodeUrls(boolean percentEncodeUrls) { * @return {@code this} */ public Builder attributeProviderFactory(AttributeProviderFactory attributeProviderFactory) { - if (attributeProviderFactory == null) { - throw new NullPointerException("attributeProviderFactory must not be null"); - } + Objects.requireNonNull(attributeProviderFactory, "attributeProviderFactory must not be null"); this.attributeProviderFactories.add(attributeProviderFactory); return this; } @@ -196,9 +203,7 @@ public Builder attributeProviderFactory(AttributeProviderFactory attributeProvid * @return {@code this} */ public Builder nodeRendererFactory(HtmlNodeRendererFactory nodeRendererFactory) { - if (nodeRendererFactory == null) { - throw new NullPointerException("nodeRendererFactory must not be null"); - } + Objects.requireNonNull(nodeRendererFactory, "nodeRendererFactory must not be null"); this.nodeRendererFactories.add(nodeRendererFactory); return this; } @@ -208,9 +213,7 @@ public Builder nodeRendererFactory(HtmlNodeRendererFactory nodeRendererFactory) * @return {@code this} */ public Builder extensions(Iterable extensions) { - if (extensions == null) { - throw new NullPointerException("extensions must not be null"); - } + Objects.requireNonNull(extensions, "extensions must not be null"); for (Extension extension : extensions) { if (extension instanceof HtmlRendererExtension) { HtmlRendererExtension htmlRendererExtension = (HtmlRendererExtension) extension; @@ -238,15 +241,13 @@ private RendererContext(HtmlWriter htmlWriter) { this.htmlWriter = htmlWriter; attributeProviders = new ArrayList<>(attributeProviderFactories.size()); - for (AttributeProviderFactory attributeProviderFactory : attributeProviderFactories) { + for (var attributeProviderFactory : attributeProviderFactories) { attributeProviders.add(attributeProviderFactory.create(this)); } - // The first node renderer for a node type "wins". - for (int i = nodeRendererFactories.size() - 1; i >= 0; i--) { - HtmlNodeRendererFactory nodeRendererFactory = nodeRendererFactories.get(i); - NodeRenderer nodeRenderer = nodeRendererFactory.create(this); - nodeRendererMap.add(nodeRenderer); + for (var factory : nodeRendererFactories) { + var renderer = factory.create(this); + nodeRendererMap.add(renderer); } } @@ -255,6 +256,11 @@ public boolean shouldEscapeHtml() { return escapeHtml; } + @Override + public boolean shouldOmitSingleParagraphP() { + return omitSingleParagraphP; + } + @Override public boolean shouldSanitizeUrls() { return sanitizeUrls; @@ -296,6 +302,14 @@ public void render(Node node) { nodeRendererMap.render(node); } + public void beforeRoot(Node node) { + nodeRendererMap.beforeRoot(node); + } + + public void afterRoot(Node node) { + nodeRendererMap.afterRoot(node); + } + private void setCustomAttributes(Node node, String tagName, Map attrs) { for (AttributeProvider attributeProvider : attributeProviders) { attributeProvider.setAttributes(node, tagName, attrs); diff --git a/commonmark/src/main/java/org/commonmark/renderer/html/HtmlWriter.java b/commonmark/src/main/java/org/commonmark/renderer/html/HtmlWriter.java index 8c79eb8b4..a4ac05d45 100644 --- a/commonmark/src/main/java/org/commonmark/renderer/html/HtmlWriter.java +++ b/commonmark/src/main/java/org/commonmark/renderer/html/HtmlWriter.java @@ -3,20 +3,18 @@ import org.commonmark.internal.util.Escaping; import java.io.IOException; -import java.util.Collections; import java.util.Map; +import java.util.Objects; public class HtmlWriter { - private static final Map NO_ATTRIBUTES = Collections.emptyMap(); + private static final Map NO_ATTRIBUTES = Map.of(); private final Appendable buffer; private char lastChar = 0; public HtmlWriter(Appendable out) { - if (out == null) { - throw new NullPointerException("out must not be null"); - } + Objects.requireNonNull(out, "out must not be null"); this.buffer = out; } @@ -40,12 +38,14 @@ public void tag(String name, Map attrs, boolean voidElement) { append("<"); append(name); if (attrs != null && !attrs.isEmpty()) { - for (Map.Entry attrib : attrs.entrySet()) { + for (var attr : attrs.entrySet()) { append(" "); - append(Escaping.escapeHtml(attrib.getKey())); - append("=\""); - append(Escaping.escapeHtml(attrib.getValue())); - append("\""); + append(Escaping.escapeHtml(attr.getKey())); + if (attr.getValue() != null) { + append("=\""); + append(Escaping.escapeHtml(attr.getValue())); + append("\""); + } } } if (voidElement) { diff --git a/commonmark/src/main/java/org/commonmark/renderer/markdown/CoreMarkdownNodeRenderer.java b/commonmark/src/main/java/org/commonmark/renderer/markdown/CoreMarkdownNodeRenderer.java new file mode 100644 index 000000000..5a81676f4 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/renderer/markdown/CoreMarkdownNodeRenderer.java @@ -0,0 +1,554 @@ +package org.commonmark.renderer.markdown; + +import org.commonmark.node.*; +import org.commonmark.renderer.NodeRenderer; +import org.commonmark.text.AsciiMatcher; +import org.commonmark.text.CharMatcher; +import org.commonmark.text.Characters; + +import java.util.List; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * The node renderer that renders all the core nodes (comes last in the order of node renderers). + *

    + * Note that while sometimes it would be easier to record what kind of syntax was used on parsing (e.g. ATX vs Setext + * heading), this renderer is intended to also work for documents that were created by directly creating + * {@link Node Nodes} instead. So in order to support that, it sometimes needs to do a bit more work. + */ +public class CoreMarkdownNodeRenderer extends AbstractVisitor implements NodeRenderer { + + private final AsciiMatcher textEscape; + private final CharMatcher textEscapeInHeading; + private final CharMatcher linkDestinationNeedsAngleBrackets = + AsciiMatcher.builder().c(' ').c('(').c(')').c('<').c('>').c('\n').c('\\').build(); + private final CharMatcher linkDestinationEscapeInAngleBrackets = + AsciiMatcher.builder().c('<').c('>').c('\n').c('\\').build(); + private final CharMatcher linkTitleEscapeInQuotes = + AsciiMatcher.builder().c('"').c('\n').c('\\').build(); + + private final Pattern orderedListMarkerPattern = Pattern.compile("^([0-9]{1,9})([.)])"); + + protected final MarkdownNodeRendererContext context; + private final MarkdownWriter writer; + /** + * If we're currently within a {@link BulletList} or {@link OrderedList}, this keeps the context of that list. + * It has a parent field so that it can represent a stack (for nested lists). + */ + private ListHolder listHolder; + + public CoreMarkdownNodeRenderer(MarkdownNodeRendererContext context) { + this.context = context; + this.writer = context.getWriter(); + + textEscape = AsciiMatcher.builder().anyOf("[]<>`*_&\n\\").anyOf(context.getSpecialCharacters()).build(); + textEscapeInHeading = AsciiMatcher.builder(textEscape).anyOf("#").build(); + } + + @Override + public Set> getNodeTypes() { + return Set.of( + BlockQuote.class, + BulletList.class, + Code.class, + Document.class, + Emphasis.class, + FencedCodeBlock.class, + HardLineBreak.class, + Heading.class, + HtmlBlock.class, + HtmlInline.class, + Image.class, + IndentedCodeBlock.class, + Link.class, + ListItem.class, + OrderedList.class, + Paragraph.class, + SoftLineBreak.class, + StrongEmphasis.class, + Text.class, + ThematicBreak.class + ); + } + + @Override + public void render(Node node) { + node.accept(this); + } + + @Override + public void visit(Document document) { + // No rendering itself + visitChildren(document); + writer.line(); + } + + @Override + public void visit(ThematicBreak thematicBreak) { + String literal = thematicBreak.getLiteral(); + if (literal == null) { + // Let's use ___ as it doesn't introduce ambiguity with * or - list item markers + literal = "___"; + } + writer.raw(literal); + writer.block(); + } + + @Override + public void visit(Heading heading) { + if (heading.getLevel() <= 2) { + LineBreakVisitor lineBreakVisitor = new LineBreakVisitor(); + heading.accept(lineBreakVisitor); + boolean isMultipleLines = lineBreakVisitor.hasLineBreak(); + + if (isMultipleLines) { + // Setext headings: Can have multiple lines, but only level 1 or 2 + visitChildren(heading); + writer.line(); + if (heading.getLevel() == 1) { + // Note that it would be nice to match the length of the contents instead of just using 3, but that's + // not easy. + writer.raw("==="); + } else { + writer.raw("---"); + } + writer.block(); + return; + } + } + + // ATX headings: Can't have multiple lines, but up to level 6. + for (int i = 0; i < heading.getLevel(); i++) { + writer.raw('#'); + } + writer.raw(' '); + visitChildren(heading); + + writer.block(); + } + + @Override + public void visit(IndentedCodeBlock indentedCodeBlock) { + String literal = indentedCodeBlock.getLiteral(); + // We need to respect line prefixes which is why we need to write it line by line (e.g. an indented code block + // within a block quote) + writer.writePrefix(" "); + writer.pushPrefix(" "); + List lines = getLines(literal); + for (int i = 0; i < lines.size(); i++) { + String line = lines.get(i); + writer.raw(line); + if (i != lines.size() - 1) { + writer.line(); + } + } + writer.popPrefix(); + writer.block(); + } + + @Override + public void visit(FencedCodeBlock codeBlock) { + String literal = codeBlock.getLiteral(); + String fenceChar = codeBlock.getFenceCharacter() != null ? codeBlock.getFenceCharacter() : "`"; + int openingFenceLength; + if (codeBlock.getOpeningFenceLength() != null) { + // If we have a known fence length, use it + openingFenceLength = codeBlock.getOpeningFenceLength(); + } else { + // Otherwise, calculate the closing fence length pessimistically, e.g. if the code block itself contains a + // line with ```, we need to use a fence of length 4. If ``` occurs with non-whitespace characters on a + // line, we technically don't need a longer fence, but it's not incorrect to do so. + int fenceCharsInLiteral = findMaxRunLength(fenceChar, literal); + openingFenceLength = Math.max(fenceCharsInLiteral + 1, 3); + } + int closingFenceLength = codeBlock.getClosingFenceLength() != null ? codeBlock.getClosingFenceLength() : openingFenceLength; + + String openingFence = repeat(fenceChar, openingFenceLength); + String closingFence = repeat(fenceChar, closingFenceLength); + int indent = codeBlock.getFenceIndent(); + + if (indent > 0) { + String indentPrefix = repeat(" ", indent); + writer.writePrefix(indentPrefix); + writer.pushPrefix(indentPrefix); + } + + writer.raw(openingFence); + if (codeBlock.getInfo() != null) { + writer.raw(codeBlock.getInfo()); + } + writer.line(); + if (!literal.isEmpty()) { + List lines = getLines(literal); + for (String line : lines) { + writer.raw(line); + writer.line(); + } + } + writer.raw(closingFence); + if (indent > 0) { + writer.popPrefix(); + } + writer.block(); + } + + @Override + public void visit(HtmlBlock htmlBlock) { + List lines = getLines(htmlBlock.getLiteral()); + for (int i = 0; i < lines.size(); i++) { + String line = lines.get(i); + writer.raw(line); + if (i != lines.size() - 1) { + writer.line(); + } + } + writer.block(); + } + + @Override + public void visit(Paragraph paragraph) { + visitChildren(paragraph); + writer.block(); + } + + @Override + public void visit(BlockQuote blockQuote) { + writer.writePrefix("> "); + writer.pushPrefix("> "); + visitChildren(blockQuote); + writer.popPrefix(); + writer.block(); + } + + @Override + public void visit(BulletList bulletList) { + writer.pushTight(bulletList.isTight()); + listHolder = new BulletListHolder(listHolder, bulletList); + visitChildren(bulletList); + listHolder = listHolder.parent; + writer.popTight(); + writer.block(); + } + + @Override + public void visit(OrderedList orderedList) { + writer.pushTight(orderedList.isTight()); + listHolder = new OrderedListHolder(listHolder, orderedList); + visitChildren(orderedList); + listHolder = listHolder.parent; + writer.popTight(); + writer.block(); + } + + @Override + public void visit(ListItem listItem) { + int markerIndent = listItem.getMarkerIndent() != null ? listItem.getMarkerIndent() : 0; + String marker; + if (listHolder instanceof BulletListHolder) { + BulletListHolder bulletListHolder = (BulletListHolder) listHolder; + marker = repeat(" ", markerIndent) + bulletListHolder.marker; + } else if (listHolder instanceof OrderedListHolder) { + OrderedListHolder orderedListHolder = (OrderedListHolder) listHolder; + marker = repeat(" ", markerIndent) + orderedListHolder.number + orderedListHolder.delimiter; + orderedListHolder.number++; + } else { + throw new IllegalStateException("Unknown list holder type: " + listHolder); + } + Integer contentIndent = listItem.getContentIndent(); + String spaces = contentIndent != null ? repeat(" ", Math.max(contentIndent - marker.length(), 1)) : " "; + writer.writePrefix(marker); + writer.writePrefix(spaces); + writer.pushPrefix(repeat(" ", marker.length() + spaces.length())); + + if (listItem.getFirstChild() == null) { + // Empty list item + writer.block(); + } else { + visitChildren(listItem); + } + + writer.popPrefix(); + } + + @Override + public void visit(Code code) { + String literal = code.getLiteral(); + // If the literal includes backticks, we can surround them by using one more backtick. + int backticks = findMaxRunLength("`", literal); + for (int i = 0; i < backticks + 1; i++) { + writer.raw('`'); + } + // If the literal starts or ends with a backtick, surround it with a single space. + // If it starts and ends with a space (but is not only spaces), add an additional space (otherwise they would + // get removed on parsing). + boolean addSpace = literal.startsWith("`") || literal.endsWith("`") || + (literal.startsWith(" ") && literal.endsWith(" ") && Characters.hasNonSpace(literal)); + if (addSpace) { + writer.raw(' '); + } + writer.raw(literal); + if (addSpace) { + writer.raw(' '); + } + for (int i = 0; i < backticks + 1; i++) { + writer.raw('`'); + } + } + + @Override + public void visit(Emphasis emphasis) { + String delimiter = emphasis.getOpeningDelimiter(); + // Use delimiter that was parsed if available + if (delimiter == null) { + // When emphasis is nested, a different delimiter needs to be used + delimiter = writer.getLastChar() == '*' ? "_" : "*"; + } + writer.raw(delimiter); + super.visit(emphasis); + writer.raw(delimiter); + } + + @Override + public void visit(StrongEmphasis strongEmphasis) { + writer.raw("**"); + super.visit(strongEmphasis); + writer.raw("**"); + } + + @Override + public void visit(Link link) { + writeLinkLike(link.getTitle(), link.getDestination(), link, "["); + } + + @Override + public void visit(Image image) { + writeLinkLike(image.getTitle(), image.getDestination(), image, "!["); + } + + @Override + public void visit(HtmlInline htmlInline) { + writer.raw(htmlInline.getLiteral()); + } + + @Override + public void visit(HardLineBreak hardLineBreak) { + writer.raw(" "); + writer.line(); + } + + @Override + public void visit(SoftLineBreak softLineBreak) { + writer.line(); + } + + @Override + public void visit(Text text) { + // Text is tricky. In Markdown special characters (`-`, `#` etc.) can be escaped (`\-`, `\#` etc.) so that + // they're parsed as plain text. Currently, whether a character was escaped or not is not recorded in the Node, + // so here we don't know. If we just wrote out those characters unescaped, the resulting Markdown would change + // meaning (turn into a list item, heading, etc.). + // You might say "Why not store that in the Node when parsing", but that wouldn't work for the use case where + // nodes are constructed directly instead of via parsing. This renderer needs to work for that too. + // So currently, when in doubt, we escape. For special characters only occurring at the beginning of a line, + // we only escape them then (we wouldn't want to escape every `.` for example). + String literal = text.getLiteral(); + if (writer.isAtLineStart() && !literal.isEmpty()) { + char c = literal.charAt(0); + switch (c) { + case '-': { + // Would be ambiguous with a bullet list marker, escape + writer.raw("\\-"); + literal = literal.substring(1); + break; + } + case '#': { + // Would be ambiguous with an ATX heading, escape + writer.raw("\\#"); + literal = literal.substring(1); + break; + } + case '=': { + // Would be ambiguous with a Setext heading, escape unless it's the first line in the block + if (text.getPrevious() != null) { + writer.raw("\\="); + literal = literal.substring(1); + } + break; + } + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + // Check for ordered list marker + Matcher m = orderedListMarkerPattern.matcher(literal); + if (m.find()) { + writer.raw(m.group(1)); + writer.raw("\\" + m.group(2)); + literal = literal.substring(m.end()); + } + break; + } + case '\t': { + writer.raw(" "); + literal = literal.substring(1); + break; + } + case ' ': { + writer.raw(" "); + literal = literal.substring(1); + break; + } + } + } + + CharMatcher escape = text.getParent() instanceof Heading ? textEscapeInHeading : textEscape; + + if (literal.endsWith("!") && text.getNext() instanceof Link) { + // If we wrote the `!` unescaped, it would turn the link into an image instead. + writer.text(literal.substring(0, literal.length() - 1), escape); + writer.raw("\\!"); + } else { + writer.text(literal, escape); + } + } + + @Override + protected void visitChildren(Node parent) { + Node node = parent.getFirstChild(); + while (node != null) { + Node next = node.getNext(); + context.render(node); + node = next; + } + } + + private static int findMaxRunLength(String needle, String s) { + int maxRunLength = 0; + int pos = 0; + while (pos < s.length()) { + pos = s.indexOf(needle, pos); + if (pos == -1) { + return maxRunLength; + } + int runLength = 0; + do { + pos += needle.length(); + runLength++; + } while (s.startsWith(needle, pos)); + maxRunLength = Math.max(runLength, maxRunLength); + } + return maxRunLength; + } + + private static boolean contains(String s, CharMatcher charMatcher) { + for (int i = 0; i < s.length(); i++) { + if (charMatcher.matches(s.charAt(i))) { + return true; + } + } + return false; + } + + // Keep for Android compat (String.repeat only available on Android 12 and later) + private static String repeat(String s, int count) { + StringBuilder sb = new StringBuilder(s.length() * count); + for (int i = 0; i < count; i++) { + sb.append(s); + } + return sb.toString(); + } + + private static List getLines(String literal) { + // Without -1, split would discard all trailing empty strings, which is not what we want, e.g. it would + // return the same result for "abc", "abc\n" and "abc\n\n". + // With -1, it returns ["abc"], ["abc", ""] and ["abc", "", ""]. + String[] parts = literal.split("\n", -1); + if (parts[parts.length - 1].isEmpty()) { + // But we don't want the last empty string, as "\n" is used as a line terminator (not a separator), + // so return without the last element. + return List.of(parts).subList(0, parts.length - 1); + } else { + return List.of(parts); + } + } + + private void writeLinkLike(String title, String destination, Node node, String opener) { + writer.raw(opener); + visitChildren(node); + writer.raw(']'); + writer.raw('('); + if (contains(destination, linkDestinationNeedsAngleBrackets)) { + writer.raw('<'); + writer.text(destination, linkDestinationEscapeInAngleBrackets); + writer.raw('>'); + } else { + writer.raw(destination); + } + if (title != null) { + writer.raw(' '); + writer.raw('"'); + writer.text(title, linkTitleEscapeInQuotes); + writer.raw('"'); + } + writer.raw(')'); + } + + private static class ListHolder { + final ListHolder parent; + + protected ListHolder(ListHolder parent) { + this.parent = parent; + } + } + + private static class BulletListHolder extends ListHolder { + final String marker; + + public BulletListHolder(ListHolder parent, BulletList bulletList) { + super(parent); + this.marker = bulletList.getMarker() != null ? bulletList.getMarker() : "-"; + } + } + + private static class OrderedListHolder extends ListHolder { + final String delimiter; + private int number; + + protected OrderedListHolder(ListHolder parent, OrderedList orderedList) { + super(parent); + delimiter = orderedList.getMarkerDelimiter() != null ? orderedList.getMarkerDelimiter() : "."; + number = orderedList.getMarkerStartNumber() != null ? orderedList.getMarkerStartNumber() : 1; + } + } + + /** + * Visits nodes to check if there are any soft or hard line breaks. + */ + private static class LineBreakVisitor extends AbstractVisitor { + private boolean lineBreak = false; + + public boolean hasLineBreak() { + return lineBreak; + } + + @Override + public void visit(SoftLineBreak softLineBreak) { + super.visit(softLineBreak); + lineBreak = true; + } + + @Override + public void visit(HardLineBreak hardLineBreak) { + super.visit(hardLineBreak); + lineBreak = true; + } + } +} diff --git a/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownNodeRendererContext.java b/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownNodeRendererContext.java new file mode 100644 index 000000000..40640d1b4 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownNodeRendererContext.java @@ -0,0 +1,30 @@ +package org.commonmark.renderer.markdown; + +import org.commonmark.node.Node; + +import java.util.Set; + +/** + * Context that is passed to custom node renderers, see {@link MarkdownNodeRendererFactory#create}. + */ +public interface MarkdownNodeRendererContext { + + /** + * @return the writer to use + */ + MarkdownWriter getWriter(); + + /** + * Render the specified node and its children using the configured renderers. This should be used to render child + * nodes; be careful not to pass the node that is being rendered, that would result in an endless loop. + * + * @param node the node to render + */ + void render(Node node); + + /** + * @return additional special characters that need to be escaped if they occur in normal text; currently only ASCII + * characters are allowed + */ + Set getSpecialCharacters(); +} diff --git a/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownNodeRendererFactory.java b/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownNodeRendererFactory.java new file mode 100644 index 000000000..14221ea56 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownNodeRendererFactory.java @@ -0,0 +1,25 @@ +package org.commonmark.renderer.markdown; + +import org.commonmark.renderer.NodeRenderer; + +import java.util.Set; + +/** + * Factory for instantiating new node renderers for rendering custom nodes. + */ +public interface MarkdownNodeRendererFactory { + + /** + * Create a new node renderer for the specified rendering context. + * + * @param context the context for rendering (normally passed on to the node renderer) + * @return a node renderer + */ + NodeRenderer create(MarkdownNodeRendererContext context); + + /** + * @return the additional special characters that this factory would like to have escaped in normal text; currently + * only ASCII characters are allowed + */ + Set getSpecialCharacters(); +} diff --git a/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownRenderer.java b/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownRenderer.java new file mode 100644 index 000000000..e4996fb08 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownRenderer.java @@ -0,0 +1,161 @@ +package org.commonmark.renderer.markdown; + +import org.commonmark.Extension; +import org.commonmark.internal.renderer.NodeRendererMap; +import org.commonmark.node.Node; +import org.commonmark.renderer.NodeRenderer; +import org.commonmark.renderer.Renderer; + +import java.util.*; + +/** + * Renders nodes to Markdown (CommonMark syntax); use {@link #builder()} to create a renderer. + *

    + * Note that it doesn't currently preserve the exact syntax of the original input Markdown (if any): + *

      + *
    • Headings are output as ATX headings if possible (multi-line headings need Setext headings)
    • + *
    • Links are always rendered as inline links (no support for reference links yet)
    • + *
    • Escaping might be over-eager, e.g. a plain {@code *} might be escaped + * even though it doesn't need to be in that particular context
    • + *
    • Leading whitespace in paragraphs is not preserved
    • + *
    + * However, it should produce Markdown that is semantically equivalent to the input, i.e. if the Markdown was parsed + * again and compared against the original AST, it should be the same (minus bugs). + */ +public class MarkdownRenderer implements Renderer { + + private final List nodeRendererFactories; + + private MarkdownRenderer(Builder builder) { + this.nodeRendererFactories = new ArrayList<>(builder.nodeRendererFactories.size() + 1); + this.nodeRendererFactories.addAll(builder.nodeRendererFactories); + // Add as last. This means clients can override the rendering of core nodes if they want. + this.nodeRendererFactories.add(new MarkdownNodeRendererFactory() { + @Override + public NodeRenderer create(MarkdownNodeRendererContext context) { + return new CoreMarkdownNodeRenderer(context); + } + + @Override + public Set getSpecialCharacters() { + return Set.of(); + } + }); + } + + /** + * Create a new builder for configuring a {@link MarkdownRenderer}. + * + * @return a builder + */ + public static Builder builder() { + return new Builder(); + } + + @Override + public void render(Node node, Appendable output) { + RendererContext context = new RendererContext(new MarkdownWriter(output)); + context.render(node); + } + + @Override + public String render(Node node) { + StringBuilder sb = new StringBuilder(); + render(node, sb); + return sb.toString(); + } + + /** + * Builder for configuring a {@link MarkdownRenderer}. See methods for default configuration. + */ + public static class Builder { + + private final List nodeRendererFactories = new ArrayList<>(); + + /** + * @return the configured {@link MarkdownRenderer} + */ + public MarkdownRenderer build() { + return new MarkdownRenderer(this); + } + + /** + * Add a factory for instantiating a node renderer (done when rendering). This allows to override the rendering + * of node types or define rendering for custom node types. + *

    + * If multiple node renderers for the same node type are created, the one from the factory that was added first + * "wins". (This is how the rendering for core node types can be overridden; the default rendering comes last.) + * + * @param nodeRendererFactory the factory for creating a node renderer + * @return {@code this} + */ + public Builder nodeRendererFactory(MarkdownNodeRendererFactory nodeRendererFactory) { + this.nodeRendererFactories.add(nodeRendererFactory); + return this; + } + + /** + * @param extensions extensions to use on this renderer + * @return {@code this} + */ + public Builder extensions(Iterable extensions) { + for (Extension extension : extensions) { + if (extension instanceof MarkdownRendererExtension) { + MarkdownRendererExtension markdownRendererExtension = (MarkdownRendererExtension) extension; + markdownRendererExtension.extend(this); + } + } + return this; + } + } + + /** + * Extension for {@link MarkdownRenderer} for rendering custom nodes. + */ + public interface MarkdownRendererExtension extends Extension { + + /** + * Extend Markdown rendering, usually by registering custom node renderers using {@link Builder#nodeRendererFactory}. + * + * @param rendererBuilder the renderer builder to extend + */ + void extend(Builder rendererBuilder); + } + + private class RendererContext implements MarkdownNodeRendererContext { + private final MarkdownWriter writer; + private final NodeRendererMap nodeRendererMap = new NodeRendererMap(); + private final Set additionalTextEscapes; + + private RendererContext(MarkdownWriter writer) { + // Set fields that are used by interface + this.writer = writer; + Set escapes = new HashSet<>(); + for (MarkdownNodeRendererFactory factory : nodeRendererFactories) { + escapes.addAll(factory.getSpecialCharacters()); + } + additionalTextEscapes = Collections.unmodifiableSet(escapes); + + for (var factory : nodeRendererFactories) { + // Pass in this as context here, which uses the fields set above + var renderer = factory.create(this); + nodeRendererMap.add(renderer); + } + } + + @Override + public MarkdownWriter getWriter() { + return writer; + } + + @Override + public void render(Node node) { + nodeRendererMap.render(node); + } + + @Override + public Set getSpecialCharacters() { + return additionalTextEscapes; + } + } +} diff --git a/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownWriter.java b/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownWriter.java new file mode 100644 index 000000000..c9f427021 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownWriter.java @@ -0,0 +1,246 @@ +package org.commonmark.renderer.markdown; + +import org.commonmark.text.CharMatcher; + +import java.io.IOException; +import java.util.LinkedList; + +/** + * Writer for Markdown (CommonMark) text. + */ +public class MarkdownWriter { + + private final Appendable buffer; + + private int blockSeparator = 0; + private char lastChar; + private boolean atLineStart = true; + + // Stacks of settings that affect various rendering behaviors. The common pattern here is that callers use "push" to + // change a setting, render some nodes, and then "pop" the setting off the stack again to restore previous state. + private final LinkedList prefixes = new LinkedList<>(); + private final LinkedList tight = new LinkedList<>(); + private final LinkedList rawEscapes = new LinkedList<>(); + + public MarkdownWriter(Appendable out) { + buffer = out; + } + + /** + * Write the supplied string (raw/unescaped except if {@link #pushRawEscape} was used). + */ + public void raw(String s) { + flushBlockSeparator(); + write(s, null); + } + + /** + * Write the supplied character (raw/unescaped except if {@link #pushRawEscape} was used). + */ + public void raw(char c) { + flushBlockSeparator(); + write(c); + } + + /** + * Write the supplied string with escaping. + * + * @param s the string to write + * @param escape which characters to escape + */ + public void text(String s, CharMatcher escape) { + if (s.isEmpty()) { + return; + } + flushBlockSeparator(); + write(s, escape); + + lastChar = s.charAt(s.length() - 1); + atLineStart = false; + } + + /** + * Write a newline (line terminator). + */ + public void line() { + write('\n'); + writePrefixes(); + atLineStart = true; + } + + /** + * Enqueue a block separator to be written before the next text is written. Block separators are not written + * straight away because if there are no more blocks to write we don't want a separator (at the end of the document). + */ + public void block() { + // Remember whether this should be a tight or loose separator now because tight could get changed in between + // this and the next flush. + blockSeparator = isTight() ? 1 : 2; + atLineStart = true; + } + + /** + * Push a prefix onto the top of the stack. All prefixes are written at the beginning of each line, until the + * prefix is popped again. + * + * @param prefix the raw prefix string + */ + public void pushPrefix(String prefix) { + prefixes.addLast(prefix); + } + + /** + * Write a prefix. + * + * @param prefix the raw prefix string to write + */ + public void writePrefix(String prefix) { + boolean tmp = atLineStart; + raw(prefix); + atLineStart = tmp; + } + + /** + * Remove the last prefix from the top of the stack. + */ + public void popPrefix() { + prefixes.removeLast(); + } + + /** + * Change whether blocks are tight or loose. Loose is the default where blocks are separated by a blank line. Tight + * is where blocks are not separated by a blank line. Tight blocks are used in lists, if there are no blank lines + * within the list. + *

    + * Note that changing this does not affect block separators that have already been enqueued with {@link #block()}, + * only future ones. + */ + public void pushTight(boolean tight) { + this.tight.addLast(tight); + } + + /** + * Remove the last "tight" setting from the top of the stack. + */ + public void popTight() { + this.tight.removeLast(); + } + + /** + * Escape the characters matching the supplied matcher, in all text (text and raw). This might be useful to + * extensions that add another layer of syntax, e.g. the tables extension that uses `|` to separate cells and needs + * all `|` characters to be escaped (even in code spans). + * + * @param rawEscape the characters to escape in raw text + */ + public void pushRawEscape(CharMatcher rawEscape) { + rawEscapes.add(rawEscape); + } + + /** + * Remove the last raw escape from the top of the stack. + */ + public void popRawEscape() { + rawEscapes.removeLast(); + } + + /** + * @return the last character that was written + */ + public char getLastChar() { + return lastChar; + } + + /** + * @return whether we're at the line start (not counting any prefixes), i.e. after a {@link #line} or {@link #block}. + */ + public boolean isAtLineStart() { + return atLineStart; + } + + private void write(String s, CharMatcher escape) { + try { + if (rawEscapes.isEmpty() && escape == null) { + // Normal fast path + buffer.append(s); + } else { + for (int i = 0; i < s.length(); i++) { + append(s.charAt(i), escape); + } + } + } catch (IOException e) { + throw new RuntimeException(e); + } + + int length = s.length(); + if (length != 0) { + lastChar = s.charAt(length - 1); + } + atLineStart = false; + } + + private void write(char c) { + try { + append(c, null); + } catch (IOException e) { + throw new RuntimeException(e); + } + + lastChar = c; + atLineStart = false; + } + + private void writePrefixes() { + if (!prefixes.isEmpty()) { + for (String prefix : prefixes) { + write(prefix, null); + } + } + } + + /** + * If a block separator has been enqueued with {@link #block()} but not yet written, write it now. + */ + private void flushBlockSeparator() { + if (blockSeparator != 0) { + write('\n'); + writePrefixes(); + if (blockSeparator > 1) { + write('\n'); + writePrefixes(); + } + blockSeparator = 0; + } + } + + private void append(char c, CharMatcher escape) throws IOException { + if (needsEscaping(c, escape)) { + if (c == '\n') { + // Can't escape this with \, use numeric character reference + buffer.append(" "); + } else { + buffer.append('\\'); + buffer.append(c); + } + } else { + buffer.append(c); + } + } + + private boolean isTight() { + return !tight.isEmpty() && tight.getLast(); + } + + private boolean needsEscaping(char c, CharMatcher escape) { + return (escape != null && escape.matches(c)) || rawNeedsEscaping(c); + } + + private boolean rawNeedsEscaping(char c) { + for (CharMatcher rawEscape : rawEscapes) { + if (rawEscape.matches(c)) { + return true; + } + } + return false; + } +} diff --git a/commonmark/src/main/java/org/commonmark/renderer/markdown/package-info.java b/commonmark/src/main/java/org/commonmark/renderer/markdown/package-info.java new file mode 100644 index 000000000..f707671d5 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/renderer/markdown/package-info.java @@ -0,0 +1,4 @@ +/** + * Markdown rendering (see {@link org.commonmark.renderer.markdown.MarkdownRenderer}) + */ +package org.commonmark.renderer.markdown; diff --git a/commonmark/src/main/java/org/commonmark/renderer/text/CoreTextContentNodeRenderer.java b/commonmark/src/main/java/org/commonmark/renderer/text/CoreTextContentNodeRenderer.java index a5f9db518..ee564cbdb 100644 --- a/commonmark/src/main/java/org/commonmark/renderer/text/CoreTextContentNodeRenderer.java +++ b/commonmark/src/main/java/org/commonmark/renderer/text/CoreTextContentNodeRenderer.java @@ -2,12 +2,7 @@ import org.commonmark.node.*; import org.commonmark.renderer.NodeRenderer; -import org.commonmark.internal.renderer.text.BulletListHolder; -import org.commonmark.internal.renderer.text.ListHolder; -import org.commonmark.internal.renderer.text.OrderedListHolder; -import java.util.Arrays; -import java.util.HashSet; import java.util.Set; /** @@ -27,7 +22,7 @@ public CoreTextContentNodeRenderer(TextContentNodeRendererContext context) { @Override public Set> getNodeTypes() { - return new HashSet<>(Arrays.asList( + return Set.of( Document.class, Heading.class, Paragraph.class, @@ -48,7 +43,7 @@ public Set> getNodeTypes() { HtmlInline.class, SoftLineBreak.class, HardLineBreak.class - )); + ); } @Override @@ -64,26 +59,24 @@ public void visit(Document document) { @Override public void visit(BlockQuote blockQuote) { - textContent.write('«'); + // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + textContent.write('\u00AB'); visitChildren(blockQuote); - textContent.write('»'); + textContent.resetBlock(); + // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + textContent.write('\u00BB'); - writeEndOfLineIfNeeded(blockQuote, null); + textContent.block(); } @Override public void visit(BulletList bulletList) { - if (listHolder != null) { - writeEndOfLine(); - } + textContent.pushTight(bulletList.isTight()); listHolder = new BulletListHolder(listHolder, bulletList); visitChildren(bulletList); - writeEndOfLineIfNeeded(bulletList, null); - if (listHolder.getParent() != null) { - listHolder = listHolder.getParent(); - } else { - listHolder = null; - } + textContent.popTight(); + textContent.block(); + listHolder = listHolder.getParent(); } @Override @@ -95,31 +88,40 @@ public void visit(Code code) { @Override public void visit(FencedCodeBlock fencedCodeBlock) { - if (context.stripNewlines()) { - textContent.writeStripped(fencedCodeBlock.getLiteral()); - writeEndOfLineIfNeeded(fencedCodeBlock, null); + var literal = stripTrailingNewline(fencedCodeBlock.getLiteral()); + if (stripNewlines()) { + textContent.writeStripped(literal); } else { - textContent.write(fencedCodeBlock.getLiteral()); + textContent.write(literal); } + textContent.block(); } @Override public void visit(HardLineBreak hardLineBreak) { - writeEndOfLineIfNeeded(hardLineBreak, null); + if (stripNewlines()) { + textContent.whitespace(); + } else { + textContent.line(); + } } @Override public void visit(Heading heading) { visitChildren(heading); - writeEndOfLineIfNeeded(heading, ':'); + if (stripNewlines()) { + textContent.write(": "); + } else { + textContent.block(); + } } @Override public void visit(ThematicBreak thematicBreak) { - if (!context.stripNewlines()) { + if (!stripNewlines()) { textContent.write("***"); } - writeEndOfLineIfNeeded(thematicBreak, null); + textContent.block(); } @Override @@ -139,12 +141,13 @@ public void visit(Image image) { @Override public void visit(IndentedCodeBlock indentedCodeBlock) { - if (context.stripNewlines()) { - textContent.writeStripped(indentedCodeBlock.getLiteral()); - writeEndOfLineIfNeeded(indentedCodeBlock, null); + var literal = stripTrailingNewline(indentedCodeBlock.getLiteral()); + if (stripNewlines()) { + textContent.writeStripped(literal); } else { - textContent.write(indentedCodeBlock.getLiteral()); + textContent.write(literal); } + textContent.block(); } @Override @@ -155,49 +158,56 @@ public void visit(Link link) { @Override public void visit(ListItem listItem) { if (listHolder != null && listHolder instanceof OrderedListHolder) { - OrderedListHolder orderedListHolder = (OrderedListHolder) listHolder; - String indent = context.stripNewlines() ? "" : orderedListHolder.getIndent(); - textContent.write(indent + orderedListHolder.getCounter() + orderedListHolder.getDelimiter() + " "); + var orderedListHolder = (OrderedListHolder) listHolder; + var marker = orderedListHolder.getCounter() + orderedListHolder.getDelimiter(); + var spaces = " "; + textContent.write(marker); + textContent.write(spaces); + textContent.pushPrefix(repeat(" ", marker.length() + spaces.length())); visitChildren(listItem); - writeEndOfLineIfNeeded(listItem, null); + textContent.block(); + textContent.popPrefix(); orderedListHolder.increaseCounter(); } else if (listHolder != null && listHolder instanceof BulletListHolder) { BulletListHolder bulletListHolder = (BulletListHolder) listHolder; - if (!context.stripNewlines()) { - textContent.write(bulletListHolder.getIndent() + bulletListHolder.getMarker() + " "); + if (!stripNewlines()) { + var marker = bulletListHolder.getMarker(); + var spaces = " "; + textContent.write(marker); + textContent.write(spaces); + textContent.pushPrefix(repeat(" ", marker.length() + spaces.length())); } visitChildren(listItem); - writeEndOfLineIfNeeded(listItem, null); + textContent.block(); + if (!stripNewlines()) { + textContent.popPrefix(); + } } } @Override public void visit(OrderedList orderedList) { - if (listHolder != null) { - writeEndOfLine(); - } + textContent.pushTight(orderedList.isTight()); listHolder = new OrderedListHolder(listHolder, orderedList); visitChildren(orderedList); - writeEndOfLineIfNeeded(orderedList, null); - if (listHolder.getParent() != null) { - listHolder = listHolder.getParent(); - } else { - listHolder = null; - } + textContent.popTight(); + textContent.block(); + listHolder = listHolder.getParent(); } @Override public void visit(Paragraph paragraph) { visitChildren(paragraph); - // Add "end of line" only if its "root paragraph. - if (paragraph.getParent() == null || paragraph.getParent() instanceof Document) { - writeEndOfLineIfNeeded(paragraph, null); - } + textContent.block(); } @Override public void visit(SoftLineBreak softLineBreak) { - writeEndOfLineIfNeeded(softLineBreak, null); + if (stripNewlines()) { + textContent.whitespace(); + } else { + textContent.line(); + } } @Override @@ -216,7 +226,7 @@ protected void visitChildren(Node parent) { } private void writeText(String text) { - if (context.stripNewlines()) { + if (stripNewlines()) { textContent.writeStripped(text); } else { textContent.write(text); @@ -255,26 +265,72 @@ private void writeLink(Node node, String title, String destination) { } } - private void writeEndOfLineIfNeeded(Node node, Character c) { - if (context.stripNewlines()) { - if (c != null) { - textContent.write(c); - } - if (node.getNext() != null) { - textContent.whitespace(); - } + private boolean stripNewlines() { + return context.lineBreakRendering() == LineBreakRendering.STRIP; + } + + private static String stripTrailingNewline(String s) { + if (s.endsWith("\n")) { + return s.substring(0, s.length() - 1); } else { - if (node.getNext() != null) { - textContent.line(); - } + return s; } } - private void writeEndOfLine() { - if (context.stripNewlines()) { - textContent.whitespace(); - } else { - textContent.line(); + // Keep for Android compat (String.repeat only available on Android 12 and later) + private static String repeat(String s, int count) { + var sb = new StringBuilder(s.length() * count); + for (int i = 0; i < count; i++) { + sb.append(s); + } + return sb.toString(); + } + + private static class BulletListHolder extends ListHolder { + private final String marker; + + public BulletListHolder(ListHolder parent, BulletList list) { + super(parent); + marker = list.getMarker(); + } + + public String getMarker() { + return marker; + } + } + + private abstract static class ListHolder { + private final ListHolder parent; + + ListHolder(ListHolder parent) { + this.parent = parent; + } + + public ListHolder getParent() { + return parent; + } + } + + private static class OrderedListHolder extends ListHolder { + private final String delimiter; + private int counter; + + public OrderedListHolder(ListHolder parent, OrderedList list) { + super(parent); + delimiter = list.getMarkerDelimiter() != null ? list.getMarkerDelimiter() : "."; + counter = list.getMarkerStartNumber() != null ? list.getMarkerStartNumber() : 1; + } + + public String getDelimiter() { + return delimiter; + } + + public int getCounter() { + return counter; + } + + public void increaseCounter() { + counter++; } } } diff --git a/commonmark/src/main/java/org/commonmark/renderer/text/LineBreakRendering.java b/commonmark/src/main/java/org/commonmark/renderer/text/LineBreakRendering.java new file mode 100644 index 000000000..27eeaf0da --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/renderer/text/LineBreakRendering.java @@ -0,0 +1,19 @@ +package org.commonmark.renderer.text; + +/** + * Control how line breaks are rendered. + */ +public enum LineBreakRendering { + /** + * Strip all line breaks within blocks and between blocks, resulting in all the text in a single line. + */ + STRIP, + /** + * Use single line breaks between blocks, not a blank line (also render all lists as tight). + */ + COMPACT, + /** + * Separate blocks by a blank line (and respect tight vs loose lists). + */ + SEPARATE_BLOCKS, +} diff --git a/commonmark/src/main/java/org/commonmark/renderer/text/TextContentNodeRendererContext.java b/commonmark/src/main/java/org/commonmark/renderer/text/TextContentNodeRendererContext.java index 1b1cf327c..d6fcb8d77 100644 --- a/commonmark/src/main/java/org/commonmark/renderer/text/TextContentNodeRendererContext.java +++ b/commonmark/src/main/java/org/commonmark/renderer/text/TextContentNodeRendererContext.java @@ -4,10 +4,17 @@ public interface TextContentNodeRendererContext { + /** + * Controls how line breaks should be rendered, see {@link LineBreakRendering}. + */ + LineBreakRendering lineBreakRendering(); + /** * @return true for stripping new lines and render text as "single line", * false for keeping all line breaks. + * @deprecated Use {@link #lineBreakRendering()} instead */ + @Deprecated boolean stripNewlines(); /** diff --git a/commonmark/src/main/java/org/commonmark/renderer/text/TextContentRenderer.java b/commonmark/src/main/java/org/commonmark/renderer/text/TextContentRenderer.java index d38f99972..d64d0c7ef 100644 --- a/commonmark/src/main/java/org/commonmark/renderer/text/TextContentRenderer.java +++ b/commonmark/src/main/java/org/commonmark/renderer/text/TextContentRenderer.java @@ -9,14 +9,17 @@ import java.util.ArrayList; import java.util.List; +/** + * Renders nodes to plain text content with minimal markup-like additions. + */ public class TextContentRenderer implements Renderer { - private final boolean stripNewlines; + private final LineBreakRendering lineBreakRendering; private final List nodeRendererFactories; private TextContentRenderer(Builder builder) { - this.stripNewlines = builder.stripNewlines; + this.lineBreakRendering = builder.lineBreakRendering; this.nodeRendererFactories = new ArrayList<>(builder.nodeRendererFactories.size() + 1); this.nodeRendererFactories.addAll(builder.nodeRendererFactories); @@ -30,7 +33,7 @@ public NodeRenderer create(TextContentNodeRendererContext context) { } /** - * Create a new builder for configuring an {@link TextContentRenderer}. + * Create a new builder for configuring a {@link TextContentRenderer}. * * @return a builder */ @@ -40,7 +43,7 @@ public static Builder builder() { @Override public void render(Node node, Appendable output) { - RendererContext context = new RendererContext(new TextContentWriter(output)); + RendererContext context = new RendererContext(new TextContentWriter(output, lineBreakRendering)); context.render(node); } @@ -52,12 +55,12 @@ public String render(Node node) { } /** - * Builder for configuring an {@link TextContentRenderer}. See methods for default configuration. + * Builder for configuring a {@link TextContentRenderer}. See methods for default configuration. */ public static class Builder { - private boolean stripNewlines = false; private List nodeRendererFactories = new ArrayList<>(); + private LineBreakRendering lineBreakRendering = LineBreakRendering.COMPACT; /** * @return the configured {@link TextContentRenderer} @@ -66,15 +69,29 @@ public TextContentRenderer build() { return new TextContentRenderer(this); } + /** + * Configure how line breaks (newlines) are rendered, see {@link LineBreakRendering}. + * The default is {@link LineBreakRendering#COMPACT}. + * + * @param lineBreakRendering the mode to use + * @return {@code this} + */ + public Builder lineBreakRendering(LineBreakRendering lineBreakRendering) { + this.lineBreakRendering = lineBreakRendering; + return this; + } + /** * Set the value of flag for stripping new lines. * * @param stripNewlines true for stripping new lines and render text as "single line", * false for keeping all line breaks * @return {@code this} + * @deprecated Use {@link #lineBreakRendering(LineBreakRendering)} with {@link LineBreakRendering#STRIP} instead */ + @Deprecated public Builder stripNewlines(boolean stripNewlines) { - this.stripNewlines = stripNewlines; + this.lineBreakRendering = stripNewlines ? LineBreakRendering.STRIP : LineBreakRendering.COMPACT; return this; } @@ -100,9 +117,9 @@ public Builder nodeRendererFactory(TextContentNodeRendererFactory nodeRendererFa public Builder extensions(Iterable extensions) { for (Extension extension : extensions) { if (extension instanceof TextContentRenderer.TextContentRendererExtension) { - TextContentRenderer.TextContentRendererExtension htmlRendererExtension = + TextContentRenderer.TextContentRendererExtension textContentRendererExtension = (TextContentRenderer.TextContentRendererExtension) extension; - htmlRendererExtension.extend(this); + textContentRendererExtension.extend(this); } } return this; @@ -123,17 +140,20 @@ private class RendererContext implements TextContentNodeRendererContext { private RendererContext(TextContentWriter textContentWriter) { this.textContentWriter = textContentWriter; - // The first node renderer for a node type "wins". - for (int i = nodeRendererFactories.size() - 1; i >= 0; i--) { - TextContentNodeRendererFactory nodeRendererFactory = nodeRendererFactories.get(i); - NodeRenderer nodeRenderer = nodeRendererFactory.create(this); - nodeRendererMap.add(nodeRenderer); + for (var factory : nodeRendererFactories) { + var renderer = factory.create(this); + nodeRendererMap.add(renderer); } } + @Override + public LineBreakRendering lineBreakRendering() { + return lineBreakRendering; + } + @Override public boolean stripNewlines() { - return stripNewlines; + return lineBreakRendering == LineBreakRendering.STRIP; } @Override diff --git a/commonmark/src/main/java/org/commonmark/renderer/text/TextContentWriter.java b/commonmark/src/main/java/org/commonmark/renderer/text/TextContentWriter.java index 0ea56e621..1fb482785 100644 --- a/commonmark/src/main/java/org/commonmark/renderer/text/TextContentWriter.java +++ b/commonmark/src/main/java/org/commonmark/renderer/text/TextContentWriter.java @@ -1,47 +1,141 @@ package org.commonmark.renderer.text; import java.io.IOException; +import java.util.LinkedList; public class TextContentWriter { private final Appendable buffer; + private final LineBreakRendering lineBreakRendering; + private final LinkedList prefixes = new LinkedList<>(); + private final LinkedList tight = new LinkedList<>(); + + private String blockSeparator = null; private char lastChar; public TextContentWriter(Appendable out) { - buffer = out; + this(out, LineBreakRendering.COMPACT); + } + + public TextContentWriter(Appendable out, LineBreakRendering lineBreakRendering) { + this.buffer = out; + this.lineBreakRendering = lineBreakRendering; } public void whitespace() { if (lastChar != 0 && lastChar != ' ') { - append(' '); + write(' '); } } public void colon() { if (lastChar != 0 && lastChar != ':') { - append(':'); + write(':'); } } public void line() { - if (lastChar != 0 && lastChar != '\n') { - append('\n'); - } + append('\n'); + writePrefixes(); + } + + public void block() { + blockSeparator = lineBreakRendering == LineBreakRendering.STRIP ? " " : // + lineBreakRendering == LineBreakRendering.COMPACT || isTight() ? "\n" : "\n\n"; + } + + public void resetBlock() { + blockSeparator = null; } public void writeStripped(String s) { - append(s.replaceAll("[\\r\\n\\s]+", " ")); + write(s.replaceAll("[\\r\\n\\s]+", " ")); } public void write(String s) { + flushBlockSeparator(); append(s); } public void write(char c) { + flushBlockSeparator(); append(c); } + /** + * Push a prefix onto the top of the stack. All prefixes are written at the beginning of each line, until the + * prefix is popped again. + * + * @param prefix the raw prefix string + */ + public void pushPrefix(String prefix) { + prefixes.addLast(prefix); + } + + /** + * Write a prefix. + * + * @param prefix the raw prefix string to write + */ + public void writePrefix(String prefix) { + write(prefix); + } + + /** + * Remove the last prefix from the top of the stack. + */ + public void popPrefix() { + prefixes.removeLast(); + } + + /** + * Change whether blocks are tight or loose. Loose is the default where blocks are separated by a blank line. Tight + * is where blocks are not separated by a blank line. Tight blocks are used in lists, if there are no blank lines + * within the list. + *

    + * Note that changing this does not affect block separators that have already been enqueued with {@link #block()}, + * only future ones. + */ + public void pushTight(boolean tight) { + this.tight.addLast(tight); + } + + /** + * Remove the last "tight" setting from the top of the stack. + */ + public void popTight() { + this.tight.removeLast(); + } + + private boolean isTight() { + return !tight.isEmpty() && tight.getLast(); + } + + private void writePrefixes() { + for (String prefix : prefixes) { + append(prefix); + } + } + + /** + * If a block separator has been enqueued with {@link #block()} but not yet written, write it now. + */ + private void flushBlockSeparator() { + if (blockSeparator != null) { + if (blockSeparator.equals("\n") || blockSeparator.equals("\n\n")) { + for (int i = 0; i < blockSeparator.length(); i++) { + var sep = blockSeparator.charAt(i); + append(sep); + writePrefixes(); + } + } else { + append(blockSeparator); + } + blockSeparator = null; + } + } + private void append(String s) { try { buffer.append(s); diff --git a/commonmark/src/main/java/org/commonmark/renderer/text/package-info.java b/commonmark/src/main/java/org/commonmark/renderer/text/package-info.java index 07a558091..8309f4bd6 100644 --- a/commonmark/src/main/java/org/commonmark/renderer/text/package-info.java +++ b/commonmark/src/main/java/org/commonmark/renderer/text/package-info.java @@ -1,4 +1,4 @@ /** - * Text content rendering (see {@link org.commonmark.renderer.text.TextContentRenderer}) + * Plain text rendering with minimal markup (see {@link org.commonmark.renderer.text.TextContentRenderer}) */ package org.commonmark.renderer.text; diff --git a/commonmark/src/main/java/org/commonmark/text/AsciiMatcher.java b/commonmark/src/main/java/org/commonmark/text/AsciiMatcher.java new file mode 100644 index 000000000..0d9cea458 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/text/AsciiMatcher.java @@ -0,0 +1,73 @@ +package org.commonmark.text; + +import java.util.BitSet; +import java.util.Set; + +/** + * Char matcher that can match ASCII characters efficiently. + */ +public class AsciiMatcher implements CharMatcher { + private final BitSet set; + + private AsciiMatcher(Builder builder) { + this.set = builder.set; + } + + @Override + public boolean matches(char c) { + return set.get(c); + } + + public Builder newBuilder() { + return new Builder((BitSet) set.clone()); + } + + public static Builder builder() { + return new Builder(new BitSet()); + } + + public static Builder builder(AsciiMatcher matcher) { + return new Builder((BitSet) matcher.set.clone()); + } + + public static class Builder { + private final BitSet set; + + private Builder(BitSet set) { + this.set = set; + } + + public Builder c(char c) { + if (c > 127) { + throw new IllegalArgumentException("Can only match ASCII characters"); + } + set.set(c); + return this; + } + + public Builder anyOf(String s) { + for (int i = 0; i < s.length(); i++) { + c(s.charAt(i)); + } + return this; + } + + public Builder anyOf(Set characters) { + for (Character c : characters) { + c(c); + } + return this; + } + + public Builder range(char from, char toInclusive) { + for (char c = from; c <= toInclusive; c++) { + c(c); + } + return this; + } + + public AsciiMatcher build() { + return new AsciiMatcher(this); + } + } +} diff --git a/commonmark/src/main/java/org/commonmark/text/CharMatcher.java b/commonmark/src/main/java/org/commonmark/text/CharMatcher.java new file mode 100644 index 000000000..2833e65c3 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/text/CharMatcher.java @@ -0,0 +1,13 @@ +package org.commonmark.text; + +/** + * Matcher interface for {@code char} values. + *

    + * Note that because this matches on {@code char} values only (as opposed to {@code int} code points), + * this only operates on the level of code units and doesn't support supplementary characters + * (see {@link Character#isSupplementaryCodePoint(int)}). + */ +public interface CharMatcher { + + boolean matches(char c); +} diff --git a/commonmark/src/main/java/org/commonmark/text/Characters.java b/commonmark/src/main/java/org/commonmark/text/Characters.java new file mode 100644 index 000000000..ee56ca67e --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/text/Characters.java @@ -0,0 +1,157 @@ +package org.commonmark.text; + +/** + * Functions for finding characters in strings or checking characters. + */ +public class Characters { + + public static int find(char c, CharSequence s, int startIndex) { + int length = s.length(); + for (int i = startIndex; i < length; i++) { + if (s.charAt(i) == c) { + return i; + } + } + return -1; + } + + public static int findLineBreak(CharSequence s, int startIndex) { + int length = s.length(); + for (int i = startIndex; i < length; i++) { + switch (s.charAt(i)) { + case '\n': + case '\r': + return i; + } + } + return -1; + } + + /** + * @see blank line + */ + public static boolean isBlank(CharSequence s) { + return skipSpaceTab(s, 0, s.length()) == s.length(); + } + + public static boolean hasNonSpace(CharSequence s) { + int length = s.length(); + int skipped = skip(' ', s, 0, length); + return skipped != length; + } + + public static boolean isLetter(CharSequence s, int index) { + int codePoint = Character.codePointAt(s, index); + return Character.isLetter(codePoint); + } + + public static boolean isSpaceOrTab(CharSequence s, int index) { + if (index < s.length()) { + switch (s.charAt(index)) { + case ' ': + case '\t': + return true; + } + } + return false; + } + + /** + * @see Unicode punctuation character + */ + public static boolean isPunctuationCodePoint(int codePoint) { + switch (Character.getType(codePoint)) { + // General category "P" (punctuation) + case Character.DASH_PUNCTUATION: + case Character.START_PUNCTUATION: + case Character.END_PUNCTUATION: + case Character.CONNECTOR_PUNCTUATION: + case Character.OTHER_PUNCTUATION: + case Character.INITIAL_QUOTE_PUNCTUATION: + case Character.FINAL_QUOTE_PUNCTUATION: + // General category "S" (symbol) + case Character.MATH_SYMBOL: + case Character.CURRENCY_SYMBOL: + case Character.MODIFIER_SYMBOL: + case Character.OTHER_SYMBOL: + return true; + default: + switch (codePoint) { + case '$': + case '+': + case '<': + case '=': + case '>': + case '^': + case '`': + case '|': + case '~': + return true; + default: + return false; + } + } + } + + /** + * Check whether the provided code point is a Unicode whitespace character as defined in the spec. + * + * @see Unicode whitespace character + */ + public static boolean isWhitespaceCodePoint(int codePoint) { + switch (codePoint) { + case ' ': + case '\t': + case '\n': + case '\f': + case '\r': + return true; + default: + return Character.getType(codePoint) == Character.SPACE_SEPARATOR; + } + } + + public static int skip(char skip, CharSequence s, int startIndex, int endIndex) { + for (int i = startIndex; i < endIndex; i++) { + if (s.charAt(i) != skip) { + return i; + } + } + return endIndex; + } + + public static int skipBackwards(char skip, CharSequence s, int startIndex, int lastIndex) { + for (int i = startIndex; i >= lastIndex; i--) { + if (s.charAt(i) != skip) { + return i; + } + } + return lastIndex - 1; + } + + public static int skipSpaceTab(CharSequence s, int startIndex, int endIndex) { + for (int i = startIndex; i < endIndex; i++) { + switch (s.charAt(i)) { + case ' ': + case '\t': + break; + default: + return i; + } + } + return endIndex; + } + + public static int skipSpaceTabBackwards(CharSequence s, int startIndex, int lastIndex) { + for (int i = startIndex; i >= lastIndex; i--) { + switch (s.charAt(i)) { + case ' ': + case '\t': + break; + default: + return i; + } + } + return lastIndex - 1; + } +} diff --git a/commonmark/src/main/java/org/commonmark/text/package-info.java b/commonmark/src/main/java/org/commonmark/text/package-info.java new file mode 100644 index 000000000..ab9eec6f1 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/text/package-info.java @@ -0,0 +1,4 @@ +/** + * Text processing utilities for parsing and rendering, exported for use by extensions + */ +package org.commonmark.text; diff --git a/commonmark/src/main/resources/META-INF/LICENSE.txt b/commonmark/src/main/resources/META-INF/LICENSE.txt new file mode 100644 index 000000000..b09e367ce --- /dev/null +++ b/commonmark/src/main/resources/META-INF/LICENSE.txt @@ -0,0 +1,23 @@ +Copyright (c) 2015, Atlassian Pty Ltd +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/commonmark/src/main/resources/org/commonmark/internal/util/entities.properties b/commonmark/src/main/resources/org/commonmark/internal/util/entities.txt similarity index 100% rename from commonmark/src/main/resources/org/commonmark/internal/util/entities.properties rename to commonmark/src/main/resources/org/commonmark/internal/util/entities.txt diff --git a/commonmark/src/test/java/org/commonmark/ProfilingMain.java b/commonmark/src/test/java/org/commonmark/ProfilingMain.java index 0f0c08153..83b1bdaff 100644 --- a/commonmark/src/test/java/org/commonmark/ProfilingMain.java +++ b/commonmark/src/test/java/org/commonmark/ProfilingMain.java @@ -1,10 +1,11 @@ package org.commonmark; +import org.commonmark.node.Node; import org.commonmark.parser.Parser; import org.commonmark.renderer.html.HtmlRenderer; import org.commonmark.testutil.TestResources; -import java.util.Collections; +import java.util.ArrayList; import java.util.List; public class ProfilingMain { @@ -15,17 +16,29 @@ public class ProfilingMain { private static final HtmlRenderer RENDERER = HtmlRenderer.builder().build(); public static void main(String[] args) throws Exception { - System.out.println("Started up, attach profiler now"); - Thread.sleep(10_000); - System.out.println("Parsing and rendering"); - parseAndRender(Collections.singletonList(SPEC)); - System.out.println("Finished parsing"); + System.out.println("Attach profiler, then press enter to start parsing."); + System.in.read(); + System.out.println("Parsing"); + List nodes = parse(List.of(SPEC)); + System.out.println("Finished parsing, press enter to start rendering"); + System.in.read(); + System.out.println(render(nodes)); + System.out.println("Finished rendering"); } - private static long parseAndRender(List examples) { - long length = 0; + private static List parse(List examples) { + List nodes = new ArrayList<>(); for (String example : examples) { - String result = RENDERER.render(PARSER.parse(example)); + Node doc = PARSER.parse(example); + nodes.add(doc); + } + return nodes; + } + + private static long render(List examples) { + long length = 0; + for (Node example : examples) { + String result = RENDERER.render(example); length += result.length(); } return length; diff --git a/commonmark/src/test/java/org/commonmark/internal/DocumentParserTest.java b/commonmark/src/test/java/org/commonmark/internal/DocumentParserTest.java index c4d848362..a834665ff 100644 --- a/commonmark/src/test/java/org/commonmark/internal/DocumentParserTest.java +++ b/commonmark/src/test/java/org/commonmark/internal/DocumentParserTest.java @@ -2,20 +2,16 @@ import org.commonmark.node.*; import org.commonmark.parser.block.BlockParserFactory; -import org.junit.Test; +import org.junit.jupiter.api.Test; -import java.util.Arrays; -import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.Set; -import java.util.HashSet; -import static org.hamcrest.CoreMatchers.is; -import static org.junit.Assert.assertThat; -import static org.junit.Assert.assertTrue; +import static org.assertj.core.api.Assertions.assertThat; -public class DocumentParserTest { - private static List CORE_FACTORIES = Arrays.asList( +class DocumentParserTest { + private static final List CORE_FACTORIES = List.of( new BlockQuoteParser.Factory(), new HeadingParser.Factory(), new FencedCodeBlockParser.Factory(), @@ -25,28 +21,28 @@ public class DocumentParserTest { new IndentedCodeBlockParser.Factory()); @Test - public void calculateBlockParserFactories_givenAFullListOfAllowedNodes_includesAllCoreFactories() { - List customParserFactories = Collections.emptyList(); - Set> nodes = new HashSet<>(Arrays.asList(BlockQuote.class, Heading.class, FencedCodeBlock.class, HtmlBlock.class, ThematicBreak.class, ListBlock.class, IndentedCodeBlock.class)); + void calculateBlockParserFactories_givenAFullListOfAllowedNodes_includesAllCoreFactories() { + List customParserFactories = List.of(); + var enabledBlockTypes = Set.of(BlockQuote.class, Heading.class, FencedCodeBlock.class, HtmlBlock.class, ThematicBreak.class, ListBlock.class, IndentedCodeBlock.class); - List blockParserFactories = DocumentParser.calculateBlockParserFactories(customParserFactories, nodes); - assertThat(blockParserFactories.size(), is(CORE_FACTORIES.size())); + List blockParserFactories = DocumentParser.calculateBlockParserFactories(customParserFactories, enabledBlockTypes); + assertThat(blockParserFactories).hasSameSizeAs(CORE_FACTORIES); for (BlockParserFactory factory : CORE_FACTORIES) { - assertTrue(hasInstance(blockParserFactories, factory.getClass())); + assertThat(hasInstance(blockParserFactories, factory.getClass())).isTrue(); } } @Test - public void calculateBlockParserFactories_givenAListOfAllowedNodes_includesAssociatedFactories() { - List customParserFactories = Collections.emptyList(); + void calculateBlockParserFactories_givenAListOfAllowedNodes_includesAssociatedFactories() { + List customParserFactories = List.of(); Set> nodes = new HashSet<>(); nodes.add(IndentedCodeBlock.class); List blockParserFactories = DocumentParser.calculateBlockParserFactories(customParserFactories, nodes); - assertThat(blockParserFactories.size(), is(1)); - assertTrue(hasInstance(blockParserFactories, IndentedCodeBlockParser.Factory.class)); + assertThat(blockParserFactories).hasSize(1); + assertThat(hasInstance(blockParserFactories, IndentedCodeBlockParser.Factory.class)).isTrue(); } private boolean hasInstance(List blockParserFactories, Class factoryClass) { diff --git a/commonmark/src/test/java/org/commonmark/internal/LinkReferenceDefinitionParserTest.java b/commonmark/src/test/java/org/commonmark/internal/LinkReferenceDefinitionParserTest.java index f0bdef492..b69ada0e9 100644 --- a/commonmark/src/test/java/org/commonmark/internal/LinkReferenceDefinitionParserTest.java +++ b/commonmark/src/test/java/org/commonmark/internal/LinkReferenceDefinitionParserTest.java @@ -2,178 +2,204 @@ import org.commonmark.internal.LinkReferenceDefinitionParser.State; import org.commonmark.node.LinkReferenceDefinition; -import org.junit.Test; +import org.commonmark.parser.SourceLine; +import org.junit.jupiter.api.Test; -import static org.junit.Assert.assertEquals; +import static org.assertj.core.api.Assertions.assertThat; -public class LinkReferenceDefinitionParserTest { +class LinkReferenceDefinitionParserTest { - private LinkReferenceDefinitionParser parser = new LinkReferenceDefinitionParser(); + private final LinkReferenceDefinitionParser parser = new LinkReferenceDefinitionParser(); @Test - public void testStartLabel() { - parser.parse("["); - assertEquals(State.LABEL, parser.getState()); - assertEquals("[", parser.getParagraphContent().toString()); + void testStartLabel() { + assertState("[", State.LABEL, "["); } @Test - public void testStartNoLabel() { + void testStartNoLabel() { // Not a label assertParagraph("a"); // Can not go back to parsing link reference definitions - parser.parse("a"); - parser.parse("["); - assertEquals(State.PARAGRAPH, parser.getState()); - assertEquals("a\n[", parser.getParagraphContent().toString()); + parse("a"); + parse("["); + assertThat(parser.getState()).isEqualTo(State.PARAGRAPH); + assertParagraphLines("a\n[", parser); } @Test - public void testEmptyLabel() { + void testEmptyLabel() { assertParagraph("[]: /"); assertParagraph("[ ]: /"); assertParagraph("[ \t\n\u000B\f\r ]: /"); } @Test - public void testLabelColon() { + void testLabelColon() { assertParagraph("[foo] : /"); } @Test - public void testLabel() { + void testLabel() { assertState("[foo]:", State.DESTINATION, "[foo]:"); assertState("[ foo ]:", State.DESTINATION, "[ foo ]:"); } @Test - public void testLabelInvalid() { + void testLabelInvalid() { assertParagraph("[foo[]:"); } @Test - public void testLabelMultiline() { - LinkReferenceDefinitionParser parser = new LinkReferenceDefinitionParser(); - parser.parse("[two"); - assertEquals(State.LABEL, parser.getState()); - parser.parse("lines]:"); - assertEquals(State.DESTINATION, parser.getState()); - parser.parse("/url"); - assertEquals(State.START_TITLE, parser.getState()); - assertDef(parser.getDefinitions().get(0), "two lines", "/url", null); + void testLabelMultiline() { + parse("[two"); + assertThat(parser.getState()).isEqualTo(State.LABEL); + parse("lines]:"); + assertThat(parser.getState()).isEqualTo(State.DESTINATION); + parse("/url"); + assertThat(parser.getState()).isEqualTo(State.START_TITLE); + assertDef(parser.getDefinitions().get(0), "two\nlines", "/url", null); } @Test - public void testLabelStartsWithNewline() { - LinkReferenceDefinitionParser parser = new LinkReferenceDefinitionParser(); - parser.parse("["); - assertEquals(State.LABEL, parser.getState()); - parser.parse("weird]:"); - assertEquals(State.DESTINATION, parser.getState()); - parser.parse("/url"); - assertEquals(State.START_TITLE, parser.getState()); - assertDef(parser.getDefinitions().get(0), "weird", "/url", null); + void testLabelStartsWithNewline() { + parse("["); + assertThat(parser.getState()).isEqualTo(State.LABEL); + parse("weird]:"); + assertThat(parser.getState()).isEqualTo(State.DESTINATION); + parse("/url"); + assertThat(parser.getState()).isEqualTo(State.START_TITLE); + assertDef(parser.getDefinitions().get(0), "\nweird", "/url", null); } @Test - public void testDestination() { - LinkReferenceDefinitionParser parser = new LinkReferenceDefinitionParser(); - parser.parse("[foo]: /url"); - assertEquals(State.START_TITLE, parser.getState()); - assertEquals("", parser.getParagraphContent().toString()); + void testDestination() { + parse("[foo]: /url"); + assertThat(parser.getState()).isEqualTo(State.START_TITLE); + assertParagraphLines("", parser); - assertEquals(1, parser.getDefinitions().size()); + assertThat(parser.getDefinitions()).hasSize(1); assertDef(parser.getDefinitions().get(0), "foo", "/url", null); - parser.parse("[bar]: "); + parse("[bar]: "); assertDef(parser.getDefinitions().get(1), "bar", "/url2", null); } @Test - public void testDestinationInvalid() { + void testDestinationInvalid() { assertParagraph("[foo]: "); } @Test - public void testTitle() { - LinkReferenceDefinitionParser parser = new LinkReferenceDefinitionParser(); - parser.parse("[foo]: /url 'title'"); - assertEquals(State.START_DEFINITION, parser.getState()); - assertEquals("", parser.getParagraphContent().toString()); + void testTitle() { + parse("[foo]: /url 'title'"); + assertThat(parser.getState()).isEqualTo(State.START_DEFINITION); + assertParagraphLines("", parser); - assertEquals(1, parser.getDefinitions().size()); + assertThat(parser.getDefinitions()).hasSize(1); assertDef(parser.getDefinitions().get(0), "foo", "/url", "title"); } @Test - public void testTitleStartWhitespace() { - LinkReferenceDefinitionParser parser = new LinkReferenceDefinitionParser(); - parser.parse("[foo]: /url"); - assertEquals(State.START_TITLE, parser.getState()); - assertEquals("", parser.getParagraphContent().toString()); + void testTitleStartWhitespace() { + parse("[foo]: /url"); + assertThat(parser.getState()).isEqualTo(State.START_TITLE); + assertParagraphLines("", parser); - parser.parse(" "); + parse(" "); - assertEquals(State.START_DEFINITION, parser.getState()); - assertEquals(" ", parser.getParagraphContent().toString()); + assertThat(parser.getState()).isEqualTo(State.START_DEFINITION); + assertParagraphLines(" ", parser); - assertEquals(1, parser.getDefinitions().size()); + assertThat(parser.getDefinitions()).hasSize(1); assertDef(parser.getDefinitions().get(0), "foo", "/url", null); } @Test - public void testTitleMultiline() { - LinkReferenceDefinitionParser parser = new LinkReferenceDefinitionParser(); - parser.parse("[foo]: /url 'two"); - assertEquals(State.TITLE, parser.getState()); - assertEquals("[foo]: /url 'two", parser.getParagraphContent().toString()); - assertEquals(0, parser.getDefinitions().size()); + void testTitleMultiline() { + parse("[foo]: /url 'two"); + assertThat(parser.getState()).isEqualTo(State.TITLE); + assertParagraphLines("[foo]: /url 'two", parser); + assertThat(parser.getDefinitions()).isEmpty(); + + parse("lines"); + assertThat(parser.getState()).isEqualTo(State.TITLE); + assertParagraphLines("[foo]: /url 'two\nlines", parser); + assertThat(parser.getDefinitions()).isEmpty(); + + parse("'"); + assertThat(parser.getState()).isEqualTo(State.START_DEFINITION); + assertParagraphLines("", parser); + + assertThat(parser.getDefinitions()).hasSize(1); + assertDef(parser.getDefinitions().get(0), "foo", "/url", "two\nlines\n"); + } - parser.parse("lines"); - assertEquals(State.TITLE, parser.getState()); - assertEquals("[foo]: /url 'two\nlines", parser.getParagraphContent().toString()); - assertEquals(0, parser.getDefinitions().size()); + @Test + void testTitleMultiline2() { + parse("[foo]: /url '"); + assertThat(parser.getState()).isEqualTo(State.TITLE); + parse("title'"); + assertThat(parser.getState()).isEqualTo(State.START_DEFINITION); + + assertDef(parser.getDefinitions().get(0), "foo", "/url", "\ntitle"); + } - parser.parse("'"); - assertEquals(State.START_DEFINITION, parser.getState()); - assertEquals("", parser.getParagraphContent().toString()); + @Test + void testTitleMultiline3() { + parse("[foo]: /url"); + assertThat(parser.getState()).isEqualTo(State.START_TITLE); + // Note that this looks like a valid title until we parse "bad", at which point we need to treat the whole line + // as a paragraph line and discard any already parsed title. + parse("\"title\" bad"); + assertThat(parser.getState()).isEqualTo(State.PARAGRAPH); - assertEquals(1, parser.getDefinitions().size()); - assertDef(parser.getDefinitions().get(0), "foo", "/url", "two\nlines\n"); + assertDef(parser.getDefinitions().get(0), "foo", "/url", null); } @Test - public void testTitleMultiline2() { - LinkReferenceDefinitionParser parser = new LinkReferenceDefinitionParser(); - parser.parse("[foo]: /url '"); - assertEquals(State.TITLE, parser.getState()); - parser.parse("title'"); - assertEquals(State.START_DEFINITION, parser.getState()); + void testTitleMultiline4() { + parse("[foo]: /url"); + assertThat(parser.getState()).isEqualTo(State.START_TITLE); + parse("(title"); + assertThat(parser.getState()).isEqualTo(State.TITLE); + parse("foo("); + assertThat(parser.getState()).isEqualTo(State.PARAGRAPH); - assertDef(parser.getDefinitions().get(0), "foo", "/url", "\ntitle"); + assertDef(parser.getDefinitions().get(0), "foo", "/url", null); } @Test - public void testTitleInvalid() { + void testTitleInvalid() { assertParagraph("[foo]: /url (invalid("); assertParagraph("[foo]: 'title'"); assertParagraph("[foo]: /url 'title' INVALID"); } + private void parse(String content) { + parser.parse(SourceLine.of(content, null)); + } + private static void assertParagraph(String input) { assertState(input, State.PARAGRAPH, input); } private static void assertState(String input, State state, String paragraphContent) { LinkReferenceDefinitionParser parser = new LinkReferenceDefinitionParser(); - parser.parse(input); - assertEquals(state, parser.getState()); - assertEquals(paragraphContent, parser.getParagraphContent().toString()); + // TODO: Should we check things with source spans here? + parser.parse(SourceLine.of(input, null)); + assertThat(parser.getState()).isEqualTo(state); + assertParagraphLines(paragraphContent, parser); } private static void assertDef(LinkReferenceDefinition def, String label, String destination, String title) { - assertEquals(label, def.getLabel()); - assertEquals(destination, def.getDestination()); - assertEquals(title, def.getTitle()); + assertThat(def.getLabel()).isEqualTo(label); + assertThat(def.getDestination()).isEqualTo(destination); + assertThat(def.getTitle()).isEqualTo(title); + } + + private static void assertParagraphLines(String expectedContent, LinkReferenceDefinitionParser parser) { + String actual = parser.getParagraphLines().getContent(); + assertThat(actual).isEqualTo(expectedContent); } } diff --git a/commonmark/src/test/java/org/commonmark/internal/util/EscapingTest.java b/commonmark/src/test/java/org/commonmark/internal/util/EscapingTest.java index 9433eb7d0..eb2f1a801 100644 --- a/commonmark/src/test/java/org/commonmark/internal/util/EscapingTest.java +++ b/commonmark/src/test/java/org/commonmark/internal/util/EscapingTest.java @@ -1,21 +1,21 @@ package org.commonmark.internal.util; -import org.junit.Test; +import org.junit.jupiter.api.Test; -import static org.junit.Assert.assertEquals; +import static org.assertj.core.api.Assertions.assertThat; -public class EscapingTest { +class EscapingTest { @Test - public void testEscapeHtml() { - assertEquals("nothing to escape", Escaping.escapeHtml("nothing to escape")); - assertEquals("&", Escaping.escapeHtml("&")); - assertEquals("<", Escaping.escapeHtml("<")); - assertEquals(">", Escaping.escapeHtml(">")); - assertEquals(""", Escaping.escapeHtml("\"")); - assertEquals("< start", Escaping.escapeHtml("< start")); - assertEquals("end >", Escaping.escapeHtml("end >")); - assertEquals("< both >", Escaping.escapeHtml("< both >")); - assertEquals("< middle & too >", Escaping.escapeHtml("< middle & too >")); + void testEscapeHtml() { + assertThat(Escaping.escapeHtml("nothing to escape")).isEqualTo("nothing to escape"); + assertThat(Escaping.escapeHtml("&")).isEqualTo("&"); + assertThat(Escaping.escapeHtml("<")).isEqualTo("<"); + assertThat(Escaping.escapeHtml(">")).isEqualTo(">"); + assertThat(Escaping.escapeHtml("\"")).isEqualTo("""); + assertThat(Escaping.escapeHtml("< start")).isEqualTo("< start"); + assertThat(Escaping.escapeHtml("end >")).isEqualTo("end >"); + assertThat(Escaping.escapeHtml("< both >")).isEqualTo("< both >"); + assertThat(Escaping.escapeHtml("< middle & too >")).isEqualTo("< middle & too >"); } } diff --git a/commonmark/src/test/java/org/commonmark/internal/util/LineReaderTest.java b/commonmark/src/test/java/org/commonmark/internal/util/LineReaderTest.java new file mode 100644 index 000000000..b52713846 --- /dev/null +++ b/commonmark/src/test/java/org/commonmark/internal/util/LineReaderTest.java @@ -0,0 +1,124 @@ +package org.commonmark.internal.util; + +import org.junit.jupiter.api.Test; + +import java.io.*; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Objects; + +import static java.util.stream.Collectors.joining; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.commonmark.internal.util.LineReader.CHAR_BUFFER_SIZE; + +class LineReaderTest { + + @Test + void testReadLine() throws IOException { + assertLines(); + + assertLines("", "\n"); + assertLines("foo", "\n", "bar", "\n"); + assertLines("foo", "\n", "bar", null); + assertLines("", "\n", "", "\n"); + assertLines(repeat("a", CHAR_BUFFER_SIZE - 1), "\n"); + assertLines(repeat("a", CHAR_BUFFER_SIZE), "\n"); + assertLines(repeat("a", CHAR_BUFFER_SIZE) + "b", "\n"); + + assertLines("", "\r\n"); + assertLines("foo", "\r\n", "bar", "\r\n"); + assertLines("foo", "\r\n", "bar", null); + assertLines("", "\r\n", "", "\r\n"); + assertLines(repeat("a", CHAR_BUFFER_SIZE - 2), "\r\n"); + assertLines(repeat("a", CHAR_BUFFER_SIZE - 1), "\r\n"); + assertLines(repeat("a", CHAR_BUFFER_SIZE), "\r\n"); + assertLines(repeat("a", CHAR_BUFFER_SIZE) + "b", "\r\n"); + + assertLines("", "\r"); + assertLines("foo", "\r", "bar", "\r"); + assertLines("foo", "\r", "bar", null); + assertLines("", "\r", "", "\r"); + assertLines(repeat("a", CHAR_BUFFER_SIZE - 1), "\r"); + assertLines(repeat("a", CHAR_BUFFER_SIZE), "\r"); + assertLines(repeat("a", CHAR_BUFFER_SIZE) + "b", "\r"); + + assertLines("", "\n", "", "\r", "", "\r\n", "", "\n"); + assertLines("what", "\r", "are", "\r", "", "\r", "you", "\r\n", "", "\r\n", "even", "\n", "doing", null); + } + + @Test + void testClose() throws IOException { + var reader = new InputStreamReader(new ByteArrayInputStream("test".getBytes(StandardCharsets.UTF_8))); + var lineReader = new LineReader(reader); + lineReader.close(); + lineReader.close(); + assertThatThrownBy(reader::read).isInstanceOf(IOException.class); + } + + private void assertLines(String... s) throws IOException { + assertThat(s.length).as("Expected parts needs to be even (pairs of content and terminator)").isEven(); + var input = Arrays.stream(s).filter(Objects::nonNull).collect(joining("")); + + assertLines(new StringReader(input), s); + assertLines(new SlowStringReader(input), s); + } + + private static void assertLines(Reader reader, String... expectedParts) throws IOException { + try (var lineReader = new LineReader(reader)) { + var lines = new ArrayList<>(); + String line; + while ((line = lineReader.readLine()) != null) { + lines.add(line); + lines.add(lineReader.getLineTerminator()); + } + assertThat(lineReader.getLineTerminator()).isNull(); + assertThat(lines).containsExactly(expectedParts); + } + } + + private static String repeat(String s, int count) { + StringBuilder sb = new StringBuilder(s.length() * count); + for (int i = 0; i < count; i++) { + sb.append(s); + } + return sb.toString(); + } + + /** + * Reader that only reads 0 or 1 chars at a time to test the corner cases. + */ + private static class SlowStringReader extends Reader { + + private final String s; + private int position = 0; + private boolean empty = false; + + private SlowStringReader(String s) { + this.s = s; + } + + @Override + public int read(char[] cbuf, int off, int len) throws IOException { + Objects.checkFromIndexSize(off, len, cbuf.length); + if (len == 0) { + return 0; + } + empty = !empty; + if (empty) { + // Return 0 every other time to test handling of 0. + return 0; + } + if (position >= s.length()) { + return -1; + } + cbuf[off] = s.charAt(position++); + return 1; + } + + @Override + public void close() throws IOException { + } + } +} diff --git a/commonmark/src/test/java/org/commonmark/parser/InlineContentParserTest.java b/commonmark/src/test/java/org/commonmark/parser/InlineContentParserTest.java new file mode 100644 index 000000000..d0f45a6bc --- /dev/null +++ b/commonmark/src/test/java/org/commonmark/parser/InlineContentParserTest.java @@ -0,0 +1,125 @@ +package org.commonmark.parser; + +import org.commonmark.node.*; +import org.commonmark.parser.beta.InlineContentParser; +import org.commonmark.parser.beta.InlineContentParserFactory; +import org.commonmark.parser.beta.InlineParserState; +import org.commonmark.parser.beta.ParsedInline; +import org.commonmark.test.Nodes; +import org.junit.jupiter.api.Test; + +import java.util.Set; + +import static org.assertj.core.api.Assertions.assertThat; + +class InlineContentParserTest { + + @Test + void customInlineContentParser() { + var parser = Parser.builder().customInlineContentParserFactory(new DollarInlineParser.Factory()).build(); + var doc = parser.parse("Test: $hey *there*$ $you$\n\n# Heading $heading$\n"); + var inline1 = Nodes.find(doc, DollarInline.class); + assertThat(inline1.getLiteral()).isEqualTo("hey *there*"); + + var inline2 = (DollarInline) doc.getFirstChild().getLastChild(); + assertThat(inline2.getLiteral()).isEqualTo("you"); + + var heading = Nodes.find(doc, Heading.class); + var inline3 = (DollarInline) heading.getLastChild(); + assertThat(inline3.getLiteral()).isEqualTo("heading"); + + // Parser is created for each inline snippet, which is why the index resets for the second snippet. + assertThat(inline1.getIndex()).isEqualTo(0); + assertThat(inline2.getIndex()).isEqualTo(1); + assertThat(inline3.getIndex()).isEqualTo(0); + } + + @Test + void bangInlineContentParser() { + // See if using ! for a custom inline content parser works. + // ![] is used for images, but if it's not followed by a [, it should be possible to parse it differently. + var parser = Parser.builder().customInlineContentParserFactory(new BangInlineParser.Factory()).build(); + var doc = parser.parse("![image](url) !notimage"); + var image = Nodes.find(doc, Image.class); + assertThat(image.getDestination()).isEqualTo("url"); + assertThat(((Text) image.getNext()).getLiteral()).isEqualTo(" "); + // Class + assertThat(image.getNext().getNext()).isInstanceOf(BangInline.class); + assertThat(((Text) image.getNext().getNext().getNext()).getLiteral()).isEqualTo("notimage"); + } + + private static class DollarInline extends CustomNode { + private final String literal; + private final int index; + + public DollarInline(String literal, int index) { + this.literal = literal; + this.index = index; + } + + public String getLiteral() { + return literal; + } + + public int getIndex() { + return index; + } + } + + private static class DollarInlineParser implements InlineContentParser { + + private int index = 0; + + @Override + public ParsedInline tryParse(InlineParserState inlineParserState) { + var scanner = inlineParserState.scanner(); + scanner.next(); + var pos = scanner.position(); + + var end = scanner.find('$'); + if (end == -1) { + return ParsedInline.none(); + } + var content = scanner.getSource(pos, scanner.position()).getContent(); + scanner.next(); + return ParsedInline.of(new DollarInline(content, index++), scanner.position()); + } + + static class Factory implements InlineContentParserFactory { + @Override + public Set getTriggerCharacters() { + return Set.of('$'); + } + + @Override + public InlineContentParser create() { + return new DollarInlineParser(); + } + } + } + + private static class BangInline extends CustomNode { + } + + private static class BangInlineParser implements InlineContentParser { + + @Override + public ParsedInline tryParse(InlineParserState inlineParserState) { + var scanner = inlineParserState.scanner(); + scanner.next(); + return ParsedInline.of(new BangInline(), scanner.position()); + } + + static class Factory implements InlineContentParserFactory { + @Override + public Set getTriggerCharacters() { + return Set.of('!'); + } + + @Override + public InlineContentParser create() { + return new BangInlineParser(); + } + } + } +} diff --git a/commonmark/src/test/java/org/commonmark/parser/beta/LinkProcessorTest.java b/commonmark/src/test/java/org/commonmark/parser/beta/LinkProcessorTest.java new file mode 100644 index 000000000..ef8739128 --- /dev/null +++ b/commonmark/src/test/java/org/commonmark/parser/beta/LinkProcessorTest.java @@ -0,0 +1,26 @@ +package org.commonmark.parser.beta; + +import org.commonmark.node.Link; +import org.commonmark.node.Text; +import org.commonmark.parser.Parser; +import org.commonmark.test.Nodes; +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; + +class LinkProcessorTest { + + @Test + void testLinkMarkerShouldNotBeIncludedByDefault() { + // If a link marker is registered but is not processed, the built-in link processor shouldn't consume it. + // And I think by default, other processors shouldn't consume it either (by accident). + // So requiring processors to opt into including the marker is better than requiring them to opt out, + // because processors that look for a marker already need to write some code to deal with the marker anyway, + // and will have tests ensuring that the marker is part of the parsed node, not the text. + var parser = Parser.builder().linkMarker('^').build(); + var doc = parser.parse("^[test](url)"); + var link = Nodes.find(doc, Link.class); + assertThat(link.getDestination()).isEqualTo("url"); + assertThat(((Text) link.getPrevious()).getLiteral()).isEqualTo("^"); + } +} diff --git a/commonmark/src/test/java/org/commonmark/parser/beta/ScannerTest.java b/commonmark/src/test/java/org/commonmark/parser/beta/ScannerTest.java new file mode 100644 index 000000000..bd74cab0e --- /dev/null +++ b/commonmark/src/test/java/org/commonmark/parser/beta/ScannerTest.java @@ -0,0 +1,158 @@ +package org.commonmark.parser.beta; + +import org.commonmark.node.SourceSpan; +import org.commonmark.parser.SourceLine; +import org.commonmark.parser.SourceLines; +import org.junit.jupiter.api.Test; + +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; + +class ScannerTest { + + @Test + void testNext() { + Scanner scanner = new Scanner(List.of( + SourceLine.of("foo bar", null)), + 0, 4); + assertThat(scanner.peek()).isEqualTo('b'); + scanner.next(); + assertThat(scanner.peek()).isEqualTo('a'); + scanner.next(); + assertThat(scanner.peek()).isEqualTo('r'); + scanner.next(); + assertThat(scanner.peek()).isEqualTo('\0'); + } + + @Test + void testMultipleLines() { + Scanner scanner = new Scanner(List.of( + SourceLine.of("ab", null), + SourceLine.of("cde", null)), + 0, 0); + assertThat(scanner.hasNext()).isTrue(); + assertThat(scanner.peekPreviousCodePoint()).isEqualTo('\0'); + assertThat(scanner.peek()).isEqualTo('a'); + scanner.next(); + + assertThat(scanner.hasNext()).isTrue(); + assertThat(scanner.peekPreviousCodePoint()).isEqualTo('a'); + assertThat(scanner.peek()).isEqualTo('b'); + scanner.next(); + + assertThat(scanner.hasNext()).isTrue(); + assertThat(scanner.peekPreviousCodePoint()).isEqualTo('b'); + assertThat(scanner.peek()).isEqualTo('\n'); + scanner.next(); + + assertThat(scanner.hasNext()).isTrue(); + assertThat(scanner.peekPreviousCodePoint()).isEqualTo('\n'); + assertThat(scanner.peek()).isEqualTo('c'); + scanner.next(); + + assertThat(scanner.hasNext()).isTrue(); + assertThat(scanner.peekPreviousCodePoint()).isEqualTo('c'); + assertThat(scanner.peek()).isEqualTo('d'); + scanner.next(); + + assertThat(scanner.hasNext()).isTrue(); + assertThat(scanner.peekPreviousCodePoint()).isEqualTo('d'); + assertThat(scanner.peek()).isEqualTo('e'); + scanner.next(); + + assertThat(scanner.hasNext()).isFalse(); + assertThat(scanner.peekPreviousCodePoint()).isEqualTo('e'); + assertThat(scanner.peek()).isEqualTo('\0'); + } + + @Test + void testCodePoints() { + Scanner scanner = new Scanner(List.of(SourceLine.of("\uD83D\uDE0A", null)), 0, 0); + + assertThat(scanner.hasNext()).isTrue(); + assertThat(scanner.peekPreviousCodePoint()).isEqualTo('\0'); + assertThat(scanner.peekCodePoint()).isEqualTo(128522); + scanner.next(); + // This jumps chars, not code points. So jump two here + scanner.next(); + + assertThat(scanner.hasNext()).isFalse(); + assertThat(scanner.peekPreviousCodePoint()).isEqualTo(128522); + assertThat(scanner.peekCodePoint()).isEqualTo('\0'); + } + + @Test + void testTextBetween() { + Scanner scanner = new Scanner(List.of( + SourceLine.of("ab", SourceSpan.of(10, 3, 13, 2)), + SourceLine.of("cde", SourceSpan.of(11, 4, 20, 3))), + 0, 0); + + Position start = scanner.position(); + + scanner.next(); + assertSourceLines(scanner.getSource(start, scanner.position()), + "a", + SourceSpan.of(10, 3, 13, 1)); + + Position afterA = scanner.position(); + + scanner.next(); + assertSourceLines(scanner.getSource(start, scanner.position()), + "ab", + SourceSpan.of(10, 3, 13, 2)); + + Position afterB = scanner.position(); + + scanner.next(); + assertSourceLines(scanner.getSource(start, scanner.position()), + "ab\n", + SourceSpan.of(10, 3, 13, 2)); + + scanner.next(); + assertSourceLines(scanner.getSource(start, scanner.position()), + "ab\nc", + SourceSpan.of(10, 3, 13, 2), + SourceSpan.of(11, 4, 20, 1)); + + scanner.next(); + assertSourceLines(scanner.getSource(start, scanner.position()), + "ab\ncd", + SourceSpan.of(10, 3, 13, 2), + SourceSpan.of(11, 4, 20, 2)); + + scanner.next(); + assertSourceLines(scanner.getSource(start, scanner.position()), + "ab\ncde", + SourceSpan.of(10, 3, 13, 2), + SourceSpan.of(11, 4, 20, 3)); + + assertSourceLines(scanner.getSource(afterA, scanner.position()), + "b\ncde", + SourceSpan.of(10, 4, 14, 1), + SourceSpan.of(11, 4, 20, 3)); + + assertSourceLines(scanner.getSource(afterB, scanner.position()), + "\ncde", + SourceSpan.of(11, 4, 20, 3)); + } + + private void assertSourceLines(SourceLines sourceLines, String expectedContent, SourceSpan... expectedSourceSpans) { + assertThat(sourceLines.getContent()).isEqualTo(expectedContent); + assertThat(sourceLines.getSourceSpans()).isEqualTo(List.of(expectedSourceSpans)); + } + + @Test + void nextString() { + Scanner scanner = Scanner.of(SourceLines.of(List.of( + SourceLine.of("hey ya", null), + SourceLine.of("hi", null)))); + assertThat(scanner.next("hoy")).isFalse(); + assertThat(scanner.next("hey")).isTrue(); + assertThat(scanner.next(' ')).isTrue(); + assertThat(scanner.next("yo")).isFalse(); + assertThat(scanner.next("ya")).isTrue(); + assertThat(scanner.next(" ")).isFalse(); + } +} diff --git a/commonmark/src/test/java/org/commonmark/renderer/markdown/MarkdownRendererTest.java b/commonmark/src/test/java/org/commonmark/renderer/markdown/MarkdownRendererTest.java new file mode 100644 index 000000000..6a468a08e --- /dev/null +++ b/commonmark/src/test/java/org/commonmark/renderer/markdown/MarkdownRendererTest.java @@ -0,0 +1,359 @@ +package org.commonmark.renderer.markdown; + +import org.commonmark.node.*; +import org.commonmark.parser.Parser; +import org.commonmark.renderer.NodeRenderer; +import org.junit.jupiter.api.Test; + +import java.util.Set; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.commonmark.testutil.Asserts.assertRendering; + +public class MarkdownRendererTest { + + // Leaf blocks + + @Test + public void testThematicBreaks() { + assertRoundTrip("___\n"); + assertRoundTrip("___\n\nfoo\n"); + // List item with hr -> hr needs to not use the same as the marker + assertRoundTrip("* ___\n"); + assertRoundTrip("- ___\n"); + + // Preserve the literal + assertRoundTrip("----\n"); + assertRoundTrip("*****\n"); + + // Apply fallback for null literal + ThematicBreak node = new ThematicBreak(); + assertThat(render(node)).isEqualTo("___"); + } + + @Test + public void testHeadings() { + // Type of heading is currently not preserved + assertRoundTrip("# foo\n"); + assertRoundTrip("## foo\n"); + assertRoundTrip("### foo\n"); + assertRoundTrip("#### foo\n"); + assertRoundTrip("##### foo\n"); + assertRoundTrip("###### foo\n"); + + assertRoundTrip("Foo\nbar\n===\n"); + assertRoundTrip("Foo \nbar\n===\n"); + assertRoundTrip("[foo\nbar](/url)\n===\n"); + + assertRoundTrip("# foo\n\nbar\n"); + } + + @Test + public void testIndentedCodeBlocks() { + assertRoundTrip(" hi\n"); + assertRoundTrip(" hi\n code\n"); + assertRoundTrip("> hi\n> code\n"); + } + + @Test + public void testFencedCodeBlocks() { + assertRoundTrip("```\ntest\n```\n"); + assertRoundTrip("~~~~\ntest\n~~~~\n"); + assertRoundTrip("```info\ntest\n```\n"); + assertRoundTrip(" ```\n test\n ```\n"); + assertRoundTrip("```\n```\n"); + + // Preserve the length + assertRoundTrip("````\ntest\n````\n"); + assertRoundTrip("~~~\ntest\n~~~~~~\n"); + } + + @Test + public void testFencedCodeBlocksFromAst() { + var doc = new Document(); + var codeBlock = new FencedCodeBlock(); + codeBlock.setLiteral("hi code"); + doc.appendChild(codeBlock); + + assertRendering("", "```\nhi code\n```\n", render(doc)); + + codeBlock.setLiteral("hi`\n```\n``test"); + assertRendering("", "````\nhi`\n```\n``test\n````\n", render(doc)); + } + + @Test + public void testHtmlBlocks() { + assertRoundTrip("

    test
    \n"); + assertRoundTrip(">
    \n> test\n>
    \n"); + } + + @Test + public void testParagraphs() { + assertRoundTrip("foo\n"); + assertRoundTrip("foo\n\nbar\n"); + } + + // Container blocks + + @Test + public void testBlockQuotes() { + assertRoundTrip("> test\n"); + assertRoundTrip("> foo\n> bar\n"); + assertRoundTrip("> > foo\n> > bar\n"); + assertRoundTrip("> # Foo\n> \n> bar\n> baz\n"); + } + + @Test + public void testBulletListItems() { + assertRoundTrip("* foo\n"); + assertRoundTrip("- foo\n"); + assertRoundTrip("+ foo\n"); + assertRoundTrip("* foo\n bar\n"); + assertRoundTrip("* ```\n code\n ```\n"); + assertRoundTrip("* foo\n\n* bar\n"); + // Note that the " " in the second line is not necessary, but it's not wrong either. + // We could try to avoid it in a future change, but not sure if necessary. + assertRoundTrip("* foo\n \n bar\n"); + + // Tight list + assertRoundTrip("* foo\n* bar\n"); + // Tight list where the second item contains a loose list + assertRoundTrip("- Foo\n - Bar\n \n - Baz\n"); + + // List item indent. This is a tricky one, but here the amount of space between the list marker and "one" + // determines whether "two" is part of the list item or an indented code block. + // In this case, it's an indented code block because it's not indented enough to be part of the list item. + // If the renderer would just use "- one", then "two" would change from being an indented code block to being + // a paragraph in the list item! So it is important for the renderer to preserve the content indent of the list + // item. + assertRoundTrip(" - one\n\n two\n"); + + // Empty list + assertRoundTrip("- \n\nFoo\n"); + } + + @Test + public void testBulletListItemsFromAst() { + var doc = new Document(); + var list = new BulletList(); + var item = new ListItem(); + item.appendChild(new Text("Test")); + list.appendChild(item); + doc.appendChild(list); + + assertRendering("", "- Test\n", render(doc)); + + list.setMarker("*"); + assertRendering("", "* Test\n", render(doc)); + } + + @Test + public void testOrderedListItems() { + assertRoundTrip("1. foo\n"); + assertRoundTrip("2. foo\n\n3. bar\n"); + + // Tight list + assertRoundTrip("1. foo\n2. bar\n"); + // Tight list where the second item contains a loose list + assertRoundTrip("1. Foo\n 1. Bar\n \n 2. Baz\n"); + + assertRoundTrip(" 1. one\n\n two\n"); + } + + @Test + public void testOrderedListItemsFromAst() { + var doc = new Document(); + var list = new OrderedList(); + var item = new ListItem(); + item.appendChild(new Text("Test")); + list.appendChild(item); + doc.appendChild(list); + + assertRendering("", "1. Test\n", render(doc)); + + list.setMarkerStartNumber(2); + list.setMarkerDelimiter(")"); + assertRendering("", "2) Test\n", render(doc)); + } + + @Test + public void testOrderedListItemsWithStartNumberLongerThanLaterNumber() { + var source = "10001.\n20.\n"; + var doc = parse(source); + assertRendering(source, "10001. \n10002. \n", render(doc)); + } + + // Inlines + + @Test + public void testTabs() { + assertRoundTrip("a\tb\n"); + } + + @Test + public void testEscaping() { + // These are a bit tricky. We always escape some characters, even though they only need escaping if they would + // otherwise result in a different parse result (e.g. a link): + assertRoundTrip("\\[a\\](/uri)\n"); + assertRoundTrip("\\`abc\\`\n"); + + // Some characters only need to be escaped at the beginning of the line + assertRoundTrip("\\- Test\n"); + assertRoundTrip("\\-\n"); + assertRoundTrip("Test -\n"); + assertRoundTrip("Abc\n\n\\- Test\n"); + assertRoundTrip("\\# Test\n"); + assertRoundTrip("\\## Test\n"); + assertRoundTrip("\\#\n"); + assertRoundTrip("Foo\n\\===\n"); + // Only needs to be escaped after some text, not at beginning of paragraph + assertRoundTrip("===\n"); + assertRoundTrip("a\n\n===\n"); + // The beginning of the line within the block, so disregarding prefixes + assertRoundTrip("> \\- Test\n"); + assertRoundTrip("- \\- Test\n"); + // That's not the beginning of the line + assertRoundTrip("`a`- foo\n"); + + // This is a bit more tricky as we need to check for a list start + assertRoundTrip("1\\. Foo\n"); + assertRoundTrip("999\\. Foo\n"); + assertRoundTrip("1\\.\n"); + assertRoundTrip("1\\) Foo\n"); + + // Escaped whitespace, wow + assertRoundTrip(" foo\n"); + assertRoundTrip(" foo\n"); + assertRoundTrip("foo bar\n"); + } + + @Test + public void testCodeSpans() { + assertRoundTrip("`foo`\n"); + assertRoundTrip("``foo ` bar``\n"); + assertRoundTrip("```foo `` ` bar```\n"); + + assertRoundTrip("`` `foo ``\n"); + assertRoundTrip("`` ` ``\n"); + assertRoundTrip("` `\n"); + } + + @Test + public void testEmphasis() { + assertRoundTrip("*foo*\n"); + assertRoundTrip("foo*bar*\n"); + // When nesting, a different delimiter needs to be used + assertRoundTrip("*_foo_*\n"); + assertRoundTrip("*_*foo*_*\n"); + assertRoundTrip("_*foo*_\n"); + + // Not emphasis (needs * inside words) + assertRoundTrip("foo\\_bar\\_\n"); + + // Even when rendering a manually constructed tree, the emphasis delimiter needs to be chosen correctly. + Document doc = new Document(); + Paragraph p = new Paragraph(); + doc.appendChild(p); + Emphasis e1 = new Emphasis(); + p.appendChild(e1); + Emphasis e2 = new Emphasis(); + e1.appendChild(e2); + e2.appendChild(new Text("hi")); + assertThat(render(doc)).isEqualTo("*_hi_*\n"); + } + + @Test + public void testStrongEmphasis() { + assertRoundTrip("**foo**\n"); + assertRoundTrip("foo**bar**\n"); + } + + @Test + public void testLinks() { + assertRoundTrip("[link](/uri)\n"); + assertRoundTrip("[link](/uri \"title\")\n"); + assertRoundTrip("[link]()\n"); + assertRoundTrip("[a]()\n"); + assertRoundTrip("[a]()\n"); + assertRoundTrip("[a](c>)\n"); + assertRoundTrip("[a](c>)\n"); + assertRoundTrip("[a](/uri \"foo \\\" bar\")\n"); + assertRoundTrip("[link](/uri \"tes\\\\\")\n"); + assertRoundTrip("[link](/url \"test \")\n"); + assertRoundTrip("[link]()\n"); + } + + @Test + public void testImages() { + assertRoundTrip("![link](/uri)\n"); + assertRoundTrip("![link](/uri \"title\")\n"); + assertRoundTrip("![link]()\n"); + assertRoundTrip("![a]()\n"); + assertRoundTrip("![a]()\n"); + assertRoundTrip("![a](c>)\n"); + assertRoundTrip("![a](c>)\n"); + assertRoundTrip("![a](/uri \"foo \\\" bar\")\n"); + } + + @Test + public void testHtmlInline() { + assertRoundTrip("*foo*\n"); + } + + @Test + public void testHardLineBreaks() { + assertRoundTrip("foo \nbar\n"); + } + + @Test + public void testSoftLineBreaks() { + assertRoundTrip("foo\nbar\n"); + } + + @Test + public void overrideNodeRender() { + var nodeRendererFactory = new MarkdownNodeRendererFactory() { + @Override + public NodeRenderer create(MarkdownNodeRendererContext context) { + return new NodeRenderer() { + @Override + public Set> getNodeTypes() { + return Set.of(Heading.class); + } + + @Override + public void render(Node node) { + context.getWriter().raw("# Custom heading"); + } + }; + } + + @Override + public Set getSpecialCharacters() { + return Set.of(); + } + }; + + MarkdownRenderer renderer = MarkdownRenderer.builder().nodeRendererFactory(nodeRendererFactory).build(); + String rendered = renderer.render(parse("# Hello")); + assertThat(rendered).isEqualTo("# Custom heading\n"); + } + + private void assertRoundTrip(String input) { + String rendered = parseAndRender(input); + assertThat(rendered).isEqualTo(input); + } + + private String parseAndRender(String source) { + Node parsed = parse(source); + return render(parsed); + } + + private Node parse(String source) { + return Parser.builder().build().parse(source); + } + + private String render(Node node) { + return MarkdownRenderer.builder().build().render(node); + } +} diff --git a/commonmark/src/test/java/org/commonmark/renderer/markdown/SpecMarkdownRendererTest.java b/commonmark/src/test/java/org/commonmark/renderer/markdown/SpecMarkdownRendererTest.java new file mode 100644 index 000000000..3b88df55d --- /dev/null +++ b/commonmark/src/test/java/org/commonmark/renderer/markdown/SpecMarkdownRendererTest.java @@ -0,0 +1,95 @@ +package org.commonmark.renderer.markdown; + +import org.commonmark.node.Node; +import org.commonmark.parser.Parser; +import org.commonmark.renderer.html.HtmlRenderer; +import org.commonmark.testutil.TestResources; +import org.commonmark.testutil.example.Example; +import org.commonmark.testutil.example.ExampleReader; +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Tests Markdown rendering using the examples in the spec like this: + *
      + *
    1. Parses the source to an AST and then renders it back to Markdown
    2. + *
    3. Parses that to an AST and then renders it to HTML
    4. + *
    5. Compares that HTML to the expected HTML of the example: + * If it's the same, then the expected elements were preserved in the Markdown rendering
    6. + *
    + */ +public class SpecMarkdownRendererTest { + + public static final MarkdownRenderer MARKDOWN_RENDERER = MarkdownRenderer.builder().build(); + // The spec says URL-escaping is optional, but the examples assume that it's enabled. + public static final HtmlRenderer HTML_RENDERER = HtmlRenderer.builder().percentEncodeUrls(true).build(); + + @Test + public void testCoverage() { + List examples = ExampleReader.readExamples(TestResources.getSpec()); + List passes = new ArrayList<>(); + List fails = new ArrayList<>(); + for (Example example : examples) { + String markdown = renderMarkdown(example.getSource()); + String rendered = renderHtml(markdown); + if (rendered.equals(example.getHtml())) { + passes.add(example); + } else { + fails.add(example); + } + } + + System.out.println("Passed examples by section (total " + passes.size() + "):"); + printCountsBySection(passes); + System.out.println(); + + System.out.println("Failed examples by section (total " + fails.size() + "):"); + printCountsBySection(fails); + System.out.println(); + + System.out.println("Failed examples:"); + for (Example fail : fails) { + System.out.println("Failed: " + fail); + System.out.println("````````````````````````````````"); + System.out.print(fail.getSource()); + System.out.println("````````````````````````````````"); + System.out.println(); + } + + assertThat(passes).hasSizeGreaterThanOrEqualTo(652); + assertThat(fails).isEmpty(); + } + + private static void printCountsBySection(List examples) { + Map bySection = new LinkedHashMap<>(); + for (Example example : examples) { + Integer count = bySection.get(example.getSection()); + if (count == null) { + count = 0; + } + bySection.put(example.getSection(), count + 1); + } + for (Map.Entry entry : bySection.entrySet()) { + System.out.println(entry.getValue() + ": " + entry.getKey()); + } + } + + private Node parse(String source) { + return Parser.builder().build().parse(source); + } + + private String renderMarkdown(String source) { + return MARKDOWN_RENDERER.render(parse(source)); + } + + private String renderHtml(String source) { + // The spec uses "rightwards arrow" to show tabs + return HTML_RENDERER.render(parse(source)).replace("\t", "\u2192"); + } +} diff --git a/commonmark/src/test/java/org/commonmark/test/AbstractVisitorTest.java b/commonmark/src/test/java/org/commonmark/test/AbstractVisitorTest.java index b3b60fa3b..edb6936f4 100644 --- a/commonmark/src/test/java/org/commonmark/test/AbstractVisitorTest.java +++ b/commonmark/src/test/java/org/commonmark/test/AbstractVisitorTest.java @@ -1,10 +1,9 @@ package org.commonmark.test; import org.commonmark.node.*; -import org.junit.Test; +import org.junit.jupiter.api.Test; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNull; +import static org.assertj.core.api.Assertions.assertThat; public class AbstractVisitorTest { @@ -26,13 +25,13 @@ public void visit(Text text) { assertCode("foo", paragraph.getFirstChild()); assertCode("bar", paragraph.getFirstChild().getNext()); - assertNull(paragraph.getFirstChild().getNext().getNext()); + assertThat(paragraph.getFirstChild().getNext().getNext()).isNull(); assertCode("bar", paragraph.getLastChild()); } private static void assertCode(String expectedLiteral, Node node) { - assertEquals("Expected node to be a Code node: " + node, Code.class, node.getClass()); + assertThat(node).isInstanceOf(Code.class); Code code = (Code) node; - assertEquals(expectedLiteral, code.getLiteral()); + assertThat(code.getLiteral()).isEqualTo(expectedLiteral); } } diff --git a/commonmark/src/test/java/org/commonmark/test/BlockParserFactoryTest.java b/commonmark/src/test/java/org/commonmark/test/BlockParserFactoryTest.java new file mode 100644 index 000000000..b733d7970 --- /dev/null +++ b/commonmark/src/test/java/org/commonmark/test/BlockParserFactoryTest.java @@ -0,0 +1,127 @@ +package org.commonmark.test; + +import org.commonmark.node.*; +import org.commonmark.parser.IncludeSourceSpans; +import org.commonmark.parser.InlineParser; +import org.commonmark.parser.Parser; +import org.commonmark.parser.SourceLines; +import org.commonmark.parser.block.*; +import org.junit.jupiter.api.Test; + +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; + +public class BlockParserFactoryTest { + + @Test + public void customBlockParserFactory() { + var parser = Parser.builder().customBlockParserFactory(new DashBlockParser.Factory()).build(); + + // The dashes would normally be a ThematicBreak + var doc = parser.parse("hey\n\n---\n"); + + assertThat(doc.getFirstChild()).isInstanceOf(Paragraph.class); + assertThat(((Text) doc.getFirstChild().getFirstChild()).getLiteral()).isEqualTo("hey"); + assertThat(doc.getLastChild()).isInstanceOf(DashBlock.class); + } + + @Test + public void replaceActiveBlockParser() { + var parser = Parser.builder() + .customBlockParserFactory(new StarHeadingBlockParser.Factory()) + .includeSourceSpans(IncludeSourceSpans.BLOCKS_AND_INLINES) + .build(); + + var doc = parser.parse("a\nbc\n***\n"); + + var heading = doc.getFirstChild(); + assertThat(heading).isInstanceOf(StarHeading.class); + assertThat(heading.getNext()).isNull(); + var a = heading.getFirstChild(); + assertThat(a).isInstanceOf(Text.class); + assertThat(((Text) a).getLiteral()).isEqualTo("a"); + var bc = a.getNext().getNext(); + assertThat(bc).isInstanceOf(Text.class); + assertThat(((Text) bc).getLiteral()).isEqualTo("bc"); + assertThat(bc.getNext()).isNull(); + + assertThat(heading.getSourceSpans()).isEqualTo(List.of( + SourceSpan.of(0, 0, 0, 1), + SourceSpan.of(1, 0, 2, 2), + SourceSpan.of(2, 0, 5, 3))); + assertThat(a.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 0, 0, 1))); + assertThat(bc.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(1, 0, 2, 2))); + } + + private static class DashBlock extends CustomBlock { + } + + private static class DashBlockParser extends AbstractBlockParser { + + private DashBlock dash = new DashBlock(); + + @Override + public Block getBlock() { + return dash; + } + + @Override + public BlockContinue tryContinue(ParserState parserState) { + return BlockContinue.none(); + } + + static class Factory extends AbstractBlockParserFactory { + + @Override + public BlockStart tryStart(ParserState state, MatchedBlockParser matchedBlockParser) { + if (state.getLine().getContent().equals("---")) { + return BlockStart.of(new DashBlockParser()); + } + return BlockStart.none(); + } + } + } + + private static class StarHeading extends CustomBlock { + } + + private static class StarHeadingBlockParser extends AbstractBlockParser { + + private final SourceLines content; + private final StarHeading heading = new StarHeading(); + + StarHeadingBlockParser(SourceLines content) { + this.content = content; + } + + @Override + public Block getBlock() { + return heading; + } + + @Override + public BlockContinue tryContinue(ParserState parserState) { + return BlockContinue.none(); + } + + @Override + public void parseInlines(InlineParser inlineParser) { + inlineParser.parse(content, heading); + } + + static class Factory extends AbstractBlockParserFactory { + + @Override + public BlockStart tryStart(ParserState state, MatchedBlockParser matchedBlockParser) { + var lines = matchedBlockParser.getParagraphLines(); + if (state.getLine().getContent().toString().startsWith("***")) { + return BlockStart.of(new StarHeadingBlockParser(lines)) + .replaceActiveBlockParser(); + } else { + return BlockStart.none(); + } + } + } + } +} diff --git a/commonmark/src/test/java/org/commonmark/test/CoreRenderingTestCase.java b/commonmark/src/test/java/org/commonmark/test/CoreRenderingTestCase.java index 38f319e1c..2303d2617 100644 --- a/commonmark/src/test/java/org/commonmark/test/CoreRenderingTestCase.java +++ b/commonmark/src/test/java/org/commonmark/test/CoreRenderingTestCase.java @@ -11,6 +11,7 @@ public class CoreRenderingTestCase extends RenderingTestCase { @Override protected String render(String source) { - return RENDERER.render(PARSER.parse(source)); + var node = PARSER.parse(source); + return RENDERER.render(node); } } diff --git a/commonmark/src/test/java/org/commonmark/test/DelimitedTest.java b/commonmark/src/test/java/org/commonmark/test/DelimitedTest.java index a34a32c44..3f2f0d611 100644 --- a/commonmark/src/test/java/org/commonmark/test/DelimitedTest.java +++ b/commonmark/src/test/java/org/commonmark/test/DelimitedTest.java @@ -2,12 +2,12 @@ import org.commonmark.node.*; import org.commonmark.parser.Parser; -import org.junit.Test; +import org.junit.jupiter.api.Test; import java.util.ArrayList; import java.util.List; -import static org.junit.Assert.assertEquals; +import static org.assertj.core.api.Assertions.assertThat; public class DelimitedTest { @@ -35,20 +35,20 @@ public void visit(StrongEmphasis node) { }; document.accept(visitor); - assertEquals(4, list.size()); + assertThat(list).hasSize(4); Delimited emphasis = list.get(0); Delimited strong = list.get(1); Delimited important = list.get(2); Delimited critical = list.get(3); - assertEquals("*", emphasis.getOpeningDelimiter()); - assertEquals("*", emphasis.getClosingDelimiter()); - assertEquals("**", strong.getOpeningDelimiter()); - assertEquals("**", strong.getClosingDelimiter()); - assertEquals("_", important.getOpeningDelimiter()); - assertEquals("_", important.getClosingDelimiter()); - assertEquals("__", critical.getOpeningDelimiter()); - assertEquals("__", critical.getClosingDelimiter()); + assertThat(emphasis.getOpeningDelimiter()).isEqualTo("*"); + assertThat(emphasis.getClosingDelimiter()).isEqualTo("*"); + assertThat(strong.getOpeningDelimiter()).isEqualTo("**"); + assertThat(strong.getClosingDelimiter()).isEqualTo("**"); + assertThat(important.getOpeningDelimiter()).isEqualTo("_"); + assertThat(important.getClosingDelimiter()).isEqualTo("_"); + assertThat(critical.getOpeningDelimiter()).isEqualTo("__"); + assertThat(critical.getClosingDelimiter()).isEqualTo("__"); } } diff --git a/commonmark/src/test/java/org/commonmark/test/DelimiterProcessorTest.java b/commonmark/src/test/java/org/commonmark/test/DelimiterProcessorTest.java index 948c484cd..e4920120d 100644 --- a/commonmark/src/test/java/org/commonmark/test/DelimiterProcessorTest.java +++ b/commonmark/src/test/java/org/commonmark/test/DelimiterProcessorTest.java @@ -11,13 +11,13 @@ import org.commonmark.renderer.html.HtmlNodeRendererFactory; import org.commonmark.renderer.html.HtmlRenderer; import org.commonmark.testutil.RenderingTestCase; -import org.junit.Test; +import org.junit.jupiter.api.Test; -import java.util.Collections; import java.util.Locale; import java.util.Set; -import static org.junit.Assert.assertEquals; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; public class DelimiterProcessorTest extends RenderingTestCase { @@ -30,8 +30,8 @@ public void delimiterProcessorWithInvalidDelimiterUse() { .customDelimiterProcessor(new CustomDelimiterProcessor(':', 0)) .customDelimiterProcessor(new CustomDelimiterProcessor(';', -1)) .build(); - assertEquals("

    :test:

    \n", RENDERER.render(parser.parse(":test:"))); - assertEquals("

    ;test;

    \n", RENDERER.render(parser.parse(";test;"))); + assertThat(RENDERER.render(parser.parse(":test:"))).isEqualTo("

    :test:

    \n"); + assertThat(RENDERER.render(parser.parse(";test;"))).isEqualTo("

    ;test;

    \n"); } @Test @@ -55,16 +55,17 @@ public void multipleDelimitersWithDifferentLengths() { .customDelimiterProcessor(new OneDelimiterProcessor()) .customDelimiterProcessor(new TwoDelimiterProcessor()) .build(); - assertEquals("

    (1)one(/1) (2)two(/2)

    \n", RENDERER.render(parser.parse("+one+ ++two++"))); - assertEquals("

    (1)(2)both(/2)(/1)

    \n", RENDERER.render(parser.parse("+++both+++"))); + assertThat(RENDERER.render(parser.parse("+one+ ++two++"))).isEqualTo("

    (1)one(/1) (2)two(/2)

    \n"); + assertThat(RENDERER.render(parser.parse("+++both+++"))).isEqualTo("

    (1)(2)both(/2)(/1)

    \n"); } - @Test(expected = IllegalArgumentException.class) - public void multipleDelimitersWithSameLength() { - Parser.builder() - .customDelimiterProcessor(new OneDelimiterProcessor()) - .customDelimiterProcessor(new OneDelimiterProcessor()) - .build(); + @Test + public void multipleDelimitersWithSameLengthConflict() { + assertThatThrownBy(() -> + Parser.builder() + .customDelimiterProcessor(new OneDelimiterProcessor()) + .customDelimiterProcessor(new OneDelimiterProcessor()) + .build()).isInstanceOf(IllegalArgumentException.class); } @Override @@ -99,13 +100,9 @@ public int getMinLength() { } @Override - public int getDelimiterUse(DelimiterRun opener, DelimiterRun closer) { + public int process(DelimiterRun openingRun, DelimiterRun closingRun) { return delimiterUse; } - - @Override - public void process(Text opener, Text closer, int delimiterUse) { - } } private static class AsymmetricDelimiterProcessor implements DelimiterProcessor { @@ -126,20 +123,19 @@ public int getMinLength() { } @Override - public int getDelimiterUse(DelimiterRun opener, DelimiterRun closer) { - return 1; - } - - @Override - public void process(Text opener, Text closer, int delimiterUse) { + public int process(DelimiterRun openingRun, DelimiterRun closingRun) { UpperCaseNode content = new UpperCaseNode(); - Node tmp = opener.getNext(); - while (tmp != null && tmp != closer) { + Text start = openingRun.getOpener(); + Text end = closingRun.getCloser(); + Node tmp = start.getNext(); + while (tmp != null && tmp != end) { Node next = tmp.getNext(); content.appendChild(tmp); tmp = next; } - opener.insertAfter(content); + start.insertAfter(content); + + return 1; } } @@ -164,7 +160,7 @@ private UpperCaseNodeRenderer(HtmlNodeRendererContext context) { @Override public Set> getNodeTypes() { - return Collections.>singleton(UpperCaseNode.class); + return Set.of(UpperCaseNode.class); } @Override @@ -198,15 +194,11 @@ public int getMinLength() { } @Override - public int getDelimiterUse(DelimiterRun opener, DelimiterRun closer) { + public int process(DelimiterRun openingRun, DelimiterRun closingRun) { + openingRun.getOpener().insertAfter(new Text("(1)")); + closingRun.getCloser().insertBefore(new Text("(/1)")); return 1; } - - @Override - public void process(Text opener, Text closer, int delimiterUse) { - opener.insertAfter(new Text("(1)")); - closer.insertBefore(new Text("(/1)")); - } } private static class TwoDelimiterProcessor implements DelimiterProcessor { @@ -227,14 +219,10 @@ public int getMinLength() { } @Override - public int getDelimiterUse(DelimiterRun opener, DelimiterRun closer) { + public int process(DelimiterRun openingRun, DelimiterRun closingRun) { + openingRun.getOpener().insertAfter(new Text("(2)")); + closingRun.getCloser().insertBefore(new Text("(/2)")); return 2; } - - @Override - public void process(Text opener, Text closer, int delimiterUse) { - opener.insertAfter(new Text("(2)")); - closer.insertBefore(new Text("(/2)")); - } } } diff --git a/commonmark/src/test/java/org/commonmark/test/FencedCodeBlockParserTest.java b/commonmark/src/test/java/org/commonmark/test/FencedCodeBlockParserTest.java index 774c6ff0e..443b0fa51 100644 --- a/commonmark/src/test/java/org/commonmark/test/FencedCodeBlockParserTest.java +++ b/commonmark/src/test/java/org/commonmark/test/FencedCodeBlockParserTest.java @@ -5,9 +5,9 @@ import org.commonmark.parser.Parser; import org.commonmark.renderer.html.HtmlRenderer; import org.commonmark.testutil.RenderingTestCase; -import org.junit.Test; +import org.junit.jupiter.api.Test; -import static org.junit.Assert.assertEquals; +import static org.assertj.core.api.Assertions.assertThat; public class FencedCodeBlockParserTest extends RenderingTestCase { @@ -18,8 +18,8 @@ public class FencedCodeBlockParserTest extends RenderingTestCase { public void backtickInfo() { Node document = PARSER.parse("```info ~ test\ncode\n```"); FencedCodeBlock codeBlock = (FencedCodeBlock) document.getFirstChild(); - assertEquals("info ~ test", codeBlock.getInfo()); - assertEquals("code\n", codeBlock.getLiteral()); + assertThat(codeBlock.getInfo()).isEqualTo("info ~ test"); + assertThat(codeBlock.getLiteral()).isEqualTo("code\n"); } @Test diff --git a/commonmark/src/test/java/org/commonmark/test/HeadingParserTest.java b/commonmark/src/test/java/org/commonmark/test/HeadingParserTest.java index a5b179a81..f7bf35a4c 100644 --- a/commonmark/src/test/java/org/commonmark/test/HeadingParserTest.java +++ b/commonmark/src/test/java/org/commonmark/test/HeadingParserTest.java @@ -3,7 +3,7 @@ import org.commonmark.parser.Parser; import org.commonmark.renderer.html.HtmlRenderer; import org.commonmark.testutil.RenderingTestCase; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class HeadingParserTest extends RenderingTestCase { diff --git a/commonmark/src/test/java/org/commonmark/test/HtmlInlineParserTest.java b/commonmark/src/test/java/org/commonmark/test/HtmlInlineParserTest.java new file mode 100644 index 000000000..8e1fd9790 --- /dev/null +++ b/commonmark/src/test/java/org/commonmark/test/HtmlInlineParserTest.java @@ -0,0 +1,34 @@ +package org.commonmark.test; + +import org.junit.jupiter.api.Test; + +public class HtmlInlineParserTest extends CoreRenderingTestCase { + + @Test + public void comment() { + assertRendering("inline ", "

    inline

    \n"); + assertRendering("inline ", "

    inline

    \n"); + assertRendering("inline ", "

    inline

    \n"); + assertRendering("inline ", "

    inline

    \n"); + assertRendering("inline ", "

    inline

    \n"); + assertRendering("inline -->", "

    inline -->

    \n"); + assertRendering("inline -->", "

    inline -->

    \n"); + } + + @Test + public void cdata() { + assertRendering("inline ", "

    inline

    \n"); + assertRendering("inline ", "

    inline

    \n"); + } + + @Test + public void declaration() { + // Whitespace is mandatory + assertRendering("inline ", "

    inline <!FOO>

    \n"); + assertRendering("inline ", "

    inline

    \n"); + assertRendering("inline ", "

    inline

    \n"); + + // Lowercase + assertRendering("inline ", "

    inline

    \n"); + } +} diff --git a/commonmark/src/test/java/org/commonmark/test/HtmlRendererTest.java b/commonmark/src/test/java/org/commonmark/test/HtmlRendererTest.java index 04b493cba..02d970949 100644 --- a/commonmark/src/test/java/org/commonmark/test/HtmlRendererTest.java +++ b/commonmark/src/test/java/org/commonmark/test/HtmlRendererTest.java @@ -5,49 +5,54 @@ import org.commonmark.renderer.NodeRenderer; import org.commonmark.renderer.html.*; import org.commonmark.testutil.TestResources; -import org.junit.Test; +import org.junit.jupiter.api.Test; -import java.util.*; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutorService; +import java.util.ArrayList; +import java.util.Map; +import java.util.Set; import java.util.concurrent.Executors; import java.util.concurrent.Future; -import static org.hamcrest.CoreMatchers.is; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertThat; +import static org.assertj.core.api.Assertions.assertThat; public class HtmlRendererTest { @Test public void htmlAllowingShouldNotEscapeInlineHtml() { String rendered = htmlAllowingRenderer().render(parse("paragraph with inline & html")); - assertEquals("

    paragraph with inline & html

    \n", rendered); + assertThat(rendered).isEqualTo("

    paragraph with inline & html

    \n"); } @Test public void htmlAllowingShouldNotEscapeBlockHtml() { String rendered = htmlAllowingRenderer().render(parse("
    block &
    ")); - assertEquals("
    block &
    \n", rendered); + assertThat(rendered).isEqualTo("
    block &
    \n"); } @Test public void htmlEscapingShouldEscapeInlineHtml() { String rendered = htmlEscapingRenderer().render(parse("paragraph with inline & html")); // Note that & is not escaped, as it's a normal text node, not part of the inline HTML. - assertEquals("

    paragraph with <span id='foo' class="bar">inline & html</span>

    \n", rendered); + assertThat(rendered).isEqualTo("

    paragraph with <span id='foo' class="bar">inline & html</span>

    \n"); } @Test public void htmlEscapingShouldEscapeHtmlBlocks() { String rendered = htmlEscapingRenderer().render(parse("
    block &
    ")); - assertEquals("

    <div id='foo' class="bar">block &amp;</div>

    \n", rendered); + assertThat(rendered).isEqualTo("

    <div id='foo' class="bar">block &amp;</div>

    \n"); } @Test public void textEscaping() { String rendered = defaultRenderer().render(parse("escaping: & < > \" '")); - assertEquals("

    escaping: & < > " '

    \n", rendered); + assertThat(rendered).isEqualTo("

    escaping: & < > " '

    \n"); + } + + @Test + public void characterReferencesWithoutSemicolonsShouldNotBeParsedShouldBeEscaped() { + String input = "[example](javascript:alert('XSS'))"; + String rendered = defaultRenderer().render(parse(input)); + assertThat(rendered).isEqualTo("

    example

    \n"); } @Test @@ -56,7 +61,7 @@ public void attributeEscaping() { Link link = new Link(); link.setDestination(":"); paragraph.appendChild(link); - assertEquals("

    \n", defaultRenderer().render(paragraph)); + assertThat(defaultRenderer().render(paragraph)).isEqualTo("

    \n"); } @Test @@ -65,7 +70,7 @@ public void rawUrlsShouldNotFilterDangerousProtocols() { Link link = new Link(); link.setDestination("javascript:alert(5);"); paragraph.appendChild(link); - assertEquals("

    \n", rawUrlsRenderer().render(paragraph)); + assertThat(rawUrlsRenderer().render(paragraph)).isEqualTo("

    \n"); } @Test @@ -74,13 +79,41 @@ public void sanitizedUrlsShouldSetRelNoFollow() { Link link = new Link(); link.setDestination("/exampleUrl"); paragraph.appendChild(link); - assertEquals("

    \n", sanitizeUrlsRenderer().render(paragraph)); + assertThat(sanitizeUrlsRenderer().render(paragraph)).isEqualTo("

    \n"); paragraph = new Paragraph(); link = new Link(); link.setDestination("https://google.com"); paragraph.appendChild(link); - assertEquals("

    \n", sanitizeUrlsRenderer().render(paragraph)); + assertThat(sanitizeUrlsRenderer().render(paragraph)).isEqualTo("

    \n"); + } + + @Test + public void sanitizedUrlsShouldAllowSafeProtocols() { + Paragraph paragraph = new Paragraph(); + Link link = new Link(); + link.setDestination("http://google.com"); + paragraph.appendChild(link); + assertThat(sanitizeUrlsRenderer().render(paragraph)).isEqualTo("

    \n"); + + paragraph = new Paragraph(); + link = new Link(); + link.setDestination("https://google.com"); + paragraph.appendChild(link); + assertThat(sanitizeUrlsRenderer().render(paragraph)).isEqualTo("

    \n"); + + paragraph = new Paragraph(); + link = new Link(); + link.setDestination("mailto:foo@bar.example.com"); + paragraph.appendChild(link); + assertThat(sanitizeUrlsRenderer().render(paragraph)).isEqualTo("

    \n"); + + String image = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAAFiUAABYlAUlSJPAAAAAQSURBVBhXY/iPBVBf8P9/AG8TY51nJdgkAAAAAElFTkSuQmCC"; + paragraph = new Paragraph(); + link = new Link(); + link.setDestination(image); + paragraph.appendChild(link); + assertThat(sanitizeUrlsRenderer().render(paragraph)).isEqualTo("

    \n"); } @Test @@ -89,39 +122,42 @@ public void sanitizedUrlsShouldFilterDangerousProtocols() { Link link = new Link(); link.setDestination("javascript:alert(5);"); paragraph.appendChild(link); - assertEquals("

    \n", sanitizeUrlsRenderer().render(paragraph)); + assertThat(sanitizeUrlsRenderer().render(paragraph)).isEqualTo("

    \n"); + + paragraph = new Paragraph(); + link = new Link(); + link.setDestination("ftp://google.com"); + paragraph.appendChild(link); + assertThat(sanitizeUrlsRenderer().render(paragraph)).isEqualTo("

    \n"); } @Test public void percentEncodeUrlDisabled() { - assertEquals("

    a

    \n", defaultRenderer().render(parse("[a](foo&bar)"))); - assertEquals("

    a

    \n", defaultRenderer().render(parse("[a](ä)"))); - assertEquals("

    a

    \n", defaultRenderer().render(parse("[a](foo%20bar)"))); + assertThat(defaultRenderer().render(parse("[a](foo&bar)"))).isEqualTo("

    a

    \n"); + assertThat(defaultRenderer().render(parse("[a](ä)"))).isEqualTo("

    a

    \n"); + assertThat(defaultRenderer().render(parse("[a](foo%20bar)"))).isEqualTo("

    a

    \n"); } @Test public void percentEncodeUrl() { // Entities are escaped anyway - assertEquals("

    a

    \n", percentEncodingRenderer().render(parse("[a](foo&bar)"))); + assertThat(percentEncodingRenderer().render(parse("[a](foo&bar)"))).isEqualTo("

    a

    \n"); // Existing encoding is preserved - assertEquals("

    a

    \n", percentEncodingRenderer().render(parse("[a](foo%20bar)"))); - assertEquals("

    a

    \n", percentEncodingRenderer().render(parse("[a](foo%61)"))); + assertThat(percentEncodingRenderer().render(parse("[a](foo%20bar)"))).isEqualTo("

    a

    \n"); + assertThat(percentEncodingRenderer().render(parse("[a](foo%61)"))).isEqualTo("

    a

    \n"); // Invalid encoding is escaped - assertEquals("

    a

    \n", percentEncodingRenderer().render(parse("[a](foo%)"))); - assertEquals("

    a

    \n", percentEncodingRenderer().render(parse("[a](foo%a)"))); - assertEquals("

    a

    \n", percentEncodingRenderer().render(parse("[a](foo%a_)"))); - assertEquals("

    a

    \n", percentEncodingRenderer().render(parse("[a](foo%xx)"))); + assertThat(percentEncodingRenderer().render(parse("[a](foo%)"))).isEqualTo("

    a

    \n"); + assertThat(percentEncodingRenderer().render(parse("[a](foo%a)"))).isEqualTo("

    a

    \n"); + assertThat(percentEncodingRenderer().render(parse("[a](foo%a_)"))).isEqualTo("

    a

    \n"); + assertThat(percentEncodingRenderer().render(parse("[a](foo%xx)"))).isEqualTo("

    a

    \n"); // Reserved characters are preserved, except for '[' and ']' - assertEquals("

    a

    \n", percentEncodingRenderer().render(parse("[a](!*'();:@&=+$,/?#[])"))); + assertThat(percentEncodingRenderer().render(parse("[a](!*'();:@&=+$,/?#[])"))).isEqualTo("

    a

    \n"); // Unreserved characters are preserved - assertEquals("

    a

    \n", - percentEncodingRenderer().render(parse("[a](ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~)"))); + assertThat(percentEncodingRenderer().render(parse("[a](ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~)"))).isEqualTo("

    a

    \n"); // Other characters are percent-encoded (LATIN SMALL LETTER A WITH DIAERESIS) - assertEquals("

    a

    \n", - percentEncodingRenderer().render(parse("[a](ä)"))); + assertThat(percentEncodingRenderer().render(parse("[a](ä)"))).isEqualTo("

    a

    \n"); // Other characters are percent-encoded (MUSICAL SYMBOL G CLEF, surrogate pair in UTF-16) - assertEquals("

    a

    \n", - percentEncodingRenderer().render(parse("[a](\uD834\uDD1E)"))); + assertThat(percentEncodingRenderer().render(parse("[a](\uD834\uDD1E)"))).isEqualTo("

    a

    \n"); } @Test @@ -148,10 +184,10 @@ public void setAttributes(Node node, String tagName, Map attribu HtmlRenderer renderer = HtmlRenderer.builder().attributeProviderFactory(custom).build(); String rendered = renderer.render(parse("```info\ncontent\n```")); - assertEquals("
    content\n
    \n", rendered); + assertThat(rendered).isEqualTo("
    content\n
    \n"); String rendered2 = renderer.render(parse("```evil\"\ncontent\n```")); - assertEquals("
    content\n
    \n", rendered2); + assertThat(rendered2).isEqualTo("
    content\n
    \n"); } @Test @@ -173,7 +209,7 @@ public void setAttributes(Node node, String tagName, Map attribu HtmlRenderer renderer = HtmlRenderer.builder().attributeProviderFactory(custom).build(); String rendered = renderer.render(parse("![foo](/url)\n")); - assertEquals("

    \n", rendered); + assertThat(rendered).isEqualTo("

    \n"); } @Test @@ -196,7 +232,7 @@ public void setAttributes(Node node, String tagName, Map attribu HtmlRenderer renderer = HtmlRenderer.builder().attributeProviderFactory(factory).build(); String rendered = renderer.render(parse("text node")); String secondPass = renderer.render(parse("text node")); - assertEquals(rendered, secondPass); + assertThat(secondPass).isEqualTo(rendered); } @Test @@ -207,7 +243,7 @@ public NodeRenderer create(final HtmlNodeRendererContext context) { return new NodeRenderer() { @Override public Set> getNodeTypes() { - return Collections.>singleton(Link.class); + return Set.of(Link.class); } @Override @@ -220,30 +256,37 @@ public void render(Node node) { HtmlRenderer renderer = HtmlRenderer.builder().nodeRendererFactory(nodeRendererFactory).build(); String rendered = renderer.render(parse("foo [bar](/url)")); - assertEquals("

    foo test

    \n", rendered); + assertThat(rendered).isEqualTo("

    foo test

    \n"); } @Test public void orderedListStartZero() { - assertEquals("
      \n
    1. Test
    2. \n
    \n", defaultRenderer().render(parse("0. Test\n"))); + assertThat(defaultRenderer().render(parse("0. Test\n"))).isEqualTo("
      \n
    1. Test
    2. \n
    \n"); } @Test public void imageAltTextWithSoftLineBreak() { - assertEquals("

    \"foo\nbar\"

    \n", - defaultRenderer().render(parse("![foo\nbar](/url)\n"))); + assertThat(defaultRenderer().render(parse("![foo\nbar](/url)\n"))).isEqualTo("

    \"foo\nbar\"

    \n"); } @Test public void imageAltTextWithHardLineBreak() { - assertEquals("

    \"foo\nbar\"

    \n", - defaultRenderer().render(parse("![foo \nbar](/url)\n"))); + assertThat(defaultRenderer().render(parse("![foo \nbar](/url)\n"))).isEqualTo("

    \"foo\nbar\"

    \n"); } @Test public void imageAltTextWithEntities() { - assertEquals("

    \"foo

    \n", - defaultRenderer().render(parse("![foo ä](/url)\n"))); + assertThat(defaultRenderer().render(parse("![foo ä](/url)\n"))).isEqualTo("

    \"foo

    \n"); + } + + @Test + public void imageAltTextWithInlines() { + assertThat(defaultRenderer().render(parse("![_foo_ **bar** [link](/url)](/url)\n"))).isEqualTo("

    \"foo

    \n"); + } + + @Test + public void imageAltTextWithCode() { + assertThat(defaultRenderer().render(parse("![`foo` bar](/url)\n"))).isEqualTo("

    \"foo

    \n"); } @Test @@ -260,35 +303,35 @@ public void canRenderContentsOfSingleParagraph() { document.appendChild(current); } - assertEquals("Here I have a test link", - defaultRenderer().render(document)); + assertThat(defaultRenderer().render(document)).isEqualTo("Here I have a test link"); + } + + @Test + public void omitSingleParagraphP() { + var renderer = HtmlRenderer.builder().omitSingleParagraphP(true).build(); + assertThat(renderer.render(parse("hi *there*"))).isEqualTo("hi there"); } @Test public void threading() throws Exception { - Parser parser = Parser.builder().build(); - String spec = TestResources.readAsString(TestResources.getSpec()); - final Node document = parser.parse(spec); + var parser = Parser.builder().build(); + var spec = TestResources.readAsString(TestResources.getSpec()); + var document = parser.parse(spec); - final HtmlRenderer htmlRenderer = HtmlRenderer.builder().build(); - String expectedRendering = htmlRenderer.render(document); + var htmlRenderer = HtmlRenderer.builder().build(); + var expectedRendering = htmlRenderer.render(document); // Render in parallel using the same HtmlRenderer instance. - List> futures = new ArrayList<>(); - ExecutorService executorService = Executors.newFixedThreadPool(4); + var futures = new ArrayList>(); + var executorService = Executors.newFixedThreadPool(4); for (int i = 0; i < 40; i++) { - Future future = executorService.submit(new Callable() { - @Override - public String call() throws Exception { - return htmlRenderer.render(document); - } - }); + var future = executorService.submit(() -> htmlRenderer.render(document)); futures.add(future); } - for (Future future : futures) { - String rendering = future.get(); - assertThat(rendering, is(expectedRendering)); + for (var future : futures) { + var rendering = future.get(); + assertThat(rendering).isEqualTo(expectedRendering); } } diff --git a/commonmark/src/test/java/org/commonmark/test/InlineParserContextTest.java b/commonmark/src/test/java/org/commonmark/test/InlineParserContextTest.java new file mode 100644 index 000000000..c05cac2d2 --- /dev/null +++ b/commonmark/src/test/java/org/commonmark/test/InlineParserContextTest.java @@ -0,0 +1,81 @@ +package org.commonmark.test; + +import org.commonmark.internal.InlineParserImpl; +import org.commonmark.parser.beta.LinkProcessor; +import org.commonmark.parser.beta.InlineContentParserFactory; +import org.commonmark.node.LinkReferenceDefinition; +import org.commonmark.parser.InlineParser; +import org.commonmark.parser.InlineParserContext; +import org.commonmark.parser.InlineParserFactory; +import org.commonmark.parser.Parser; +import org.commonmark.parser.delimiter.DelimiterProcessor; +import org.commonmark.renderer.html.HtmlRenderer; +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import static org.assertj.core.api.Assertions.assertThat; + +public class InlineParserContextTest { + + @Test + public void labelShouldBeOriginalNotNormalized() { + CapturingInlineParserFactory inlineParserFactory = new CapturingInlineParserFactory(); + + Parser parser = Parser.builder().inlineParserFactory(inlineParserFactory).build(); + String input = "[link with special label][FooBarBaz]\n\n[foobarbaz]: /url"; + + String rendered = HtmlRenderer.builder().build().render(parser.parse(input)); + + // Lookup should pass original label to context + assertThat(inlineParserFactory.lookups).isEqualTo(List.of("FooBarBaz")); + + // Context should normalize label for finding reference + assertThat(rendered).isEqualTo("

    link with special label

    \n"); + } + + static class CapturingInlineParserFactory implements InlineParserFactory { + + private List lookups = new ArrayList<>(); + + @Override + public InlineParser create(final InlineParserContext inlineParserContext) { + InlineParserContext wrappedContext = new InlineParserContext() { + @Override + public List getCustomInlineContentParserFactories() { + return inlineParserContext.getCustomInlineContentParserFactories(); + } + + @Override + public List getCustomDelimiterProcessors() { + return inlineParserContext.getCustomDelimiterProcessors(); + } + + @Override + public List getCustomLinkProcessors() { + return inlineParserContext.getCustomLinkProcessors(); + } + + @Override + public Set getCustomLinkMarkers() { + return inlineParserContext.getCustomLinkMarkers(); + } + + @Override + public LinkReferenceDefinition getLinkReferenceDefinition(String label) { + return getDefinition(LinkReferenceDefinition.class, label); + } + + @Override + public D getDefinition(Class type, String label) { + lookups.add(label); + return inlineParserContext.getDefinition(type, label); + } + }; + + return new InlineParserImpl(wrappedContext); + } + } +} diff --git a/commonmark/src/test/java/org/commonmark/test/LinkReferenceDefinitionNodeTest.java b/commonmark/src/test/java/org/commonmark/test/LinkReferenceDefinitionNodeTest.java index 37b3d5dcd..8410ff028 100644 --- a/commonmark/src/test/java/org/commonmark/test/LinkReferenceDefinitionNodeTest.java +++ b/commonmark/src/test/java/org/commonmark/test/LinkReferenceDefinitionNodeTest.java @@ -2,13 +2,11 @@ import org.commonmark.node.*; import org.commonmark.parser.Parser; -import org.junit.Test; +import org.junit.jupiter.api.Test; import java.util.List; -import static org.hamcrest.CoreMatchers.instanceOf; -import static org.hamcrest.CoreMatchers.is; -import static org.junit.Assert.assertThat; +import static org.assertj.core.api.Assertions.assertThat; public class LinkReferenceDefinitionNodeTest { @@ -17,12 +15,12 @@ public void testDefinitionWithoutParagraph() { Node document = parse("This is a paragraph with a [foo] link.\n\n[foo]: /url 'title'"); List nodes = Nodes.getChildren(document); - assertThat(nodes.size(), is(2)); - assertThat(nodes.get(0), instanceOf(Paragraph.class)); + assertThat(nodes).hasSize(2); + assertThat(nodes.get(0)).isInstanceOf(Paragraph.class); LinkReferenceDefinition definition = assertDef(nodes.get(1), "foo"); - assertThat(definition.getDestination(), is("/url")); - assertThat(definition.getTitle(), is("title")); + assertThat(definition.getDestination()).isEqualTo("/url"); + assertThat(definition.getTitle()).isEqualTo("title"); } @Test @@ -30,10 +28,10 @@ public void testDefinitionWithParagraph() { Node document = parse("[foo]: /url\nThis is a paragraph with a [foo] link."); List nodes = Nodes.getChildren(document); - assertThat(nodes.size(), is(2)); + assertThat(nodes).hasSize(2); // Note that definition is not part of the paragraph, it's a sibling - assertThat(nodes.get(0), instanceOf(LinkReferenceDefinition.class)); - assertThat(nodes.get(1), instanceOf(Paragraph.class)); + assertThat(nodes.get(0)).isInstanceOf(LinkReferenceDefinition.class); + assertThat(nodes.get(1)).isInstanceOf(Paragraph.class); } @Test @@ -41,8 +39,8 @@ public void testMultipleDefinitions() { Node document = parse("This is a paragraph with a [foo] link.\n\n[foo]: /url\n[bar]: /url"); List nodes = Nodes.getChildren(document); - assertThat(nodes.size(), is(3)); - assertThat(nodes.get(0), instanceOf(Paragraph.class)); + assertThat(nodes).hasSize(3); + assertThat(nodes.get(0)).isInstanceOf(Paragraph.class); assertDef(nodes.get(1), "foo"); assertDef(nodes.get(2), "bar"); } @@ -52,14 +50,14 @@ public void testMultipleDefinitionsWithSameLabel() { Node document = parse("This is a paragraph with a [foo] link.\n\n[foo]: /url1\n[foo]: /url2"); List nodes = Nodes.getChildren(document); - assertThat(nodes.size(), is(3)); - assertThat(nodes.get(0), instanceOf(Paragraph.class)); + assertThat(nodes).hasSize(3); + assertThat(nodes.get(0)).isInstanceOf(Paragraph.class); LinkReferenceDefinition def1 = assertDef(nodes.get(1), "foo"); - assertThat(def1.getDestination(), is("/url1")); + assertThat(def1.getDestination()).isEqualTo("/url1"); // When there's multiple definitions with the same label, the first one "wins", as in reference links will use // that. But we still want to preserve the original definitions in the document. LinkReferenceDefinition def2 = assertDef(nodes.get(2), "foo"); - assertThat(def2.getDestination(), is("/url2")); + assertThat(def2.getDestination()).isEqualTo("/url2"); } @Test @@ -67,42 +65,52 @@ public void testDefinitionOfReplacedBlock() { Node document = parse("[foo]: /url\nHeading\n======="); List nodes = Nodes.getChildren(document); - assertThat(nodes.size(), is(2)); + assertThat(nodes).hasSize(2); assertDef(nodes.get(0), "foo"); - assertThat(nodes.get(1), instanceOf(Heading.class)); + assertThat(nodes.get(1)).isInstanceOf(Heading.class); } @Test public void testDefinitionInListItem() { Node document = parse("* [foo]: /url\n [foo]\n"); - assertThat(document.getFirstChild(), instanceOf(BulletList.class)); + assertThat(document.getFirstChild()).isInstanceOf(BulletList.class); Node item = document.getFirstChild().getFirstChild(); - assertThat(item, instanceOf(ListItem.class)); + assertThat(item).isInstanceOf(ListItem.class); List nodes = Nodes.getChildren(item); - assertThat(nodes.size(), is(2)); + assertThat(nodes).hasSize(2); assertDef(nodes.get(0), "foo"); - assertThat(nodes.get(1), instanceOf(Paragraph.class)); + assertThat(nodes.get(1)).isInstanceOf(Paragraph.class); } @Test public void testDefinitionInListItem2() { Node document = parse("* [foo]: /url\n* [foo]\n"); - assertThat(document.getFirstChild(), instanceOf(BulletList.class)); + assertThat(document.getFirstChild()).isInstanceOf(BulletList.class); List items = Nodes.getChildren(document.getFirstChild()); - assertThat(items.size(), is(2)); + assertThat(items).hasSize(2); Node item1 = items.get(0); Node item2 = items.get(1); - assertThat(item1, instanceOf(ListItem.class)); - assertThat(item2, instanceOf(ListItem.class)); + assertThat(item1).isInstanceOf(ListItem.class); + assertThat(item2).isInstanceOf(ListItem.class); - assertThat(Nodes.getChildren(item1).size(), is(1)); + assertThat(Nodes.getChildren(item1)).hasSize(1); assertDef(item1.getFirstChild(), "foo"); - assertThat(Nodes.getChildren(item2).size(), is(1)); - assertThat(item2.getFirstChild(), instanceOf(Paragraph.class)); + assertThat(Nodes.getChildren(item2)).hasSize(1); + assertThat(item2.getFirstChild()).isInstanceOf(Paragraph.class); + } + + @Test + public void testDefinitionLabelCaseIsPreserved() { + Node document = parse("This is a paragraph with a [foo] link.\n\n[fOo]: /url 'title'"); + List nodes = Nodes.getChildren(document); + + assertThat(nodes).hasSize(2); + assertThat(nodes.get(0)).isInstanceOf(Paragraph.class); + assertDef(nodes.get(1), "fOo"); } private static Node parse(String input) { @@ -111,9 +119,9 @@ private static Node parse(String input) { } private static LinkReferenceDefinition assertDef(Node node, String label) { - assertThat(node, instanceOf(LinkReferenceDefinition.class)); + assertThat(node).isInstanceOf(LinkReferenceDefinition.class); LinkReferenceDefinition def = (LinkReferenceDefinition) node; - assertThat(def.getLabel(), is(label)); + assertThat(def.getLabel()).isEqualTo(label); return def; } } diff --git a/commonmark/src/test/java/org/commonmark/test/ListBlockParserTest.java b/commonmark/src/test/java/org/commonmark/test/ListBlockParserTest.java new file mode 100644 index 000000000..02ac3abff --- /dev/null +++ b/commonmark/src/test/java/org/commonmark/test/ListBlockParserTest.java @@ -0,0 +1,65 @@ +package org.commonmark.test; + +import org.commonmark.node.ListItem; +import org.commonmark.node.Node; +import org.commonmark.parser.Parser; +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; + +public class ListBlockParserTest { + + private static final Parser PARSER = Parser.builder().build(); + + @Test + public void testBulletListIndents() { + assertListItemIndents("* foo", 0, 2); + assertListItemIndents(" * foo", 1, 3); + assertListItemIndents(" * foo", 2, 4); + assertListItemIndents(" * foo", 3, 5); + + assertListItemIndents("* foo", 0, 3); + assertListItemIndents("* foo", 0, 4); + assertListItemIndents("* foo", 0, 5); + assertListItemIndents(" * foo", 1, 4); + assertListItemIndents(" * foo", 3, 8); + + // The indent is relative to any containing blocks + assertListItemIndents("> * foo", 0, 2); + assertListItemIndents("> * foo", 1, 3); + assertListItemIndents("> * foo", 1, 4); + + // Tab counts as 3 spaces here (to the next tab stop column of 4) -> content indent is 1+3 + assertListItemIndents("*\tfoo", 0, 4); + + // Empty list, content indent is expected to be 2 + assertListItemIndents("-\n", 0, 2); + } + + @Test + public void testOrderedListIndents() { + assertListItemIndents("1. foo", 0, 3); + assertListItemIndents(" 1. foo", 1, 4); + assertListItemIndents(" 1. foo", 2, 5); + assertListItemIndents(" 1. foo", 3, 6); + + assertListItemIndents("1. foo", 0, 4); + assertListItemIndents("1. foo", 0, 5); + assertListItemIndents("1. foo", 0, 6); + assertListItemIndents(" 1. foo", 1, 5); + assertListItemIndents(" 1. foo", 2, 8); + + assertListItemIndents("> 1. foo", 0, 3); + assertListItemIndents("> 1. foo", 1, 4); + assertListItemIndents("> 1. foo", 1, 5); + + assertListItemIndents("1.\tfoo", 0, 4); + } + + private void assertListItemIndents(String input, int expectedMarkerIndent, int expectedContentIndent) { + Node doc = PARSER.parse(input); + ListItem listItem = Nodes.find(doc, ListItem.class); + assertThat((int) listItem.getMarkerIndent()).isEqualTo(expectedMarkerIndent); + assertThat((int) listItem.getContentIndent()).isEqualTo(expectedContentIndent); + } +} diff --git a/commonmark/src/test/java/org/commonmark/test/ListTightLooseTest.java b/commonmark/src/test/java/org/commonmark/test/ListTightLooseTest.java index 4889bb9ab..c6bda31ed 100644 --- a/commonmark/src/test/java/org/commonmark/test/ListTightLooseTest.java +++ b/commonmark/src/test/java/org/commonmark/test/ListTightLooseTest.java @@ -1,6 +1,6 @@ package org.commonmark.test; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class ListTightLooseTest extends CoreRenderingTestCase { diff --git a/commonmark/src/test/java/org/commonmark/test/Nodes.java b/commonmark/src/test/java/org/commonmark/test/Nodes.java index bbc019a6a..06d04fde6 100644 --- a/commonmark/src/test/java/org/commonmark/test/Nodes.java +++ b/commonmark/src/test/java/org/commonmark/test/Nodes.java @@ -4,6 +4,7 @@ import java.util.ArrayList; import java.util.List; +import java.util.Objects; public class Nodes { @@ -14,4 +15,36 @@ public static List getChildren(Node parent) { } return children; } + + /** + * Recursively try to find a node with the given type within the children of the specified node. + * + * @param parent The node to get children from (node itself will not be checked) + * @param nodeClass The type of node to find + */ + public static T tryFind(Node parent, Class nodeClass) { + Node node = parent.getFirstChild(); + while (node != null) { + Node next = node.getNext(); + if (nodeClass.isInstance(node)) { + //noinspection unchecked + return (T) node; + } + T result = tryFind(node, nodeClass); + if (result != null) { + return result; + } + node = next; + } + return null; + } + + /** + * Recursively try to find a node with the given type within the children of the specified node. Throw if node + * could not be found. + */ + public static T find(Node parent, Class nodeClass) { + return Objects.requireNonNull(tryFind(parent, nodeClass), + "Could not find a " + nodeClass.getSimpleName() + " node in " + parent); + } } diff --git a/commonmark/src/test/java/org/commonmark/test/ParserTest.java b/commonmark/src/test/java/org/commonmark/test/ParserTest.java index e058de378..337196c56 100644 --- a/commonmark/src/test/java/org/commonmark/test/ParserTest.java +++ b/commonmark/src/test/java/org/commonmark/test/ParserTest.java @@ -1,31 +1,25 @@ package org.commonmark.test; import org.commonmark.node.*; -import org.commonmark.parser.InlineParser; -import org.commonmark.parser.InlineParserContext; -import org.commonmark.parser.InlineParserFactory; -import org.commonmark.parser.Parser; -import org.commonmark.parser.block.*; +import org.commonmark.parser.*; import org.commonmark.renderer.html.HtmlRenderer; +import org.commonmark.renderer.markdown.MarkdownRenderer; import org.commonmark.testutil.TestResources; -import org.junit.Test; +import org.junit.jupiter.api.Test; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; -import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; -import static org.hamcrest.CoreMatchers.*; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertThat; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; public class ParserTest { @@ -35,7 +29,7 @@ public void ioReaderTest() throws IOException { InputStream input1 = TestResources.getSpec().openStream(); Node document1; - try (InputStreamReader reader = new InputStreamReader(input1, Charset.forName("UTF-8"))) { + try (InputStreamReader reader = new InputStreamReader(input1, StandardCharsets.UTF_8)) { document1 = parser.parseReader(reader); } @@ -43,19 +37,7 @@ public void ioReaderTest() throws IOException { Node document2 = parser.parse(spec); HtmlRenderer renderer = HtmlRenderer.builder().escapeHtml(true).build(); - assertEquals(renderer.render(document2), renderer.render(document1)); - } - - @Test - public void customBlockParserFactory() { - Parser parser = Parser.builder().customBlockParserFactory(new DashBlockParserFactory()).build(); - - // The dashes would normally be a ThematicBreak - Node document = parser.parse("hey\n\n---\n"); - - assertThat(document.getFirstChild(), instanceOf(Paragraph.class)); - assertEquals("hey", ((Text) document.getFirstChild().getFirstChild()).getLiteral()); - assertThat(document.getLastChild(), instanceOf(DashBlock.class)); + assertThat(renderer.render(document1)).isEqualTo(renderer.render(document2)); } @Test @@ -64,18 +46,25 @@ public void enabledBlockTypes() { Parser parser = Parser.builder().build(); // all core parsers by default Node document = parser.parse(given); - assertThat(document.getFirstChild(), instanceOf(Heading.class)); + assertThat(document.getFirstChild()).isInstanceOf(Heading.class); Set> headersOnly = new HashSet<>(); headersOnly.add(Heading.class); parser = Parser.builder().enabledBlockTypes(headersOnly).build(); document = parser.parse(given); - assertThat(document.getFirstChild(), instanceOf(Heading.class)); + assertThat(document.getFirstChild()).isInstanceOf(Heading.class); Set> noCoreTypes = new HashSet<>(); parser = Parser.builder().enabledBlockTypes(noCoreTypes).build(); document = parser.parse(given); - assertThat(document.getFirstChild(), not(instanceOf(Heading.class))); + assertThat(document.getFirstChild()).isNotInstanceOf(Heading.class); + } + + @Test + public void enabledBlockTypesThrowsWhenGivenUnknownClass() { + // BulletList can't be enabled separately at the moment, only all ListBlock types + assertThatThrownBy(() -> + Parser.builder().enabledBlockTypes(Set.of(Heading.class, BulletList.class)).build()).isInstanceOf(IllegalArgumentException.class); } @Test @@ -84,26 +73,26 @@ public void indentation() { Parser parser = Parser.builder().build(); Node document = parser.parse(given); - assertThat(document.getFirstChild(), instanceOf(BulletList.class)); + assertThat(document.getFirstChild()).isInstanceOf(BulletList.class); Node list = document.getFirstChild(); // first level list - assertEquals("expect one child", list.getFirstChild(), list.getLastChild()); - assertEquals("1 space", firstText(list.getFirstChild())); + assertThat(list.getLastChild()).as("expect one child").isEqualTo(list.getFirstChild()); + assertThat(firstText(list.getFirstChild())).isEqualTo("1 space"); list = list.getFirstChild().getLastChild(); // second level list - assertEquals("expect one child", list.getFirstChild(), list.getLastChild()); - assertEquals("3 spaces", firstText(list.getFirstChild())); + assertThat(list.getLastChild()).as("expect one child").isEqualTo(list.getFirstChild()); + assertThat(firstText(list.getFirstChild())).isEqualTo("3 spaces"); list = list.getFirstChild().getLastChild(); // third level list - assertEquals("5 spaces", firstText(list.getFirstChild())); - assertEquals("tab + space", firstText(list.getFirstChild().getNext())); + assertThat(firstText(list.getFirstChild())).isEqualTo("5 spaces"); + assertThat(firstText(list.getFirstChild().getNext())).isEqualTo("tab + space"); } @Test public void inlineParser() { final InlineParser fakeInlineParser = new InlineParser() { @Override - public void parse(String input, Node node) { + public void parse(SourceLines lines, Node node) { node.appendChild(new ThematicBreak()); } }; @@ -119,70 +108,146 @@ public InlineParser create(InlineParserContext inlineParserContext) { Parser parser = Parser.builder().inlineParserFactory(fakeInlineParserFactory).build(); String input = "**bold** **bold** ~~strikethrough~~"; - assertThat(parser.parse(input).getFirstChild().getFirstChild(), instanceOf(ThematicBreak.class)); + assertThat(parser.parse(input).getFirstChild().getFirstChild()).isInstanceOf(ThematicBreak.class); } @Test public void threading() throws Exception { - final Parser parser = Parser.builder().build(); - final String spec = TestResources.readAsString(TestResources.getSpec()); + var parser = Parser.builder().build(); + var spec = TestResources.readAsString(TestResources.getSpec()); - HtmlRenderer renderer = HtmlRenderer.builder().build(); - String expectedRendering = renderer.render(parser.parse(spec)); + var renderer = HtmlRenderer.builder().build(); + var expectedRendering = renderer.render(parser.parse(spec)); // Parse in parallel using the same Parser instance. - List> futures = new ArrayList<>(); - ExecutorService executorService = Executors.newFixedThreadPool(4); + var futures = new ArrayList>(); + var executorService = Executors.newFixedThreadPool(4); for (int i = 0; i < 40; i++) { - Future future = executorService.submit(new Callable() { - @Override - public Node call() throws Exception { - return parser.parse(spec); - } - }); + var future = executorService.submit(() -> parser.parse(spec)); futures.add(future); } - for (Future future : futures) { - Node node = future.get(); - assertThat(renderer.render(node), is(expectedRendering)); + for (var future : futures) { + var node = future.get(); + assertThat(renderer.render(node)).isEqualTo(expectedRendering); } } + @Test + public void maxOpenBlockParsersMustBeZeroOrGreater() { + assertThatThrownBy(() -> + Parser.builder().maxOpenBlockParsers(-1)).isInstanceOf(IllegalArgumentException.class); + } + + @Test + public void maxOpenBlockParsersIsOptIn() { + var parser = Parser.builder().build(); + + var document = parser.parse(alternatingNestedList(9)); + + assertThat(renderText(deepestStructuredParagraph(document, 9))).isEqualTo("level9"); + } + + @Test + public void maxOpenBlockParsersPreservesSevenLogicalListLevelsAtSeventeenBlocks() { + var parser = Parser.builder().maxOpenBlockParsers(17).build(); + + var document = parser.parse(alternatingNestedList(7)); + + assertThat(renderText(deepestStructuredParagraph(document, 7))).isEqualTo("level7"); + } + + @Test + public void maxOpenBlockParsersPreservesEightLogicalListLevelsAtSeventeenBlocks() { + var parser = Parser.builder().maxOpenBlockParsers(17).build(); + + var document = parser.parse(alternatingNestedList(8)); + + assertThat(renderText(deepestStructuredParagraph(document, 8))).isEqualTo("level8"); + } + + @Test + public void maxOpenBlockParsersDegradesTheNinthLogicalListLevelToPlainText() { + var parser = Parser.builder().maxOpenBlockParsers(17).build(); + + var document = parser.parse(alternatingNestedList(9)); + var deepestParagraph = deepestStructuredParagraph(document, 8); + + assertThat(renderText(deepestParagraph)).isEqualTo("level8\n\\- level9"); + assertThat(deepestParagraph.getNext()).isNull(); + } + + @Test + public void maxOpenBlockParsersAlsoLimitsMixedListAndBlockQuoteNesting() { + var parser = Parser.builder().maxOpenBlockParsers(5).build(); + + var document = parser.parse(String.join("\n", + "- level1", + " > level2", + " > > level3", + " > > > level4")); + + var listBlock = document.getFirstChild(); + assertThat(listBlock).isInstanceOf(BulletList.class); + + var listItem = listBlock.getFirstChild(); + var blockQuote1 = listItem.getLastChild(); + assertThat(blockQuote1).isInstanceOf(BlockQuote.class); + + var blockQuote2 = blockQuote1.getLastChild(); + assertThat(blockQuote2).isInstanceOf(BlockQuote.class); + + var deepestParagraph = blockQuote2.getLastChild(); + assertThat(deepestParagraph).isInstanceOf(Paragraph.class); + assertThat(renderText(deepestParagraph)).isEqualTo("level3\n\\> level4"); + assertThat(deepestParagraph.getNext()).isNull(); + } + private String firstText(Node n) { while (!(n instanceof Text)) { - assertThat(n, notNullValue()); + assertThat(n).isNotNull(); n = n.getFirstChild(); } return ((Text) n).getLiteral(); } - private static class DashBlock extends CustomBlock { + private Paragraph deepestStructuredParagraph(Node document, int levels) { + Node node = document.getFirstChild(); + for (int level = 1; level <= levels; level++) { + assertThat(node).isInstanceOf(ListBlock.class); + var listItem = node.getFirstChild(); + assertThat(listItem).isNotNull(); + if (level == levels) { + assertThat(listItem.getFirstChild()).isInstanceOf(Paragraph.class); + return (Paragraph) listItem.getFirstChild(); + } + node = listItem.getLastChild(); + } + throw new AssertionError("unreachable"); } - private static class DashBlockParser extends AbstractBlockParser { - - private DashBlock dash = new DashBlock(); - - @Override - public Block getBlock() { - return dash; - } + private String renderText(Node node) { + return MarkdownRenderer.builder().build().render(node).trim(); + } - @Override - public BlockContinue tryContinue(ParserState parserState) { - return BlockContinue.none(); + private String alternatingNestedList(int levels) { + int indent = 0; + var lines = new ArrayList(); + for (int level = 1; level <= levels; level++) { + var ordered = level % 2 == 0; + var marker = ordered ? "1. " : "- "; + lines.add(" ".repeat(indent) + marker + "level" + level); + indent += marker.length(); } + return String.join("\n", lines); } - private static class DashBlockParserFactory extends AbstractBlockParserFactory { - - @Override - public BlockStart tryStart(ParserState state, MatchedBlockParser matchedBlockParser) { - if (state.getLine().equals("---")) { - return BlockStart.of(new DashBlockParser()); - } - return BlockStart.none(); + private int depth(Node node) { + int depth = 0; + while (node.getParent() != null) { + node = node.getParent(); + depth++; } + return depth; } } diff --git a/commonmark/src/test/java/org/commonmark/test/PathologicalTest.java b/commonmark/src/test/java/org/commonmark/test/PathologicalTest.java index a853b1b11..66d39de23 100644 --- a/commonmark/src/test/java/org/commonmark/test/PathologicalTest.java +++ b/commonmark/src/test/java/org/commonmark/test/PathologicalTest.java @@ -1,93 +1,78 @@ package org.commonmark.test; -import org.junit.FixMethodOrder; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.Stopwatch; -import org.junit.rules.Timeout; -import org.junit.runner.Description; -import org.junit.runners.MethodSorters; +import org.junit.jupiter.api.MethodOrderer; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestMethodOrder; +import org.junit.jupiter.api.Timeout; import java.util.concurrent.TimeUnit; -import static org.commonmark.testutil.Strings.repeat; - /** * Pathological input cases (from commonmark.js). */ -@FixMethodOrder(MethodSorters.NAME_ASCENDING) +@Timeout(value = 3, unit = TimeUnit.SECONDS) +@TestMethodOrder(MethodOrderer.MethodName.class) public class PathologicalTest extends CoreRenderingTestCase { private int x = 100_000; - @Rule - public Timeout timeout = new Timeout(3, TimeUnit.SECONDS); - - @Rule - public Stopwatch stopwatch = new Stopwatch() { - @Override - protected void finished(long nanos, Description description) { - System.err.println(description.getDisplayName() + " took " + (nanos / 1000000) + " ms"); - } - }; - @Test public void nestedStrongEmphasis() { // this is limited by the stack size because visitor is recursive x = 500; assertRendering( - repeat("*a **a ", x) + "b" + repeat(" a** a*", x), - "

    " + repeat("a a ", x) + "b" + - repeat(" a a", x) + "

    \n"); + "*a **a ".repeat(x) + "b" + " a** a*".repeat(x), + "

    " + "a a ".repeat(x) + "b" + + " a a".repeat(x) + "

    \n"); } @Test public void emphasisClosersWithNoOpeners() { assertRendering( - repeat("a_ ", x), - "

    " + repeat("a_ ", x - 1) + "a_

    \n"); + "a_ ".repeat(x), + "

    " + "a_ ".repeat(x - 1) + "a_

    \n"); } @Test public void emphasisOpenersWithNoClosers() { assertRendering( - repeat("_a ", x), - "

    " + repeat("_a ", x - 1) + "_a

    \n"); + "_a ".repeat(x), + "

    " + "_a ".repeat(x - 1) + "_a

    \n"); } @Test public void linkClosersWithNoOpeners() { assertRendering( - repeat("a] ", x), - "

    " + repeat("a] ", x - 1) + "a]

    \n"); + "a] ".repeat(x), + "

    " + "a] ".repeat(x - 1) + "a]

    \n"); } @Test public void linkOpenersWithNoClosers() { assertRendering( - repeat("[a ", x), - "

    " + repeat("[a ", x - 1) + "[a

    \n"); + "[a ".repeat(x), + "

    " + "[a ".repeat(x - 1) + "[a

    \n"); } @Test public void linkOpenersAndEmphasisClosers() { assertRendering( - repeat("[ a_ ", x), - "

    " + repeat("[ a_ ", x - 1) + "[ a_

    \n"); + "[ a_ ".repeat(x), + "

    " + "[ a_ ".repeat(x - 1) + "[ a_

    \n"); } @Test public void mismatchedOpenersAndClosers() { assertRendering( - repeat("*a_ ", x), - "

    " + repeat("*a_ ", x - 1) + "*a_

    \n"); + "*a_ ".repeat(x), + "

    " + "*a_ ".repeat(x - 1) + "*a_

    \n"); } @Test public void nestedBrackets() { assertRendering( - repeat("[", x) + "a" + repeat("]", x), - "

    " + repeat("[", x) + "a" + repeat("]", x) + "

    \n"); + "[".repeat(x) + "a" + "]".repeat(x), + "

    " + "[".repeat(x) + "a" + "]".repeat(x) + "

    \n"); } @Test @@ -95,29 +80,29 @@ public void nestedBlockQuotes() { // this is limited by the stack size because visitor is recursive x = 1000; assertRendering( - repeat("> ", x) + "a\n", - repeat("
    \n", x) + "

    a

    \n" + - repeat("
    \n", x)); + "> ".repeat(x) + "a\n", + "
    \n".repeat(x) + "

    a

    \n" + + "
    \n".repeat(x)); } @Test public void hugeHorizontalRule() { assertRendering( - repeat("*", 10000) + "\n", + "*".repeat(10000) + "\n", "
    \n"); } @Test public void backslashInLink() { // See https://github.com/commonmark/commonmark.js/issues/157 - assertRendering("[" + repeat("\\", x) + "\n", - "

    " + "[" + repeat("\\", x / 2) + "

    \n"); + assertRendering("[" + "\\".repeat(x) + "\n", + "

    " + "[" + "\\".repeat(x / 2) + "

    \n"); } @Test public void unclosedInlineLinks() { // See https://github.com/commonmark/commonmark.js/issues/129 - assertRendering(repeat("[](", x) + "\n", - "

    " + repeat("[](", x) + "

    \n"); + assertRendering("[](".repeat(x) + "\n", + "

    " + "[](".repeat(x) + "

    \n"); } } diff --git a/commonmark/src/test/java/org/commonmark/test/RegressionTest.java b/commonmark/src/test/java/org/commonmark/test/RegressionTest.java index c4a0d3be5..900a6518c 100644 --- a/commonmark/src/test/java/org/commonmark/test/RegressionTest.java +++ b/commonmark/src/test/java/org/commonmark/test/RegressionTest.java @@ -6,18 +6,18 @@ import org.commonmark.testutil.TestResources; import org.commonmark.testutil.example.Example; import org.commonmark.testutil.example.ExampleReader; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.Parameter; +import org.junit.jupiter.params.ParameterizedClass; +import org.junit.jupiter.params.provider.MethodSource; -import java.net.URL; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; -@RunWith(Parameterized.class) +@ParameterizedClass +@MethodSource("data") public class RegressionTest extends RenderingTestCase { private static final Parser PARSER = Parser.builder().build(); @@ -26,20 +26,13 @@ public class RegressionTest extends RenderingTestCase { private static final Map OVERRIDDEN_EXAMPLES = getOverriddenExamples(); - private final Example example; + @Parameter + Example example; - public RegressionTest(Example example) { - this.example = example; - } - - @Parameters(name = "{0}") - public static List data() { - List data = new ArrayList<>(); - for (URL regressionResource : TestResources.getRegressions()) { - List examples = ExampleReader.readExamples(regressionResource); - for (Example example : examples) { - data.add(new Object[]{example}); - } + static List data() { + var data = new ArrayList(); + for (var regressionResource : TestResources.getRegressions()) { + data.addAll(ExampleReader.readExamples(regressionResource)); } return data; } @@ -64,6 +57,8 @@ private static Map getOverriddenExamples() { // The only difference is that we don't change `%28` and `%29` to `(` and `)` (percent encoding is preserved) m.put("[XSS](javascript&colon;alert%28'XSS'%29)\n", "

    XSS

    \n"); + // Callers should handle BOMs + m.put("\uFEFF# Hi\n", "

    \uFEFF# Hi

    \n"); return m; } diff --git a/commonmark/src/test/java/org/commonmark/test/SourceLineTest.java b/commonmark/src/test/java/org/commonmark/test/SourceLineTest.java new file mode 100644 index 000000000..5d34bf410 --- /dev/null +++ b/commonmark/src/test/java/org/commonmark/test/SourceLineTest.java @@ -0,0 +1,45 @@ +package org.commonmark.test; + +import org.commonmark.node.SourceSpan; +import org.commonmark.parser.SourceLine; +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +public class SourceLineTest { + + @Test + public void testSubstring() { + SourceLine line = SourceLine.of("abcd", SourceSpan.of(3, 10, 13, 4)); + + assertSourceLine(line.substring(0, 4), "abcd", SourceSpan.of(3, 10, 13, 4)); + assertSourceLine(line.substring(0, 3), "abc", SourceSpan.of(3, 10, 13, 3)); + assertSourceLine(line.substring(0, 2), "ab", SourceSpan.of(3, 10, 13, 2)); + assertSourceLine(line.substring(0, 1), "a", SourceSpan.of(3, 10, 13, 1)); + assertSourceLine(line.substring(0, 0), "", null); + + assertSourceLine(line.substring(1, 4), "bcd", SourceSpan.of(3, 11, 14, 3)); + assertSourceLine(line.substring(1, 3), "bc", SourceSpan.of(3, 11, 14, 2)); + + assertSourceLine(line.substring(3, 4), "d", SourceSpan.of(3, 13, 16, 1)); + assertSourceLine(line.substring(4, 4), "", null); + } + + @Test + public void testSubstringBeginOutOfBounds() { + var sourceLine = SourceLine.of("abcd", SourceSpan.of(3, 10, 13, 4)); + assertThatThrownBy(() -> sourceLine.substring(3, 2)).isInstanceOf(StringIndexOutOfBoundsException.class); + } + + @Test + public void testSubstringEndOutOfBounds() { + var sourceLine = SourceLine.of("abcd", SourceSpan.of(3, 10, 13, 4)); + assertThatThrownBy(() -> sourceLine.substring(0, 5)).isInstanceOf(StringIndexOutOfBoundsException.class); + } + + private static void assertSourceLine(SourceLine sourceLine, String expectedContent, SourceSpan expectedSourceSpan) { + assertThat(sourceLine.getContent()).isEqualTo(expectedContent); + assertThat(sourceLine.getSourceSpan()).isEqualTo(expectedSourceSpan); + } +} diff --git a/commonmark/src/test/java/org/commonmark/test/SourceSpanRenderer.java b/commonmark/src/test/java/org/commonmark/test/SourceSpanRenderer.java new file mode 100644 index 000000000..c29aac61e --- /dev/null +++ b/commonmark/src/test/java/org/commonmark/test/SourceSpanRenderer.java @@ -0,0 +1,108 @@ +package org.commonmark.test; + +import org.commonmark.node.AbstractVisitor; +import org.commonmark.node.Node; + +import java.util.*; + +public class SourceSpanRenderer { + + /** + * Render source spans in the document using source position's line and column index. + */ + public static String renderWithLineColumn(Node document, String source) { + SourceSpanMarkersVisitor visitor = new SourceSpanMarkersVisitor(); + document.accept(visitor); + var lineColumnMarkers = visitor.getLineColumnMarkers(); + + StringBuilder sb = new StringBuilder(); + + String[] lines = source.split("\n"); + + for (int lineIndex = 0; lineIndex < lines.length; lineIndex++) { + String line = lines[lineIndex]; + Map> lineMarkers = lineColumnMarkers.get(lineIndex); + for (int i = 0; i < line.length(); i++) { + appendMarkers(lineMarkers, i, sb); + sb.append(line.charAt(i)); + } + appendMarkers(lineMarkers, line.length(), sb); + sb.append("\n"); + } + + return sb.toString(); + } + + /** + * Render source spans in the document using source position's input index. + */ + public static String renderWithInputIndex(Node document, String source) { + SourceSpanMarkersVisitor visitor = new SourceSpanMarkersVisitor(); + document.accept(visitor); + var markers = visitor.getInputIndexMarkers(); + + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < source.length(); i++) { + markers.getOrDefault(i, List.of()).forEach(marker -> sb.append(marker)); + sb.append(source.charAt(i)); + } + return sb.toString(); + } + + private static void appendMarkers(Map> lineMarkers, int columnIndex, StringBuilder sb) { + if (lineMarkers != null) { + List columnMarkers = lineMarkers.get(columnIndex); + if (columnMarkers != null) { + for (String marker : columnMarkers) { + sb.append(marker); + } + } + } + } + + private static class SourceSpanMarkersVisitor extends AbstractVisitor { + + private static final String OPENING = "({[<⸢⸤"; + private static final String CLOSING = ")}]>⸣⸥"; + + private final Map>> lineColumnMarkers = new HashMap<>(); + private final Map> inputIndexMarkers = new HashMap<>(); + + private int markerIndex; + + public Map>> getLineColumnMarkers() { + return lineColumnMarkers; + } + + public Map> getInputIndexMarkers() { + return inputIndexMarkers; + } + + @Override + protected void visitChildren(Node parent) { + if (!parent.getSourceSpans().isEmpty()) { + for (var span : parent.getSourceSpans()) { + String opener = String.valueOf(OPENING.charAt(markerIndex % OPENING.length())); + String closer = String.valueOf(CLOSING.charAt(markerIndex % CLOSING.length())); + + int line = span.getLineIndex(); + int col = span.getColumnIndex(); + var input = span.getInputIndex(); + int length = span.getLength(); + getMarkers(line, col).add(opener); + getMarkers(line, col + length).add(0, closer); + + inputIndexMarkers.computeIfAbsent(input, k -> new LinkedList<>()).add(opener); + inputIndexMarkers.computeIfAbsent(input + length, k -> new LinkedList<>()).add(0, closer); + } + markerIndex++; + } + super.visitChildren(parent); + } + + private List getMarkers(int lineIndex, int columnIndex) { + var columnMap = lineColumnMarkers.computeIfAbsent(lineIndex, k -> new HashMap<>()); + return columnMap.computeIfAbsent(columnIndex, k -> new LinkedList<>()); + } + } +} diff --git a/commonmark/src/test/java/org/commonmark/test/SourceSpanTest.java b/commonmark/src/test/java/org/commonmark/test/SourceSpanTest.java new file mode 100644 index 000000000..f1bb231f4 --- /dev/null +++ b/commonmark/src/test/java/org/commonmark/test/SourceSpanTest.java @@ -0,0 +1,68 @@ +package org.commonmark.test; + +import org.commonmark.node.SourceSpan; +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +public class SourceSpanTest { + + @Test + public void testSubSpan() { + var span = SourceSpan.of(1, 2, 3, 5); + + assertThat(span.subSpan(0)).isSameAs(span); + assertThat(span.subSpan(0, 5)).isSameAs(span); + + assertThat(span.subSpan(1)).isEqualTo(SourceSpan.of(1, 3, 4, 4)); + assertThat(span.subSpan(2)).isEqualTo(SourceSpan.of(1, 4, 5, 3)); + assertThat(span.subSpan(3)).isEqualTo(SourceSpan.of(1, 5, 6, 2)); + assertThat(span.subSpan(4)).isEqualTo(SourceSpan.of(1, 6, 7, 1)); + // Not sure if empty spans are useful, but it probably makes sense to mirror how substrings work + assertThat(span.subSpan(5)).isEqualTo(SourceSpan.of(1, 7, 8, 0)); + assertThat("abcde".substring(5)).isEqualTo(""); + + assertThat(span.subSpan(0, 5)).isEqualTo(SourceSpan.of(1, 2, 3, 5)); + assertThat(span.subSpan(0, 4)).isEqualTo(SourceSpan.of(1, 2, 3, 4)); + assertThat(span.subSpan(0, 3)).isEqualTo(SourceSpan.of(1, 2, 3, 3)); + assertThat(span.subSpan(0, 2)).isEqualTo(SourceSpan.of(1, 2, 3, 2)); + assertThat(span.subSpan(0, 1)).isEqualTo(SourceSpan.of(1, 2, 3, 1)); + assertThat(span.subSpan(0, 0)).isEqualTo(SourceSpan.of(1, 2, 3, 0)); + assertThat("abcde".substring(0, 1)).isEqualTo("a"); + assertThat("abcde".substring(0, 0)).isEqualTo(""); + + assertThat(span.subSpan(1, 4)).isEqualTo(SourceSpan.of(1, 3, 4, 3)); + assertThat(span.subSpan(2, 3)).isEqualTo(SourceSpan.of(1, 4, 5, 1)); + } + + @Test + public void testSubSpanBeginIndexNegative() { + var sourceSpan = SourceSpan.of(1, 2, 3, 5); + assertThatThrownBy(() -> sourceSpan.subSpan(-1)).isInstanceOf(IndexOutOfBoundsException.class); + } + + @Test + public void testSubSpanBeginIndexOutOfBounds() { + var sourceSpan = SourceSpan.of(1, 2, 3, 5); + assertThatThrownBy(() -> sourceSpan.subSpan(6)).isInstanceOf(IndexOutOfBoundsException.class); + } + + @Test + public void testSubSpanEndIndexNegative() { + var sourceSpan = SourceSpan.of(1, 2, 3, 5); + assertThatThrownBy(() -> sourceSpan.subSpan(0, -1)).isInstanceOf(IndexOutOfBoundsException.class); + } + + @Test + public void testSubSpanEndIndexOutOfBounds() { + var sourceSpan = SourceSpan.of(1, 2, 3, 5); + assertThatThrownBy(() -> sourceSpan.subSpan(0, 6)).isInstanceOf(IndexOutOfBoundsException.class); + } + + @Test + public void testSubSpanBeginIndexGreaterThanEndIndex() { + var sourceSpan = SourceSpan.of(1, 2, 3, 5); + assertThatThrownBy(() -> sourceSpan.subSpan(2, 1)).isInstanceOf(IndexOutOfBoundsException.class); + } +} diff --git a/commonmark/src/test/java/org/commonmark/test/SourceSpansTest.java b/commonmark/src/test/java/org/commonmark/test/SourceSpansTest.java new file mode 100644 index 000000000..f4e9d0a17 --- /dev/null +++ b/commonmark/src/test/java/org/commonmark/test/SourceSpansTest.java @@ -0,0 +1,428 @@ +package org.commonmark.test; + +import org.commonmark.node.*; +import org.commonmark.parser.IncludeSourceSpans; +import org.commonmark.parser.Parser; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.io.StringReader; +import java.util.ArrayDeque; +import java.util.Deque; +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; + +public class SourceSpansTest { + + private static final Parser PARSER = Parser.builder().includeSourceSpans(IncludeSourceSpans.BLOCKS).build(); + private static final Parser INLINES_PARSER = Parser.builder().includeSourceSpans(IncludeSourceSpans.BLOCKS_AND_INLINES).build(); + + @Test + public void paragraph() { + assertSpans("foo\n", Paragraph.class, SourceSpan.of(0, 0, 0, 3)); + assertSpans("foo\nbar\n", Paragraph.class, SourceSpan.of(0, 0, 0, 3), SourceSpan.of(1, 0, 4, 3)); + assertSpans(" foo\n bar\n", Paragraph.class, SourceSpan.of(0, 0, 0, 5), SourceSpan.of(1, 0, 6, 5)); + assertSpans("> foo\n> bar\n", Paragraph.class, SourceSpan.of(0, 2, 2, 3), SourceSpan.of(1, 2, 8, 3)); + assertSpans("* foo\n bar\n", Paragraph.class, SourceSpan.of(0, 2, 2, 3), SourceSpan.of(1, 2, 8, 3)); + assertSpans("* foo\nbar\n", Paragraph.class, SourceSpan.of(0, 2, 2, 3), SourceSpan.of(1, 0, 6, 3)); + } + + @Test + public void thematicBreak() { + assertSpans("---\n", ThematicBreak.class, SourceSpan.of(0, 0, 0, 3)); + assertSpans(" ---\n", ThematicBreak.class, SourceSpan.of(0, 0, 0, 5)); + assertSpans("> ---\n", ThematicBreak.class, SourceSpan.of(0, 2, 2, 3)); + } + + @Test + public void atxHeading() { + assertSpans("# foo", Heading.class, SourceSpan.of(0, 0, 0, 5)); + assertSpans(" # foo", Heading.class, SourceSpan.of(0, 0, 0, 6)); + assertSpans("## foo ##", Heading.class, SourceSpan.of(0, 0, 0, 9)); + assertSpans("> # foo", Heading.class, SourceSpan.of(0, 2, 2, 5)); + } + + @Test + public void setextHeading() { + assertSpans("foo\n===\n", Heading.class, SourceSpan.of(0, 0, 0, 3), SourceSpan.of(1, 0, 4, 3)); + assertSpans("foo\nbar\n====\n", Heading.class, SourceSpan.of(0, 0, 0, 3), SourceSpan.of(1, 0, 4, 3), SourceSpan.of(2, 0, 8, 4)); + assertSpans(" foo\n ===\n", Heading.class, SourceSpan.of(0, 0, 0, 5), SourceSpan.of(1, 0, 6, 5)); + assertSpans("> foo\n> ===\n", Heading.class, SourceSpan.of(0, 2, 2, 3), SourceSpan.of(1, 2, 8, 3)); + } + + @Test + public void indentedCodeBlock() { + assertSpans(" foo\n", IndentedCodeBlock.class, SourceSpan.of(0, 0, 0, 7)); + assertSpans(" foo\n", IndentedCodeBlock.class, SourceSpan.of(0, 0, 0, 8)); + assertSpans("\tfoo\n", IndentedCodeBlock.class, SourceSpan.of(0, 0, 0, 4)); + assertSpans(" \tfoo\n", IndentedCodeBlock.class, SourceSpan.of(0, 0, 0, 5)); + assertSpans(" \tfoo\n", IndentedCodeBlock.class, SourceSpan.of(0, 0, 0, 6)); + assertSpans(" \tfoo\n", IndentedCodeBlock.class, SourceSpan.of(0, 0, 0, 7)); + assertSpans(" \tfoo\n", IndentedCodeBlock.class, SourceSpan.of(0, 0, 0, 8)); + assertSpans(" \t foo\n", IndentedCodeBlock.class, SourceSpan.of(0, 0, 0, 9)); + assertSpans("\t foo\n", IndentedCodeBlock.class, SourceSpan.of(0, 0, 0, 5)); + assertSpans("\t foo\n", IndentedCodeBlock.class, SourceSpan.of(0, 0, 0, 6)); + assertSpans(" foo\n bar\n", IndentedCodeBlock.class, SourceSpan.of(0, 0, 0, 7), SourceSpan.of(1, 0, 8, 8)); + assertSpans(" foo\n\tbar\n", IndentedCodeBlock.class, SourceSpan.of(0, 0, 0, 7), SourceSpan.of(1, 0, 8, 4)); + assertSpans(" foo\n \n \n", IndentedCodeBlock.class, SourceSpan.of(0, 0, 0, 7), SourceSpan.of(1, 0, 8, 4), SourceSpan.of(2, 0, 13, 5)); + assertSpans("> foo\n", IndentedCodeBlock.class, SourceSpan.of(0, 2, 2, 7)); + } + + @Test + public void fencedCodeBlock() { + assertSpans("```\nfoo\n```\n", FencedCodeBlock.class, + SourceSpan.of(0, 0, 0, 3), SourceSpan.of(1, 0, 4, 3), SourceSpan.of(2, 0, 8, 3)); + assertSpans("```\n foo\n```\n", FencedCodeBlock.class, + SourceSpan.of(0, 0, 0, 3), SourceSpan.of(1, 0, 4, 4), SourceSpan.of(2, 0, 9, 3)); + assertSpans("```\nfoo\nbar\n```\n", FencedCodeBlock.class, + SourceSpan.of(0, 0, 0, 3), SourceSpan.of(1, 0, 4, 3), SourceSpan.of(2, 0, 8, 3), SourceSpan.of(3, 0, 12, 3)); + assertSpans(" ```\n foo\n ```\n", FencedCodeBlock.class, + SourceSpan.of(0, 0, 0, 6), SourceSpan.of(1, 0, 7, 6), SourceSpan.of(2, 0, 14, 6)); + assertSpans(" ```\n foo\nfoo\n```\n", FencedCodeBlock.class, + SourceSpan.of(0, 0, 0, 4), SourceSpan.of(1, 0, 5, 4), SourceSpan.of(2, 0, 10, 3), SourceSpan.of(3, 0, 14, 3)); + assertSpans("```info\nfoo\n```\n", FencedCodeBlock.class, + SourceSpan.of(0, 0, 0, 7), SourceSpan.of(1, 0, 8, 3), SourceSpan.of(2, 0, 12, 3)); + assertSpans("* ```\n foo\n ```\n", FencedCodeBlock.class, + SourceSpan.of(0, 2, 2, 3), SourceSpan.of(1, 2, 8, 3), SourceSpan.of(2, 2, 14, 3)); + assertSpans("> ```\n> foo\n> ```\n", FencedCodeBlock.class, + SourceSpan.of(0, 2, 2, 3), SourceSpan.of(1, 2, 8, 3), SourceSpan.of(2, 2, 14, 3)); + + Node document = PARSER.parse("```\nfoo\n```\nbar\n"); + Paragraph paragraph = (Paragraph) document.getLastChild(); + assertThat(paragraph.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(3, 0, 12, 3))); + } + + @Test + public void htmlBlock() { + assertSpans("
    \n", HtmlBlock.class, SourceSpan.of(0, 0, 0, 5)); + assertSpans("
    \n foo\n
    \n", HtmlBlock.class, + SourceSpan.of(0, 0, 0, 6), + SourceSpan.of(1, 0, 7, 4), + SourceSpan.of(2, 0, 12, 7)); + assertSpans("*
    \n", HtmlBlock.class, SourceSpan.of(0, 2, 2, 5)); + } + + @Test + public void blockQuote() { + assertSpans(">foo\n", BlockQuote.class, SourceSpan.of(0, 0, 0, 4)); + assertSpans("> foo\n", BlockQuote.class, SourceSpan.of(0, 0, 0, 5)); + assertSpans("> foo\n", BlockQuote.class, SourceSpan.of(0, 0, 0, 6)); + assertSpans(" > foo\n", BlockQuote.class, SourceSpan.of(0, 0, 0, 6)); + assertSpans(" > foo\n > bar\n", BlockQuote.class, SourceSpan.of(0, 0, 0, 8), SourceSpan.of(1, 0, 9, 7)); + // Lazy continuations + assertSpans("> foo\nbar\n", BlockQuote.class, SourceSpan.of(0, 0, 0, 5), SourceSpan.of(1, 0, 6, 3)); + assertSpans("> foo\nbar\n> baz\n", BlockQuote.class, SourceSpan.of(0, 0, 0, 5), SourceSpan.of(1, 0, 6, 3), SourceSpan.of(2, 0, 10, 5)); + assertSpans("> > foo\nbar\n", BlockQuote.class, SourceSpan.of(0, 0, 0, 7), SourceSpan.of(1, 0, 8, 3)); + } + + @Test + public void listBlock() { + assertSpans("* foo\n", ListBlock.class, SourceSpan.of(0, 0, 0, 5)); + assertSpans("* foo\n bar\n", ListBlock.class, SourceSpan.of(0, 0, 0, 5), SourceSpan.of(1, 0, 6, 5)); + assertSpans("* foo\n* bar\n", ListBlock.class, SourceSpan.of(0, 0, 0, 5), SourceSpan.of(1, 0, 6, 5)); + assertSpans("* foo\n # bar\n", ListBlock.class, SourceSpan.of(0, 0, 0, 5), SourceSpan.of(1, 0, 6, 7)); + assertSpans("* foo\n * bar\n", ListBlock.class, SourceSpan.of(0, 0, 0, 5), SourceSpan.of(1, 0, 6, 7)); + assertSpans("* foo\n> bar\n", ListBlock.class, SourceSpan.of(0, 0, 0, 5)); + assertSpans("> * foo\n", ListBlock.class, SourceSpan.of(0, 2, 2, 5)); + + // Lazy continuations + assertSpans("* foo\nbar\nbaz", ListBlock.class, SourceSpan.of(0, 0, 0, 5), SourceSpan.of(1, 0, 6, 3), SourceSpan.of(2, 0, 10, 3)); + assertSpans("* foo\nbar\n* baz", ListBlock.class, SourceSpan.of(0, 0, 0, 5), SourceSpan.of(1, 0, 6, 3), SourceSpan.of(2, 0, 10, 5)); + assertSpans("* foo\n * bar\nbaz", ListBlock.class, SourceSpan.of(0, 0, 0, 5), SourceSpan.of(1, 0, 6, 7), SourceSpan.of(2, 0, 14, 3)); + + Node document = PARSER.parse("* foo\n * bar\n"); + ListBlock listBlock = (ListBlock) document.getFirstChild().getFirstChild().getLastChild(); + assertThat(listBlock.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(1, 2, 8, 5))); + } + + @Test + public void listItem() { + assertSpans("* foo\n", ListItem.class, SourceSpan.of(0, 0, 0, 5)); + assertSpans(" * foo\n", ListItem.class, SourceSpan.of(0, 0, 0, 6)); + assertSpans(" * foo\n", ListItem.class, SourceSpan.of(0, 0, 0, 7)); + assertSpans(" * foo\n", ListItem.class, SourceSpan.of(0, 0, 0, 8)); + assertSpans("*\n foo\n", ListItem.class, SourceSpan.of(0, 0, 0, 1), SourceSpan.of(1, 0, 2, 5)); + assertSpans("*\n foo\n bar\n", ListItem.class, SourceSpan.of(0, 0, 0, 1), SourceSpan.of(1, 0, 2, 5), SourceSpan.of(2, 0, 8, 5)); + assertSpans("> * foo\n", ListItem.class, SourceSpan.of(0, 2, 2, 5)); + + // Lazy continuations + assertSpans("* foo\nbar\n", ListItem.class, SourceSpan.of(0, 0, 0, 5), SourceSpan.of(1, 0, 6, 3)); + assertSpans("* foo\nbar\nbaz\n", ListItem.class, SourceSpan.of(0, 0, 0, 5), SourceSpan.of(1, 0, 6, 3), SourceSpan.of(2, 0, 10, 3)); + } + + @Test + public void linkReferenceDefinition() { + // This is tricky due to how link reference definition parsing works. It is stripped from the paragraph if it's + // successfully parsed, otherwise it stays part of the paragraph. + Node document = PARSER.parse("[foo]: /url\ntext\n"); + + LinkReferenceDefinition linkReferenceDefinition = (LinkReferenceDefinition) document.getFirstChild(); + assertThat(linkReferenceDefinition.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 0, 0, 11))); + + Paragraph paragraph = (Paragraph) document.getLastChild(); + assertThat(paragraph.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(1, 0, 12, 4))); + } + + @Test + public void linkReferenceDefinitionMultiple() { + var doc = PARSER.parse("[foo]: /foo\n[bar]: /bar\n"); + var def1 = (LinkReferenceDefinition) doc.getFirstChild(); + var def2 = (LinkReferenceDefinition) doc.getLastChild(); + assertThat(def1.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 0, 0, 11))); + assertThat(def2.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(1, 0, 12, 11))); + } + + @Test + public void linkReferenceDefinitionWithTitle() { + var doc = PARSER.parse("[1]: #not-code \"Text\"\n[foo]: /foo\n"); + var def1 = (LinkReferenceDefinition) doc.getFirstChild(); + var def2 = (LinkReferenceDefinition) doc.getLastChild(); + assertThat(def1.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 0, 0, 21))); + assertThat(def2.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(1, 0, 22, 11))); + } + + @Test + public void linkReferenceDefinitionWithTitleInvalid() { + var doc = PARSER.parse("[foo]: /url\n\"title\" ok\n"); + var def = Nodes.find(doc, LinkReferenceDefinition.class); + var paragraph = Nodes.find(doc, Paragraph.class); + assertThat(def.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 0, 0, 11))); + assertThat(paragraph.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(1, 0, 12, 10))); + } + + @Test + public void linkReferenceDefinitionHeading() { + // This is probably the trickiest because we have a link reference definition at the start of a paragraph + // that gets replaced because of a heading. Phew. + Node document = PARSER.parse("[foo]: /url\nHeading\n===\n"); + + LinkReferenceDefinition linkReferenceDefinition = (LinkReferenceDefinition) document.getFirstChild(); + assertThat(linkReferenceDefinition.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 0, 0, 11))); + + Heading heading = (Heading) document.getLastChild(); + assertThat(heading.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(1, 0, 12, 7), SourceSpan.of(2, 0, 20, 3))); + } + + @Test + public void lazyContinuationLines() { + { + // From https://spec.commonmark.org/0.31.2/#example-250 + // Wrong source span for the inner block quote for the second line. + var doc = PARSER.parse("> > > foo\nbar\n"); + + var bq1 = (BlockQuote) doc.getLastChild(); + assertThat(bq1.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 0, 0, 9), SourceSpan.of(1, 0, 10, 3))); + var bq2 = (BlockQuote) bq1.getLastChild(); + assertThat(bq2.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 2, 2, 7), SourceSpan.of(1, 0, 10, 3))); + var bq3 = (BlockQuote) bq2.getLastChild(); + assertThat(bq3.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 4, 4, 5), SourceSpan.of(1, 0, 10, 3))); + var paragraph = (Paragraph) bq3.getLastChild(); + assertThat(paragraph.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 6, 6, 3), SourceSpan.of(1, 0, 10, 3))); + } + + { + // Adding one character to the last line remove blockQuote3 source for the second line + var doc = PARSER.parse("> > > foo\nbars\n"); + + var bq1 = (BlockQuote) doc.getLastChild(); + assertThat(bq1.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 0, 0, 9), SourceSpan.of(1, 0, 10, 4))); + var bq2 = (BlockQuote) bq1.getLastChild(); + assertThat(bq2.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 2, 2, 7), SourceSpan.of(1, 0, 10, 4))); + var bq3 = (BlockQuote) bq2.getLastChild(); + assertThat(bq3.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 4, 4, 5), SourceSpan.of(1, 0, 10, 4))); + var paragraph = (Paragraph) bq3.getLastChild(); + assertThat(paragraph.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 6, 6, 3), SourceSpan.of(1, 0, 10, 4))); + } + + { + // From https://spec.commonmark.org/0.31.2/#example-292 + var doc = PARSER.parse("> 1. > Blockquote\ncontinued here."); + + var bq1 = (BlockQuote) doc.getLastChild(); + assertThat(bq1.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 0, 0, 17), SourceSpan.of(1, 0, 18, 15))); + var orderedList = (OrderedList) bq1.getLastChild(); + assertThat(orderedList.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 2, 2, 15), SourceSpan.of(1, 0, 18, 15))); + var listItem = (ListItem) orderedList.getLastChild(); + assertThat(listItem.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 2, 2, 15), SourceSpan.of(1, 0, 18, 15))); + var bq2 = (BlockQuote) listItem.getLastChild(); + assertThat(bq2.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 5, 5, 12), SourceSpan.of(1, 0, 18, 15))); + var paragraph = (Paragraph) bq2.getLastChild(); + assertThat(paragraph.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 7, 7, 10), SourceSpan.of(1, 0, 18, 15))); + } + + { + // Lazy continuation line for nested blockquote + var doc = PARSER.parse("> > foo\n> bar\n"); + + var bq1 = (BlockQuote) doc.getLastChild(); + assertThat(bq1.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 0, 0, 7), SourceSpan.of(1, 0, 8, 5))); + var bq2 = (BlockQuote) bq1.getLastChild(); + assertThat(bq2.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 2, 2, 5), SourceSpan.of(1, 2, 10, 3))); + var paragraph = (Paragraph) bq2.getLastChild(); + assertThat(paragraph.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 4, 4, 3), SourceSpan.of(1, 2, 10, 3))); + } + } + + @Test + public void visualCheck() { + assertVisualize("> * foo\n> bar\n> * baz\n", "(> {[* ]})\n(> {[ ]})\n(> {⸢* ⸤baz⸥⸣})\n"); + assertVisualize("> * ```\n> foo\n> ```\n", "(> {[* <```>]})\n(> {[ ]})\n(> {[ <```>]})\n"); + } + + @Test + public void inlineText() { + assertInlineSpans("foo", Text.class, SourceSpan.of(0, 0, 0, 3)); + assertInlineSpans("> foo", Text.class, SourceSpan.of(0, 2, 2, 3)); + assertInlineSpans("* foo", Text.class, SourceSpan.of(0, 2, 2, 3)); + + // SourceSpans should be merged: ` is a separate Text node while inline parsing and gets merged at the end + assertInlineSpans("foo`bar", Text.class, SourceSpan.of(0, 0, 0, 7)); + assertInlineSpans("foo[bar", Text.class, SourceSpan.of(0, 0, 0, 7)); + assertInlineSpans("> foo`bar", Text.class, SourceSpan.of(0, 2, 2, 7)); + + assertInlineSpans("[foo](/url)", Text.class, SourceSpan.of(0, 1, 1, 3)); + assertInlineSpans("*foo*", Text.class, SourceSpan.of(0, 1, 1, 3)); + } + + @Test + public void inlineHeading() { + assertInlineSpans("# foo", Text.class, SourceSpan.of(0, 2, 2, 3)); + assertInlineSpans(" # foo", Text.class, SourceSpan.of(0, 3, 3, 3)); + assertInlineSpans("> # foo", Text.class, SourceSpan.of(0, 4, 4, 3)); + } + + @Test + public void inlineAutolink() { + assertInlineSpans("see ", Link.class, SourceSpan.of(0, 4, 4, 21)); + } + + @Test + public void inlineBackslash() { + assertInlineSpans("\\!", Text.class, SourceSpan.of(0, 0, 0, 2)); + } + + @Test + public void inlineBackticks() { + assertInlineSpans("see `code`", Code.class, SourceSpan.of(0, 4, 4, 6)); + assertInlineSpans("`multi\nline`", Code.class, + SourceSpan.of(0, 0, 0, 6), + SourceSpan.of(1, 0, 7, 5)); + assertInlineSpans("text ```", Text.class, SourceSpan.of(0, 0, 0, 8)); + } + + @Test + public void inlineEntity() { + assertInlineSpans("&", Text.class, SourceSpan.of(0, 0, 0, 5)); + } + + @Test + public void inlineHtml() { + assertInlineSpans("hi there", HtmlInline.class, SourceSpan.of(0, 3, 3, 8)); + } + + @Test + public void links() { + assertInlineSpans("\n[text](/url)", Link.class, SourceSpan.of(1, 0, 1, 12)); + assertInlineSpans("\n[text](/url)", Text.class, SourceSpan.of(1, 1, 2, 4)); + + assertInlineSpans("\n[text]\n\n[text]: /url", Link.class, SourceSpan.of(1, 0, 1, 6)); + assertInlineSpans("\n[text]\n\n[text]: /url", Text.class, SourceSpan.of(1, 1, 2, 4)); + assertInlineSpans("\n[text][]\n\n[text]: /url", Link.class, SourceSpan.of(1, 0, 1, 8)); + assertInlineSpans("\n[text][]\n\n[text]: /url", Text.class, SourceSpan.of(1, 1, 2, 4)); + assertInlineSpans("\n[text][ref]\n\n[ref]: /url", Link.class, SourceSpan.of(1, 0, 1, 11)); + assertInlineSpans("\n[text][ref]\n\n[ref]: /url", Text.class, SourceSpan.of(1, 1, 2, 4)); + assertInlineSpans("\n[notalink]", Text.class, SourceSpan.of(1, 0, 1, 10)); + } + + @Test + public void inlineEmphasis() { + assertInlineSpans("\n*hey*", Emphasis.class, SourceSpan.of(1, 0, 1, 5)); + assertInlineSpans("\n*hey*", Text.class, SourceSpan.of(1, 1, 2, 3)); + assertInlineSpans("\n**hey**", StrongEmphasis.class, SourceSpan.of(1, 0, 1, 7)); + assertInlineSpans("\n**hey**", Text.class, SourceSpan.of(1, 2, 3, 3)); + + // This is an interesting one. It renders like this: + //

    *hey

    + // The delimiter processor only uses one of the asterisks. + // So the first Text node should be the `*` at the beginning with the correct span. + assertInlineSpans("\n**hey*", Text.class, SourceSpan.of(1, 0, 1, 1)); + assertInlineSpans("\n**hey*", Emphasis.class, SourceSpan.of(1, 1, 2, 5)); + + assertInlineSpans("\n***hey**", Text.class, SourceSpan.of(1, 0, 1, 1)); + assertInlineSpans("\n***hey**", StrongEmphasis.class, SourceSpan.of(1, 1, 2, 7)); + + Node document = INLINES_PARSER.parse("*hey**"); + Node lastText = document.getFirstChild().getLastChild(); + assertThat(lastText.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 5, 5, 1))); + } + + @Test + public void tabExpansion() { + assertInlineSpans(">\tfoo", BlockQuote.class, SourceSpan.of(0, 0, 0, 5)); + assertInlineSpans(">\tfoo", Text.class, SourceSpan.of(0, 2, 2, 3)); + + assertInlineSpans("a\tb", Text.class, SourceSpan.of(0, 0, 0, 3)); + } + + @Test + public void differentLineTerminators() { + var input = "foo\nbar\rbaz\r\nqux\r\n\r\n> *hi*"; + assertSpans(input, Paragraph.class, + SourceSpan.of(0, 0, 0, 3), + SourceSpan.of(1, 0, 4, 3), + SourceSpan.of(2, 0, 8, 3), + SourceSpan.of(3, 0, 13, 3)); + assertSpans(input, BlockQuote.class, + SourceSpan.of(5, 0, 20, 6)); + + assertInlineSpans(input, Emphasis.class, SourceSpan.of(5, 2, 22, 4)); + } + + private void assertVisualize(String source, String expected) { + var doc = PARSER.parse(source); + assertThat(SourceSpanRenderer.renderWithLineColumn(doc, source)).isEqualTo(expected); + assertThat(SourceSpanRenderer.renderWithInputIndex(doc, source)).isEqualTo(expected); + } + + private static void assertSpans(String input, Class nodeClass, SourceSpan... expectedSourceSpans) { + assertSpans(PARSER.parse(input), nodeClass, expectedSourceSpans); + try { + assertSpans(PARSER.parseReader(new StringReader(input)), nodeClass, expectedSourceSpans); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private static void assertInlineSpans(String input, Class nodeClass, SourceSpan... expectedSourceSpans) { + assertSpans(INLINES_PARSER.parse(input), nodeClass, expectedSourceSpans); + try { + assertSpans(INLINES_PARSER.parseReader(new StringReader(input)), nodeClass, expectedSourceSpans); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private static void assertSpans(Node rootNode, Class nodeClass, SourceSpan... expectedSourceSpans) { + Node node = findNode(rootNode, nodeClass); + assertThat(node.getSourceSpans()).isEqualTo(List.of(expectedSourceSpans)); + } + + private static Node findNode(Node rootNode, Class nodeClass) { + Deque nodes = new ArrayDeque<>(); + nodes.add(rootNode); + while (!nodes.isEmpty()) { + Node node = nodes.removeFirst(); + if (nodeClass.isInstance(node)) { + return node; + } + if (node.getFirstChild() != null) { + nodes.addFirst(node.getFirstChild()); + } + if (node.getNext() != null) { + nodes.addLast(node.getNext()); + } + } + throw new AssertionError("Expected to find " + nodeClass + " node"); + } +} diff --git a/commonmark/src/test/java/org/commonmark/test/SpecBenchmark.java b/commonmark/src/test/java/org/commonmark/test/SpecBenchmark.java index 99da7aa25..e7bb080a8 100644 --- a/commonmark/src/test/java/org/commonmark/test/SpecBenchmark.java +++ b/commonmark/src/test/java/org/commonmark/test/SpecBenchmark.java @@ -11,7 +11,6 @@ import org.openjdk.jmh.runner.options.Options; import org.openjdk.jmh.runner.options.OptionsBuilder; -import java.util.Collections; import java.util.List; @State(Scope.Benchmark) @@ -37,7 +36,7 @@ public static void main(String[] args) throws Exception { @Benchmark public long parseWholeSpec() { - return parse(Collections.singletonList(SPEC)); + return parse(List.of(SPEC)); } @Benchmark @@ -47,7 +46,7 @@ public long parseExamples() { @Benchmark public long parseAndRenderWholeSpec() { - return parseAndRender(Collections.singletonList(SPEC)); + return parseAndRender(List.of(SPEC)); } @Benchmark diff --git a/commonmark/src/test/java/org/commonmark/test/SpecCoreTest.java b/commonmark/src/test/java/org/commonmark/test/SpecCoreTest.java index 4e416264f..fefd8fb30 100644 --- a/commonmark/src/test/java/org/commonmark/test/SpecCoreTest.java +++ b/commonmark/src/test/java/org/commonmark/test/SpecCoreTest.java @@ -1,15 +1,16 @@ package org.commonmark.test; -import org.commonmark.renderer.html.HtmlRenderer; import org.commonmark.node.AbstractVisitor; import org.commonmark.node.Node; import org.commonmark.node.Text; import org.commonmark.parser.Parser; +import org.commonmark.renderer.html.HtmlRenderer; import org.commonmark.testutil.SpecTestCase; import org.commonmark.testutil.example.Example; -import org.junit.Test; +import org.junit.jupiter.api.Test; -import static org.junit.Assert.fail; +import static org.assertj.core.api.Assertions.fail; +import static org.commonmark.testutil.Asserts.assertRendering; public class SpecCoreTest extends SpecTestCase { @@ -17,10 +18,6 @@ public class SpecCoreTest extends SpecTestCase { // The spec says URL-escaping is optional, but the examples assume that it's enabled. private static final HtmlRenderer RENDERER = HtmlRenderer.builder().percentEncodeUrls(true).build(); - public SpecCoreTest(Example example) { - super(example); - } - @Test public void testTextNodesContiguous() { final String source = example.getSource(); @@ -49,8 +46,12 @@ protected void visitChildren(Node parent) { }); } - @Override - protected String render(String source) { + @Test + public void testHtmlRendering() { + assertRendering(example.getSource(), example.getHtml(), render(example.getSource())); + } + + private String render(String source) { return RENDERER.render(PARSER.parse(source)); } } diff --git a/commonmark/src/test/java/org/commonmark/test/SpecCrLfCoreTest.java b/commonmark/src/test/java/org/commonmark/test/SpecCrLfCoreTest.java index 6424ab659..47ca3da4e 100644 --- a/commonmark/src/test/java/org/commonmark/test/SpecCrLfCoreTest.java +++ b/commonmark/src/test/java/org/commonmark/test/SpecCrLfCoreTest.java @@ -4,6 +4,9 @@ import org.commonmark.renderer.html.HtmlRenderer; import org.commonmark.testutil.SpecTestCase; import org.commonmark.testutil.example.Example; +import org.junit.jupiter.api.Test; + +import static org.commonmark.testutil.Asserts.assertRendering; /** * Same as {@link SpecCoreTest} but converts line endings to Windows-style CR+LF endings before parsing. @@ -14,12 +17,12 @@ public class SpecCrLfCoreTest extends SpecTestCase { // The spec says URL-escaping is optional, but the examples assume that it's enabled. private static final HtmlRenderer RENDERER = HtmlRenderer.builder().percentEncodeUrls(true).build(); - public SpecCrLfCoreTest(Example example) { - super(example); + @Test + public void testHtmlRendering() { + assertRendering(example.getSource(), example.getHtml(), render(example.getSource())); } - @Override - protected String render(String source) { + private String render(String source) { String windowsStyle = source.replace("\n", "\r\n"); return RENDERER.render(PARSER.parse(windowsStyle)); } diff --git a/commonmark/src/test/java/org/commonmark/test/SpecialInputTest.java b/commonmark/src/test/java/org/commonmark/test/SpecialInputTest.java index a70127a72..45cd3aea2 100644 --- a/commonmark/src/test/java/org/commonmark/test/SpecialInputTest.java +++ b/commonmark/src/test/java/org/commonmark/test/SpecialInputTest.java @@ -1,7 +1,6 @@ package org.commonmark.test; -import org.commonmark.testutil.Strings; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class SpecialInputTest extends CoreRenderingTestCase { @@ -95,14 +94,14 @@ public void linkLabelWithBracket() { @Test public void linkLabelLength() { - String label1 = Strings.repeat("a", 999); + String label1 = "a".repeat(999); assertRendering("[foo][" + label1 + "]\n\n[" + label1 + "]: /", "

    foo

    \n"); assertRendering("[foo][x" + label1 + "]\n\n[x" + label1 + "]: /", "

    [foo][x" + label1 + "]

    \n

    [x" + label1 + "]: /

    \n"); assertRendering("[foo][\n" + label1 + "]\n\n[\n" + label1 + "]: /", "

    [foo][\n" + label1 + "]

    \n

    [\n" + label1 + "]: /

    \n"); - String label2 = Strings.repeat("a\n", 499); + String label2 = "a\n".repeat(499); assertRendering("[foo][" + label2 + "]\n\n[" + label2 + "]: /", "

    foo

    \n"); assertRendering("[foo][12" + label2 + "]\n\n[12" + label2 + "]: /", "

    [foo][12" + label2 + "]

    \n

    [12" + label2 + "]: /

    \n"); @@ -167,4 +166,63 @@ public void deeplyIndentedList() { "\n" + "\n"); } + + @Test + public void trailingTabs() { + // The tab is not treated as 4 spaces here and so does not result in a hard line break, but is just preserved. + // This matches what commonmark.js did at the time of writing. + assertRendering("a\t\nb\n", "

    a\t\nb

    \n"); + } + + @Test + public void unicodePunctuationEmphasis() { + // The character here is: U+12470 CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER + // Which is in Unicode category "Po" and needs 2 code units in UTF-16. That means to implement + // it correctly, we need to check code points, not Java chars. + // Note that currently the reference implementation doesn't implement this correctly (resulting in no ). + assertRendering("foo\uD809\uDC70_(bar)_", "

    foo\uD809\uDC70(bar)

    \n"); + } + + @Test + public void htmlBlockInterruptingList() { + assertRendering("- \n", "
      \n" + + "
    • \n" + + "
    • \n" + + "
    \n"); + + assertRendering("- \n", "
      \n" + + "
    • \n" + + "\n"); + } + + @Test + public void emphasisAfterHardLineBreak() { + assertRendering("Hello \n" + + "**Bar**\n" + + "Foo\n", "

      Hello
      \n" + + "Bar\n" + + "Foo

      \n"); + + assertRendering("Hello \n" + + "**Bar** \n" + + "Foo\n", "

      Hello
      \n" + + "Bar
      \n" + + "Foo

      \n"); + } } diff --git a/commonmark/src/test/java/org/commonmark/test/TextContentRendererTest.java b/commonmark/src/test/java/org/commonmark/test/TextContentRendererTest.java index 7a873b19d..46757e0c3 100644 --- a/commonmark/src/test/java/org/commonmark/test/TextContentRendererTest.java +++ b/commonmark/src/test/java/org/commonmark/test/TextContentRendererTest.java @@ -1,252 +1,187 @@ package org.commonmark.test; -import org.commonmark.renderer.text.TextContentRenderer; +import org.commonmark.node.Link; import org.commonmark.node.Node; +import org.commonmark.renderer.NodeRenderer; +import org.commonmark.renderer.text.LineBreakRendering; +import org.commonmark.renderer.text.TextContentNodeRendererContext; +import org.commonmark.renderer.text.TextContentNodeRendererFactory; +import org.commonmark.renderer.text.TextContentRenderer; import org.commonmark.parser.Parser; -import org.junit.Test; +import org.commonmark.testutil.Asserts; +import org.junit.jupiter.api.Test; -import static org.junit.Assert.assertEquals; +import java.util.Set; public class TextContentRendererTest { + private static final Parser PARSER = Parser.builder().build(); + private static final TextContentRenderer COMPACT_RENDERER = TextContentRenderer.builder().build(); + private static final TextContentRenderer SEPARATE_RENDERER = TextContentRenderer.builder() + .lineBreakRendering(LineBreakRendering.SEPARATE_BLOCKS).build(); + private static final TextContentRenderer STRIPPED_RENDERER = TextContentRenderer.builder() + .lineBreakRendering(LineBreakRendering.STRIP).build(); + @Test public void textContentText() { - String source; - String rendered; - - source = "foo bar"; - rendered = defaultRenderer().render(parse(source)); - assertEquals("foo bar", rendered); - rendered = strippedRenderer().render(parse(source)); - assertEquals("foo bar", rendered); - - source = "foo foo\n\nbar\nbar"; - rendered = defaultRenderer().render(parse(source)); - assertEquals("foo foo\nbar\nbar", rendered); - rendered = strippedRenderer().render(parse(source)); - assertEquals("foo foo bar bar", rendered); + String s; + + s = "foo bar"; + assertCompact(s, "foo bar"); + assertStripped(s, "foo bar"); + + s = "foo foo\n\nbar\nbar"; + assertCompact(s, "foo foo\nbar\nbar"); + assertSeparate(s, "foo foo\n\nbar\nbar"); + assertStripped(s, "foo foo bar bar"); + } + + @Test + public void textContentHeading() { + assertCompact("# Heading\n\nFoo", "Heading\nFoo"); + assertSeparate("# Heading\n\nFoo", "Heading\n\nFoo"); + assertStripped("# Heading\n\nFoo", "Heading: Foo"); } @Test public void textContentEmphasis() { - String source; - String rendered; - - source = "***foo***"; - rendered = defaultRenderer().render(parse(source)); - assertEquals("foo", rendered); - rendered = strippedRenderer().render(parse(source)); - assertEquals("foo", rendered); - - source = "foo ***foo*** bar ***bar***"; - rendered = defaultRenderer().render(parse(source)); - assertEquals("foo foo bar bar", rendered); - rendered = strippedRenderer().render(parse(source)); - assertEquals("foo foo bar bar", rendered); - - source = "foo\n***foo***\nbar\n\n***bar***"; - rendered = defaultRenderer().render(parse(source)); - assertEquals("foo\nfoo\nbar\nbar", rendered); - rendered = strippedRenderer().render(parse(source)); - assertEquals("foo foo bar bar", rendered); + String s; + + s = "***foo***"; + assertCompact(s, "foo"); + assertStripped(s, "foo"); + + s = "foo ***foo*** bar ***bar***"; + assertCompact(s, "foo foo bar bar"); + assertStripped(s, "foo foo bar bar"); + + s = "foo\n***foo***\nbar\n\n***bar***"; + assertCompact(s, "foo\nfoo\nbar\nbar"); + assertSeparate(s, "foo\nfoo\nbar\n\nbar"); + assertStripped(s, "foo foo bar bar"); } @Test public void textContentQuotes() { - String source; - String rendered; - - source = "foo\n>foo\nbar\n\nbar"; - rendered = defaultRenderer().render(parse(source)); - assertEquals("foo\n«foo\nbar»\nbar", rendered); - rendered = strippedRenderer().render(parse(source)); - assertEquals("foo «foo bar» bar", rendered); + String s; + + s = "foo\n>foo\nbar\n\nbar"; + assertCompact(s, "foo\n«foo\nbar»\nbar"); + assertSeparate(s, "foo\n\n«foo\nbar»\n\nbar"); + assertStripped(s, "foo «foo bar» bar"); } @Test public void textContentLinks() { - String source; - String expected; - String rendered; - - source = "foo [text](http://link \"title\") bar"; - expected = "foo \"text\" (title: http://link) bar"; - rendered = defaultRenderer().render(parse(source)); - assertEquals(expected, rendered); - rendered = strippedRenderer().render(parse(source)); - assertEquals(expected, rendered); - - source = "foo [text](http://link \"http://link\") bar"; - expected = "foo \"text\" (http://link) bar"; - rendered = defaultRenderer().render(parse(source)); - assertEquals(expected, rendered); - rendered = strippedRenderer().render(parse(source)); - assertEquals(expected, rendered); - - source = "foo [text](http://link) bar"; - expected = "foo \"text\" (http://link) bar"; - rendered = defaultRenderer().render(parse(source)); - assertEquals(expected, rendered); - rendered = strippedRenderer().render(parse(source)); - assertEquals(expected, rendered); - - source = "foo [text]() bar"; - expected = "foo \"text\" bar"; - rendered = defaultRenderer().render(parse(source)); - assertEquals(expected, rendered); - rendered = strippedRenderer().render(parse(source)); - assertEquals(expected, rendered); - - source = "foo http://link bar"; - expected = "foo http://link bar"; - rendered = defaultRenderer().render(parse(source)); - assertEquals(expected, rendered); - rendered = strippedRenderer().render(parse(source)); - assertEquals(expected, rendered); + assertAll("foo [text](http://link \"title\") bar", "foo \"text\" (title: http://link) bar"); + assertAll("foo [text](http://link \"http://link\") bar", "foo \"text\" (http://link) bar"); + assertAll("foo [text](http://link) bar", "foo \"text\" (http://link) bar"); + assertAll("foo [text]() bar", "foo \"text\" bar"); + assertAll("foo http://link bar", "foo http://link bar"); } @Test public void textContentImages() { - String source; - String expected; - String rendered; - - source = "foo ![text](http://link \"title\") bar"; - expected = "foo \"text\" (title: http://link) bar"; - rendered = defaultRenderer().render(parse(source)); - assertEquals(expected, rendered); - rendered = strippedRenderer().render(parse(source)); - assertEquals(expected, rendered); - - source = "foo ![text](http://link) bar"; - expected = "foo \"text\" (http://link) bar"; - rendered = defaultRenderer().render(parse(source)); - assertEquals(expected, rendered); - rendered = strippedRenderer().render(parse(source)); - assertEquals(expected, rendered); - - source = "foo ![text]() bar"; - expected = "foo \"text\" bar"; - rendered = defaultRenderer().render(parse(source)); - assertEquals(expected, rendered); - rendered = strippedRenderer().render(parse(source)); - assertEquals(expected, rendered); + assertAll("foo ![text](http://link \"title\") bar", "foo \"text\" (title: http://link) bar"); + assertAll("foo ![text](http://link) bar", "foo \"text\" (http://link) bar"); + assertAll("foo ![text]() bar", "foo \"text\" bar"); } @Test public void textContentLists() { - String source; - String rendered; - - source = "foo\n* foo\n* bar\n\nbar"; - rendered = defaultRenderer().render(parse(source)); - assertEquals("foo\n* foo\n* bar\nbar", rendered); - rendered = strippedRenderer().render(parse(source)); - assertEquals("foo foo bar bar", rendered); - - source = "foo\n- foo\n- bar\n\nbar"; - rendered = defaultRenderer().render(parse(source)); - assertEquals("foo\n- foo\n- bar\nbar", rendered); - rendered = strippedRenderer().render(parse(source)); - assertEquals("foo foo bar bar", rendered); - - source = "foo\n1. foo\n2. bar\n\nbar"; - rendered = defaultRenderer().render(parse(source)); - assertEquals("foo\n1. foo\n2. bar\nbar", rendered); - rendered = strippedRenderer().render(parse(source)); - assertEquals("foo 1. foo 2. bar bar", rendered); - - source = "foo\n0) foo\n1) bar\n\nbar"; - rendered = defaultRenderer().render(parse(source)); - assertEquals("foo\n0) foo\n1) bar\nbar", rendered); - rendered = strippedRenderer().render(parse(source)); - assertEquals("foo 0) foo 1) bar bar", rendered); - - source = "bar\n1. foo\n 1. bar\n2. foo"; - rendered = defaultRenderer().render(parse(source)); - assertEquals("bar\n1. foo\n 1. bar\n2. foo", rendered); - rendered = strippedRenderer().render(parse(source)); - assertEquals("bar 1. foo 1. bar 2. foo", rendered); - - source = "bar\n* foo\n - bar\n* foo"; - rendered = defaultRenderer().render(parse(source)); - assertEquals("bar\n* foo\n - bar\n* foo", rendered); - rendered = strippedRenderer().render(parse(source)); - assertEquals("bar foo bar foo", rendered); - - source = "bar\n* foo\n 1. bar\n 2. bar\n* foo"; - rendered = defaultRenderer().render(parse(source)); - assertEquals("bar\n* foo\n 1. bar\n 2. bar\n* foo", rendered); - rendered = strippedRenderer().render(parse(source)); - assertEquals("bar foo 1. bar 2. bar foo", rendered); - - source = "bar\n1. foo\n * bar\n * bar\n2. foo"; - rendered = defaultRenderer().render(parse(source)); - assertEquals("bar\n1. foo\n * bar\n * bar\n2. foo", rendered); - rendered = strippedRenderer().render(parse(source)); - assertEquals("bar 1. foo bar bar 2. foo", rendered); + String s; + + s = "foo\n* foo\n* bar\n\nbar"; + assertCompact(s, "foo\n* foo\n* bar\nbar"); + assertSeparate(s, "foo\n\n* foo\n* bar\n\nbar"); + assertStripped(s, "foo foo bar bar"); + + s = "foo\n- foo\n- bar\n\nbar"; + assertCompact(s, "foo\n- foo\n- bar\nbar"); + assertSeparate(s, "foo\n\n- foo\n- bar\n\nbar"); + assertStripped(s, "foo foo bar bar"); + + s = "foo\n1. foo\n2. bar\n\nbar"; + assertCompact(s, "foo\n1. foo\n2. bar\nbar"); + assertSeparate(s, "foo\n\n1. foo\n2. bar\n\nbar"); + assertStripped(s, "foo 1. foo 2. bar bar"); + + s = "foo\n0) foo\n1) bar\n\nbar"; + assertCompact(s, "foo\n0) foo\n1) bar\nbar"); + assertSeparate(s, "foo\n0) foo\n\n1) bar\n\nbar"); + assertStripped(s, "foo 0) foo 1) bar bar"); + + s = "bar\n1. foo\n 1. bar\n2. foo"; + assertCompact(s, "bar\n1. foo\n 1. bar\n2. foo"); + assertSeparate(s, "bar\n\n1. foo\n 1. bar\n2. foo"); + assertStripped(s, "bar 1. foo 1. bar 2. foo"); + + s = "bar\n* foo\n - bar\n* foo"; + assertCompact(s, "bar\n* foo\n - bar\n* foo"); + assertSeparate(s, "bar\n\n* foo\n - bar\n* foo"); + assertStripped(s, "bar foo bar foo"); + + s = "bar\n* foo\n 1. bar\n 2. bar\n* foo"; + assertCompact(s, "bar\n* foo\n 1. bar\n 2. bar\n* foo"); + assertSeparate(s, "bar\n\n* foo\n 1. bar\n 2. bar\n* foo"); + assertStripped(s, "bar foo 1. bar 2. bar foo"); + + s = "bar\n1. foo\n * bar\n * bar\n2. foo"; + assertCompact(s, "bar\n1. foo\n * bar\n * bar\n2. foo"); + assertSeparate(s, "bar\n\n1. foo\n * bar\n * bar\n2. foo"); + assertStripped(s, "bar 1. foo bar bar 2. foo"); + + // For a loose list (not tight) + s = "foo\n\n* bar\n\n* baz"; + // Compact ignores loose + assertCompact(s, "foo\n* bar\n* baz"); + // Separate preserves it + assertSeparate(s, "foo\n\n* bar\n\n* baz"); + assertStripped(s, "foo bar baz"); + } @Test public void textContentCode() { - String source; - String expected; - String rendered; - - source = "foo `code` bar"; - expected = "foo \"code\" bar"; - rendered = defaultRenderer().render(parse(source)); - assertEquals(expected, rendered); - rendered = strippedRenderer().render(parse(source)); - assertEquals(expected, rendered); + assertAll("foo `code` bar", "foo \"code\" bar"); } @Test public void textContentCodeBlock() { - String source; - String rendered; - - source = "foo\n```\nfoo\nbar\n```\nbar"; - rendered = defaultRenderer().render(parse(source)); - assertEquals("foo\nfoo\nbar\nbar", rendered); - rendered = strippedRenderer().render(parse(source)); - assertEquals("foo foo bar bar", rendered); - - source = "foo\n\n foo\n bar\nbar"; - rendered = defaultRenderer().render(parse(source)); - assertEquals("foo\nfoo\n bar\nbar", rendered); - rendered = strippedRenderer().render(parse(source)); - assertEquals("foo foo bar bar", rendered); + String s; + s = "foo\n```\nfoo\nbar\n```\nbar"; + assertCompact(s, "foo\nfoo\nbar\nbar"); + assertSeparate(s, "foo\n\nfoo\nbar\n\nbar"); + assertStripped(s, "foo foo bar bar"); + + s = "foo\n\n foo\n bar\nbar"; + assertCompact(s, "foo\nfoo\n bar\nbar"); + assertSeparate(s, "foo\n\nfoo\n bar\n\nbar"); + assertStripped(s, "foo foo bar bar"); } @Test - public void textContentBrakes() { - String source; - String rendered; - - source = "foo\nbar"; - rendered = defaultRenderer().render(parse(source)); - assertEquals("foo\nbar", rendered); - rendered = strippedRenderer().render(parse(source)); - assertEquals("foo bar", rendered); - - source = "foo \nbar"; - rendered = defaultRenderer().render(parse(source)); - assertEquals("foo\nbar", rendered); - rendered = strippedRenderer().render(parse(source)); - assertEquals("foo bar", rendered); - - source = "foo\n___\nbar"; - rendered = defaultRenderer().render(parse(source)); - assertEquals("foo\n***\nbar", rendered); - rendered = strippedRenderer().render(parse(source)); - assertEquals("foo bar", rendered); + public void textContentBreaks() { + String s; + + s = "foo\nbar"; + assertCompact(s, "foo\nbar"); + assertSeparate(s, "foo\nbar"); + assertStripped(s, "foo bar"); + + s = "foo \nbar"; + assertCompact(s, "foo\nbar"); + assertSeparate(s, "foo\nbar"); + assertStripped(s, "foo bar"); + + s = "foo\n___\nbar"; + assertCompact(s, "foo\n***\nbar"); + assertSeparate(s, "foo\n\n***\n\nbar"); + assertStripped(s, "foo bar"); } @Test public void textContentHtml() { - String rendered; - String html = "\n" + " \n" + " \n" + " \n" + "
      \n" + @@ -254,23 +189,80 @@ public void textContentHtml() { "
      "; - rendered = defaultRenderer().render(parse(html)); - assertEquals(html, rendered); + assertCompact(html, html); + assertSeparate(html, html); html = "foo foobar bar"; - rendered = defaultRenderer().render(parse(html)); - assertEquals(html, rendered); + assertAll(html, html); + } + + @Test + public void testContentNestedLists() { + var s = "List:\n" + + "1. 2) 3. \n" + + "end"; + assertCompact(s, s); + + var s2 = "1. A\n 1) B\n 1. Test"; + assertCompact(s2, s2); + } + + @Test + public void testOverrideNodeRendering() { + var nodeRendererFactory = new TextContentNodeRendererFactory() { + @Override + public NodeRenderer create(TextContentNodeRendererContext context) { + return new NodeRenderer() { + + @Override + public Set> getNodeTypes() { + return Set.of(Link.class); + } + + @Override + public void render(Node node) { + context.getWriter().write('"'); + renderChildren(node); + context.getWriter().write('"'); + } + + private void renderChildren(Node parent) { + Node node = parent.getFirstChild(); + while (node != null) { + Node next = node.getNext(); + context.render(node); + node = next; + } + } + }; + } + }; + var renderer = TextContentRenderer.builder().nodeRendererFactory(nodeRendererFactory).build(); + var source = "Hi [Example](https://example.com)"; + Asserts.assertRendering(source, "Hi \"Example\"", renderer.render(PARSER.parse(source))); + } + + private void assertCompact(String source, String expected) { + var doc = PARSER.parse(source); + var actualRendering = COMPACT_RENDERER.render(doc); + Asserts.assertRendering(source, expected, actualRendering); } - private TextContentRenderer defaultRenderer() { - return TextContentRenderer.builder().build(); + private void assertSeparate(String source, String expected) { + var doc = PARSER.parse(source); + var actualRendering = SEPARATE_RENDERER.render(doc); + Asserts.assertRendering(source, expected, actualRendering); } - private TextContentRenderer strippedRenderer() { - return TextContentRenderer.builder().stripNewlines(true).build(); + private void assertStripped(String source, String expected) { + var doc = PARSER.parse(source); + var actualRendering = STRIPPED_RENDERER.render(doc); + Asserts.assertRendering(source, expected, actualRendering); } - private Node parse(String source) { - return Parser.builder().build().parse(source); + private void assertAll(String source, String expected) { + assertCompact(source, expected); + assertSeparate(source, expected); + assertStripped(source, expected); } } diff --git a/commonmark/src/test/java/org/commonmark/test/TextContentWriterTest.java b/commonmark/src/test/java/org/commonmark/test/TextContentWriterTest.java index 0be668a70..a9f37792e 100644 --- a/commonmark/src/test/java/org/commonmark/test/TextContentWriterTest.java +++ b/commonmark/src/test/java/org/commonmark/test/TextContentWriterTest.java @@ -1,9 +1,9 @@ package org.commonmark.test; import org.commonmark.renderer.text.TextContentWriter; -import org.junit.Test; +import org.junit.jupiter.api.Test; -import static org.junit.Assert.*; +import static org.assertj.core.api.Assertions.assertThat; public class TextContentWriterTest { @@ -14,7 +14,7 @@ public void whitespace() throws Exception { writer.write("foo"); writer.whitespace(); writer.write("bar"); - assertEquals("foo bar", stringBuilder.toString()); + assertThat(stringBuilder.toString()).isEqualTo("foo bar"); } @Test @@ -24,7 +24,7 @@ public void colon() throws Exception { writer.write("foo"); writer.colon(); writer.write("bar"); - assertEquals("foo:bar", stringBuilder.toString()); + assertThat(stringBuilder.toString()).isEqualTo("foo:bar"); } @Test @@ -34,7 +34,7 @@ public void line() throws Exception { writer.write("foo"); writer.line(); writer.write("bar"); - assertEquals("foo\nbar", stringBuilder.toString()); + assertThat(stringBuilder.toString()).isEqualTo("foo\nbar"); } @Test @@ -42,7 +42,7 @@ public void writeStripped() throws Exception { StringBuilder stringBuilder = new StringBuilder(); TextContentWriter writer = new TextContentWriter(stringBuilder); writer.writeStripped("foo\n bar"); - assertEquals("foo bar", stringBuilder.toString()); + assertThat(stringBuilder.toString()).isEqualTo("foo bar"); } @Test @@ -50,6 +50,6 @@ public void write() throws Exception { StringBuilder stringBuilder = new StringBuilder(); TextContentWriter writer = new TextContentWriter(stringBuilder); writer.writeStripped("foo bar"); - assertEquals("foo bar", stringBuilder.toString()); + assertThat(stringBuilder.toString()).isEqualTo("foo bar"); } } diff --git a/commonmark/src/test/java/org/commonmark/test/ThematicBreakParserTest.java b/commonmark/src/test/java/org/commonmark/test/ThematicBreakParserTest.java new file mode 100644 index 000000000..1d564cca2 --- /dev/null +++ b/commonmark/src/test/java/org/commonmark/test/ThematicBreakParserTest.java @@ -0,0 +1,25 @@ +package org.commonmark.test; + +import org.commonmark.node.ThematicBreak; +import org.commonmark.parser.Parser; +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; + +public class ThematicBreakParserTest { + + private static final Parser PARSER = Parser.builder().build(); + + @Test + public void testLiteral() { + assertLiteral("***", "***"); + assertLiteral("-- -", "-- -"); + assertLiteral(" __ __ __ ", " __ __ __ "); + assertLiteral("***", "> ***"); + } + + private static void assertLiteral(String expected, String input) { + var tb = Nodes.find(PARSER.parse(input), ThematicBreak.class); + assertThat(tb.getLiteral()).isEqualTo(expected); + } +} diff --git a/commonmark/src/test/java/org/commonmark/test/UsageExampleTest.java b/commonmark/src/test/java/org/commonmark/test/UsageExampleTest.java index 9ff646630..20cd9f5ab 100644 --- a/commonmark/src/test/java/org/commonmark/test/UsageExampleTest.java +++ b/commonmark/src/test/java/org/commonmark/test/UsageExampleTest.java @@ -1,34 +1,47 @@ package org.commonmark.test; import org.commonmark.node.*; +import org.commonmark.parser.IncludeSourceSpans; import org.commonmark.parser.Parser; import org.commonmark.renderer.NodeRenderer; import org.commonmark.renderer.html.*; -import org.junit.Ignore; -import org.junit.Test; +import org.commonmark.renderer.markdown.MarkdownRenderer; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.nio.charset.StandardCharsets; -import java.util.Collections; import java.util.Map; import java.util.Set; -import static org.junit.Assert.assertEquals; +import static org.assertj.core.api.Assertions.assertThat; public class UsageExampleTest { @Test public void parseAndRender() { Parser parser = Parser.builder().build(); - Node document = parser.parse("This is *Sparta*"); + Node document = parser.parse("This is *Markdown*"); HtmlRenderer renderer = HtmlRenderer.builder().escapeHtml(true).build(); - assertEquals("

      This is Sparta

      \n", renderer.render(document)); + assertThat(renderer.render(document)).isEqualTo("

      This is Markdown

      \n"); } @Test - @Ignore + public void renderToMarkdown() { + MarkdownRenderer renderer = MarkdownRenderer.builder().build(); + Node document = new Document(); + Heading heading = new Heading(); + heading.setLevel(2); + heading.appendChild(new Text("My title")); + document.appendChild(heading); + + assertThat(renderer.render(document)).isEqualTo("## My title\n"); + } + + @Test + @Disabled public void parseReaderRender() throws IOException { Parser parser = Parser.builder().build(); try (InputStreamReader reader = new InputStreamReader(new FileInputStream("file.md"), StandardCharsets.UTF_8)) { @@ -43,7 +56,22 @@ public void visitor() { Node node = parser.parse("Example\n=======\n\nSome more text"); WordCountVisitor visitor = new WordCountVisitor(); node.accept(visitor); - assertEquals(4, visitor.wordCount); + assertThat(visitor.wordCount).isEqualTo(4); + } + + @Test + public void sourcePositions() { + var parser = Parser.builder().includeSourceSpans(IncludeSourceSpans.BLOCKS_AND_INLINES).build(); + + var source = "foo\n\nbar *baz*"; + var doc = parser.parse(source); + var emphasis = doc.getLastChild().getLastChild(); + var s = emphasis.getSourceSpans().get(0); + assertThat(s.getLineIndex()).isEqualTo(2); + assertThat(s.getColumnIndex()).isEqualTo(4); + assertThat(s.getInputIndex()).isEqualTo(9); + assertThat(s.getLength()).isEqualTo(5); + assertThat(source.substring(s.getInputIndex(), s.getInputIndex() + s.getLength())).isEqualTo("*baz*"); } @Test @@ -51,6 +79,7 @@ public void addAttributes() { Parser parser = Parser.builder().build(); HtmlRenderer renderer = HtmlRenderer.builder() .attributeProviderFactory(new AttributeProviderFactory() { + @Override public AttributeProvider create(AttributeProviderContext context) { return new ImageAttributeProvider(); } @@ -58,8 +87,7 @@ public AttributeProvider create(AttributeProviderContext context) { .build(); Node document = parser.parse("![text](/url.png)"); - assertEquals("

      \"text\"

      \n", - renderer.render(document)); + assertThat(renderer.render(document)).isEqualTo("

      \"text\"

      \n"); } @Test @@ -67,6 +95,7 @@ public void customizeRendering() { Parser parser = Parser.builder().build(); HtmlRenderer renderer = HtmlRenderer.builder() .nodeRendererFactory(new HtmlNodeRendererFactory() { + @Override public NodeRenderer create(HtmlNodeRendererContext context) { return new IndentedCodeBlockNodeRenderer(context); } @@ -74,7 +103,7 @@ public NodeRenderer create(HtmlNodeRendererContext context) { .build(); Node document = parser.parse("Example:\n\n code"); - assertEquals("

      Example:

      \n
      code\n
      \n", renderer.render(document)); + assertThat(renderer.render(document)).isEqualTo("

      Example:

      \n
      code\n
      \n"); } class WordCountVisitor extends AbstractVisitor { @@ -113,7 +142,7 @@ class IndentedCodeBlockNodeRenderer implements NodeRenderer { @Override public Set> getNodeTypes() { // Return the node types we want to use this renderer for. - return Collections.>singleton(IndentedCodeBlock.class); + return Set.of(IndentedCodeBlock.class); } @Override diff --git a/commonmark/src/test/java/org/commonmark/text/CharactersTest.java b/commonmark/src/test/java/org/commonmark/text/CharactersTest.java new file mode 100644 index 000000000..99f510cb7 --- /dev/null +++ b/commonmark/src/test/java/org/commonmark/text/CharactersTest.java @@ -0,0 +1,33 @@ +package org.commonmark.text; + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; + +public class CharactersTest { + + @Test + public void isPunctuation() { + // From https://spec.commonmark.org/0.29/#ascii-punctuation-character + char[] chars = { + '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', // (U+0021–2F) + ':', ';', '<', '=', '>', '?', '@', // (U+003A–0040) + '[', '\\', ']', '^', '_', '`', // (U+005B–0060) + '{', '|', '}', '~' // (U+007B–007E) + }; + + for (char c : chars) { + assertThat(Characters.isPunctuationCodePoint(c)).as("Expected to be punctuation: " + c).isTrue(); + } + } + + @Test + public void isBlank() { + assertThat(Characters.isBlank("")).isTrue(); + assertThat(Characters.isBlank(" ")).isTrue(); + assertThat(Characters.isBlank("\t")).isTrue(); + assertThat(Characters.isBlank(" \t")).isTrue(); + assertThat(Characters.isBlank("a")).isFalse(); + assertThat(Characters.isBlank("\f")).isFalse(); + } +} diff --git a/etc/update-spec.sh b/etc/update-spec.sh index c31613e5d..0f9def8b3 100755 --- a/etc/update-spec.sh +++ b/etc/update-spec.sh @@ -6,8 +6,8 @@ if [ "$#" -ne 1 ]; then fi version=$1 -curl -L "https://raw.githubusercontent.com/commonmark/CommonMark/$version/spec.txt" -o commonmark-test-util/src/main/resources/spec.txt -curl -L "https://raw.githubusercontent.com/github/cmark-gfm/master/test/spec.txt" -o commonmark-ext-gfm-tables/src/test/resources/gfm-spec.txt +curl -L "https://raw.githubusercontent.com/commonmark/commonmark-spec/$version/spec.txt" -o commonmark-test-util/src/main/resources/spec.txt +curl -L "https://raw.githubusercontent.com/github/cmark-gfm/master/test/spec.txt" -o commonmark-test-util/src/main/resources/gfm-spec.txt echo "Check cmark and commonmark.js regression.txt:" echo "https://github.com/commonmark/cmark/blob/master/test/regression.txt" diff --git a/mvnw b/mvnw new file mode 100755 index 000000000..19529ddf8 --- /dev/null +++ b/mvnw @@ -0,0 +1,259 @@ +#!/bin/sh +# ---------------------------------------------------------------------------- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ---------------------------------------------------------------------------- + +# ---------------------------------------------------------------------------- +# Apache Maven Wrapper startup batch script, version 3.3.2 +# +# Optional ENV vars +# ----------------- +# JAVA_HOME - location of a JDK home dir, required when download maven via java source +# MVNW_REPOURL - repo url base for downloading maven distribution +# MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven +# MVNW_VERBOSE - true: enable verbose log; debug: trace the mvnw script; others: silence the output +# ---------------------------------------------------------------------------- + +set -euf +[ "${MVNW_VERBOSE-}" != debug ] || set -x + +# OS specific support. +native_path() { printf %s\\n "$1"; } +case "$(uname)" in +CYGWIN* | MINGW*) + [ -z "${JAVA_HOME-}" ] || JAVA_HOME="$(cygpath --unix "$JAVA_HOME")" + native_path() { cygpath --path --windows "$1"; } + ;; +esac + +# set JAVACMD and JAVACCMD +set_java_home() { + # For Cygwin and MinGW, ensure paths are in Unix format before anything is touched + if [ -n "${JAVA_HOME-}" ]; then + if [ -x "$JAVA_HOME/jre/sh/java" ]; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + JAVACCMD="$JAVA_HOME/jre/sh/javac" + else + JAVACMD="$JAVA_HOME/bin/java" + JAVACCMD="$JAVA_HOME/bin/javac" + + if [ ! -x "$JAVACMD" ] || [ ! -x "$JAVACCMD" ]; then + echo "The JAVA_HOME environment variable is not defined correctly, so mvnw cannot run." >&2 + echo "JAVA_HOME is set to \"$JAVA_HOME\", but \"\$JAVA_HOME/bin/java\" or \"\$JAVA_HOME/bin/javac\" does not exist." >&2 + return 1 + fi + fi + else + JAVACMD="$( + 'set' +e + 'unset' -f command 2>/dev/null + 'command' -v java + )" || : + JAVACCMD="$( + 'set' +e + 'unset' -f command 2>/dev/null + 'command' -v javac + )" || : + + if [ ! -x "${JAVACMD-}" ] || [ ! -x "${JAVACCMD-}" ]; then + echo "The java/javac command does not exist in PATH nor is JAVA_HOME set, so mvnw cannot run." >&2 + return 1 + fi + fi +} + +# hash string like Java String::hashCode +hash_string() { + str="${1:-}" h=0 + while [ -n "$str" ]; do + char="${str%"${str#?}"}" + h=$(((h * 31 + $(LC_CTYPE=C printf %d "'$char")) % 4294967296)) + str="${str#?}" + done + printf %x\\n $h +} + +verbose() { :; } +[ "${MVNW_VERBOSE-}" != true ] || verbose() { printf %s\\n "${1-}"; } + +die() { + printf %s\\n "$1" >&2 + exit 1 +} + +trim() { + # MWRAPPER-139: + # Trims trailing and leading whitespace, carriage returns, tabs, and linefeeds. + # Needed for removing poorly interpreted newline sequences when running in more + # exotic environments such as mingw bash on Windows. + printf "%s" "${1}" | tr -d '[:space:]' +} + +# parse distributionUrl and optional distributionSha256Sum, requires .mvn/wrapper/maven-wrapper.properties +while IFS="=" read -r key value; do + case "${key-}" in + distributionUrl) distributionUrl=$(trim "${value-}") ;; + distributionSha256Sum) distributionSha256Sum=$(trim "${value-}") ;; + esac +done <"${0%/*}/.mvn/wrapper/maven-wrapper.properties" +[ -n "${distributionUrl-}" ] || die "cannot read distributionUrl property in ${0%/*}/.mvn/wrapper/maven-wrapper.properties" + +case "${distributionUrl##*/}" in +maven-mvnd-*bin.*) + MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/ + case "${PROCESSOR_ARCHITECTURE-}${PROCESSOR_ARCHITEW6432-}:$(uname -a)" in + *AMD64:CYGWIN* | *AMD64:MINGW*) distributionPlatform=windows-amd64 ;; + :Darwin*x86_64) distributionPlatform=darwin-amd64 ;; + :Darwin*arm64) distributionPlatform=darwin-aarch64 ;; + :Linux*x86_64*) distributionPlatform=linux-amd64 ;; + *) + echo "Cannot detect native platform for mvnd on $(uname)-$(uname -m), use pure java version" >&2 + distributionPlatform=linux-amd64 + ;; + esac + distributionUrl="${distributionUrl%-bin.*}-$distributionPlatform.zip" + ;; +maven-mvnd-*) MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/ ;; +*) MVN_CMD="mvn${0##*/mvnw}" _MVNW_REPO_PATTERN=/org/apache/maven/ ;; +esac + +# apply MVNW_REPOURL and calculate MAVEN_HOME +# maven home pattern: ~/.m2/wrapper/dists/{apache-maven-,maven-mvnd--}/ +[ -z "${MVNW_REPOURL-}" ] || distributionUrl="$MVNW_REPOURL$_MVNW_REPO_PATTERN${distributionUrl#*"$_MVNW_REPO_PATTERN"}" +distributionUrlName="${distributionUrl##*/}" +distributionUrlNameMain="${distributionUrlName%.*}" +distributionUrlNameMain="${distributionUrlNameMain%-bin}" +MAVEN_USER_HOME="${MAVEN_USER_HOME:-${HOME}/.m2}" +MAVEN_HOME="${MAVEN_USER_HOME}/wrapper/dists/${distributionUrlNameMain-}/$(hash_string "$distributionUrl")" + +exec_maven() { + unset MVNW_VERBOSE MVNW_USERNAME MVNW_PASSWORD MVNW_REPOURL || : + exec "$MAVEN_HOME/bin/$MVN_CMD" "$@" || die "cannot exec $MAVEN_HOME/bin/$MVN_CMD" +} + +if [ -d "$MAVEN_HOME" ]; then + verbose "found existing MAVEN_HOME at $MAVEN_HOME" + exec_maven "$@" +fi + +case "${distributionUrl-}" in +*?-bin.zip | *?maven-mvnd-?*-?*.zip) ;; +*) die "distributionUrl is not valid, must match *-bin.zip or maven-mvnd-*.zip, but found '${distributionUrl-}'" ;; +esac + +# prepare tmp dir +if TMP_DOWNLOAD_DIR="$(mktemp -d)" && [ -d "$TMP_DOWNLOAD_DIR" ]; then + clean() { rm -rf -- "$TMP_DOWNLOAD_DIR"; } + trap clean HUP INT TERM EXIT +else + die "cannot create temp dir" +fi + +mkdir -p -- "${MAVEN_HOME%/*}" + +# Download and Install Apache Maven +verbose "Couldn't find MAVEN_HOME, downloading and installing it ..." +verbose "Downloading from: $distributionUrl" +verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName" + +# select .zip or .tar.gz +if ! command -v unzip >/dev/null; then + distributionUrl="${distributionUrl%.zip}.tar.gz" + distributionUrlName="${distributionUrl##*/}" +fi + +# verbose opt +__MVNW_QUIET_WGET=--quiet __MVNW_QUIET_CURL=--silent __MVNW_QUIET_UNZIP=-q __MVNW_QUIET_TAR='' +[ "${MVNW_VERBOSE-}" != true ] || __MVNW_QUIET_WGET='' __MVNW_QUIET_CURL='' __MVNW_QUIET_UNZIP='' __MVNW_QUIET_TAR=v + +# normalize http auth +case "${MVNW_PASSWORD:+has-password}" in +'') MVNW_USERNAME='' MVNW_PASSWORD='' ;; +has-password) [ -n "${MVNW_USERNAME-}" ] || MVNW_USERNAME='' MVNW_PASSWORD='' ;; +esac + +if [ -z "${MVNW_USERNAME-}" ] && command -v wget >/dev/null; then + verbose "Found wget ... using wget" + wget ${__MVNW_QUIET_WGET:+"$__MVNW_QUIET_WGET"} "$distributionUrl" -O "$TMP_DOWNLOAD_DIR/$distributionUrlName" || die "wget: Failed to fetch $distributionUrl" +elif [ -z "${MVNW_USERNAME-}" ] && command -v curl >/dev/null; then + verbose "Found curl ... using curl" + curl ${__MVNW_QUIET_CURL:+"$__MVNW_QUIET_CURL"} -f -L -o "$TMP_DOWNLOAD_DIR/$distributionUrlName" "$distributionUrl" || die "curl: Failed to fetch $distributionUrl" +elif set_java_home; then + verbose "Falling back to use Java to download" + javaSource="$TMP_DOWNLOAD_DIR/Downloader.java" + targetZip="$TMP_DOWNLOAD_DIR/$distributionUrlName" + cat >"$javaSource" <<-END + public class Downloader extends java.net.Authenticator + { + protected java.net.PasswordAuthentication getPasswordAuthentication() + { + return new java.net.PasswordAuthentication( System.getenv( "MVNW_USERNAME" ), System.getenv( "MVNW_PASSWORD" ).toCharArray() ); + } + public static void main( String[] args ) throws Exception + { + setDefault( new Downloader() ); + java.nio.file.Files.copy( java.net.URI.create( args[0] ).toURL().openStream(), java.nio.file.Paths.get( args[1] ).toAbsolutePath().normalize() ); + } + } + END + # For Cygwin/MinGW, switch paths to Windows format before running javac and java + verbose " - Compiling Downloader.java ..." + "$(native_path "$JAVACCMD")" "$(native_path "$javaSource")" || die "Failed to compile Downloader.java" + verbose " - Running Downloader.java ..." + "$(native_path "$JAVACMD")" -cp "$(native_path "$TMP_DOWNLOAD_DIR")" Downloader "$distributionUrl" "$(native_path "$targetZip")" +fi + +# If specified, validate the SHA-256 sum of the Maven distribution zip file +if [ -n "${distributionSha256Sum-}" ]; then + distributionSha256Result=false + if [ "$MVN_CMD" = mvnd.sh ]; then + echo "Checksum validation is not supported for maven-mvnd." >&2 + echo "Please disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2 + exit 1 + elif command -v sha256sum >/dev/null; then + if echo "$distributionSha256Sum $TMP_DOWNLOAD_DIR/$distributionUrlName" | sha256sum -c >/dev/null 2>&1; then + distributionSha256Result=true + fi + elif command -v shasum >/dev/null; then + if echo "$distributionSha256Sum $TMP_DOWNLOAD_DIR/$distributionUrlName" | shasum -a 256 -c >/dev/null 2>&1; then + distributionSha256Result=true + fi + else + echo "Checksum validation was requested but neither 'sha256sum' or 'shasum' are available." >&2 + echo "Please install either command, or disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2 + exit 1 + fi + if [ $distributionSha256Result = false ]; then + echo "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised." >&2 + echo "If you updated your Maven version, you need to update the specified distributionSha256Sum property." >&2 + exit 1 + fi +fi + +# unzip and move +if command -v unzip >/dev/null; then + unzip ${__MVNW_QUIET_UNZIP:+"$__MVNW_QUIET_UNZIP"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -d "$TMP_DOWNLOAD_DIR" || die "failed to unzip" +else + tar xzf${__MVNW_QUIET_TAR:+"$__MVNW_QUIET_TAR"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -C "$TMP_DOWNLOAD_DIR" || die "failed to untar" +fi +printf %s\\n "$distributionUrl" >"$TMP_DOWNLOAD_DIR/$distributionUrlNameMain/mvnw.url" +mv -- "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain" "$MAVEN_HOME" || [ -d "$MAVEN_HOME" ] || die "fail to move MAVEN_HOME" + +clean || : +exec_maven "$@" diff --git a/pom.xml b/pom.xml index ae01c90f1..f12805316 100644 --- a/pom.xml +++ b/pom.xml @@ -1,31 +1,29 @@ 4.0.0 - - com.atlassian.pom - central-pom - 5.0.13 - - pom - com.atlassian.commonmark + org.commonmark commonmark-parent - 0.14.1-SNAPSHOT + 0.28.1-SNAPSHOT commonmark-java parent Java implementation of CommonMark, a specification of the Markdown format for turning plain text into formatted text. - https://github.com/atlassian/commonmark-java + https://github.com/commonmark/commonmark-java commonmark commonmark-ext-autolink + commonmark-ext-footnotes + commonmark-ext-gfm-alerts commonmark-ext-gfm-strikethrough commonmark-ext-gfm-tables commonmark-ext-heading-anchor + commonmark-ext-image-attributes commonmark-ext-ins + commonmark-ext-task-list-items commonmark-ext-yaml-front-matter commonmark-integration-test commonmark-test-util @@ -42,109 +40,234 @@ org.apache.maven.plugins maven-compiler-plugin - 3.7.0 + 3.14.0 - 7 - 7 + 11 org.apache.maven.plugins maven-jar-plugin - 3.0.2 + 3.4.2 + + + ${project.build.outputDirectory}/META-INF/MANIFEST.MF + + + + + org.apache.maven.plugins + maven-install-plugin + 3.1.4 org.apache.maven.plugins maven-javadoc-plugin + 3.11.2 *.internal,*.internal.* false - http://static.javadoc.io/com.atlassian.commonmark/commonmark/${project.version}/ + https://static.javadoc.io/org.commonmark/commonmark/${project.version}/ + ${commonmark.javadoc.location} - - org.apache.maven.plugins - maven-release-plugin - org.apache.maven.plugins maven-surefire-plugin - 2.22.1 + 3.5.3 + + + + + org.sonatype.central + central-publishing-maven-plugin + 0.8.0 + true + + central + true + published + + + + org.apache.maven.plugins + maven-release-plugin + 3.1.1 + + true + false + release + deploy + + + + org.apache.felix + maven-bundle-plugin + + 5.1.9 + + + bundle-manifest + process-classes + + manifest + + + + + - com.atlassian.commonmark + org.commonmark commonmark - 0.14.1-SNAPSHOT + 0.28.1-SNAPSHOT - com.atlassian.commonmark + org.commonmark commonmark-ext-autolink - 0.14.1-SNAPSHOT + 0.28.1-SNAPSHOT - com.atlassian.commonmark + org.commonmark + commonmark-ext-footnotes + 0.28.1-SNAPSHOT + + + org.commonmark + commonmark-ext-image-attributes + 0.28.1-SNAPSHOT + + + org.commonmark commonmark-ext-ins - 0.14.1-SNAPSHOT + 0.28.1-SNAPSHOT + + + org.commonmark + commonmark-ext-gfm-alerts + 0.28.1-SNAPSHOT - com.atlassian.commonmark + org.commonmark commonmark-ext-gfm-strikethrough - 0.14.1-SNAPSHOT + 0.28.1-SNAPSHOT - com.atlassian.commonmark + org.commonmark commonmark-ext-gfm-tables - 0.14.1-SNAPSHOT + 0.28.1-SNAPSHOT - com.atlassian.commonmark + org.commonmark commonmark-ext-heading-anchor - 0.14.1-SNAPSHOT + 0.28.1-SNAPSHOT - com.atlassian.commonmark + org.commonmark + commonmark-ext-task-list-items + 0.28.1-SNAPSHOT + + + org.commonmark commonmark-ext-yaml-front-matter - 0.14.1-SNAPSHOT + 0.28.1-SNAPSHOT - com.atlassian.commonmark + org.commonmark commonmark-test-util - 0.14.1-SNAPSHOT + 0.28.1-SNAPSHOT - junit - junit - 4.12 + org.junit.jupiter + junit-jupiter + 5.13.1 + + + org.assertj + assertj-core + 3.27.7 org.openjdk.jmh jmh-core - 1.17.5 + 1.37 org.openjdk.jmh jmh-generator-annprocess - 1.17.5 + 1.37 + + release + + + + org.apache.maven.plugins + maven-source-plugin + 3.3.1 + + + attach-sources + + jar-no-fork + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + + + attach-javadocs + + jar + + + + + + org.apache.maven.plugins + maven-gpg-plugin + 3.2.7 + + + sign-artifacts + verify + + sign + + + + --pinentry-mode + loopback + + + + + + + + coverage @@ -152,7 +275,7 @@ org.jacoco jacoco-maven-plugin - 0.7.9 + 0.8.13 @@ -176,8 +299,8 @@ - BSD 2-Clause License - http://opensource.org/licenses/BSD-2-Clause + BSD-2-Clause + https://opensource.org/licenses/BSD-2-Clause repo @@ -185,16 +308,13 @@ Robin Stocker - rstocker@atlassian.com - Atlassian - https://www.atlassian.com/ - scm:git:git@github.com:atlassian/commonmark-java.git - scm:git:git@github.com:atlassian/commonmark-java.git - https://github.com/atlassian/commonmark-java + scm:git:https://github.com/commonmark/commonmark-java + scm:git:https://github.com/commonmark/commonmark-java + https://github.com/commonmark/commonmark-java HEAD diff --git a/renovate.json b/renovate.json new file mode 100644 index 000000000..f45d8f110 --- /dev/null +++ b/renovate.json @@ -0,0 +1,5 @@ +{ + "extends": [ + "config:base" + ] +}