Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Comply with CommonMark 0.31.2 #416

Merged
merged 15 commits into from
Apr 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Fixes:
intervals. (#408, #419)
- Do not misinterpret bracketed e-mails as citations. (#424, #426,
sponsored by @istqborg)
- Comply with CommonMark 0.31.2. (#416, contributed by @lostenderman)

Documentation:

Expand Down
105 changes: 81 additions & 24 deletions markdown.dtx
Original file line number Diff line number Diff line change
Expand Up @@ -22063,19 +22063,6 @@ end
% \par
% \begin{markdown}
%
% The \luamdef{util.lookup_files} method looks up files with filename `f`
% and returns their paths. Further options for the \pkg{Kpathsea} library
% can be specified in table `options`. [@luatex21, Section 10.7.4]
%
% \end{markdown}
% \begin{macrocode}
function util.lookup_files(f, options)
return kpse.lookup(f, options)
end
% \end{macrocode}
% \par
% \begin{markdown}
%
% The \luamdef{util.expand_tabs_in_line} expands tabs in string `s`. If
% `tabstop` is specified, it is used as the tab stop width. Otherwise,
% the tab stop width of 4 characters is used. The method is a copy of the tab
Expand Down Expand Up @@ -24523,6 +24510,38 @@ end
% \par
% \begin{markdown}
%
%### Unicode punctuation
% This section documents [the Unicode punctuation][unicode-punctuation]
% recognized by the markdown reader. The punctuation is organized in the
% \luamdef{punctuation} table according to the number of bytes occupied after
% conversion to \acro{utf}8.
%
% [unicode-punctuation]: https://spec.commonmark.org/0.31.2/#unicode-punctuation-character
% (CommonMark Spec, Version 0.31.2 (2024-01-28))
%
% \end{markdown}
% \begin{macrocode}
local punctuation = {}
(function()
local pathname = kpse.lookup("UnicodeData.txt")
local file = assert(io.open(pathname, "r"),
[[Could not open file "UnicodeData.txt"]])
for line in file:lines() do
local codepoint, major_category = line:match("^(%x+);[^;]*;(%a)")
if major_category == "P" or major_category == "S" then
local code = unicode.utf8.char(tonumber(codepoint, 16))
if punctuation[#code] == nil then
punctuation[#code] = {}
end
table.insert(punctuation[#code], code)
end
end
assert(file:close())
end)()
% \end{macrocode}
% \par
% \begin{markdown}
%
%### Plain \TeX{} Writer {#tex-writer}
%
% This section documents the \luamref{writer} object, which implements the
Expand Down Expand Up @@ -25778,7 +25797,6 @@ parsers.letter = R("AZ","az")
parsers.alphanumeric = R("AZ","az","09")
parsers.keyword = parsers.letter
* (parsers.alphanumeric + parsers.dash)^0
parsers.internal_punctuation = S(":;,.?")

parsers.doubleasterisks = P("**")
parsers.doubleunderscores = P("__")
Expand All @@ -25789,7 +25807,40 @@ parsers.any = P(1)
parsers.succeed = P(true)
parsers.fail = P(false)

parsers.escapable = S("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~")
parsers.internal_punctuation = S(":;,.?")
parsers.ascii_punctuation = S("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~")
parsers.punctuation = {}
(function()
for size = 1, 4 do
local codepoint_parser = parsers.fail
if size == 1 then
codepoint_parser = codepoint_parser + parsers.ascii_punctuation
end
for _, code in ipairs(punctuation[size] or {}) do
local code_parser = parsers.succeed
assert(#code == size)
for i = 1, size do
local byte = code:sub(i, i)
local byte_parser = S(byte)
code_parser = code_parser * byte_parser
end
codepoint_parser = codepoint_parser + code_parser
end
parsers.punctuation[size] = codepoint_parser
end
end)()
% \end{macrocode}
% \par
% \begin{markdown}
%
% Here, we garbage-collect the \luamref{punctuation} table, since we won't need it anymore.
%
% \end{markdown}
% \begin{macrocode}
punctuation = nil
collectgarbage("collect")

Witiko marked this conversation as resolved.
Show resolved Hide resolved
parsers.escapable = parsers.ascii_punctuation
parsers.anyescaped = parsers.backslash / "" * parsers.escapable
+ parsers.any

Expand Down Expand Up @@ -26982,7 +27033,7 @@ parsers.html_comment = Cs( parsers.html_comment_start

parsers.html_inline_comment = (parsers.html_comment_start / "")
* -P(">") * -P("->")
* Cs((V("NoSoftLineBreakEndline") + parsers.any - P("--")
* Cs((V("NoSoftLineBreakEndline") + parsers.any
- parsers.nested_breaking_blank - parsers.html_comment_end)^0)
* (parsers.html_comment_end / "")

Expand Down Expand Up @@ -28032,7 +28083,6 @@ function M.reader.new(writer, options)
return lpeg.R("\240\244") * cont * cont * cont
end
end

% \end{macrocode}
% \begin{markdown}
%
Expand All @@ -28050,23 +28100,30 @@ function M.reader.new(writer, options)
else
char_length = pos + 1
end
c = lpeg.match({ C(utf8_by_byte_count(char_length)) },s,i+pos)
if (c ~= nil) and (unicode.utf8.match(c, chartype)) then
return i

if (chartype == "punctuation") then
if lpeg.match(parsers.punctuation[char_length], s, i+pos) then
return i
end
else
c = lpeg.match({ C(utf8_by_byte_count(char_length)) },s,i+pos)
if (c ~= nil) and (unicode.utf8.match(c, chartype)) then
return i
end
end
end
end

local function check_preceding_unicode_punctuation(s, i)
return check_unicode_type(s, i, -4, -1, "%p")
return check_unicode_type(s, i, -4, -1, "punctuation")
Witiko marked this conversation as resolved.
Show resolved Hide resolved
end

local function check_preceding_unicode_whitespace(s, i)
return check_unicode_type(s, i, -4, -1, "%s")
end

local function check_following_unicode_punctuation(s, i)
return check_unicode_type(s, i, 0, 3, "%p")
return check_unicode_type(s, i, 0, 3, "punctuation")
end

local function check_following_unicode_whitespace(s, i)
Expand Down Expand Up @@ -30050,7 +30107,7 @@ M.extensions.content_blocks = function(language_map)
% \begin{macrocode}
local languages_json = (function()
local base, prev, curr
for _, pathname in ipairs{util.lookup_files(language_map, { all=true })} do
for _, pathname in ipairs{kpse.lookup(language_map, { all=true })} do
local file = io.open(pathname, "r")
if not file then goto continue end
local input = assert(file:read("*a"))
Expand Down Expand Up @@ -32327,7 +32384,7 @@ function M.new(options)
%
% \end{markdown}
% \begin{macrocode}
local pathname = util.lookup_files(filename)
local pathname = kpse.lookup(filename)
local input_file = assert(io.open(pathname, "r"),
[[Could not open user-defined syntax extension "]]
.. pathname .. [[" for reading]])
Expand Down
16 changes: 16 additions & 0 deletions tests/testfiles/CommonMark_0.31.2/autolinks/002.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
% ---RESULT--- "example": 594,
%
% <p><a href="https://foo.bar.baz/test?q=hello&amp;id=22&amp;boolean">https://foo.bar.baz/test?q=hello&amp;id=22&amp;boolean</a></p>
%
% ---\RESULT---

<<<
<https://foo.bar.baz/test?q=hello&id=22&boolean>
>>>
BEGIN document
BEGIN link
- label: https://foo.bar.baz/test?q=hello(ampersand)id=22(ampersand)boolean
- URI: https://foo.bar.baz/test?q=hello&id=22&boolean
- title:
END link
END document
11 changes: 11 additions & 0 deletions tests/testfiles/CommonMark_0.31.2/autolinks/009.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
% ---RESULT--- "example": 601,
%
% <p>&lt;https://foo.bar/baz bim&gt;</p>
%
% ---\RESULT---

<<<
<https://foo.bar/baz bim>
>>>
BEGIN document
END document
16 changes: 16 additions & 0 deletions tests/testfiles/CommonMark_0.31.2/autolinks/010.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
% ---RESULT--- "example": 602,
%
% <p><a href="https://example.com/%5C%5B%5C">https://example.com/\[\</a></p>
%
% ---\RESULT---

<<<
<https://example.com/\[\>
>>>
BEGIN document
BEGIN link
- label: https://example.com/(backslash)[(backslash)
- URI: https://example.com/(backslash)[(backslash)
- title:
END link
END document
11 changes: 11 additions & 0 deletions tests/testfiles/CommonMark_0.31.2/autolinks/015.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
% ---RESULT--- "example": 607,
%
% <p>&lt; https://foo.bar &gt;</p>
%
% ---\RESULT---

<<<
< https://foo.bar >
>>>
BEGIN document
END document
11 changes: 11 additions & 0 deletions tests/testfiles/CommonMark_0.31.2/autolinks/018.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
% ---RESULT--- "example": 610,
%
% <p>https://example.com</p>
%
% ---\RESULT---

<<<
https://example.com
>>>
BEGIN document
END document
16 changes: 16 additions & 0 deletions tests/testfiles/CommonMark_0.31.2/backslash_escapes/009.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
% ---RESULT--- "example": 20,
%
% <p><a href="https://example.com?find=%5C*">https://example.com?find=\*</a></p>
%
% ---\RESULT---

<<<
<https://example.com?find=\*>
>>>
BEGIN document
BEGIN link
- label: https://example.com?find=(backslash)*
- URI: https://example.com?find=(backslash)*
- title:
END link
END document
12 changes: 12 additions & 0 deletions tests/testfiles/CommonMark_0.31.2/code_spans/018.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
% ---RESULT--- "example": 345,
%
% <p><code>&lt;https://foo.bar.</code>baz&gt;`</p>
%
% ---\RESULT---

<<<
`<https://foo.bar.`baz>`
>>>
BEGIN document
codeSpan: <https://foo.bar.
END document
16 changes: 16 additions & 0 deletions tests/testfiles/CommonMark_0.31.2/code_spans/019.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
% ---RESULT--- "example": 346,
%
% <p><a href="https://foo.bar.%60baz">https://foo.bar.`baz</a>`</p>
%
% ---\RESULT---

<<<
<https://foo.bar.`baz>`
>>>
BEGIN document
BEGIN link
- label: https://foo.bar.`baz
- URI: https://foo.bar.`baz
- title:
END link
END document
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
% ---RESULT--- "example": 479,
%
% <p>**a<a href="https://foo.bar/?q=**">https://foo.bar/?q=**</a></p>
%
% ---\RESULT---

<<<
**a<https://foo.bar/?q=**>
>>>
BEGIN document
BEGIN link
- label: https://foo.bar/?q=**
- URI: https://foo.bar/?q=**
- title:
END link
END document
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
% ---RESULT--- "example": 480,
%
% <p>__a<a href="https://foo.bar/?q=__">https://foo.bar/?q=__</a></p>
%
% ---\RESULT---

<<<
__a<https://foo.bar/?q=__>
>>>
BEGIN document
underscore
underscore
BEGIN link
- label: https://foo.bar/?q=(underscore)(underscore)
- URI: https://foo.bar/?q=__
- title:
END link
END document
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
% ---RESULT--- "example": 354, (new)
%
% <p>*$*alpha.</p>
% <p>*£*bravo.</p>
% <p>*€*charlie.</p>
%
% ---\RESULT---

<<<
*$*alpha.

*£*bravo.

*€*charlie.
>>>
BEGIN document
dollarSign
paragraphSeparator
paragraphSeparator
END document
34 changes: 34 additions & 0 deletions tests/testfiles/CommonMark_0.31.2/links/020.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
% ---RESULT--- "example": 500,
%
% <p><a href="#fragment">link</a></p>
% <p><a href="https://example.com#fragment">link</a></p>
% <p><a href="https://example.com?foo=3#frag">link</a></p>
%
% ---\RESULT---

<<<
[link](#fragment)

[link](https://example.com#fragment)

[link](https://example.com?foo=3#frag)
>>>
BEGIN document
BEGIN link
- label: link
- URI: #fragment
- title:
END link
paragraphSeparator
BEGIN link
- label: link
- URI: https://example.com#fragment
- title:
END link
paragraphSeparator
BEGIN link
- label: link
- URI: https://example.com?foo=3#frag
- title:
END link
END document
Loading
Loading