Skip to content

Commit

Permalink
Merge pull request #416 from lostenderman/update-commonmark
Browse files Browse the repository at this point in the history
  • Loading branch information
github-actions[bot] authored Apr 1, 2024
2 parents bd7ac0a + fad4c65 commit d8a1d2f
Show file tree
Hide file tree
Showing 18 changed files with 342 additions and 24 deletions.
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Fixes:
intervals. (#408, #419)
- Do not misinterpret bracketed e-mails as citations. (#424, #426,
sponsored by @istqborg)
- Comply with CommonMark 0.31.2. (#416, contributed by @lostenderman)

Documentation:

Expand Down
105 changes: 81 additions & 24 deletions markdown.dtx
Original file line number Diff line number Diff line change
Expand Up @@ -22063,19 +22063,6 @@ end
% \par
% \begin{markdown}
%
% The \luamdef{util.lookup_files} method looks up files with filename `f`
% and returns their paths. Further options for the \pkg{Kpathsea} library
% can be specified in table `options`. [@luatex21, Section 10.7.4]
%
% \end{markdown}
% \begin{macrocode}
function util.lookup_files(f, options)
return kpse.lookup(f, options)
end
% \end{macrocode}
% \par
% \begin{markdown}
%
% The \luamdef{util.expand_tabs_in_line} expands tabs in string `s`. If
% `tabstop` is specified, it is used as the tab stop width. Otherwise,
% the tab stop width of 4 characters is used. The method is a copy of the tab
Expand Down Expand Up @@ -24523,6 +24510,38 @@ end
% \par
% \begin{markdown}
%
%### Unicode punctuation
% This section documents [the Unicode punctuation][unicode-punctuation]
% recognized by the markdown reader. The punctuation is organized in the
% \luamdef{punctuation} table according to the number of bytes occupied after
% conversion to \acro{utf}8.
%
% [unicode-punctuation]: https://spec.commonmark.org/0.31.2/#unicode-punctuation-character
% (CommonMark Spec, Version 0.31.2 (2024-01-28))
%
% \end{markdown}
% \begin{macrocode}
local punctuation = {}
(function()
local pathname = kpse.lookup("UnicodeData.txt")
local file = assert(io.open(pathname, "r"),
[[Could not open file "UnicodeData.txt"]])
for line in file:lines() do
local codepoint, major_category = line:match("^(%x+);[^;]*;(%a)")
if major_category == "P" or major_category == "S" then
local code = unicode.utf8.char(tonumber(codepoint, 16))
if punctuation[#code] == nil then
punctuation[#code] = {}
end
table.insert(punctuation[#code], code)
end
end
assert(file:close())
end)()
% \end{macrocode}
% \par
% \begin{markdown}
%
%### Plain \TeX{} Writer {#tex-writer}
%
% This section documents the \luamref{writer} object, which implements the
Expand Down Expand Up @@ -25778,7 +25797,6 @@ parsers.letter = R("AZ","az")
parsers.alphanumeric = R("AZ","az","09")
parsers.keyword = parsers.letter
* (parsers.alphanumeric + parsers.dash)^0
parsers.internal_punctuation = S(":;,.?")

parsers.doubleasterisks = P("**")
parsers.doubleunderscores = P("__")
Expand All @@ -25789,7 +25807,40 @@ parsers.any = P(1)
parsers.succeed = P(true)
parsers.fail = P(false)

parsers.escapable = S("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~")
parsers.internal_punctuation = S(":;,.?")
parsers.ascii_punctuation = S("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~")
parsers.punctuation = {}
(function()
for size = 1, 4 do
local codepoint_parser = parsers.fail
if size == 1 then
codepoint_parser = codepoint_parser + parsers.ascii_punctuation
end
for _, code in ipairs(punctuation[size] or {}) do
local code_parser = parsers.succeed
assert(#code == size)
for i = 1, size do
local byte = code:sub(i, i)
local byte_parser = S(byte)
code_parser = code_parser * byte_parser
end
codepoint_parser = codepoint_parser + code_parser
end
parsers.punctuation[size] = codepoint_parser
end
end)()
% \end{macrocode}
% \par
% \begin{markdown}
%
% Here, we garbage-collect the \luamref{punctuation} table, since we won't need it anymore.
%
% \end{markdown}
% \begin{macrocode}
punctuation = nil
collectgarbage("collect")

parsers.escapable = parsers.ascii_punctuation
parsers.anyescaped = parsers.backslash / "" * parsers.escapable
+ parsers.any

Expand Down Expand Up @@ -26982,7 +27033,7 @@ parsers.html_comment = Cs( parsers.html_comment_start

parsers.html_inline_comment = (parsers.html_comment_start / "")
* -P(">") * -P("->")
* Cs((V("NoSoftLineBreakEndline") + parsers.any - P("--")
* Cs((V("NoSoftLineBreakEndline") + parsers.any
- parsers.nested_breaking_blank - parsers.html_comment_end)^0)
* (parsers.html_comment_end / "")

Expand Down Expand Up @@ -28032,7 +28083,6 @@ function M.reader.new(writer, options)
return lpeg.R("\240\244") * cont * cont * cont
end
end

% \end{macrocode}
% \begin{markdown}
%
Expand All @@ -28050,23 +28100,30 @@ function M.reader.new(writer, options)
else
char_length = pos + 1
end
c = lpeg.match({ C(utf8_by_byte_count(char_length)) },s,i+pos)
if (c ~= nil) and (unicode.utf8.match(c, chartype)) then
return i

if (chartype == "punctuation") then
if lpeg.match(parsers.punctuation[char_length], s, i+pos) then
return i
end
else
c = lpeg.match({ C(utf8_by_byte_count(char_length)) },s,i+pos)
if (c ~= nil) and (unicode.utf8.match(c, chartype)) then
return i
end
end
end
end

local function check_preceding_unicode_punctuation(s, i)
return check_unicode_type(s, i, -4, -1, "%p")
return check_unicode_type(s, i, -4, -1, "punctuation")
end

local function check_preceding_unicode_whitespace(s, i)
return check_unicode_type(s, i, -4, -1, "%s")
end

local function check_following_unicode_punctuation(s, i)
return check_unicode_type(s, i, 0, 3, "%p")
return check_unicode_type(s, i, 0, 3, "punctuation")
end

local function check_following_unicode_whitespace(s, i)
Expand Down Expand Up @@ -30050,7 +30107,7 @@ M.extensions.content_blocks = function(language_map)
% \begin{macrocode}
local languages_json = (function()
local base, prev, curr
for _, pathname in ipairs{util.lookup_files(language_map, { all=true })} do
for _, pathname in ipairs{kpse.lookup(language_map, { all=true })} do
local file = io.open(pathname, "r")
if not file then goto continue end
local input = assert(file:read("*a"))
Expand Down Expand Up @@ -32327,7 +32384,7 @@ function M.new(options)
%
% \end{markdown}
% \begin{macrocode}
local pathname = util.lookup_files(filename)
local pathname = kpse.lookup(filename)
local input_file = assert(io.open(pathname, "r"),
[[Could not open user-defined syntax extension "]]
.. pathname .. [[" for reading]])
Expand Down
16 changes: 16 additions & 0 deletions tests/testfiles/CommonMark_0.31.2/autolinks/002.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
% ---RESULT--- "example": 594,
%
% <p><a href="https://foo.bar.baz/test?q=hello&amp;id=22&amp;boolean">https://foo.bar.baz/test?q=hello&amp;id=22&amp;boolean</a></p>
%
% ---\RESULT---

<<<
<https://foo.bar.baz/test?q=hello&id=22&boolean>
>>>
BEGIN document
BEGIN link
- label: https://foo.bar.baz/test?q=hello(ampersand)id=22(ampersand)boolean
- URI: https://foo.bar.baz/test?q=hello&id=22&boolean
- title:
END link
END document
11 changes: 11 additions & 0 deletions tests/testfiles/CommonMark_0.31.2/autolinks/009.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
% ---RESULT--- "example": 601,
%
% <p>&lt;https://foo.bar/baz bim&gt;</p>
%
% ---\RESULT---

<<<
<https://foo.bar/baz bim>
>>>
BEGIN document
END document
16 changes: 16 additions & 0 deletions tests/testfiles/CommonMark_0.31.2/autolinks/010.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
% ---RESULT--- "example": 602,
%
% <p><a href="https://example.com/%5C%5B%5C">https://example.com/\[\</a></p>
%
% ---\RESULT---

<<<
<https://example.com/\[\>
>>>
BEGIN document
BEGIN link
- label: https://example.com/(backslash)[(backslash)
- URI: https://example.com/(backslash)[(backslash)
- title:
END link
END document
11 changes: 11 additions & 0 deletions tests/testfiles/CommonMark_0.31.2/autolinks/015.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
% ---RESULT--- "example": 607,
%
% <p>&lt; https://foo.bar &gt;</p>
%
% ---\RESULT---

<<<
< https://foo.bar >
>>>
BEGIN document
END document
11 changes: 11 additions & 0 deletions tests/testfiles/CommonMark_0.31.2/autolinks/018.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
% ---RESULT--- "example": 610,
%
% <p>https://example.com</p>
%
% ---\RESULT---

<<<
https://example.com
>>>
BEGIN document
END document
16 changes: 16 additions & 0 deletions tests/testfiles/CommonMark_0.31.2/backslash_escapes/009.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
% ---RESULT--- "example": 20,
%
% <p><a href="https://example.com?find=%5C*">https://example.com?find=\*</a></p>
%
% ---\RESULT---

<<<
<https://example.com?find=\*>
>>>
BEGIN document
BEGIN link
- label: https://example.com?find=(backslash)*
- URI: https://example.com?find=(backslash)*
- title:
END link
END document
12 changes: 12 additions & 0 deletions tests/testfiles/CommonMark_0.31.2/code_spans/018.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
% ---RESULT--- "example": 345,
%
% <p><code>&lt;https://foo.bar.</code>baz&gt;`</p>
%
% ---\RESULT---

<<<
`<https://foo.bar.`baz>`
>>>
BEGIN document
codeSpan: <https://foo.bar.
END document
16 changes: 16 additions & 0 deletions tests/testfiles/CommonMark_0.31.2/code_spans/019.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
% ---RESULT--- "example": 346,
%
% <p><a href="https://foo.bar.%60baz">https://foo.bar.`baz</a>`</p>
%
% ---\RESULT---

<<<
<https://foo.bar.`baz>`
>>>
BEGIN document
BEGIN link
- label: https://foo.bar.`baz
- URI: https://foo.bar.`baz
- title:
END link
END document
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
% ---RESULT--- "example": 479,
%
% <p>**a<a href="https://foo.bar/?q=**">https://foo.bar/?q=**</a></p>
%
% ---\RESULT---

<<<
**a<https://foo.bar/?q=**>
>>>
BEGIN document
BEGIN link
- label: https://foo.bar/?q=**
- URI: https://foo.bar/?q=**
- title:
END link
END document
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
% ---RESULT--- "example": 480,
%
% <p>__a<a href="https://foo.bar/?q=__">https://foo.bar/?q=__</a></p>
%
% ---\RESULT---

<<<
__a<https://foo.bar/?q=__>
>>>
BEGIN document
underscore
underscore
BEGIN link
- label: https://foo.bar/?q=(underscore)(underscore)
- URI: https://foo.bar/?q=__
- title:
END link
END document
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
% ---RESULT--- "example": 354, (new)
%
% <p>*$*alpha.</p>
% <p>*£*bravo.</p>
% <p>*€*charlie.</p>
%
% ---\RESULT---

<<<
*$*alpha.

*£*bravo.

*€*charlie.
>>>
BEGIN document
dollarSign
paragraphSeparator
paragraphSeparator
END document
34 changes: 34 additions & 0 deletions tests/testfiles/CommonMark_0.31.2/links/020.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
% ---RESULT--- "example": 500,
%
% <p><a href="#fragment">link</a></p>
% <p><a href="https://example.com#fragment">link</a></p>
% <p><a href="https://example.com?foo=3#frag">link</a></p>
%
% ---\RESULT---

<<<
[link](#fragment)

[link](https://example.com#fragment)

[link](https://example.com?foo=3#frag)
>>>
BEGIN document
BEGIN link
- label: link
- URI: #fragment
- title:
END link
paragraphSeparator
BEGIN link
- label: link
- URI: https://example.com#fragment
- title:
END link
paragraphSeparator
BEGIN link
- label: link
- URI: https://example.com?foo=3#frag
- title:
END link
END document
Loading

0 comments on commit d8a1d2f

Please sign in to comment.