Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Preserve underscores in emphasis. #760

Merged
merged 3 commits into from
Aug 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
## 15.0.3
* Fixes Markdown parser gets multiple-underscores-inside-italics wrong [#389](https://github.com/fsprojects/FSharp.Formatting/issues/389)

## 15.0.2
* Trim the `--fscoptions` before passing them as `otherflags`. ([comment #616](https://github.com/fsprojects/FSharp.Formatting/issues/616#issuecomment-1200877765))

Expand Down
20 changes: 18 additions & 2 deletions src/FSharp.Formatting.Markdown/MarkdownParser.fs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,18 @@ let inline (|EscapedLatexInlineMathChar|_|) input =
| '\\' :: (('$') as c) :: rest -> Some(c, rest)
| _ -> None

/// Succeeds when the specified character list starts with a letter or number
let inline (|AlphaNum|_|) input =
let re = """^[a-zA-Z0-9]"""
let match' = Regex.Match(Array.ofList input |> String, re)

if match'.Success then
let entity = match'.Value
let _, rest = List.splitAt entity.Length input
Some(char entity, rest)
else
None

/// Matches a list if it starts with a sub-list that is delimited
/// using the specified delimiters. Returns a wrapped list and the rest.
///
Expand All @@ -79,7 +91,12 @@ let (|DelimitedMarkdown|_|) bracket input =
let rec loop acc =
function
| EscapedChar (x, xs) -> loop (x :: '\\' :: acc) xs
| input when List.startsWith endl input -> Some(List.rev acc, input)
| input when List.startsWith endl input ->
let rest = List.skip bracket.Length input

match rest with
| AlphaNum (x, xs) -> loop (x :: endl @ acc) xs
| _ -> Some(List.rev acc, input)
| x :: xs -> loop (x :: acc) xs
| [] -> None
// If it starts with 'startl', let's search for 'endl'
Expand All @@ -90,7 +107,6 @@ let (|DelimitedMarkdown|_|) bracket input =
else
None


/// This is similar to `List.Delimited`, but it skips over Latex inline math characters.
let (|DelimitedLatexDisplayMath|_|) bracket input =
let _startl, endl = bracket, bracket
Expand Down
87 changes: 87 additions & 0 deletions tests/FSharp.Markdown.Tests/Markdown.fs
Original file line number Diff line number Diff line change
Expand Up @@ -790,3 +790,90 @@ let ``Parse blockquote with three leading spaces`` () =
) ]

(Markdown.Parse doc).Paragraphs |> shouldEqual expected

[<Test>]
let ``Underscore inside italic is preserved`` () =
let doc = "_fsharp_space_after_comma_"

let expected =
[ Paragraph(
[ Emphasis(
[ Literal(
"fsharp_space_after_comma",
Some(
{ StartLine = 1
StartColumn = 0
EndLine = 1
EndColumn = 24 }
)
) ],
Some(
{ StartLine = 1
StartColumn = 0
EndLine = 1
EndColumn = 26 }
)
) ],
Some(
{ StartLine = 1
StartColumn = 0
EndLine = 1
EndColumn = 26 }
)
) ]

(Markdown.Parse doc).Paragraphs |> shouldEqual expected

[<Test>]
let ``Underscores inside word in heading`` () =
let doc =
"""
### fsharp_bar_before_discriminated_union_declaration

Always use a bar before every case in the declaration of a discriminated union.
"""

let expected =
[ Heading(
3,
[ Literal(
"fsharp_bar_before_discriminated_union_declaration",
Some
{ StartLine = 2
StartColumn = 4
EndLine = 2
EndColumn = 53 }
) ],
Some
{ StartLine = 2
StartColumn = 0
EndLine = 2
EndColumn = 53 }
)
Paragraph(
[ Literal(
"Always use a bar before every case in the declaration of a discriminated union.",
Some
{ StartLine = 4
StartColumn = 0
EndLine = 4
EndColumn = 79 }
) ],
Some
{ StartLine = 4
StartColumn = 0
EndLine = 4
EndColumn = 79 }
) ]

(Markdown.Parse doc).Paragraphs |> shouldEqual expected

[<Test>]
let ``Underscore inside italic and bold near punctuation is preserved`` () =
let doc = "This is **bold_bold**, and this _italic_; and _this_too_: again."

let expected =
"<p>This is <strong>bold_bold</strong>, and this <em>italic</em>; and <em>this_too</em>: again.</p>\r\n"
|> properNewLines

Markdown.ToHtml doc |> shouldEqual expected