diff --git a/Changes b/Changes index 49cf9d0..10539c1 100644 --- a/Changes +++ b/Changes @@ -1,8 +1,10 @@ Revision history for pmarkdown and the Markdown::Perl module. -1.02 - ?? +1.02 - 2024-04-06 - - Improvement to the support of the original markdown syntax. + - Improvement to the support of the original markdown syntax. This is probably + the best that we will do as this is now quite faithful and some of the + remaining differences are bugs in the original implementation. - Bugfixes: - Do not make a list loose when it is followed by blank lines. - Tab stops after list markers are properly computed. diff --git a/README.md b/README.md index 85121a6..88e41aa 100644 --- a/README.md +++ b/README.md @@ -7,8 +7,9 @@ extensions. This software supports the entire [`CommonMark` spec](https://spec.commonmark.org/0.31.2/) syntax, as well as all -[GitHub Flavored Markdown (gfm) extensions](https://github.github.com/gfm/) and -some more custom extensions. +[GitHub Flavored Markdown (gfm) extensions](https://github.github.com/gfm/) +some more custom extensions. It also has compatibility with the +[original Markdown syntax](https://daringfireball.net/projects/markdown/syntax). It is based on the [Markdown::Perl](https://metacpan.org/pod/Markdown::Perl) library that can be used in standalone Perl program. diff --git a/lib/App/pmarkdown.pm b/lib/App/pmarkdown.pm index 39cfb19..fcad0fe 100644 --- a/lib/App/pmarkdown.pm +++ b/lib/App/pmarkdown.pm @@ -5,7 +5,7 @@ use warnings; use Markdown::Perl; -our $VERSION = '1.01'; # Remember to also set the Markdown::Perl version. +our $VERSION = '1.02'; # Remember to also set the Markdown::Perl version. 1; diff --git a/lib/Markdown/Perl.pm b/lib/Markdown/Perl.pm index 743fe92..16a10e7 100644 --- a/lib/Markdown/Perl.pm +++ b/lib/Markdown/Perl.pm @@ -17,7 +17,7 @@ use Scalar::Util 'blessed'; use parent 'Markdown::Perl::Options'; -our $VERSION = '1.01'; # Remember to also set the App::pmarkdown version. +our $VERSION = '1.02'; # Remember to also set the App::pmarkdown version. our @EXPORT_OK = qw(convert set_options); our %EXPORT_TAGS = (all => \@EXPORT_OK); diff --git a/lib/Markdown/Perl/BlockParser.pm b/lib/Markdown/Perl/BlockParser.pm index 071adad..0dbe397 100644 --- a/lib/Markdown/Perl/BlockParser.pm +++ b/lib/Markdown/Perl/BlockParser.pm @@ -160,7 +160,7 @@ sub _finalize_paragraph { if ($this->{last_line_was_blank}) { if (@{$this->{blocks_stack}} && $this->{blocks_stack}[-1]{block}{type} eq 'list_item') { - $this->{blocks_stack}[-1]{block}{loose} = 1; + $this->{blocks_stack}[-1]{block}{loose} = 1; } } } @@ -268,7 +268,7 @@ sub _test_lazy_continuation { sub _count_matching_blocks { my ($this, $lr) = @_; # $lr is a scalar *reference* to the current line text. - $this->{matched_prefix_size} += 0; + $this->{matched_prefix_size} = 0; for my $i (0 .. $#{$this->{blocks_stack}}) { local *::_ = $lr; my $r = $this->{blocks_stack}[$i]{cond}(); @@ -430,8 +430,9 @@ sub _do_indented_code_block { if (@{$this->{paragraph}} || $l !~ m/${indented_code_re}/) { return; } - my $preserve_tabs = !$this->get_code_blocks_convert_tabs_to_spaces; - my @code_lines = scalar(remove_prefix_spaces(4, $l.$this->line_ending(), $preserve_tabs)); + my $convert_tabs = $this->get_code_blocks_convert_tabs_to_spaces; + tabs_to_space($l, $this->{matched_prefix_size}) if $convert_tabs; + my @code_lines = scalar(remove_prefix_spaces(4, $l.$this->line_ending())); my $count = 1; # The number of lines we have read my $valid_count = 1; # The number of lines we know are in the code block. my $valid_pos = $this->get_pos(); @@ -441,11 +442,10 @@ sub _do_indented_code_block { if ($nl =~ m/${indented_code_re}/) { $valid_pos = $this->get_pos(); $valid_count = $count; - push @code_lines, - scalar(remove_prefix_spaces(4, $nl.$this->line_ending(), $preserve_tabs)); + tabs_to_space($nl, $this->{matched_prefix_size}) if $convert_tabs; + push @code_lines, scalar(remove_prefix_spaces(4, $nl.$this->line_ending())); } elsif ($nl eq '') { - push @code_lines, - scalar(remove_prefix_spaces(4, $nl.$this->line_ending(), $preserve_tabs)); + push @code_lines, scalar(remove_prefix_spaces(4, $nl.$this->line_ending(), !$convert_tabs)); } else { last; } @@ -617,9 +617,10 @@ sub _do_list_item { my $mode = $this->get_lists_can_interrupt_paragraph; if (@{$this->{paragraph}}) { return if $mode eq 'never'; - if ($mode eq 'within_list' && !(@{$this->{blocks_stack}} && $this->{blocks_stack}[-1]{block}{type} eq 'list_item')) { + if ($mode eq 'within_list' + && !(@{$this->{blocks_stack}} && $this->{blocks_stack}[-1]{block}{type} eq 'list_item')) { return; - }; + } if ($mode eq 'strict' && ($text eq '' || ($type eq 'ol' && $digits != 1))) { return; } diff --git a/lib/Markdown/Perl/Options.pm b/lib/Markdown/Perl/Options.pm index 2614122..3b1e23a 100644 --- a/lib/Markdown/Perl/Options.pm +++ b/lib/Markdown/Perl/Options.pm @@ -292,7 +292,7 @@ A list can interrupt a paragraph only when we are already inside another list. =item B I<(default)> -A list can interrupt a paragraph but only with some non anbiguous list markers. +A list can interrupt a paragraph but only with some non ambiguous list markers. =item B @@ -424,7 +424,7 @@ _make_option(code_blocks_info => 'language', _enum(qw(ignored language))); =head3 B I<(boolean, default: false)> -By default, tabs are preserved inside codeblocks. With this option, all tabs (at +By default, tabs are preserved inside code blocks. With this option, all tabs (at the beginning of the lines or inside) are turned into spaces, aligned with the tab stops (currently always a multiple of 4). diff --git a/lib/Markdown/Perl/Util.pm b/lib/Markdown/Perl/Util.pm index 3f09d95..e80f464 100644 --- a/lib/Markdown/Perl/Util.pm +++ b/lib/Markdown/Perl/Util.pm @@ -15,7 +15,7 @@ use Unicode::CaseFold 'fc'; our $VERSION = 0.01; our @EXPORT_OK = - qw(split_while remove_prefix_spaces indent_size indented_one_tab horizontal_size normalize_label indented); + qw(split_while remove_prefix_spaces indent_size indented_one_tab horizontal_size normalize_label indented tabs_to_space); our %EXPORT_TAGS = (all => \@EXPORT_OK); # Partition a list into a continuous chunk for which the given code evaluates to @@ -130,4 +130,21 @@ sub normalize_label { return $label; } +# Convert tabs to space in the given string. Assuming $prefix horizontal spaces +# before the string. +sub tabs_to_space { ## no critic (RequireArgUnpacking) + my ($str, $prefix) = @_; + $prefix //= 0; + while ($str =~ m/\G[^\t]*\t/g) { + $prefix += $LAST_MATCH_END[0] - $LAST_MATCH_START[0] - 1; + my $nb_space = 4 - $prefix % 4; + substr $str, $LAST_MATCH_END[0] - 1, 1, ' ' x $nb_space; + pos($str) = $LAST_MATCH_END[0] - 1 + $nb_space; + $prefix = 0; # By definition we are now aligned with a tab stop. + } + return $str if defined wantarray; + $_[0] = $str; + return; +} + 1; diff --git a/script/pmarkdown b/script/pmarkdown index 6ee89c3..93c9ba5 100644 --- a/script/pmarkdown +++ b/script/pmarkdown @@ -143,7 +143,8 @@ CommonMark spec. This mode implements the L. -Note that this mode is not 100% similar to the original F script. +Note that this mode is not 100% similar to the original F script +as that script has some buggy behavior that we are not reproducing. =back diff --git a/t/100-util.t b/t/100-util.t index 313d193..92d80da 100644 --- a/t/100-util.t +++ b/t/100-util.t @@ -69,4 +69,15 @@ is(indented(5, "\t\tfoo"), T(), 'indented8'); is(indented(5, " foo"), T(), 'indented9'); is(indented(5, " foo"), F(), 'indented10'); +is(tabs_to_space("ab\tcd"), 'ab cd', 'tabs_to_space1'); +is(tabs_to_space("ab\tcd", 2), 'ab cd', 'tabs_to_space2'); +is(tabs_to_space("ab\t\tcd"), 'ab cd', 'tabs_to_space3'); +is(tabs_to_space("\t\tab"), ' ab', 'tabs_to_space4'); +{ + my $s = "ab\tcd"; + tabs_to_space($s); + is($s, 'ab cd', 'tabs_to_space5'); +} + + done_testing; diff --git a/t/902-markdown-test-suite.t b/t/902-markdown-test-suite.t index 3c64635..9cbddb8 100644 --- a/t/902-markdown-test-suite.t +++ b/t/902-markdown-test-suite.t @@ -9,9 +9,13 @@ use Markdown::Perl; use MmdTest; use Test2::V0; -# TODO: remove these todos. my %opt = ( - todo => [16, 18, 22], + todo => [ + # We will probably never support this test, which has quotes inside link + # titles that are in quotes. The "spec" does not say anything about that and + # it would be a nightmare to parse it. + 16, + ], # These are bugs in the Markdown "spec", not in our implementation. All of # these have been tested to be buggy in the real Markdown.pl implementation. bugs => [