Skip to content

Commit

Permalink
Parses task list markers.
Browse files Browse the repository at this point in the history
  • Loading branch information
mkende committed Mar 23, 2024
1 parent 83766ae commit 657cdf9
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 8 deletions.
31 changes: 25 additions & 6 deletions lib/Markdown/Perl.pm
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ sub convert { ## no critic (RequireArgUnpacking)
# TODO: introduce an HtmlRenderer object that carries the $linkrefs states
# around (instead of having to pass it in all the calls).
my ($linkrefs, $blocks) = $parser->process();
my $out = $this->_emit_html(0, $linkrefs, @{$blocks});
my $out = $this->_emit_html(0, 'root', $linkrefs, @{$blocks});
$this->{local_options} = {};
return $out;
}
Expand All @@ -90,9 +90,11 @@ sub _render_inlines {
}

sub _emit_html {
my ($this, $tight_block, $linkrefs, @blocks) = @_;
my ($this, $tight_block, $parent_type, $linkrefs, @blocks) = @_;
my $out = '';
my $block_index = 0;
for my $b (@blocks) {
$block_index++;
if ($b->{type} eq 'break') {
$out .= "<hr />\n";
} elsif ($b->{type} eq 'heading') {
Expand All @@ -115,13 +117,30 @@ sub _emit_html {
} elsif ($b->{type} eq 'html') {
$out .= $b->{content};
} elsif ($b->{type} eq 'paragraph') {
my $html = '';
if ((
$this->get_allow_task_list_markers eq 'list'
&& $parent_type eq 'list'
&& $block_index == 1)
|| $this->get_allow_task_list_markers eq 'always'
) {
if ($b->{content}[0] =~ m/ ^ \s* \[ (?<marker> [ xX] ) \] (?<space> \s | $ ) /x) {
$html =
'<input '
.($LAST_PAREN_MATCH{marker} eq ' ' ? '' : 'checked="" ')
.'disabled="" type="checkbox">'
.($LAST_PAREN_MATCH{space} eq ' ' ? ' ' : "\n");
substr $b->{content}[0], 0, $LAST_MATCH_END[0], '';
}
}
$html .= $this->_render_inlines($linkrefs, @{$b->{content}});
if ($tight_block) {
$out .= $this->_render_inlines($linkrefs, @{$b->{content}});
$out .= $html;
} else {
$out .= '<p>'.$this->_render_inlines($linkrefs, @{$b->{content}})."</p>\n";
$out .= "<p>${html}</p>\n";
}
} elsif ($b->{type} eq 'quotes') {
my $c = $this->_emit_html(0, $linkrefs, @{$b->{content}});
my $c = $this->_emit_html(0, 'quotes', $linkrefs, @{$b->{content}});
$out .= "<blockquote>\n${c}</blockquote>\n";
} elsif ($b->{type} eq 'list') {
my $type = $b->{style}; # 'ol' or 'ul'
Expand All @@ -131,7 +150,7 @@ sub _emit_html {
$start = " start=\"${num}\"" if $type eq 'ol' && $num != 1;
$out .= "<${type}${start}>\n<li>"
.join("</li>\n<li>",
map { $this->_emit_html(!$loose, $linkrefs, @{$_->{content}}) } @{$b->{items}})
map { $this->_emit_html(!$loose, 'list', $linkrefs, @{$_->{content}}) } @{$b->{items}})
."</li>\n</${type}>\n";
}
}
Expand Down
34 changes: 34 additions & 0 deletions lib/Markdown/Perl/Options.pm
Original file line number Diff line number Diff line change
Expand Up @@ -520,4 +520,38 @@ _make_option(
_enum(qw(http https)),
github => 'http');

=pod
=head2 B<allow_task_list_markers> I<(enum, default: list)>
Specify whether task list markers (rendered as check boxes) are recognised in
the input. The possible values are as follow:
=over 4
=item B<never>
Task list marker are never recognised
=item B<list> I<(default)>
Task list markers are recognised only as the first element at the beginning of
a list item.
=item B<always>
Task list markers are recognised at the beginning of any paragraphs, inside any
type of block.
=back
=cut

_make_option(
allow_task_list_markers => 'list',
_enum(qw(never list always)), (
markdown => 'never',
cmark => 'never',
));

1;
2 changes: 1 addition & 1 deletion t/901-github-test-suite.t
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use CmarkTest;
use Test2::V0;

# TODO: remove these todos.
my %opt = (todo => [198 .. 202, 204, 205, 279, 280, 398, 426, 434 .. 436,
my %opt = (todo => [198 .. 202, 204, 205, 398, 426, 434 .. 436,
473 .. 475, 477, 629 .. 631, 652],
# These are bugs in the GitHub spec, not in our implementation. All
# of these have been tested to be buggy in the real cmark-gfm
Expand Down
15 changes: 14 additions & 1 deletion t/lib/HtmlSanitizer.pm
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,19 @@ use Exporter 'import';

our @EXPORT = qw(sanitize_html);


my $html_tag_name_re = qr/[a-zA-Z][-a-zA-Z0-9]*/;
my $html_attribute_name_re = qr/[a-zA-Z_:][-a-zA-Z0-9_.:]*/;
my $html_space_re = qr/\n[ \t]*|[ \t][ \t]*\n?[ \t]*/; # Spaces, tabs, and up to one line ending.
my $opt_html_space_re = qr/[ \t]*\n?[ \t]*/; # Optional spaces.
my $html_attribute_value_re = qr/ [^ \t\n"'=<>`]+ | '[^']*' | "[^"]*" /x;
my $html_attribute_re =
qr/ ${html_space_re} ${html_attribute_name_re} (?: ${opt_html_space_re} = ${opt_html_space_re} ${html_attribute_value_re} )? /x;

my $html_open_tag_re = qr/ ${html_tag_name_re} ${html_attribute_re}* ${opt_html_space_re} \/? /x;
my $html_close_tag_re = qr/ \/ ${html_tag_name_re} ${opt_html_space_re} /x;


# The sanitizing here is quite strict (it only removes new lines happening just
# before or after an HTML tag), so this forces our converter to match closely
# what the cmark spec has (I guess it’s not a bad thing).
Expand All @@ -27,7 +40,7 @@ sub sanitize_html {
$html =~ m/<\/code>|$/g;
}
}
$html =~ s/(<\/[a-z]+>)/$1\n/g;
$html =~ s/( < (?: \/[a-z]+ | input ) >)/$1\n/gx;
$html =~ s/\n\n+$/\n/;
return $html;
}

0 comments on commit 657cdf9

Please sign in to comment.