-
Notifications
You must be signed in to change notification settings - Fork 4
/
clean-content.pl
57 lines (48 loc) · 1.53 KB
/
clean-content.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/env perl
use v5.30;
use strict;
use warnings;
use autodie qw( :all );
use experimental qw( signatures );
use feature qw( postderef );
use DateTime::Format::CLDR;
use Path::Tiny::Rule;
sub main {
STDOUT->binmode(':encoding(UTF-8)');
my $iter = Path::Tiny::Rule->new->name(qr/\.md\z/)->iter('content/posts');
while ( my $file = $iter->() ) {
my $content = $file->slurp_utf8;
$content =~ s/categories:\n - Uncategorized\n+//;
$content =~ s{<a href="(.+?)"[^>]+>([^<]+)</a>}{md_link($1, $2)}ge;
$content =~ s/'/'/g;
$content =~ s/*/'/g;
$content =~ s/+/+/g;
$content =~ s/-/-/g;
$content =~ s/_/_/g;
$content =~ s/`/`/g;
$content =~ s/×/x/g;
$content =~ s/&#(?:8210|8211|8212);/-/g;
$content =~ s/&#(?:8216|8217);/'/g;
$content =~ s/&#(?:8220|8243);/"/g;
$content =~ s/”/"/g;
$content =~ s/…/.../g;
$content
=~ s/\#\#\# Comment by (.+?) on (20.+)/"**$1, on " . format_date($2) . ', said:** '/eg;
$file->spew_utf8($content);
}
}
my $p = DateTime::Format::CLDR->new(
pattern => 'yyyy-MM-dd HH:mm:ss ZZ',
locale => 'en-US',
);
sub md_link ( $href, $text ) {
return "<$href>" if $href eq $text;
return "[$text]($href)";
}
sub format_date ($date) {
my $dt = $p->parse_datetime($date)
or die "Could not parse $date";
$dt->set_time_zone('America/Chicago');
return $dt->format_cldr('YYYY-MM-dd HH:mm');
}
main();