Convert Kwiki to MediaWiki

From xoa

Jump to: navigation, search

Here's a little program I wrote to convert all the Kwiki pages from an internal-to-work wiki to a MediaWiki import format.

It's certainly not as robust as you'd want for a large conversion, and there are some FLR-specific items in there, but it should be good for someone else wantig to start somewhere.

#!/usr/bin/perl

# Convert Kwiki to MediaWiki

use strict;
use warnings;

use File::Slurp qw( read_file );
use CGI qw( escapeHTML );

my @skips = qw(
    AIM
    AndyLester
);
my %skips = map {($_,1)} @skips;
my @sources = get_sources();

my @maps;

for my $source ( reverse sort { length $a <=> length $b } @sources ) {
    push( @maps, [ qr/\b\Q$source\E\b/, convert_name( $source ) ] );
}

my @pages;
for my $source ( grep { !exists $skips{$_} } sort @sources ) {
    my $title = convert_name( $source );
    my @lines = read_file( "database/$source" );

    warn ">>> $source\n";
    my $text = massaged( @lines );
    push( @pages, make_page_xml( $text, $title ) );
}
print make_file_xml( @pages );


sub convert_name {
    my $original_name = shift;

    my $name = $original_name;

    # FooBar => Foo Bar
    $name =~ s/([a-z])([A-Z])/$1 $2/g;

    # ABCFoo => ABC Foo
    $name =~ s/([A-Z]+)([A-Z][a-z])/$1 $2/g;

    # Special cases
    $name =~ s/Dns/DNS/g;
    $name =~ s/V Ms/VMs/g;
    $name =~ s/VM Ware/VMWare/g;
    $name =~ s/T Wfiles/TW files/g;
    $name =~ s/Title (Wave|Wise|Check)/Title$1/g;
    $name =~ s/Class DBI/Class::DBI/g;
    $name =~ s/Devel Cover/Devel::Cover/g;
    $name =~ s/Cadetail/CADETAIL/g;
    $name =~ s/(\S)(\d+)/$1 $2/g;

    return $name;
}

sub get_sources {
    my $dir = 'database';

    opendir my $dh, $dir or die "Can't open $dir: $!";
    @sources = grep { -f "$dir/$_" && -s _ } readdir $dh;
    @sources = grep { !/\.save$/ } @sources;

    return @sources;
}

sub massaged {
    my @lines = @_;

    for ( @lines ) {
        s/\s+$//;
        next if /^\s+/;

        # Change to monospaced
        s{\[=\s*(.+?)\s*\]}{<tt>$1</tt>}g;

        # Reverse links
        { no warnings 'uninitialized';
            s/\[(.+)\s+(http:\S+)\]/[$2 $1]/g;
        }

        # Remove !links
        s/!([A-Z]\w+)/$1/g;

        for my $eq ( qw( ==== === == = ) ) {
            s/^$eq\s*(.+)\s*/$eq $1 $eq/ && last;
        }

        for my $map ( @maps ) {
            my ($s,$t) = @{$map};

            s/$s/[$t]/g and warn "$s -> $t\n";
        }
    }

    $_ = escapeHTML( $_ ) for @lines;
    my $text = join( "\n", @lines );

    return $text;
}

sub make_page_xml {
    my $text = shift;
    my $title = shift or die "I have to have a page title";

return <<"EOF";
  <page>
    <title>$title</title>
    <revision>
      <text xml:space="preserve">$text</text>
    </revision>
  </page>
EOF
}

sub make_file_xml {
    my @pages = @_;

return <<"EOF";
<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.3/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.3/ http://www.mediawiki.org/xml/export-0.3.xsd" version="0.3" xml:lang="en">
  @pages
</mediawiki>
EOF
}
Personal tools