#! /usr/bin/perl -w
# Try to detect markup errors in translations.

# Author: Peter Moulder <pmoulder@mail.csse.monash.edu.au>
# Copyright (C) 2004 Monash University
# License: GNU GPL v2 or (at your option) any later version.

# Initial egrep version:
#mydir=`dirname "$0"`
#egrep '<b>[^<>]*(>|<([^/]|/([^b"]|b[^>])))' "$mydir"/*.po
# Somewhat simplified by use of negative lookahead in perl.
# (The egrep version as written can't detect problems that span a line,
# e.g. unterminated `<b>'.  One way of doing the s/"\n"//g thing would be with
# tr and sed, but that requires a sed that allows arbitrary line lengths, which
# many non-GNU seds don't.)

use strict;

my $com = qr/(?:\#[^\n]*\n)/;
my $str = qr/(?:"(?:[^"\\]|\\.)*"\n)/;
my $attrs = qr/(?: +[^<>]*)?/;
my $span_attr = qr/(?: +(?:font_(?:desc|family)|face|size|style|weight|variant|stretch|(?:fore|back)ground|underline|rise|strikethrough|fallback|lang)\=\\\"[^\\\"]*\\\")/;

my $rc = 0;

sub po_error ($) {
    my ($msg) = @_;
    my $name = $ARGV;
    $name =~ s,.*/,,;
    print "$name: $msg:\n$_";
    $rc = 1;
}

$/ = '';

# Reference for the markup language:
# http://developer.gnome.org/doc/API/2.0/pango/PangoMarkupFormat.html
# (though not all translation strings will be pango markup strings).
while(<>) {
	if (m{\A${com}*\Z}) {
	    next;
	}

	s/"\n"//g;
	if (!m{\A${com}*msgid[^\n]*\n${com}*msgstr[^\n]*\n${com}*\Z}) {
	    po_error('Not in msg format');
	}
	elsif (!m{\A${com}*msgid ${str}${com}*msgstr ${str}${com}*\Z}) {
	    po_error('Mismatched quotes');
	}
	elsif (m{<([bisu]|big|su[bp]|small|span|tt)${attrs}>(?![^<>]*</\1>)}) {
	    po_error("unclosed <$1>");
	}
	elsif (m{<([bisu]|big|su[bp]|small|tt)\b(?! *>)}) {
	    po_error("Unexpected characters in <$1> tag");
	}
	elsif (m{<span(?!${span_attr}* *>)}) {
	    po_error("Unexpected <span> attribute");
	    #m{<span(?>${span_attr}*)([^<>]*)};
	    #po_error("Unexpected <span> attribute `$1'");
	}

	# Should also check that we don't have a </ELEM> without a corresponding <ELEM>.
}

# Some makefiles (currently the top-level Makefile.am) expect this script to
# exit 1 if any problems found.
exit $rc;
