View ¡m../msn-2-html/make-well-formed.pl¡n
#!/usr/local/bin/perl # # Name: # make-well-formed.pl # # Purpose: # Read an XML file to see if it is well-formed. # Outputs reformatted version of input file. # # Note: # o Input file created by MSN Explorer 7.00.0021.1900 # o Output file will be read by msn-2-html.pl # o For the current version of Perl, 5.6.1, the string 'ISO-8859-1', in the first # line of the output file must be changes to 'ASCII' before running msn-2-html.pl # o tab = 4 spaces or die # # V 1.00 11-Mar-2002 # ------------------ # o Initial version # # Programs in distribution: # MSN2HTML.pm # msn-2-html.pl # make-well-formed.pl # # Author: # Ron Savage
# http://savage.net.au/index.html # # Licence: # Australian Copyright (c) 1999-2002 Ron Savage. # # All Programs of mine are 'OSI Certified Open Source Software'; # you can redistribute them and/or modify them under the terms of # The Artistic License, a copy of which is available at: # http://www.opensource.org/licenses/index.html use strict; use warnings; use XML::DOM; # ----------------------------------------------------------------- my(@context); my($indentLevel, $indentPrefix) = (0, ''); my(%name, @name); my($optionEchoAttributes) = 1; # 0 or 1. my($optionOutputStyle) = 0; # 0 or 1. my($text) = ''; my($xml) = []; # ----------------------------------------------------------------- sub charHandler { my($expat, $string) = @_; # Trim whitespace. $string =~ s/^\s+//; $string =~ s/\s+$//; if ($optionOutputStyle) { $text .= "$indentPrefix$string" if ($string); } else { $text .= $string if ($string); } } # End of charHandler. # ----------------------------------------------------------------- sub elementHandler { my($parser, $name, $model) = @_; # Strip off leading & trailing (). $model = $1 if ($model =~ /^\((.+)\)$/); $name{$name} = $model; push(@name, $name); } # End of elementHandler. # ----------------------------------------------------------------- sub endHandler { my($expat, $element) = @_; # Recover the attributes stacked in the start handler. # $attributeRef is the current element's attributes, and the top of the # stack, $context[$#context], are the attributes of the parent, if any. my($attributeRef) = pop(@context); &popIndent(); if ($optionOutputStyle) { push(@$xml, $text); push(@$xml, "$indentPrefix$element>"); } else { if ($text) { my($prefix) = pop(@$xml); $text =~ s/&/&/g; push(@$xml, "$prefix$text$element>"); } else { push(@$xml, ''); push(@$xml, "$indentPrefix$element>"); } } if ($optionEchoAttributes) { my($attribute) = join(' ', map{"$_ => '$$attributeRef{$_}'"} sort(keys(%$attributeRef) ) ); # push(@$xml, "$indentPrefix") if ($attribute); } $text = ''; } # End of endHandler. # ----------------------------------------------------------------- sub popIndent { $indentLevel--; $indentPrefix = "\t" x $indentLevel; } # End of popIndent. # ----------------------------------------------------------------- sub pushIndent { $indentLevel++; $indentPrefix = "\t" x $indentLevel; } # End of pushIndent; # ----------------------------------------------------------------- sub startHandler { my($expat, $element, %attribute) = @_; # Save the attributes so they are available in the end handler. push(@context, \%attribute); # Save the tag and the attribute list (if any). push(@$xml, '') if ($indentLevel); my($attributes) = "$indentPrefix<$element"; # If there are several attributes, save 1 per line. if (scalar(keys(%attribute) ) > 1) { &pushIndent(); for (keys(%attribute) ) { push(@$xml, $attributes); $attribute{$_} =~ s/</g; $attribute{$_} =~ s/&/&/g; $attributes = "$indentPrefix$_='$attribute{$_}'"; } &popIndent(); push(@$xml, $attributes); $attributes = $indentPrefix; } else { if (scalar(keys(%attribute) ) == 1) { # Loop once only. for (keys(%attribute) ) { $attributes .= " $_='$attribute{$_}'"; } } } $attributes .= '>'; push(@$xml, $attributes); &pushIndent(); } # End of startHandler. # ----------------------------------------------------------------- my($work_dir_name) = '/temp/msn'; my($inputFileName) = 'Favorites.xml'; my($outputFileName) = 'Favorites-nice.xml'; chdir($work_dir_name) || die("Can't chdir($work_dir_name): $!"); die("Cannot find file: $inputFileName") if (! -e $inputFileName); # Declare the parser. my($parser) = new XML::DOM::Parser; $parser -> setHandlers ( Element => \&elementHandler, Start => \&startHandler, End => \&endHandler, Char => \&charHandler, ); # IE4 can't cope with '' push(@$xml, ""); push(@$xml, ''); # Parse the document and call the handlers. my($doc) = $parser -> parsefile($inputFileName); open(OUT, "> $outputFileName") || die("Can't open(> $outputFileName): $!"); print OUT join("\n", @$xml), "\n"; close(OUT); print "Wrote $outputFileName\n";