diff --git a/scripts/broken.php b/scripts/broken.php
new file mode 100644
index 000000000..82b0d1cda
--- /dev/null
+++ b/scripts/broken.php
@@ -0,0 +1,154 @@
+ |
+# Description
+This command line utility test if an file is valid standalone XML file,
+accepting undefined entities references. If an directory is informed,
+the test is applied in all .xml files in directory and sub directories.
+This tool also cares for directories marked with .xmlfragmentdir, so
+theses files are tested in relaxed semantics for XML fragments. */
+ini_set( 'display_errors' , 1 );
+ini_set( 'display_startup_errors' , 1 );
+error_reporting( E_ALL );
+if ( count( $argv ) < 2 )
+ print_usage_exit( $argv[0] );
+array_shift( $argv );
+foreach( $argv as $arg )
+ if ( file_exists( $arg ) )
+ {
+ if ( is_file( $arg ) )
+ testFile( $arg );
+ if ( is_dir( $arg ) )
+ testDir( $arg );
+ continue;
+ }
+ echo "Path does not exist: $arg\n";
+function print_usage_exit( $cmd )
+ fwrite( STDERR , " Wrong paramater count. Usage:\n" );
+ fwrite( STDERR , " {$cmd} path:\n" );
+ exit;
+function setup( string & $prefix , string & $suffix , string & $extra )
+ // Undefined entities generate TWO different error messages on libxml
+ // - "Entity '?' not defined" (for entity inside elements)
+ // - "Extra content at the end of the document" (entity outside elements)
+ $inside = "&ZZZ;";
+ $outside = "&ZZZ;";
+ $doc = new DOMDocument();
+ $doc->recover = true;
+ $doc->resolveExternals = false;
+ $doc->substituteEntities = false;
+ libxml_use_internal_errors( true );
+ $doc->loadXML( $inside );
+ $message = trim( libxml_get_errors()[0]->message );
+ $message = str_replace( "ZZZ" , "\f" , $message );
+ [ $prefix , $suffix ] = explode( "\f" , $message );
+ libxml_clear_errors();
+ $doc->loadXML( $outside );
+ $extra = trim( libxml_get_errors()[0]->message );
+ libxml_clear_errors();
+function testFile( string $filename , bool $fragment = false )
+ static $prefix = "", $suffix = "", $extra = "";
+ if ( $extra == "" )
+ setup( $prefix , $suffix , $extra );
+ $doc = new DOMDocument();
+ $doc->recover = true;
+ $doc->resolveExternals = false;
+ $doc->substituteEntities = false;
+ libxml_use_internal_errors( true );
+ $contents = file_get_contents( $filename );
+ if ( $fragment )
+ $contents = "{$contents}";
+ $doc->loadXML( $contents );
+ $errors = libxml_get_errors();
+ libxml_clear_errors();
+ foreach( $errors as $error )
+ {
+ $message = trim( $error->message );
+ $hintFragDir = false;
+ if ( str_starts_with( $message , $prefix ) && str_ends_with( $message , $suffix ) )
+ continue;
+ //if ( $message == $extra ) // Disabled as unnecessary. Also, this indicates that some
+ // continue; // some entity reference is used at an unusual position.
+ if ( $message == $extra )
+ $hintFragDir = true;
+ $lin = $error->line;
+ $col = $error->column;
+ echo "Broken XML file:\n";
+ echo " Path: $filename [$lin,$col]\n";
+ echo " Error: $message\n";
+ if ( $hintFragDir )
+ echo " Hint: Dir is marked with .xmlfragmentdir on doc-en? If not, check entity references.\n";
+ echo "\n";
+ return;
+ }
+function testDir( string $dir )
+ $dir = realpath( $dir );
+ $files = scandir( $dir );
+ $fragment = false;
+ $subdirs = [];
+ foreach( $files as $file )
+ {
+ if ( $file == ".xmlfragmentdir" )
+ {
+ $fragment = true;
+ continue;
+ }
+ if ( $file[0] == "." )
+ continue;
+ $fullpath = realpath( "$dir/$file" );
+ if ( is_dir ( $fullpath ) )
+ {
+ $subdirs[] = $fullpath;
+ continue;
+ }
+ if ( str_ends_with( $fullpath , ".xml" ) )
+ testFile( $fullpath , $fragment );
+ }
+ foreach( $subdirs as $dir )
+ testDir( $dir );