Validateeach.sh
From DLXS Documentation
validateeach.sh
1 #!/bin/sh
2
3 #DATADIR=$DLXSROOT/prep/s/samplefa/data/
4 #XMLDECL=$DLXSROOT/misc/sgml/xml.dcl
5 #SINGLEDOCTYPE=$DLXSROOT/prep/s/samplefa/samplefa.text.inp
6 BINDIR=$DLXSROOT/bin/s/samplefa/
7
8 # Argument processing
9 #
10 USAGE="USAGE: $0 -d DATA_DIRECTORY -x XML_DECLARATION -t DOCTYPE_DECLARATION"
11
12 while getopts d:x:t: OPT
13 do
14 case $OPT in
15 d) if [ -d ${OPTARG} ]
16 then
17 DATADIR=${OPTARG}
18 else
19 echo "$0: Data directory: $OPTARG is not a directory." >&2
20 exit 1
21 fi
22 ;;
23 x) if [ -e ${OPTARG} ]
24 then
25 XMLDECL=${OPTARG}
26 else
27 echo "$0: xmldecl file: $OPTARG does not exist." >&2
28 exit 1
29 fi
30 ;;
31 t) if [ -e ${OPTARG} ]
32 then
33 SINGLEDOCTYPE=${OPTARG}
34 else
35 echo "$0: Doctype file $OPTARG does not exist." >&2
36 exit 1
37 fi
38 ;;
39
40 \?) echo "$USAGE" 1>&2
41 exit 1
42 ;;
43 esac
44 done
45 #---------------------------------------------------------------------
46
47
48 for file in ${DATADIR}*.xml
49 do
50
51 echo "working on $file"
52 ${BINDIR}fixdoctype.pl $file > $file.tmp
53 onsgmls -wxml -w no-explicit-sgml-decl -s -f $file.err ${XMLDECL} ${SINGLEDOCTYPE} $file.tmp
54 rm $file.tmp
55 done
56
57
58 find $DATADIR -type f -size 0 -prune -exec rm {} \;
59
- Lines 12-44 process the command line arguments passed to the script
- Lines 48-55 process each file in the $DATADIR that end with ".xml"
- Line 52 Creates a copy of the EAD with a ".tmp" extension after it removes the doctype declaration by running the perl script "fixdoctype.pl" This program correctly deals with multiline document declarations.
- Line 53 runs onsgmls to validate the copy using the doctype declaration in $SINGLEDOCTYPE
- -wxml tells onsgmls to warn about constructs that are not allowed in xml
- -w no-explicit-sgml-decl tells onsgmls not to warn us that the SGML declaration was not implied
- -s suppresses output. That means instead of getting a parse tree, we just get any errors
- -f $file.err tells onsgmls to write errors to a file with the ".err" extension
- $XMLDECL is an SGML Declaration for valid XML documents.
- $SINGLEDOCTYPE is a replacement for the deleted document type declaration. It points to a copy of the EAD2002 dtd. If you have custom entities such as a logo in your finding aids, you may want to declare them in your customized version of samplefa.txt.inp and samplefa.xml.inp.
- line 54 removes the temporary files
- line 58 removes 0 byte files (any *err files that are completely empty)
samplefa.text.inp
<!DOCTYPE ead SYSTEM "/l1/misc/sgml/ead.dtd" []>
See http://openjade.sourceforge.net/doc/nsgmls.htm for more info on onsgmls
