[OS X TeX] OT: Tool for Comparing PDF files ?

Michael Sternberg sternberg at anl.gov
Fri Mar 2 10:41:41 EST 2007


Hello,

On Mar 1, 2007, at 9:34 , Steffen Wolfrum wrote:
> Does someone know a tool for comparing PDF documents?
>
> Now and then I make small changes in the source files and would  
> feel saver if I'd had a tool that would show me when a resulting  
> PDF has / has not differences (to a PDF that was made before I made  
> the changes) …
>

Try the script tacked-on below.  It does a graphical diff:

	diffps -h
	diffps fileA.pdf fileB.pdf

You need the netpbm[plus] package and Ghostscript.  By default, it  
uses "xv" ("Preview" on MacOS) to display differing pages.  Use "-x  
foo" to specify another viewer, which must read ppm and png files.

For repeated uses, it uses a page-cache, which you can override with - 
f and clean with -c.


Regards, Michael
------------------------------------------------------------
#!/bin/bash
# compare pages in two similar ps-files by highlighting their  
differences
# (uses grayscale pixmaps for comparing)
#
# Usage:  (see -h)
#
# Created by Michael Sternberg, 2001-2007. Use at your own risk.

PROGRAM=`basename $0`

CACHE=.diffps
PAGES="*"
RES=72
VIEWER="xv -nolimit -24"
PAIR_FILE=pairs
VIEWS=1
HIST_THRESHOLD=1

case `uname` in
   Darwin)	VIEWER="open -a Preview" ;;
esac

Usage () {
     cat << EOT
Compare postscript/pdf files visually.
Usage: $PROGRAM [options] file1 [file2 | dir]

   If file2 is not given, the latest  version from CVS is used.

Options:
   Page rendering:
       -d directory
       		directory for page cache (default: "$CACHE")

       -p pages
       		view only the given pages (quoted shell glob pattern)
		(default: "$PAGES")

       -t threshold
       		minimum number of pixels to differ (default: $HIST_THRESHOLD)

       -r res	Resolution for pixmap rendering (default: $RES)

       -f	re-do comparison (force; discard cache)

   Viewing:
       -0	report only
       -1	view differing pages in diff-mode (red = recent; default)
       -2	view differing pages pairwise
       -3	both of the above
       -x viewer	specify image viewer for above (default: xv)

   General:
       -h	This help.
       -c	clean cache

Created by Michael Sternberg, 2001-2007. Use at your own risk.
EOT
     exit
}

Clean_Cache () {
     case $CACHE in
       */*)	echo $CACHE: not a subdirectory -- please clean manually.  
1>&2
		exit ;;
     esac
     rm -rf $CACHE		# better know what you're doing
}


     # parse options
while :
do
     case "$1" in
       -d)   CACHE=$2; shift 2 ;;
       -p)   PAGES=$2; shift 2 ;;
       -r)   RES=$2; shift 2 ;;
       -f)   FORCE=1; shift ;;
       -t)   HIST_THRESHOLD=$2; shift 2 ;;

       -0)   VIEWS=0; shift ;;
       -1)   VIEWS=1; shift ;;
       -2)   VIEWS=2; shift ;;
       -3)   VIEWS=3; shift ;;
       -x)   VIEWER=$2; shift 2 ;;

       -c)   CLEAN=1; shift ;;
       -h)   Usage ;;

       -*)   echo $0: unknown option 1>&2
	    Usage
	    exit 1 ;;
       *)    break ;;
     esac
done

     # clean cache.  Exit if this is the only task.
if [ -n "$CLEAN" ]; then
     Clean_Cache
     case $# in
       0)	exit ;;
     esac
fi

     # attempt to create cache dir
mkdir $CACHE 2> /dev/null

A_PS="$1"
B_PS="${2-$CACHE}"
[ -d "$B_PS" ] && B_PS="$B_PS/$A_PS"

case $# in
   2)	;;
   1)	# get older copy from CVS
   	cvs up -p "$A_PS" > "$B_PS" || exit
	# swap A and B to have named file as B, i.e., newer copy
	X="$B_PS"; B_PS="$A_PS"; A_PS="$X"
	;;
   *)	echo Invalid input. 1>&2
	Usage
   	exit 1
	;;
esac

A_BASE="${A_PS//\//_}"
B_BASE="${B_PS//\//_}"

     # convert to pixmap format; use cache when available and not  
outdated
if [ ! -f $CACHE/"$A_BASE"-001.pgm \
     -o "$A_PS" -nt $CACHE/"$A_BASE"-001.pgm \
     -o -n "$FORCE" \
    ]
then
     gs -dNOPAUSE -sDEVICE=pgmraw -r$RES -sOutputFile= 
$CACHE/"$A_BASE"-%03d.pgm \
     	"$A_PS" quit.ps  || exit
fi

if [ ! -f $CACHE/"$B_BASE"-001.pgm \
     -o "$B_PS" -nt $CACHE/"$B_BASE"-001.pgm \
     -o -n "$FORCE" \
    ]
then
     gs -dNOPAUSE -sDEVICE=pgmraw -r$RES -sOutputFile= 
$CACHE/"$B_BASE"-%03d.pgm \
     	"$B_PS" quit.ps  || exit
fi

     # compare pages
OWD=`pwd`
cd $CACHE
rm -f $PAIR_FILE 2> /dev/null
for A_PGM in "$A_BASE"-${PAGES}.pgm
do
     SUFFIX="${A_PGM//*-/}"
     N=${SUFFIX/.pgm/}

     B_PGM="$B_BASE-${SUFFIX}"

     H_DAT="$A_BASE-$B_BASE-${N}-hist.dat"
     V="$A_BASE-$B_BASE-${N}-view.png"
     D="$A_BASE-$B_BASE-${N}-diff.png"

     if [ ! -f "$H_DAT" -o -n "$FORCE" ]; then
	# get histogram of diffs
	pnmarith -diff "$A_PGM" "$B_PGM" | tee "$D".pgm | pgmhist > "$H_DAT"
     fi

     ## Sample histogram:
     # value   count   b%      w%
     # -----   -----   --      --
     # 0       484690    100%    100%
     # 255     14        100%  0.00289%

     # count non-black pixels
     H_COUNT=`awk 'NR>3 { sum += $2} END {print 1*sum}' "$H_DAT"`

     # assemble views of differing pages (only)
     if [ $H_COUNT -ge $HIST_THRESHOLD ]; then
	echo $N differ 1>&2
	if [ ! -f "$V" -o -n "$FORCE" ]; then
	    rgb3toppm "$A_PGM" "$B_PGM" "$B_PGM" \
		| pnmtopng -transparent white -background grey50 > "$V"
	    pnmtopng "$D".pgm > "$D"
	fi
	echo "$V" "$A_PGM" "$B_PGM" >> $PAIR_FILE
     fi
     rm -f "$D".pgm 2> /dev/null

     ## When memory is tight -- This renders options "-2" and "-3"  
useless.
     #if [ -z "$VIEWS" ]; then
     #	rm "$A_PGM" "$B_PGM
     #fi
done

# decide which images to view
case $VIEWS in
   1)  COLS=1 ;;		# diff-view only
   2)  COLS=2-3 ;;	# page pairs only
   3)  COLS=1-3 ;;	# all
   *)  exit ;;
esac

# see if xargs supports the flag -r --no-run-if-empty
xargs -r < /dev/null 2> /dev/null && XARGS_ARGS="-r"

if [ -f $PAIR_FILE ]; then
     cut -f$COLS -d' ' $PAIR_FILE | xargs $XARGS_ARGS $VIEWER
fi

# EOF


------------------------- Helpful Info -------------------------
Mac-TeX Website: http://www.esm.psu.edu/mac-tex/
TeX FAQ: http://www.tex.ac.uk/faq
List Archive: http://tug.org/pipermail/macostex-archives/
List Reminders & Etiquette: http://www.esm.psu.edu/mac-tex/list/





More information about the MacOSX-TeX mailing list