[OS X TeX] OT: Tool for Comparing PDF files ?
Michael Sternberg
sternberg at anl.gov
Fri Mar 2 10:41:41 EST 2007
Hello,
On Mar 1, 2007, at 9:34 , Steffen Wolfrum wrote:
> Does someone know a tool for comparing PDF documents?
>
> Now and then I make small changes in the source files and would
> feel saver if I'd had a tool that would show me when a resulting
> PDF has / has not differences (to a PDF that was made before I made
> the changes) …
>
Try the script tacked-on below. It does a graphical diff:
diffps -h
diffps fileA.pdf fileB.pdf
You need the netpbm[plus] package and Ghostscript. By default, it
uses "xv" ("Preview" on MacOS) to display differing pages. Use "-x
foo" to specify another viewer, which must read ppm and png files.
For repeated uses, it uses a page-cache, which you can override with -
f and clean with -c.
Regards, Michael
------------------------------------------------------------
#!/bin/bash
# compare pages in two similar ps-files by highlighting their
differences
# (uses grayscale pixmaps for comparing)
#
# Usage: (see -h)
#
# Created by Michael Sternberg, 2001-2007. Use at your own risk.
PROGRAM=`basename $0`
CACHE=.diffps
PAGES="*"
RES=72
VIEWER="xv -nolimit -24"
PAIR_FILE=pairs
VIEWS=1
HIST_THRESHOLD=1
case `uname` in
Darwin) VIEWER="open -a Preview" ;;
esac
Usage () {
cat << EOT
Compare postscript/pdf files visually.
Usage: $PROGRAM [options] file1 [file2 | dir]
If file2 is not given, the latest version from CVS is used.
Options:
Page rendering:
-d directory
directory for page cache (default: "$CACHE")
-p pages
view only the given pages (quoted shell glob pattern)
(default: "$PAGES")
-t threshold
minimum number of pixels to differ (default: $HIST_THRESHOLD)
-r res Resolution for pixmap rendering (default: $RES)
-f re-do comparison (force; discard cache)
Viewing:
-0 report only
-1 view differing pages in diff-mode (red = recent; default)
-2 view differing pages pairwise
-3 both of the above
-x viewer specify image viewer for above (default: xv)
General:
-h This help.
-c clean cache
Created by Michael Sternberg, 2001-2007. Use at your own risk.
EOT
exit
}
Clean_Cache () {
case $CACHE in
*/*) echo $CACHE: not a subdirectory -- please clean manually.
1>&2
exit ;;
esac
rm -rf $CACHE # better know what you're doing
}
# parse options
while :
do
case "$1" in
-d) CACHE=$2; shift 2 ;;
-p) PAGES=$2; shift 2 ;;
-r) RES=$2; shift 2 ;;
-f) FORCE=1; shift ;;
-t) HIST_THRESHOLD=$2; shift 2 ;;
-0) VIEWS=0; shift ;;
-1) VIEWS=1; shift ;;
-2) VIEWS=2; shift ;;
-3) VIEWS=3; shift ;;
-x) VIEWER=$2; shift 2 ;;
-c) CLEAN=1; shift ;;
-h) Usage ;;
-*) echo $0: unknown option 1>&2
Usage
exit 1 ;;
*) break ;;
esac
done
# clean cache. Exit if this is the only task.
if [ -n "$CLEAN" ]; then
Clean_Cache
case $# in
0) exit ;;
esac
fi
# attempt to create cache dir
mkdir $CACHE 2> /dev/null
A_PS="$1"
B_PS="${2-$CACHE}"
[ -d "$B_PS" ] && B_PS="$B_PS/$A_PS"
case $# in
2) ;;
1) # get older copy from CVS
cvs up -p "$A_PS" > "$B_PS" || exit
# swap A and B to have named file as B, i.e., newer copy
X="$B_PS"; B_PS="$A_PS"; A_PS="$X"
;;
*) echo Invalid input. 1>&2
Usage
exit 1
;;
esac
A_BASE="${A_PS//\//_}"
B_BASE="${B_PS//\//_}"
# convert to pixmap format; use cache when available and not
outdated
if [ ! -f $CACHE/"$A_BASE"-001.pgm \
-o "$A_PS" -nt $CACHE/"$A_BASE"-001.pgm \
-o -n "$FORCE" \
]
then
gs -dNOPAUSE -sDEVICE=pgmraw -r$RES -sOutputFile=
$CACHE/"$A_BASE"-%03d.pgm \
"$A_PS" quit.ps || exit
fi
if [ ! -f $CACHE/"$B_BASE"-001.pgm \
-o "$B_PS" -nt $CACHE/"$B_BASE"-001.pgm \
-o -n "$FORCE" \
]
then
gs -dNOPAUSE -sDEVICE=pgmraw -r$RES -sOutputFile=
$CACHE/"$B_BASE"-%03d.pgm \
"$B_PS" quit.ps || exit
fi
# compare pages
OWD=`pwd`
cd $CACHE
rm -f $PAIR_FILE 2> /dev/null
for A_PGM in "$A_BASE"-${PAGES}.pgm
do
SUFFIX="${A_PGM//*-/}"
N=${SUFFIX/.pgm/}
B_PGM="$B_BASE-${SUFFIX}"
H_DAT="$A_BASE-$B_BASE-${N}-hist.dat"
V="$A_BASE-$B_BASE-${N}-view.png"
D="$A_BASE-$B_BASE-${N}-diff.png"
if [ ! -f "$H_DAT" -o -n "$FORCE" ]; then
# get histogram of diffs
pnmarith -diff "$A_PGM" "$B_PGM" | tee "$D".pgm | pgmhist > "$H_DAT"
fi
## Sample histogram:
# value count b% w%
# ----- ----- -- --
# 0 484690 100% 100%
# 255 14 100% 0.00289%
# count non-black pixels
H_COUNT=`awk 'NR>3 { sum += $2} END {print 1*sum}' "$H_DAT"`
# assemble views of differing pages (only)
if [ $H_COUNT -ge $HIST_THRESHOLD ]; then
echo $N differ 1>&2
if [ ! -f "$V" -o -n "$FORCE" ]; then
rgb3toppm "$A_PGM" "$B_PGM" "$B_PGM" \
| pnmtopng -transparent white -background grey50 > "$V"
pnmtopng "$D".pgm > "$D"
fi
echo "$V" "$A_PGM" "$B_PGM" >> $PAIR_FILE
fi
rm -f "$D".pgm 2> /dev/null
## When memory is tight -- This renders options "-2" and "-3"
useless.
#if [ -z "$VIEWS" ]; then
# rm "$A_PGM" "$B_PGM
#fi
done
# decide which images to view
case $VIEWS in
1) COLS=1 ;; # diff-view only
2) COLS=2-3 ;; # page pairs only
3) COLS=1-3 ;; # all
*) exit ;;
esac
# see if xargs supports the flag -r --no-run-if-empty
xargs -r < /dev/null 2> /dev/null && XARGS_ARGS="-r"
if [ -f $PAIR_FILE ]; then
cut -f$COLS -d' ' $PAIR_FILE | xargs $XARGS_ARGS $VIEWER
fi
# EOF
------------------------- Helpful Info -------------------------
Mac-TeX Website: http://www.esm.psu.edu/mac-tex/
TeX FAQ: http://www.tex.ac.uk/faq
List Archive: http://tug.org/pipermail/macostex-archives/
List Reminders & Etiquette: http://www.esm.psu.edu/mac-tex/list/
More information about the MacOSX-TeX
mailing list