PDF Creation and Manipulation

gs -r300 -dBATCH -sDEVICE=pgmraw -sOutputFile=page-%03d.pgm -dNOPAUSE input.pdf

(or for PNG files, use png16m)

unpaper --layout double --input-pages 1 --output-pages 2 --no-mask-scan
--no-border-scan --border 30,30,30,30 --deskew-scan-deviation 2
--middle-wipe 80 --sheet-size 3312,2562 page-%03d.pgm unpaper-%03d.pgm

for i in unpaper-*.pgm; do pnmtotiff $i > $i.tiff; echo $i; done
tiffcp -c zip *.tiff big.tiff

tiff2pdf -z -t"Title goes here" -a"Authors" -s"Subject" -k"Keywords" -o
big.pdf big.tiff

pdfoutline big.pdf outline.txt big-with-outline.pdf

Scan Post-Processing

Current workflow:

gs -r300 -dBATCH -sDEVICE=png16m -sOutputFile=page-%03d.png -dNOPAUSE input.pdf
for f in page-*.png; do convert -colorspace gray -level 0,80% $f bw/$f; echo $f; done
for f in page-*.png; do convert $f -colorspace gray -auto-level -threshold "90%" thresh-90/$f; echo $f; done

# Reducing to B&W and tweaking the brightness and contrast
for f in page*.png; do convert -colorspace gray -level 0,80% $f 1-$f; echo $f; done
for f in page-*.png; do echo $f; convert -brightness-contrast -15x25 -level 0,80% $f _scans/$f; done

# convert input images to colour-corrected greyscale images with an alpha channel (levels better for text)
for f in ALC-???.png; do convert -colorspace gray -level 0,90%,0.25 -alpha Set $f gscc-0-90-0.25-$f; done

# convert input images to colour-corrected greyscale images with an alpha channel (levels better for photos)
for f in ALC-???.png; do convert -colorspace gray -level 0,90% -alpha Set $f gscc-0-90-$f; done

# edit the text files to create transparent mattes where the too-dark-photos are

# merge them
for f in ALC-???.png; do convert gscc-0-90-$f gscc-0-90-0.25-$f -composite output-$f; done



# subtract common backgrounds from images
for f in ??.png; do convert -composite -compose difference $f background.png -negate neg-$f; done

# invert the negative images (not sure why the -negate above doesn't work)
for f in ??.png; do convert neg-$f -negate output-$f; done


# Compositing in a mask
composite -compose Dst_Over page.png mask.png output.png


# apply mask
for (( p=0; p<10; p++)); do composite -compose Dst_Over ../PADM-1\ $p.png ../PADM\ Mask.png PADM\ 00$p-1.png; done

# increase contrast
for f in page-*.png; do echo $f; convert -brightness-contrast -15x25 -level 0,80% $f done/${f%.png}.tiff; done

# composite greyscale / colour masked images
for f in *.tiff; do composite -compose Dst_Over ../$f $f composited/$f; done


# Converting / remapping colours to an input palette file
convert input.png +dither -remap palette.png out.png

Then run the PDF file through something like Acrobat Professional to add non-destructive OCR layer under bitmap image (apparently, pdfocr or gscan2pdf can also be used to embed searchable text layers into scanned PDF files)

PDF Creation and Manipulation

Ghostscript

unpaper

Scan Post-Processing

OCR