#!/usr/bin/pike /* * jpgscanclean.pike * * Utility to clean the background of scanned pages (req. RGB page) * * Author: Dario Rodriguez dario@softhome.net * The program is licensed under the terms of the MIT/X license. */ int bgthreshold=64; // from 0 to 255 int amplitude=15; // from 1 to 255 int main(int argc, array(string) argv) { Image.Image in,out,inhsv,indistance; string infile,outfile; if(argc!=3 || argv[argc-1]=="--help") { write("Syntax: "+argv[0]+" infile.jpg outfile.png\n"); return(1); } write(argv[1]+" -> "+argv[2]+"\n"); write("* Generating intermediate representations..."); infile=argv[1]; outfile=argv[2]; in=Image.load(infile); inhsv=in->copy()->rgb_to_hsv(); indistance=in->copy()->distancesq(255,255,255); out=Image.Image(in.xsize(),in.ysize()); // Guess the most used bg color write("done.\n* Guessing most used bg hue..."); array(int) hues=allocate(256); int x,y; int h,n,selh,seln,j; int c; for(y=0;y<in.ysize();y++) { for(x=0;x<in.xsize();x++) { c=indistance.getpixel(x,y)[0]; if(c<bgthreshold) { // possible bg c=inhsv.getpixel(x,y)[0]; hues[c]++; } } if(!(y%1000)) write("."); } for(selh=0,seln=0,h=0;h<256;h++) { for(n=0,j=-amplitude;j<amplitude;j++) { n+=hues[(h+256+j)%256]; } if(n>seln) { seln=n; selh=h; } } write(""+selh); // Clean image write(".\n* Cleaning image..."); int d; for(y=0;y<in.ysize();y++) { for(x=0;x<in.xsize();x++) { c=indistance.getpixel(x,y)[0]; h=inhsv.getpixel(x,y)[0]; // calc. hue distance to bg hue wrapping on the color wheel if(h<selh) { if((selh-h)<(h+256-selh)) d=selh-h; else d=h+256-selh; } else { if((h-selh)<(selh+256-h)) d=h-selh; else d=selh+256-h; } // heuristics if(c<bgthreshold && d<bgthreshold) { out.setpixel(x,y,255,255,255); } else { array(int) rgb=in.getpixel(x,y); out.setpixel(x,y,rgb[0],rgb[1],rgb[2]); } } if(!(y%1000)) write("."); } // Write result write("done.\n* Writing result to disk..."); Stdio.write_file(outfile,Image.PNG.encode(out)); write("done.\n* Process finished.\n"); }