Python/noteshrink: enhace and beatify scanned images.

#!/usr/bin/env python '''Converts sequence of images to compact PDF while removing speckles, bleedthrough, etc. ''' # for some reason pylint complains about members being undefined :( # pylint: disable=E1101 from __future__ import print_function import sys import os import re import subprocess import shlex from argparse import ArgumentParser import numpy as np from PIL import Image from scipy.cluster.vq import kmeans, vq ###################################################################### def quantize(image, bits_per_channel=None): '''Reduces the number of bits per channel in the given image.''' if bits_per_channel is None: bits_per_channel = 6 assert image.dtype == np.uint8 shift = 8-bits_per_channel halfbin = (1 << shift) >> 1 return ((image.astype(int) >> shift) << shift) + halfbin ###################################################################### def pack_rgb(rgb): '''Packs a 24-bit RGB triples into a single integer, works on both arrays and tuples.''' orig_shape = None if isinstance(rgb, np.ndarray): assert rgb.shape[-1] == 3 orig_shape = rgb.shape[:-1] else: assert len(rgb) == 3 rgb = np.array(rgb) rgb = rgb.astype(int).reshape((-1, 3)) packed = (rgb[:, 0] << 16 | rgb[:, 1] << 8 | rgb[:, 2]) if orig_shape is None: return packed else: return packed.reshape(orig_shape) ###################################################################### def unpack_rgb(packed): '''Unpacks a single integer or array of integers into one or more 24-bit RGB values. ''' orig_shape = None if isinstance(packed, np.ndarray): assert packed.dtype == int orig_shape = packed.shape packed = packed.reshape((-1, 1)) rgb = ((packed >> 16) & 0xff, (packed >> 8) & 0xff, (packed) & 0xff) if orig_shape is None: return rgb else: return np.hstack(rgb).reshape(orig_shape + (3,)) ###################################################################### def get_bg_color(image, bits_per_channel=None): '''Obtains the background color from an image or array of RGB colors by grouping similar colors into bins and finding the most frequent one. ''' assert image.shape[-1] == 3 quantized = quantize(image, bits_per_channel).astype(int) packed = pack_rgb(quantized) unique, counts = np.unique(packed, return_counts=True) packed_mode = unique[counts.argmax()] return unpack_rgb(packed_mode) ###################################################################### def rgb_to_sv(rgb): '''Convert an RGB image or array of RGB colors to saturation and value, returning each one as a separate 32-bit floating point array or value. ''' if not isinstance(rgb, np.ndarray): rgb = np.array(rgb) axis = len(rgb.shape)-1 cmax = rgb.max(axis=axis).astype(np.float32) cmin = rgb.min(axis=axis).astype(np.float32) delta = cmax - cmin saturation = delta.astype(np.float32) / cmax.astype(np.float32) saturation = np.where(cmax == 0, 0, saturation) value = cmax/255.0 return saturation, value ###################################################################### def postprocess(output_filename, options): '''Runs the postprocessing command on the file provided.''' assert options.postprocess_cmd base, _ = os.path.splitext(output_filename) post_filename = base + options.postprocess_ext cmd = options.postprocess_cmd cmd = cmd.replace('%i', output_filename) cmd = cmd.replace('%o', post_filename) cmd = cmd.replace('%e', options.postprocess_ext) subprocess_args = shlex.split(cmd) if os.path.exists(post_filename): os.unlink(post_filename) if not options.quiet: print(' running "{}"...'.format(cmd), end=' ') sys.stdout.flush() try: result = subprocess.call(subprocess_args) before = os.stat(output_filename).st_size after = os.stat(post_filename).st_size except OSError: result = -1 if result == 0: if not options.quiet: print('{:.1f}% reduction'.format( 100*(1.0-float(after)/before))) return post_filename else: sys.stderr.write('warning: postprocessing failed!\n') return None ###################################################################### def percent(string): '''Convert a string (i.e. 85) to a fraction (i.e. .85).''' return float(string)/100.0 ###################################################################### def get_argument_parser(): '''Parse the command-line arguments for this program.''' parser = ArgumentParser( description='convert scanned, hand-written notes to PDF') show_default = ' (default %(default)s)' parser.add_argument('filenames', metavar='IMAGE', nargs='+', help='files to convert') parser.add_argument('-q', dest='quiet', action='store_true', default=False, help='reduce program output') parser.add_argument('-b', dest='basename', metavar='BASENAME', default='page', help='output PNG filename base' + show_default) parser.add_argument('-o', dest='pdfname', metavar='PDF', default='output.pdf', help='output PDF filename' + show_default) parser.add_argument('-v', dest='value_threshold', metavar='PERCENT', type=percent, default='25', help='background value threshold %%'+show_default) parser.add_argument('-s', dest='sat_threshold', metavar='PERCENT', type=percent, default='20', help='background saturation ' 'threshold %%'+show_default) parser.add_argument('-n', dest='num_colors', type=int, default='8', help='number of output colors '+show_default) parser.add_argument('-p', dest='sample_fraction', metavar='PERCENT', type=percent, default='5', help='%% of pixels to sample' + show_default) parser.add_argument('-w', dest='white_bg', action='store_true', default=False, help='make background white') parser.add_argument('-g', dest='global_palette', action='store_true', default=False, help='use one global palette for all pages') parser.add_argument('-S', dest='saturate', action='store_false', default=True, help='do not saturate colors') parser.add_argument('-K', dest='sort_numerically', action='store_false', default=True, help='keep filenames ordered as specified; ' 'use if you *really* want IMG_10.png to ' 'precede IMG_2.png') parser.add_argument('-P', dest='postprocess_cmd', default=None, help='set postprocessing command (see -O, -C, -Q)') parser.add_argument('-e', dest='postprocess_ext', default='_post.png', help='filename suffix/extension for ' 'postprocessing command') parser.add_argument('-O', dest='postprocess_cmd', action='store_const', const='optipng -silent %i -out %o', help='same as -P "%(const)s"') parser.add_argument('-C', dest='postprocess_cmd', action='store_const', const='pngcrush -q %i %o', help='same as -P "%(const)s"') parser.add_argument('-Q', dest='postprocess_cmd', action='store_const', const='pngquant --ext %e %i', help='same as -P "%(const)s"') parser.add_argument('-c', dest='pdf_cmd', metavar="COMMAND", default='convert %i %o', help='PDF command (default "%(default)s")') return parser ###################################################################### def get_filenames(options): '''Get the filenames from the command line, optionally sorted by number, so that IMG_10.png is re-arranged to come after IMG_9.png. This is a nice feature because some scanner programs (like Image Capture on Mac OS X) automatically number files without leading zeros, and this way you can supply files using a wildcard and still have the pages ordered correctly. ''' if not options.sort_numerically: return options.filenames filenames = [] for filename in options.filenames: basename = os.path.basename(filename) root, _ = os.path.splitext(basename) matches = re.findall(r'[0-9]+', root) if matches: num = int(matches[-1]) else: num = -1 filenames.append((num, filename)) return [fn for (_, fn) in sorted(filenames)] ###################################################################### def load(input_filename): '''Load an image with Pillow and convert it to numpy array. Also returns the image DPI in x and y as a tuple.''' try: pil_img = Image.open(input_filename) except IOError: sys.stderr.write('warning: error opening {}\n'.format( input_filename)) return None, None if pil_img.mode != 'RGB': pil_img = pil_img.convert('RGB') if 'dpi' in pil_img.info: dpi = pil_img.info['dpi'] else: dpi = (300, 300) img = np.array(pil_img) return img, dpi ###################################################################### def sample_pixels(img, options): '''Pick a fixed percentage of pixels in the image, returned in random order.''' pixels = img.reshape((-1, 3)) num_pixels = pixels.shape[0] num_samples = int(num_pixels*options.sample_fraction) idx = np.arange(num_pixels) np.random.shuffle(idx) return pixels[idx[:num_samples]] ###################################################################### def get_fg_mask(bg_color, samples, options): '''Determine whether each pixel in a set of samples is foreground by comparing it to the background color. A pixel is classified as a foreground pixel if either its value or saturation differs from the background by a threshold.''' s_bg, v_bg = rgb_to_sv(bg_color) s_samples, v_samples = rgb_to_sv(samples) s_diff = np.abs(s_bg - s_samples) v_diff = np.abs(v_bg - v_samples) return ((v_diff >= options.value_threshold) | (s_diff >= options.sat_threshold)) ###################################################################### def get_palette(samples, options, return_mask=False, kmeans_iter=40): '''Extract the palette for the set of sampled RGB values. The first palette entry is always the background color; the rest are determined from foreground pixels by running K-means clustering. Returns the palette, as well as a mask corresponding to the foreground pixels. ''' if not options.quiet: print(' getting palette...') bg_color = get_bg_color(samples, 6) fg_mask = get_fg_mask(bg_color, samples, options) centers, _ = kmeans(samples[fg_mask].astype(np.float32), options.num_colors-1, iter=kmeans_iter) palette = np.vstack((bg_color, centers)).astype(np.uint8) if not return_mask: return palette else: return palette, fg_mask ###################################################################### def apply_palette(img, palette, options): '''Apply the pallete to the given image. The first step is to set all background pixels to the background color; then, nearest-neighbor matching is used to map each foreground color to the closest one in the palette. ''' if not options.quiet: print(' applying palette...') bg_color = palette[0] fg_mask = get_fg_mask(bg_color, img, options) orig_shape = img.shape pixels = img.reshape((-1, 3)) fg_mask = fg_mask.flatten() num_pixels = pixels.shape[0] labels = np.zeros(num_pixels, dtype=np.uint8) labels[fg_mask], _ = vq(pixels[fg_mask], palette) return labels.reshape(orig_shape[:-1]) ###################################################################### def save(output_filename, labels, palette, dpi, options): '''Save the label/palette pair out as an indexed PNG image. This optionally saturates the pallete by mapping the smallest color component to zero and the largest one to 255, and also optionally sets the background color to pure white. ''' if not options.quiet: print(' saving {}...'.format(output_filename)) if options.saturate: palette = palette.astype(np.float32) pmin = palette.min() pmax = palette.max() palette = 255 * (palette - pmin)/(pmax-pmin) palette = palette.astype(np.uint8) if options.white_bg: palette = palette.copy() palette[0] = (255, 255, 255) output_img = Image.fromarray(labels, 'P') output_img.putpalette(palette.flatten()) output_img.save(output_filename, dpi=dpi) ###################################################################### def get_global_palette(filenames, options): '''Fetch the global palette for a series of input files by merging their samples together into one large array. ''' input_filenames = [] all_samples = [] if not options.quiet: print('building global palette...') for input_filename in filenames: img, _ = load(input_filename) if img is None: continue if not options.quiet: print(' processing {}...'.format(input_filename)) samples = sample_pixels(img, options) input_filenames.append(input_filename) all_samples.append(samples) num_inputs = len(input_filenames) all_samples = [s[:int(round(float(s.shape[0])/num_inputs))] for s in all_samples] all_samples = np.vstack(tuple(all_samples)) global_palette = get_palette(all_samples, options) if not options.quiet: print(' done\n') return input_filenames, global_palette ###################################################################### def emit_pdf(outputs, options): '''Runs the PDF conversion command to generate the PDF.''' cmd = options.pdf_cmd cmd = cmd.replace('%o', options.pdfname) if len(outputs) > 2: cmd_print = cmd.replace('%i', ' '.join(outputs[:2] + ['...'])) else: cmd_print = cmd.replace('%i', ' '.join(outputs)) cmd = cmd.replace('%i', ' '.join(outputs)) if not options.quiet: print('running PDF command "{}"...'.format(cmd_print)) try: result = subprocess.call(shlex.split(cmd)) except OSError: result = -1 if result == 0: if not options.quiet: print(' wrote', options.pdfname) else: sys.stderr.write('warning: PDF command failed\n') ###################################################################### def notescan_main(options): '''Main function for this program when run as script.''' filenames = get_filenames(options) outputs = [] do_global = options.global_palette and len(filenames) > 1 if do_global: filenames, palette = get_global_palette(filenames, options) do_postprocess = bool(options.postprocess_cmd) for input_filename in filenames: img, dpi = load(input_filename) if img is None: continue output_filename = '{}{:04d}.png'.format( options.basename, len(outputs)) if not options.quiet: print('opened', input_filename) if not do_global: samples = sample_pixels(img, options) palette = get_palette(samples, options) labels = apply_palette(img, palette, options) save(output_filename, labels, palette, dpi, options) if do_postprocess: post_filename = postprocess(output_filename, options) if post_filename: output_filename = post_filename else: do_postprocess = False outputs.append(output_filename) if not options.quiet: print(' done\n') emit_pdf(outputs, options) ###################################################################### def main(): '''Parse args and call notescan_main().''' notescan_main(options=get_argument_parser().parse_args()) if __name__ == '__main__': main()
Enhace quality for hand writing images / scanned images.

Use:

./noteshrink.py IMAGE1 [IMAGE2 ...]

Be the first to comment

You can use [html][/html], [css][/css], [php][/php] and more to embed the code. Urls are automatically hyperlinked. Line breaks and paragraphs are automatically generated.