/*
 *  SprToMap
 *  Compresses an image by producing a mosaic of smaller images
 *  Copyright (C) 2006  Chris Bazley
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public Licence as published by
 *  the Free Software Foundation; either version 2 of the Licence, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public Licence for more details.
 *
 *  You should have received a copy of the GNU General Public Licence
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/* ISO headers */
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#include <string.h>
#include <ctype.h>
#include <limits.h>
#include <math.h>
#include <time.h>
#include <assert.h>
#include <stdint.h>

/* RISC OS headers */
#ifdef RISC_OS
#include "kernel.h"
#include "swis.h"
#endif

/* Local headers */
#include "SprFormats.h"

/* Compile-time options */
#define FAST_DIFF_TABLE
//#define ALLOW_UNCROPPED

typedef struct
{
   uint8_t red;
   uint8_t green;
   uint8_t blue;
   uint8_t reserved;
} pixel32;

#ifdef ALLOW_UNCROPPED
static bool crop = false;
#endif
static bool verbose = false, blend = false, vflip = false;
static unsigned int tile_size = 8, bpp, quality = UINT_MAX;
static unsigned int output_width, output_height;
static unsigned long max_tiles = UINT16_MAX + 1;
static char *input_file = NULL, *output_map = NULL, *output_tiles = NULL;
static int max_tile_size;
#ifdef FAST_DIFF_TABLE
static unsigned long *grid_to_diff_table;
#endif
static unsigned short **list_ptr_table;
static unsigned int last_group, num_groups;
static size_t grid_size, diff_table_size;
static unsigned char *diff_table;

/* These are the weights for the least-squares function used
   to determine how similar two pixels are */
#define RED_WEIGHT   2
#define GREEN_WEIGHT 4
#define BLUE_WEIGHT  1
#define SQUARE(n) ((n)*(n))

static void *safe_calloc(size_t nmemb, size_t size)
{
  void *mem = calloc(nmemb, size);
  if (mem == NULL) {
    fprintf(stderr, "Memory allocation of %u bytes failed\n", nmemb * size);
    exit(EXIT_FAILURE);
  }
  return mem;
}

static long int grid_to_diff_index(unsigned int group_1, unsigned int group_2)
{
  long int diff_table_index;
#ifndef FAST_DIFF_TABLE
  size_t table_row_length;
#endif

  assert(group_1 != group_2);
  if (group_2 < group_1) {
    unsigned int temp = group_2;
    group_2 = group_1;
    group_1 = temp;
  }

#ifdef FAST_DIFF_TABLE
  assert(grid_to_diff_table != NULL);
  diff_table_index = grid_to_diff_table[group_1];
#else
  diff_table_index = 0;
  table_row_length = grid_size;
  for(unsigned int row = 0; row < group_1; row++)
    diff_table_index += --table_row_length;
#endif

  return diff_table_index + ((long)group_2 - group_1) - 1;
}

static void *create_map(bool wide_map)
{
  /* Create the output map, which contains references to output tile sprites
     in the right order to reproduce an approximation of the input image */
  size_t num_filled;
  unsigned long next_perc, one_perc;
  void *map;
  unsigned int tile_number;
  clock_t start_time;

  if (verbose)
    printf("Allocating output map of %u bytes.\n", grid_size * (wide_map ?
           sizeof(uint16_t) : sizeof(uint8_t)));
  map = safe_calloc(grid_size, wide_map ? sizeof(uint16_t) : sizeof(uint8_t));

  if (verbose) {
    printf("Creating %s output map...\n", wide_map ? "wide" : "byte");
    next_perc = 0;
    one_perc = ((unsigned long)grid_size << 14) / 100;
    start_time = clock();
  } else {
    /* These initialisations are just to suppress compiler warnings */
    start_time = 0;
    one_perc = 0;
  }

  next_perc = num_filled = tile_number = 0;
  for (unsigned int group = 0; group <= last_group; group++) {
    unsigned int num_members;

    assert(list_ptr_table != NULL);
    if(list_ptr_table[group] == NULL)
      continue;

    num_members = list_ptr_table[group][0];
    assert(num_members <= grid_size);

    for (unsigned int member = 1; member <= num_members; member++) {
      uint8_t *map_8_bit;
      uint16_t *map_16_bit;

      if(verbose && (unsigned long)num_filled << 14 >= next_perc) {
        printf("%u%% done\n", (num_filled * 100) / grid_size);
        next_perc += one_perc;
      }

      assert(list_ptr_table[group][member] < grid_size);
      assert(tile_number <= (wide_map ? UINT16_MAX : UINT8_MAX));
      if (wide_map) {
        map_16_bit = (uint16_t *)map;
        map_16_bit[list_ptr_table[group][member]] = tile_number;
      } else {
        map_8_bit = (uint8_t *)map;
        map_8_bit[list_ptr_table[group][member]] = tile_number;
      }
      num_filled++;
    } /* next member */
    tile_number++;
  } /* next group */

  if(verbose) {
    double elapsed = (double)(clock() - start_time) / CLOCKS_PER_SEC;
    printf("Finished in %.2f seconds", elapsed);
    if (elapsed)
      printf(" (%.2f locations filled per second)\n", num_filled / elapsed);
    else
      putchar('\n');
  }
  assert(num_filled == grid_size);

  return map;
}

static void show_list(unsigned int group)
{
  printf("Group %u:", group);
  assert(list_ptr_table != NULL);
  if(list_ptr_table[group] != NULL) {
    for(unsigned int member = 1; member <= list_ptr_table[group][0]; member++)
      printf(" %u", list_ptr_table[group][member]);
  } else {
    printf(" empty");
  }
  putchar('\n');
}

#ifdef RISC_OS

static void verify_sprite_area(spriteareaheader *sprite_area)
{
  /* Verify a sprite area (requires SpriteExtend 0.99 or later) */
  _kernel_oserror *e;
  e = _swix(OS_SpriteOp, _INR(0,1), SPRITEOP_USERAREA_SPRNAME +
            SPRITEOP_VERIFY_AREA, sprite_area);
  if (e != NULL) {
    fprintf(stderr, "Verification failed: %s\n", e->errmess);
    exit(EXIT_FAILURE);
  }
}

static void set_file_type(const char *file, int file_type)
{
  _kernel_osfile_block params;

  if(verbose) {
    char sys_var_name[32], file_type_string[16];
    const char *var;

    sprintf(sys_var_name, "File$Type_%X", file_type);
    var = getenv(sys_var_name);
    if(var == NULL)
      sprintf(file_type_string, "&%X", file_type);

    printf("Setting type of output file '%s' to %s.\n", file, var != NULL ?
           var : file_type_string);
  }

  /* Set type of named object */
  params.load = file_type;
  if (_kernel_osfile(18, file, &params) == _kernel_ERROR) {
    fprintf(stderr, "Failed to set file type: %s\n",
            _kernel_last_oserror()->errmess);
    exit(EXIT_FAILURE);
  }
}

static void check_file_type(const char *file, int req_file_type)
{
  _kernel_osfile_block params;
  int object_type, file_type;

  if(verbose)
    printf("Checking type of input file '%s'.\n", file);

  /* Read catalogue info for object without path */
  object_type = _kernel_osfile(17, file, &params);
  if(object_type == _kernel_ERROR) {
    fprintf(stderr, "Failed to read catalogue info: %s\n",
            _kernel_last_oserror()->errmess);
    exit(EXIT_FAILURE);
  }

  /* Generate a standard format error message if input file not found,
     or it is a directory */
  if(!object_type || object_type == 2) {
    params.load = object_type;
    if(_kernel_osfile(19, file, &params) == _kernel_ERROR)
      fprintf(stderr, "%s\n", _kernel_last_oserror()->errmess);
    exit(EXIT_FAILURE);
  }

  if((params.load & 0xfff00000) != 0xfff00000) {
    fprintf(stderr, "Input file is untyped (should be &%X)\n", req_file_type);
    exit(EXIT_FAILURE);
  }

  /* Extract file type (0-4095) from file's load address */
  file_type = params.load >> 8 & 0xfff;

  if(req_file_type != file_type) {
    char sys_var_name[32], req_type_string[16], file_type_string[16];
    const char *var;

    sprintf(sys_var_name, "File$Type_%X", req_file_type);
    var = getenv(sys_var_name);
    if(var != NULL) {
      strncpy(req_type_string, var, sizeof(req_type_string));
      req_type_string[sizeof(req_type_string) - 1] = '\0';
    } else {
      sprintf(req_type_string, "&%X", req_file_type);
    }

    sprintf(sys_var_name, "File$Type_%X", file_type);
    var = getenv(sys_var_name);
    if(var != NULL) {
      strncpy(file_type_string, var, sizeof(file_type_string));
      file_type_string[sizeof(file_type_string) - 1] = '\0';
    } else {
      sprintf(file_type_string, "&%X", file_type);
    }

    fprintf(stderr, "Input file has type %s (should be %s)\n",
            file_type_string, req_type_string);
    exit(EXIT_FAILURE);
  }
}
#endif

static void create_list_ptr_table(void)
{
  unsigned int group;

  if(verbose)
    printf("Allocating list pointer table of %u bytes.\n", grid_size *
           sizeof(list_ptr_table[0]));
  list_ptr_table = (unsigned short **)safe_calloc(grid_size,
                   sizeof(list_ptr_table[0]));

  if(verbose)
    puts("Creating group lists...");

  num_groups = grid_size;
  last_group = grid_size - 1;
  assert(last_group <= USHRT_MAX);

  for (group = 0; group <= last_group; group++)
  {
    /* Initially, each group corresponds to a unique grid location */
    list_ptr_table[group] = (unsigned short *)safe_calloc(2,
                            sizeof(list_ptr_table[0][0]));
    list_ptr_table[group][0] = 1;
    list_ptr_table[group][1] = group;
  }
}

static void create_diff_table(spriteareaheader *input_spr_area, size_t grid_width, size_t input_spr_width, uint32_t *palette)
{
  unsigned int input_spr_type;
  unsigned long max_total_diff, next_perc, interesting_range;
  long int loc_1, diff_table_index;
  clock_t start_time;
  double next_perc_flt, one_perc;
  size_t input_spr_row_len, input_spr_height, src_pixel_size;
  char *input_image;
  spriteheader *input_sprite;

  assert(input_spr_area != NULL);
  input_sprite = (spriteheader *)((char *)input_spr_area +
                 input_spr_area->first);

  input_spr_type = (input_sprite->type & SPRITE_INFO_TYPE_MASK) >>
                   SPRITE_INFO_TYPE_SHIFT;

  switch (input_spr_type) {
  case SPRITE_TYPE_32BPP:
    src_pixel_size = sizeof(uint32_t);
    break;

  case SPRITE_TYPE_16BPP:
    src_pixel_size = sizeof(uint16_t);
    break;

  case SPRITE_TYPE_8BPP:
    src_pixel_size = sizeof(uint8_t);
    break;

  default:
    assert(false);
    return;
  }

  /* Each row of the difference table contains one less value, because each
     group is compared only with higher-numbered groups (to save memory). */
  diff_table_size = (grid_size * (grid_size - 1)) / 2;
  if(verbose)
    printf("Allocating difference table of %u bytes.\n", diff_table_size *
           sizeof(diff_table[0]));
  diff_table = (unsigned char *)safe_calloc(diff_table_size,
               sizeof(diff_table[0]));

#ifdef FAST_DIFF_TABLE
  /* It speeds up usage of the table of differences if we have another table
     from which we can obtain the offset (within the difference table) of the
     values for a given grid location. */
  if(verbose)
    printf("Allocating %u bytes for offsets into difference table.\n",
           grid_size * sizeof(grid_to_diff_table[0]));
  grid_to_diff_table = (unsigned long *)safe_calloc(grid_size,
                       sizeof(grid_to_diff_table[0]));
#endif

  /* Calculate the maximum total difference value that can be generated by
     a comparison between two different grid locations. */
  max_total_diff = (unsigned long)tile_size * tile_size * SQUARE(255) *
                   (RED_WEIGHT + GREEN_WEIGHT + BLUE_WEIGHT);
#ifndef NDEBUG
  printf("Maximum total of pixel differences is %lu\n", max_total_diff);
#endif
  /* We can't afford to differentiate between differences greater than 5% */
  interesting_range = max_total_diff / 20;

  input_image = (char *)input_sprite + input_sprite->image;
  input_spr_row_len = (input_sprite->width + 1) * 4;
  input_spr_height = input_sprite->height + 1;
  diff_table_index = 0;
  next_perc = 0;
  next_perc_flt = 0;

  if (verbose) {
    puts("Populating difference table...");
    /* We can't use fixed-point arithmetic here because of the huge numbers
       involved (grid_size can be anything up to &10000, so diff_table_size can
       be up to &7FFF8000). */
    one_perc = (double)diff_table_size / 100;
    start_time = clock();
  } else {
    /* These initialisations are just to suppress compiler warnings */
    one_perc = 0;
    start_time = 0;
  }

  for (loc_1 = 0; loc_1 < (long)grid_size - 1; loc_1++) {
    unsigned long tile_1_top = (loc_1 / grid_width) * tile_size,
                  tile_1_lhs = (loc_1 % grid_width) * tile_size, loc_2;
    char *top_left_loc_1 = input_image + tile_1_top * input_spr_row_len +
                           (tile_1_lhs * src_pixel_size);

#ifdef FAST_DIFF_TABLE
    grid_to_diff_table[loc_1] = diff_table_index;
#endif

    for (loc_2 = loc_1 + 1; loc_2 < grid_size; loc_2++) {
      unsigned long tile_2_top, tile_2_lhs, total_diff;
      char *lhs_of_loc_1, *lhs_of_loc_2;

      if(verbose && diff_table_index >= next_perc) {
        int perc_done = (int)(((double)diff_table_index * 100) /
                        diff_table_size);
        printf("%d%% done\n", perc_done);
        next_perc_flt += one_perc;
        next_perc = (long int)ceil(next_perc_flt);
      }

      tile_2_top = (loc_2 / grid_width) * tile_size;
      tile_2_lhs = (loc_2 % grid_width) * tile_size;
#ifndef NDEBUG
      printf("Comparing grid location %lu (%lu,%lu) with %lu (%lu,%lu)\n",
             loc_1, tile_1_lhs, tile_1_top, loc_2, tile_2_lhs, tile_2_top);
#endif
      lhs_of_loc_1 = top_left_loc_1;
      lhs_of_loc_2 = input_image + tile_2_top * input_spr_row_len +
                     (tile_2_lhs * src_pixel_size);
      total_diff = 0;

#ifdef ALLOW_UNCROPPED
      if(crop || tile_1_top + tile_size <= input_spr_height && tile_2_top +
      tile_size <= input_spr_height && tile_1_lhs + tile_size <=
      input_spr_width && tile_2_lhs + tile_size <= input_spr_width)
      {
#ifndef NDEBUG
        puts("Neither grid location overlaps edge of image");
#endif
#else
        /* This code is optimised for the simple case (where neither grid
           location overlaps the edge of the source image) */
        assert(tile_1_top + tile_size <= input_spr_height);
        assert(tile_2_top + tile_size <= input_spr_height);
        assert(tile_1_lhs + tile_size <= input_spr_width);
        assert(tile_2_lhs + tile_size <= input_spr_width);
#endif

        for (unsigned int row = 0; row < tile_size; row++) {

          for (unsigned int col = 0; col < tile_size; col++) {
            int red_diff, green_diff, blue_diff;

            switch (input_spr_type) {
            case SPRITE_TYPE_32BPP:
              {
                pixel32 *row_1 = (pixel32 *)lhs_of_loc_1,
                        *row_2 = (pixel32 *)lhs_of_loc_2;

                red_diff = row_2[col].red - row_1[col].red,
                green_diff = row_2[col].green - row_1[col].green,
                blue_diff = row_2[col].blue - row_1[col].blue; 
              }
              break;
    
            case SPRITE_TYPE_16BPP:
              {
                uint16_t *row_1 = (uint16_t *)lhs_of_loc_1,
                         *row_2 = (uint16_t *)lhs_of_loc_2;

                red_diff = ((int)(row_2[col] & 31) * 255 + 15) / 31;
                red_diff -= ((int)(row_1[col] & 31) * 255 + 15) / 31;

                green_diff = ((int)(row_2[col] >> 5 & 31) * 255 + 15) / 31;
                green_diff -= ((int)(row_1[col] >> 5 & 31) * 255 + 15) / 31;

                blue_diff = ((int)(row_2[col] >> 10 & 31) * 255 + 15) / 31;
                blue_diff -= ((int)(row_1[col] >> 10 & 31) * 255 + 15) / 31;
              }
              break;
  
#ifdef RISC_OS
            case SPRITE_TYPE_8BPP:
              {
                /* Note that the format of a palette entry is subtly different
                   from that of a pixel in a 32 bpp image */
                uint8_t *row_1 = (uint8_t *)lhs_of_loc_1,
                        *row_2 = (uint8_t *)lhs_of_loc_2;
                unsigned long palette_entry_1 = palette[row_1[col]],
                              palette_entry_2 = palette[row_2[col]];

                red_diff = (int)(palette_entry_2 >> 8 & 0xff);
                red_diff -= (int)(palette_entry_1 >> 8 & 0xff);

                green_diff = (int)(palette_entry_2 >> 16 & 0xff);
                green_diff -= (int)(palette_entry_1 >> 16 & 0xff);

                blue_diff = (int)(palette_entry_2 >> 24 & 0xff);
                blue_diff -= (int)(palette_entry_1 >> 24 & 0xff);
              }
              break;
#endif
            default:
              red_diff = green_diff = blue_diff = 0; /* prevent warning */
              break;
            }
            total_diff += SQUARE(red_diff) * RED_WEIGHT +
                          SQUARE(green_diff) * GREEN_WEIGHT +
                          SQUARE(blue_diff) * BLUE_WEIGHT;
          } /* next col */

          lhs_of_loc_1 += input_spr_row_len;
          lhs_of_loc_2 += input_spr_row_len;
        } /* new row */

#ifdef ALLOW_UNCROPPED
      } else {
        fprintf(stderr, "Irregular images not yet supported (use -crop)\n");
        exit(EXIT_FAILURE);
      }
#endif
      { /* Scale the total difference to fit within the range of values that
           can be represented by an unsigned character. */
        double scaled_difference;
        if (total_diff >= interesting_range) {
          scaled_difference = UCHAR_MAX + 0.5f;
        } else {
          scaled_difference = ((double)total_diff * UCHAR_MAX) /
                              interesting_range + 0.5f;
        }
        assert(floor(scaled_difference) <= UCHAR_MAX);
        diff_table[diff_table_index++] = (unsigned char)scaled_difference;
        /* (We add 0.5 before conversion to an integral to ensure that the
           value is rounded to the nearest integer.) */
      }
#ifndef NDEBUG
      printf("Total of pixel differences is %lu (byte %u, %.2f%%)\n",
             total_diff, diff_table[diff_table_index - 1], ((double)total_diff
             * 100) / max_total_diff);
#endif
    } /* next loc_2 */
  } /* next loc_1 */

  if(verbose) {
    double elapsed = (double)(clock() - start_time) / CLOCKS_PER_SEC;
    printf("Finished in %.2f seconds", elapsed);
    if (elapsed)
      printf(" (%.2f grid locations compared per second)\n", diff_table_index /
             elapsed);
    else
      putchar('\n');
  }
}

static void destroy_list_ptr_table(void)
{
  assert(list_ptr_table != NULL);

  for (unsigned int group = 0; group <= last_group; group++) {
    if(list_ptr_table[group] != NULL)
      free(list_ptr_table[group]);
  }
  free(list_ptr_table);
}

static void save_tile_sprites(spriteareaheader *input_spr_area, size_t grid_width, size_t input_spr_width, uint32_t *palette)
{
  unsigned int num_merged, tile_num, input_spr_type;
  spriteareaheader sprite_area_header;
  spriteheader sprite_header;
  char *output_image;
  clock_t start_time;
  FILE *output;
  unsigned long next_perc, one_perc;
  unsigned long *red_totals, *green_totals, *blue_totals;
  size_t pixels_per_tile, tile_row_length, tile_image_size, input_spr_height,
         input_spr_row_len, src_pixel_size;
  char *input_image;
  spriteheader *input_sprite;

  if (verbose)
    printf("Will create %u output tiles\n", num_groups);

  pixels_per_tile = tile_size * tile_size;
#ifndef NDEBUG
  printf("Each tile will contain %u pixels\n", pixels_per_tile);
#endif

  assert(input_spr_area != NULL);
  input_sprite = (spriteheader *)((char *)input_spr_area +
                 input_spr_area->first);

  input_spr_type = (input_sprite->type & SPRITE_INFO_TYPE_MASK) >>
                   SPRITE_INFO_TYPE_SHIFT;

  switch (input_spr_type) {
  case SPRITE_TYPE_32BPP:
    src_pixel_size = sizeof(uint32_t);
    break;

  case SPRITE_TYPE_16BPP:
    src_pixel_size = sizeof(uint16_t);
    break;

  case SPRITE_TYPE_8BPP:
    src_pixel_size = sizeof(uint8_t);
    break;

  default:
    assert(false);
    return;
  }

  /* Allocate memory to hold the total of each colour component for each
     pixel of an output sprite. 32 bit integers should be big enough, because
     the total for any pixel should not exceed &10000 * &FF. */
  if(verbose)
    printf("Allocating %u bytes for R/G/B totals.\n", 3 * pixels_per_tile *
           sizeof(red_totals[0]));
  red_totals = (unsigned long *)safe_calloc(3 * pixels_per_tile,
               sizeof(red_totals[0]));
  green_totals = red_totals + pixels_per_tile;
  blue_totals = green_totals + pixels_per_tile;

  /* Allocate memory for an output image */
  tile_row_length = tile_size * bpp / 8 + 3 & ~3;
#ifndef NDEBUG
  printf("Each row of pixels in a tile sprite will be %u bytes\n",
         tile_row_length);
#endif
  tile_image_size = tile_row_length * tile_size;
  if(verbose)
    printf("Allocating %u bytes for sprite bitmap.\n", tile_image_size);
  output_image = safe_calloc(1, tile_image_size);

  /* Initialise the header of the sprite area. We don't bother setting the
     area size, because this is not included in the output file anyway. */
  sprite_area_header.sprite_count = num_groups;
  sprite_area_header.first = sizeof(spriteareaheader);
  sprite_area_header.used = sizeof(spriteareaheader) + num_groups *
                            (sizeof(spriteheader) + tile_image_size);

  /* Create the output sprite file */
  if (verbose)
    printf("Opening tiles output file '%s'.\n", output_tiles);
  output = fopen(output_tiles, "wb");
  if (output == NULL) {
    fprintf(stderr, "Could not open tiles output file '%s'\n", output_tiles);
    exit(EXIT_FAILURE);
  }

  /* Write the area header to the output sprite file. */
#ifndef NDEBUG
  printf("Writing sprite area header to file\n");
#endif
  if (fwrite(&sprite_area_header.sprite_count, sizeof(spriteareaheader) -
  offsetof(spriteareaheader, sprite_count), 1, output) < 1)
  {
    fprintf(stderr, "Failed writing sprite area header to file\n");
    fclose(output);
    exit(EXIT_FAILURE);
  }

  /* Most of the header is identical for every sprite in the output
     file, so we initialise these attributes separately. */
  sprite_header.size = sizeof(spriteheader) + tile_image_size;
  sprite_header.width = tile_row_length / 4 - 1;
  sprite_header.height = tile_size - 1;
  sprite_header.left_bit = 0;
  sprite_header.right_bit = SPRITE_RIGHT_BIT(tile_size, bpp);
#ifndef NDEBUG
  printf("Rightmost bit used in last word of each pixel row will be %u\n",
         sprite_header.right_bit);
#endif
  sprite_header.image = sizeof(spriteheader);
  sprite_header.mask = sizeof(spriteheader);
  /* Keep the same no. of dots per inch as for the input image */
  sprite_header.type = SPRITE_INFO_NOT_MODE_SEL | input_sprite->type &
                       (SPRITE_INFO_HOZ_DPI_MASK | SPRITE_INFO_VER_DPI_MASK);
  switch (bpp) {
#ifdef RISC_OS
  case 8:
    sprite_header.type |= (SPRITE_TYPE_8BPP << SPRITE_INFO_TYPE_SHIFT);
    break;
#endif

  case 16:
    sprite_header.type |= (SPRITE_TYPE_16BPP << SPRITE_INFO_TYPE_SHIFT);
    break;

  case 32:
    sprite_header.type |= (SPRITE_TYPE_32BPP << SPRITE_INFO_TYPE_SHIFT);
    break;

  default:
    assert(false);
    break;
  }

  input_image = (char *)input_sprite + input_sprite->image;
  input_spr_row_len = (input_sprite->width + 1) * 4;
  input_spr_height = input_sprite->height + 1;
  next_perc = 0;
  num_merged = 0;
  tile_num = 0;

  if(verbose) {
    puts("Creating tile sprites...");
    if (blend)
      one_perc = ((unsigned long)grid_size << 14) / 100;
    else
      one_perc = ((unsigned long)(last_group + 1) << 14) / 100;
    start_time = clock();
  } else {
    /* These initialisations are just to suppress compiler warnings */
    start_time = 0;
    one_perc = 0;
  }

  for (unsigned int group = 0; group <= last_group; group++) {
    unsigned int num_members;

    if (verbose && !blend && (unsigned long)group << 14 >= next_perc) {
      printf("%u%% done\n", (group * 100) / (last_group + 1));
      next_perc += one_perc;
    }

    assert(list_ptr_table != NULL);
    if(list_ptr_table[group] == NULL)
      continue;

#ifndef NDEBUG
    printf("Zeroing the R/G/B totals\n");
#endif
    for(unsigned int pixel = 0; pixel < pixels_per_tile; pixel++)
      red_totals[pixel] = green_totals[pixel] = blue_totals[pixel] = 0;

    if (blend)
      num_members = list_ptr_table[group][0];
    else
      num_members = 1;

    assert(num_members <= grid_size);
#ifndef NDEBUG
    printf("About to merge the %u locations in group %u\n", num_members, group);
#endif

    for (unsigned int member = 1; member <= num_members; member++) {
      unsigned int tile_top, tile_lhs, row_limit, col_limit, pixel_count;
      char *lhs_of_grid_loc;

      if (verbose && blend && (unsigned long)num_merged << 14 >= next_perc) {
        printf("%u%% done\n", (num_merged * 100) / grid_size);
        next_perc += one_perc;
      }

      tile_top = (list_ptr_table[group][member] / grid_width) * tile_size;
      tile_lhs = (list_ptr_table[group][member] % grid_width) * tile_size;
#ifndef NDEBUG
      printf("Adding grid location %u to aggregate (top left is %u,%u)\n",
             list_ptr_table[group][member], tile_lhs, tile_top);
#endif

      row_limit = tile_size;
      col_limit = tile_size;
#ifdef ALLOW_UNCROPPED
      /* Unless we cropped the grid to fit entirely within the input image,
         we must guard against reading outside its right or bottom edge */
      if (!crop) {
        if (tile_lhs + col_limit > input_spr_width) {
#ifndef NDEBUG
          printf("Cropping pixel column limit from %u to %u\n", col_limit,
                 input_spr_width - tile_lhs);
#endif
          col_limit = input_spr_width - tile_lhs;
        }

        if (tile_top + row_limit > input_spr_height) {
#ifndef NDEBUG
          printf("Cropping pixel row limit from %u to %u\n", row_limit,
                 input_spr_height - tile_top);
#endif
          row_limit = input_spr_height - tile_top;
        }
#ifndef NDEBUG
        if (col_limit != tile_size || row_limit != tile_size)
          printf("Edge of image: limits are %u,%u\n", col_limit, row_limit);
#endif
      }
#endif /* ALLOW_UNCROPPED */

      assert(tile_lhs + col_limit <= input_spr_width);
      assert(tile_top + row_limit <= input_spr_height);
      pixel_count = 0;
      lhs_of_grid_loc = input_image + tile_top * input_spr_row_len +
                        (tile_lhs * src_pixel_size);

      for (unsigned int row = 0; row < row_limit; row++) {

        for (unsigned int col = 0; col < col_limit; col++) {
          assert(pixel_count < pixels_per_tile);

          switch (input_spr_type) {
          case SPRITE_TYPE_32BPP:
            {
              pixel32 *start_of_row = (pixel32 *)lhs_of_grid_loc;

              red_totals[pixel_count] += start_of_row[col].red;
              green_totals[pixel_count] += start_of_row[col].green;
              blue_totals[pixel_count] += start_of_row[col].blue;
            }
            break;
  
          case SPRITE_TYPE_16BPP:
            {
              uint16_t *start_of_row = (uint16_t *)lhs_of_grid_loc;

              red_totals[pixel_count] += (((unsigned long)start_of_row[col] &
                                         31) * 255 + 15) / 31;
              green_totals[pixel_count] += (((unsigned long)start_of_row[col] >>
                                           5 & 31) * 255 + 15) / 31;
              blue_totals[pixel_count] += (((unsigned long)start_of_row[col] >>
                                          10 & 31) * 255 + 15) / 31;
            }
            break;

#ifdef RISC_OS
          case SPRITE_TYPE_8BPP:
            {
              /* Note that the format of a palette entry is subtly different
                 from that of a pixel in a 32 bpp image */
              uint8_t *start_of_row = (uint8_t *)lhs_of_grid_loc;
              unsigned long palette_entry = palette[start_of_row[col]];

              red_totals[pixel_count] += palette_entry >> 8 & 0xff;
              green_totals[pixel_count] += palette_entry >> 16 & 0xff;
              blue_totals[pixel_count] += palette_entry >> 24 & 0xff;
            }
            break;
#endif
          }

          pixel_count ++;
        } /* next col */

#ifdef ALLOW_UNCROPPED
        if (col_limit < tile_size) {
          /* The rest of this grid location is off the righthand
             side of the input sprite */
          pixel_count += tile_size - col_limit;
        }
#else
        assert(col_limit == tile_size);
#endif
        lhs_of_grid_loc += input_spr_row_len;
      } /* next row */

      num_merged++;
    } /* next member */

    /* Write the sprite header to the output file. */
#ifndef NDEBUG
    printf("Writing sprite header %u to file\n", tile_num);
#endif
    memset(sprite_header.name, 0, sizeof(sprite_header.name));
    sprintf(sprite_header.name, "tile_%u", tile_num);

    if (fwrite(&sprite_header, sizeof(spriteheader), 1, output) < 1) {
      fprintf(stderr, "Failed writing sprite header to file\n");
      fclose(output);
      exit(EXIT_FAILURE);
    }

    /* Create output sprite by dividing the three colour component totals for
       each pixel by the number of tiles in the group, to get the averages. */
#ifndef NDEBUG
    printf("Dividing the R/G/B totals by no. of pixels\n");
#endif
    unsigned int pixel_count = 0;
    char *output_row = output_image;

    for (unsigned int row = 0; row < tile_size; row++) {
      for (unsigned int col = 0; col < tile_size; col++) {
        uint8_t av_red, av_green, av_blue;

        assert(pixel_count < pixels_per_tile);
        av_red = (uint8_t)(red_totals[pixel_count] / num_members);
        av_green = (uint8_t)(green_totals[pixel_count] / num_members);
        av_blue = (uint8_t)(blue_totals[pixel_count] / num_members);

        switch(bpp) {
        case 32:
          {
            pixel32 *start_of_row = (pixel32 *)output_row;
            start_of_row[col].red = av_red;
            start_of_row[col].green = av_green;
            start_of_row[col].blue = av_blue;
            start_of_row[col].reserved = 0;
          }
          break;

        case 16:
          {
            uint16_t *start_of_row = (uint16_t *)output_row;
            start_of_row[col] = (av_red >> 3 & 0x1f) |
                                (av_green >> 3 & 0x1f) << 5 |
                                (av_blue >> 3 & 0x1f) << 10;
          }
          break;

#ifdef RISC_OS
        case 8:
          {
            /* Note that the format of a palette entry is subtly different from
               that of a pixel in a 32 bpp image */
            uint32_t palette_entry = av_red << 8 | av_green << 16 |
                                     av_blue << 24;
            uint8_t *start_of_row = (uint8_t *)output_row;
            int colour_number;
            _kernel_oserror *e;

            e = _swix(ColourTrans_ReturnColourNumberForMode, _INR(0,2)|_OUT(0),
                      palette_entry,
                      10 /* old-style numbered screen mode with 8bbp */,
                      0 /* use default palette for mode */,
                      &colour_number);
            if (e != NULL) {
              fprintf(stderr, "Colour translation failed: %s\n", e->errmess);
              fclose(output);
              exit(EXIT_FAILURE);
            }
            start_of_row[col] = colour_number;
          }
          break;
#endif
        }
        pixel_count++;
      } /* next col */
      output_row += tile_row_length;
    } /* next row */

    /* Write the sprite image bitmap to the output file */
#ifndef NDEBUG
    printf("Writing image bitmap to file\n");
#endif
    if (fwrite(output_image, tile_image_size, 1, output) < 1) {
      fprintf(stderr, "Failed writing tile image to file\n");
      fclose(output);
      exit(EXIT_FAILURE);
    }
    tile_num++;
  } /* next group */

  if(verbose) {
    double elapsed = (double)(clock() - start_time) / CLOCKS_PER_SEC;
    printf("Finished in %.2f seconds", elapsed);
    if (elapsed)
      printf(" (%.2f tiles merged per second)\n", num_merged / elapsed);
    else
      putchar('\n');
  }

  if (verbose)
    printf("Closing tiles output file.\n");
  fclose(output);

#ifdef RISC_OS
  set_file_type(output_tiles, FILETYPE_SPRITE);
#endif
}

static void merge_two_groups(unsigned int target_group, unsigned int source_group)
{
  size_t new_size;
  unsigned short num_g1, num_g2;
  assert(source_group > target_group);

  assert(list_ptr_table != NULL);
  assert(list_ptr_table[target_group] != NULL);
  num_g1 = list_ptr_table[target_group][0];

  assert(list_ptr_table[source_group] != NULL);
  num_g2 = list_ptr_table[source_group][0];

#ifndef NDEBUG
  printf("About to merge group %u (%u members) with group %u (%u members)\n",
         target_group, num_g1, source_group, num_g2);
#endif
  /* Recalculate all the difference values relating the super-group to all
     other groups. We average the difference values relating each group to
     the two sub-groups that we are about to merge, weighted according to
     how many members were in each sub-group. */
  for (unsigned int group = 0; group <= last_group; group++)
  {
    long int target_diff_index, source_diff_index;
    unsigned long new_diff;
    unsigned short total_members;

    if (list_ptr_table[group] == NULL || group == source_group ||
    group == target_group)
      continue; /* group no longer exists, or it is the one of the groups
                   that we are about to merge */

    target_diff_index = grid_to_diff_index(target_group, group);
    source_diff_index = grid_to_diff_index(source_group, group);

#ifndef NDEBUG
    /*printf("Difference between group %u & %u is %u\n", group, target_group,
           diff_table[target_diff_index]);
    printf("Difference between group %u & %u is %u\n", group, source_group,
           diff_table[source_diff_index]);*/
#endif

    assert(num_g1 <= grid_size);
    assert(num_g2 <= grid_size);
    total_members = num_g1 + num_g2;
    assert(total_members <= grid_size);
    assert(diff_table != NULL);
    new_diff = ((unsigned long)diff_table[target_diff_index] * num_g1 +
               (unsigned long)diff_table[source_diff_index] * num_g2 +
               total_members / 2) / total_members;
    /* (We add half the divisor to the dividend before the division, to
       ensure that the quotient is accurate to the nearest integer.) */
    assert(new_diff <= UCHAR_MAX);
    diff_table[target_diff_index] = (unsigned short)new_diff;
    /*printf("Recalculated difference between group %u & %u is %u\n", group,
          target_group, diff_table[target_diff_index]);*/

  } /* next group */

  /* Extend the heap block holding the target list to accomodate both lists */
  new_size = sizeof(list_ptr_table[target_group][0]) * (1 + num_g1 + num_g2);

  list_ptr_table[target_group] = realloc(list_ptr_table[target_group],
                                 new_size);
  if (list_ptr_table[target_group] == NULL) {
    fprintf(stderr, "Memory allocation of %u bytes failed\n", new_size);
    exit(EXIT_FAILURE);
  }

  /* Update record of no. of members in the target list */
  list_ptr_table[target_group][0] = num_g1 + num_g2;

  /* Append contents of source list to the target list */
  for (unsigned int i = 1; i <= num_g2; i++)
    list_ptr_table[target_group][num_g1 + i] = list_ptr_table[source_group][i];

#ifndef NDEBUG
  show_list(target_group);
#endif

  /* Discard the source list */
  free(list_ptr_table[source_group]);
  list_ptr_table[source_group] = NULL; /* mark source group slot as empty */

  /* If we have destroyed the last group, then find the last slot filled in
     the group list pointers table (a lame attempt at optimisation) */
  while (list_ptr_table[last_group] == NULL && last_group > 0)
    last_group--;
#ifndef NDEBUG
  printf("Last group remaining is now %u\n", last_group);
#endif
  num_groups--;
}

static void merge_all_similar_groups(void)
{
  /* Reduce the number of groups by merging similar ones, until we have
     less than or equal to the target number of tiles. */
  clock_t start_time;
  unsigned int merges_done, pass;
  unsigned char threshold, quality_threshold;

  merges_done = pass = 0;
  threshold = 0;

  if (quality == UINT_MAX)
    quality_threshold = (2 * UCHAR_MAX + 50) / 100; /* 0.1% error */
  else
    /* A difference byte value only represents 5% of the maximum possible
       difference between two grid locations. The following expression maps
       quality 0 - 100 to this range. */
    quality_threshold = ((100 - quality) * UCHAR_MAX + 50) / 100;

  printf("Quality threshold is %.2f%%\n", ((double)quality_threshold * 5) /
         UCHAR_MAX);

  if(verbose) {
    puts("Combining similar groups...");
    start_time = clock();
  } else {
    start_time = 0; /* suppress compiler warning */
  }

  do {
    unsigned char min_diff = UCHAR_MAX;

    if(verbose) {
      printf("Start of pass %u (%u groups combined so far)\n", pass,
             merges_done);

      printf("Will combine groups that have difference <= %.2f%%\n", 
             ((double)threshold * 5) / UCHAR_MAX);
    }

    /* Examine each pair of groups in turn, and merge those pairs that fall
       below the similarity threshold. */
    for (unsigned int group_1 = 0; group_1 < last_group; group_1++)
    {
      if (list_ptr_table[group_1] == NULL)
        continue; /* ignore empty group */

      for (unsigned int group_2 = group_1 + 1; group_2 <= last_group; group_2++)
      {
        unsigned short diff;

        if (list_ptr_table[group_2] == NULL)
          continue; /* ignore empty group */

        assert(diff_table != NULL);
        diff = diff_table[grid_to_diff_index(group_1, group_2)];
        if (diff <= threshold) {
          /* We have found two groups similar enough to be merged */
          merge_two_groups(group_1, group_2);
          merges_done ++;
          if (num_groups <= max_tiles && threshold >= quality_threshold)
            goto finished;
        } else {
          if (diff < min_diff) {
            min_diff = diff;
#ifndef NDEBUG
            printf("Next threshold lowered to %u (group %u and %u)\n",
                   min_diff, group_1, group_2);
#endif
          }
        }
      } /* next group_2 */
    } /* next group_1 */

    threshold = min_diff;
    pass++;
    /* Go back for another pass if the quality was not specified and we still
       have too many groups. */
  } while (threshold < quality_threshold || (num_groups > max_tiles &&
  quality == UINT_MAX && threshold < UCHAR_MAX));

  if (num_groups > max_tiles) {
    fprintf(stderr, "Image too complex (would require %u output tiles "
                    "at the specified quality)\n", num_groups);
    exit(EXIT_FAILURE);
  }

finished:
  if (verbose) {
    double elapsed = (double)(clock() - start_time) / CLOCKS_PER_SEC;
    printf("Finished in %.2f seconds", elapsed);
    if (elapsed)
      printf(" (%.2f groups combined per second)\n", (double)merges_done /
             elapsed);
    else
      putchar('\n');
    printf("Total no. of groups eliminated is %u \n", merges_done);

    if (merges_done > 0) {
      puts("Final groups are:");
      for (unsigned int group = 0; group <= last_group; group++) {
        if(list_ptr_table[group] != NULL)
          show_list(group);
      }
    }
  }
}

static void syntax_msg(void)
{
  printf("Syntax: *SprToMap [-verbose] [-blend] [-quality 0..100] [-tilesout <outputfile>] [-tilesize 1..%d] [-maxtiles 1..%u] [-bpp 8|16|32] [-mapout <outputfile>] [-vflip] [-width 1..N] [-height 1..N] <inputfile>\n", max_tile_size, UINT16_MAX + 1);
}

static void process_arguments(int argc, char *argv[])
{
  int arg;

  if (argc < 1) {
    fprintf(stderr, "Too few arguments\n");
    syntax_msg();
    exit(EXIT_FAILURE);
  }

  for (arg = 1; arg < argc && *argv[arg] == '-'; arg++) {
    if (!strcmp(argv[arg], "-tilesize")) {
      arg++;
      if (arg >= argc) {
        syntax_msg();
        exit(EXIT_FAILURE);
      }
      sscanf(argv[arg], "%u", &tile_size);
      if (tile_size < 1 || tile_size > max_tile_size) {
        fprintf(stderr, "Bad tile size\n");
        exit(EXIT_FAILURE);
      }
      continue;
    }

    if (!strcmp(argv[arg], "-maxtiles")) {
      arg++;
      if (arg >= argc) {
        syntax_msg();
        exit(EXIT_FAILURE);
      }
      sscanf(argv[arg], "%u", &max_tiles);
      if (max_tiles < 1 || max_tiles > UINT16_MAX + 1) {
        fprintf(stderr, "Bad maximum no. of tiles\n");
        exit(EXIT_FAILURE);
      }
      continue;
    }

    if (!strcmp(argv[arg], "-width")) {
      arg++;
      if (arg >= argc) {
        syntax_msg();
        exit(EXIT_FAILURE);
      }
      sscanf(argv[arg], "%u", &output_width);
      if (output_width < 1) {
        fprintf(stderr, "Bad output map width\n");
        exit(EXIT_FAILURE);
      }
      continue;
    }

    if (!strcmp(argv[arg], "-height")) {
      arg++;
      if (arg >= argc) {
        syntax_msg();
        exit(EXIT_FAILURE);
      }
      sscanf(argv[arg], "%u", &output_height);
      if (output_height < 1) {
        fprintf(stderr, "Bad output map height\n");
        exit(EXIT_FAILURE);
      }
      continue;
    }

    if (!strcmp(argv[arg], "-quality")) {
      arg++;
      if (arg >= argc) {
        syntax_msg();
        exit(EXIT_FAILURE);
      }
      sscanf(argv[arg], "%u", &quality);
      if (quality > 100) {
        fprintf(stderr, "Bad quality value\n");
        exit(EXIT_FAILURE);
      }
      continue;
    }

    if (!strcmp(argv[arg], "-bpp")) {
      arg++;
      if (arg >= argc) {
        syntax_msg();
        exit(EXIT_FAILURE);
      }
      for (int c = 0; c < strlen(argv[arg]); c++) {
        if (!isdigit(argv[arg][c])) {
          fprintf(stderr, "Bits per pixel must be numeric\n");
          exit(EXIT_FAILURE);
        }
      }
      sscanf(argv[arg], "%u", &bpp);
#ifdef RISC_OS
      if (bpp == 8 || bpp == 16 || bpp == 32)
        continue;

      fprintf(stderr, "Bits per pixel must be 8, 16 or 32\n");
#else
      if (bpp == 16 || bpp == 32)
        continue;

      fprintf(stderr, "Bits per pixel must be 16 or 32\n");
#endif
      exit(EXIT_FAILURE);
    }

    if (!strcmp(argv[arg], "-mapout")) {
      arg++;
      if(arg >= argc) {
        syntax_msg();
        exit(EXIT_FAILURE);
      }
      output_map = argv[arg];
      continue;
    }

    if (!strcmp(argv[arg], "-tilesout")) {
      arg++;
      if(arg >= argc) {
        syntax_msg();
        exit(EXIT_FAILURE);
      }
      output_tiles = argv[arg];
      continue;
    }

    if (!strcmp(argv[arg], "-verbose")) {
      verbose = true;
      continue;
    }

#ifdef ALLOW_UNCROPPED
    if (!strcmp(argv[arg], "-crop")) {
      crop = true;
      continue;
    }
#endif
    
    if (!strcmp(argv[arg], "-vflip")) {
      vflip = true;
      continue;
    }

    if (!strcmp(argv[arg], "-blend")) {
      blend = true;
      continue;
    }

    fprintf(stderr, "Unknown switch '%s'\n", argv[arg]);
    syntax_msg();
    exit(EXIT_FAILURE);
  }

  if (arg >= argc) {
    fprintf(stderr, "No input file specified\n");
    syntax_msg();
    exit(EXIT_FAILURE);
  }
  input_file = argv[arg++];

  if (arg < argc) {
    syntax_msg();
    exit(EXIT_FAILURE);
  }

  if (verbose) {
    printf("Will create up to %lu tiles, each of size %u.\n", max_tiles,
           tile_size);
    if (bpp)
      printf("Tile sprites will have %u bits per pixel.\n", bpp);
    else
      puts("Tile sprites will have same colour depth as source image.");

#ifdef ALLOW_UNCROPPED
    printf("Will %scrop output image to a multiple of the tile size.\n",
           crop ? "" : "not ");
#endif
    printf("Input file is '%s'.\n", input_file);

    if (output_map != NULL)
      printf("Map output file is '%s'.\n", output_map);
    else
      puts("No map output file.");
    if (vflip)
      puts("Will flip output map vertically.");

    if (output_tiles != NULL)
      printf("Tiles output file is '%s'.\n", output_tiles);
    else
      puts("No tiles output file.");
  }
}

static spriteareaheader *load_sprite(void)
{
  spriteareaheader *input_buffer;
  size_t input_buffer_size;
  FILE *input;
  long int end_of_input;

#ifdef RISC_OS
  check_file_type(input_file, FILETYPE_SPRITE);
#endif

  if (verbose)
    printf("Opening input file '%s'.\n", input_file);
  input = fopen(input_file, "rb");
  if (input == NULL) {
    fprintf(stderr, "Could not open input file '%s'\n", input_file);
    exit(EXIT_FAILURE);
  }

  fseek(input, 0, SEEK_END);
  end_of_input = ftell(input);
  input_buffer_size = offsetof(spriteareaheader, sprite_count) +
                      (size_t)end_of_input;
  if (verbose)
    printf("Allocating input buffer of %u bytes.\n", input_buffer_size);
  input_buffer = (spriteareaheader *)malloc(input_buffer_size);
  if (input_buffer == NULL) {
    fprintf(stderr, "Memory allocation of %u bytes failed\n",
            input_buffer_size);
    fclose(input);
    exit(EXIT_FAILURE);
  }
  input_buffer->size = input_buffer_size;

  if (verbose)
    printf("About to read input file into buffer.\n");

  fseek(input, 0, SEEK_SET);
  if (!fread(&input_buffer->sprite_count, (size_t)end_of_input, 1, input)) {
    fprintf(stderr, "Failed to read %ld bytes from input file\n", end_of_input);
    fclose(input);
    exit(EXIT_FAILURE);
  }

  if (verbose)
    printf("Closing input file.\n");
  fclose(input);

#ifdef RISC_OS
  verify_sprite_area(input_buffer);
#endif

  return input_buffer;
}

static void save_map(void *map, bool wide_map, size_t grid_width, size_t grid_height)
{
  FILE *output;

  if(verbose)
    puts("Opening map output file.");

  output = fopen(output_map, "wb");
  if (output == NULL) {
    fprintf(stderr, "Could not open map output file '%s'\n", output_map);
    exit(EXIT_FAILURE);
  }

  if (output_width || output_height || vflip) {
    /* We have complex options for generating the output map file */
    unsigned int row_limit = (output_height ? output_height : grid_height),
                 col_limit = (output_width ? output_width : grid_width);
    if (verbose)
      printf("Output map dimensions are %u by %u\n", col_limit, row_limit);

    /* Write one line at a time to the output file */
    for (unsigned int output_row = 0; output_row < row_limit; output_row ++) {
      uint8_t *map_8_bit;
      uint16_t *map_16_bit;
      unsigned int grid_row = output_row % grid_height;

      if (vflip)
        grid_row = grid_height - 1 - grid_row;

      map_16_bit = (uint16_t *)map + grid_width * grid_row;
      map_8_bit = (uint8_t *)map + grid_width * grid_row;

      if (output_width) {
        /* Write each tile reference separately to the output file */
        for (unsigned int output_col = 0; output_col < col_limit;
        output_col++) {
          unsigned int grid_col = output_col % grid_width;
          if (wide_map) {
            if (fwrite(map_16_bit + grid_col, sizeof(uint16_t), 1, output) == 1)
              continue;
          } else {
            if (fwrite(map_8_bit + grid_col, sizeof(uint8_t), 1, output) == 1)
              continue;
          }

          fprintf(stderr, "Failed writing a tile reference to the map "
                          "output file\n");
          fclose(output);
          exit(EXIT_FAILURE);
        } /* next col */
      } else {
        /* Write a complete line to the output file */
        if (wide_map) {
          if (fwrite(map_16_bit, sizeof(uint16_t), grid_width, output) >=
          grid_width)
            continue;
        } else {
          if (fwrite(map_8_bit, sizeof(uint8_t), grid_width, output) >=
          grid_width)
            continue;
        }
        fprintf(stderr, "Failed writing a line to the map output file\n");
        fclose(output);
        exit(EXIT_FAILURE);
      }
    } /* next row */
  } else {
    /* Write the whole map to the output file at once
       (can only use this code in the simplest case) */
    if (verbose)
      printf("Saving output map with native dimensions %u by %u\n", grid_width,
             grid_height);

    if (fwrite(map, wide_map ? sizeof(uint16_t) : sizeof(uint8_t), grid_size,
    output) < grid_size) {
      fprintf(stderr, "Failed writing map to output file\n");
      fclose(output);
      exit(EXIT_FAILURE);
    }
  }

  if(verbose)
    printf("Closing map output file.\n");

  fclose(output);
}

int main(int argc, char *argv[])
{
  spriteareaheader *sprite_area;
  void *map;
  size_t grid_width, grid_height, input_spr_width, input_spr_height;
  unsigned int input_spr_bpp;
  spriteheader *input_sprite;
  bool wide_map;
  uint32_t palette[256];
  _kernel_oserror *e;

  max_tile_size = (int)sqrt(ULONG_MAX / (SQUARE(255) * (RED_WEIGHT +
                  GREEN_WEIGHT + BLUE_WEIGHT)));

  process_arguments(argc, argv);

  sprite_area = load_sprite();

  assert(sprite_area != NULL);
  input_sprite = (spriteheader *)((char *)sprite_area + sprite_area->first);

  if(!(input_sprite->type & SPRITE_INFO_NOT_MODE_SEL))
  {
#ifdef RISC_OS
    fprintf(stderr, "Input sprite must be new format with 8, 16 or 32 bpp\n");
#else
    fprintf(stderr, "Input sprite must be new format with 16 or 32 bpp\n");
#endif
    return EXIT_FAILURE;
  }

  switch ((input_sprite->type & SPRITE_INFO_TYPE_MASK) >>
  SPRITE_INFO_TYPE_SHIFT)
  {
#ifdef RISC_OS
    case SPRITE_TYPE_8BPP:
      /* Read palette of input sprite */
      e = _swix(ColourTrans_ReadPalette, _INR(0,4), sprite_area,
          input_sprite, palette, sizeof(palette), 1);
      if (e != NULL) {
        fprintf(stderr, "Couldn't read sprite palette: %s\n", e->errmess);
        exit(EXIT_FAILURE);
      }
      input_spr_bpp = 8;
      break;
#endif
    case SPRITE_TYPE_16BPP:
      input_spr_bpp = 16;
      break;

    case SPRITE_TYPE_32BPP:
      input_spr_bpp = 32;
      break;

    default:
#ifdef RISC_OS
      fprintf(stderr, "Input sprite must be new format with 8, 16 or 32 bpp\n");
#else
      fprintf(stderr, "Input sprite must be new format with 16 or 32 bpp\n");
#endif
      return EXIT_FAILURE;
  }
  if (verbose)
    printf("Input sprite has %u bits per pixel.\n", input_spr_bpp);

  /* If colour depth for output sprites not specified then use the same
     as for the input sprite. */
  if (!bpp)
    bpp = input_spr_bpp;

  input_spr_width = ((input_sprite->width + 1) * 4) / (input_spr_bpp / 8);
  if (input_sprite->right_bit < 31) {
    unsigned int wasted_bits = 31 - input_sprite->right_bit;
#ifndef NDEBUG
    printf("Righthand wastage is %u bits\n", wasted_bits);
#endif
    input_spr_width -= wasted_bits / input_spr_bpp;
  }
  input_spr_height = input_sprite->height + 1;
  if (verbose)
    printf("Dimensions of input sprite are %u by %u pixels.\n",
           input_spr_width, input_spr_height);

#ifdef ALLOW_UNCROPPED
  grid_width = (input_spr_width + (crop ? 0 : tile_size - 1)) / tile_size;
  grid_height = (input_spr_height + (crop ? 0 : tile_size - 1)) / tile_size;
#else
  grid_width = input_spr_width / tile_size;
  grid_height = input_spr_height / tile_size;
#endif

  /* If a fixed width or height was specified for the output map and it is
     smaller than the grid covering the input image then reduce the grid area
     to avoid generating unused tiles. */

  if (output_width && grid_width > output_width)
    grid_width = output_width;

  if (output_height && grid_height > output_height)
    grid_height = output_height;

  if(verbose)
    printf("Dimensions of (unwrapped) map will be %u by %u.\n", grid_width,
           grid_height);

  if (!grid_width || !grid_height) {
    fprintf(stderr, "Input sprite is too small (decrease tile size)\n");
    exit(EXIT_FAILURE);
  }

  grid_size = grid_width * grid_height;
  if (grid_size - 1 > USHRT_MAX) {
    fprintf(stderr, "Input sprite is too big (increase tile size)\n");
    exit(EXIT_FAILURE);
  }

  create_diff_table(sprite_area, grid_width, input_spr_width, palette);

  create_list_ptr_table();

  merge_all_similar_groups();

  free(diff_table);

  wide_map = (num_groups > UINT8_MAX + 1);
  map = create_map(wide_map);

  if (output_tiles != NULL)
    save_tile_sprites(sprite_area, grid_width, input_spr_width, palette);

  destroy_list_ptr_table();

  free(sprite_area);

  if (output_map != NULL)
    save_map(map, wide_map, grid_width, grid_height);

  free(map);

  return EXIT_SUCCESS;
}
