/* xdelta 3 - delta compression tools and library
 * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007.  Joshua P. MacDonald
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#ifndef _XDELTA3_DECODE_H_
#define _XDELTA3_DECODE_H_


/* Return true if the caller must provide a source.  Theoretically, this has to be checked
 * after every window.  It could be that the first window requires no source, but the
 * second window does.  In practice? */
int xd3_decoder_needs_source (xd3_stream *stream)
{
  return stream->dec_win_ind & VCD_SOURCE;
}

/* Initialize the decoder for a new window.  The dec_tgtlen value is preserved across
 * successive window decodings, and the update to dec_winstart is delayed until a new
 * window actually starts.  This is to avoid throwing an error due to overflow until the
 * last possible moment.  This makes it possible to encode exactly 4GB through a 32-bit
 * encoder. */
static int
xd3_decode_init_window (xd3_stream *stream)
{
  stream->dec_cpylen = 0;
  stream->dec_cpyoff = 0;
  stream->dec_cksumbytes = 0;

  xd3_init_cache (& stream->acache);

  return 0;
}

/* Allocates buffer space for the target window and possibly the VCD_TARGET copy-window.
 * Also sets the base of the two copy segments. */
static int
xd3_decode_setup_buffers (xd3_stream *stream)
{
  /* If VCD_TARGET is set then the previous buffer may be reused. */
  if (stream->dec_win_ind & VCD_TARGET)
    {
      /* But this implementation only supports copying from the last target window.  If the
       * offset is outside that range, it can't be done. */
      if (stream->dec_cpyoff < stream->dec_laststart)
	{
	  stream->msg = "unsupported VCD_TARGET offset";
	  return XD3_INVALID_INPUT;
	}

      /* See if the two windows are the same.  This indicates the first time VCD_TARGET is
       * used.  This causes a second buffer to be allocated, after that the two are
       * swapped in the DEC_FINISH case. */
      if (stream->dec_lastwin == stream->next_out)
	{
	  stream->next_out  = NULL;
	  stream->space_out = 0;
	}

      stream->dec_cpyaddrbase = stream->dec_lastwin + (usize_t) (stream->dec_cpyoff - stream->dec_laststart);
    }

  /* See if the current output window is large enough. */
  if (stream->space_out < stream->dec_tgtlen)
    {
      xd3_free (stream, stream->dec_buffer);

      stream->space_out = xd3_round_blksize (stream->dec_tgtlen, XD3_ALLOCSIZE);

      if ((stream->dec_buffer = xd3_alloc (stream, stream->space_out, 1)) == NULL)
	{
	  return ENOMEM;
	}

      stream->next_out = stream->dec_buffer;
    }

  /* dec_tgtaddrbase refers to an invalid base address, but it is always used with a
   * sufficiently large instruction offset (i.e., beyond the copy window).  This condition
   * is enforced by xd3_decode_output_halfinst. */
  stream->dec_tgtaddrbase = stream->next_out - stream->dec_cpylen;

  return 0;
}

static int
xd3_decode_allocate (xd3_stream  *stream,
		     usize_t       size,
		     uint8_t    **copied1,
		     usize_t      *alloc1,
		     uint8_t    **copied2,
		     usize_t      *alloc2)
{
  if (*copied1 != NULL && *alloc1 < size)
    {
      xd3_free (stream, *copied1);
      *copied1 = NULL;
    }

  if (*copied1 == NULL)
    {
      *alloc1 = xd3_round_blksize (size, XD3_ALLOCSIZE);

      if ((*copied1 = xd3_alloc (stream, *alloc1, 1)) == NULL)
	{
	  return ENOMEM;
	}
    }

  return 0;
}

static int
xd3_decode_section (xd3_stream *stream,
		    xd3_desect *section,
		    xd3_decode_state nstate,
		    int copy)
{
  XD3_ASSERT (section->pos <= section->size);
  XD3_ASSERT (stream->dec_state != nstate);

  if (section->pos < section->size)
    {
      usize_t sect_take;

      if (stream->avail_in == 0)
	{
	  return XD3_INPUT;
	}

      if ((copy == 0) && (section->pos == 0))
	{
	  /* No allocation/copy needed */
	  section->buf = stream->next_in;
	  sect_take    = section->size;
	}
      else
	{
	  usize_t sect_need = section->size - section->pos;

	  /* Allocate and copy */
	  sect_take = min (sect_need, stream->avail_in);

	  if (section->pos == 0)
	    {
	      int ret;

	      if ((ret = xd3_decode_allocate (stream,
					      section->size,
					      & section->copied1,
					      & section->alloc1,
					      & section->copied2,
					      & section->alloc2))) { return ret; }

	      section->buf = section->copied1;
	    }

	  memcpy (section->copied1 + section->pos,
		  stream->next_in,
		  sect_take);
	}

      section->pos += sect_take;

      stream->dec_winbytes += sect_take;

      DECODE_INPUT (sect_take);
    }

  if (section->pos < section->size)
    {
      stream->msg = "further input required";
      return XD3_INPUT;
    }

  XD3_ASSERT (section->pos == section->size);

  stream->dec_state = nstate;
  section->buf_max  = section->buf + section->size;
  section->pos      = 0;
  return 0;
}

/* Decode the size and address for half of an instruction (i.e., a single opcode).  This
 * updates the stream->dec_position, which are bytes already output prior to processing
 * this instruction.  Perform bounds checking for sizes and copy addresses, which uses the
 * dec_position (which is why these checks are done here). */
static int
xd3_decode_parse_halfinst (xd3_stream *stream, xd3_hinst *inst)
{
  int ret;

  /* If the size from the instruction table is zero then read a size value. */
  if ((inst->size == 0) &&
      (ret = xd3_read_size (stream,
 			    & stream->inst_sect.buf,
			      stream->inst_sect.buf_max,
			    & inst->size)))
    {
      return XD3_INVALID_INPUT;
    }

  /* For copy instructions, read address. */
  if (inst->type >= XD3_CPY)
    {
      IF_DEBUG1 ({
	static int cnt = 0;
	DP(RINT "DECODE:%u: COPY at %"Q"u (winoffset %u) size %u winaddr %u\n",
		 cnt++,
		 stream->total_out + (stream->dec_position - stream->dec_cpylen),
		 (stream->dec_position - stream->dec_cpylen),
		 inst->size,
		 inst->addr);
      });

      if ((ret = xd3_decode_address (stream,
				     stream->dec_position,
				     inst->type - XD3_CPY,
				     & stream->addr_sect.buf,
				     stream->addr_sect.buf_max,
				     & inst->addr)))
	{
	  return ret;
	}

      /* Cannot copy an address before it is filled-in. */
      if (inst->addr >= stream->dec_position)
	{
	  stream->msg = "address too large";
	  return XD3_INVALID_INPUT;
	}

      /* Check: a VCD_TARGET or VCD_SOURCE copy cannot exceed the remaining buffer space
       * in its own segment. */
      if (inst->addr < stream->dec_cpylen && inst->addr + inst->size > stream->dec_cpylen)
	{
	  stream->msg = "size too large";
	  return XD3_INVALID_INPUT;
	}
    }
  else
    {
      IF_DEBUG1 ({
	if (inst->type == XD3_ADD)
	  {
	    static int cnt;
	    DP(RINT "DECODE:%d: ADD at %"Q"u (winoffset %u) size %u\n",
		     cnt++,
		     stream->total_out + stream->dec_position - stream->dec_cpylen,
		     stream->dec_position - stream->dec_cpylen,
		     inst->size);
	  }
	else
	  {
	    static int cnt;
	    XD3_ASSERT (inst->type == XD3_RUN);
	    DP(RINT "DECODE:%d: RUN at %"Q"u (winoffset %u) size %u\n",
		     cnt++,
		     stream->total_out + stream->dec_position - stream->dec_cpylen,
		     stream->dec_position - stream->dec_cpylen,
		     inst->size);
	  }
      });
    }

  /* Check: The instruction will not overflow the output buffer. */
  if (stream->dec_position + inst->size > stream->dec_maxpos)
    {
      stream->msg = "size too large";
      return XD3_INVALID_INPUT;
    }

  stream->dec_position += inst->size;
  return 0;
}

/* Decode a single opcode and then decode the two half-instructions. */
static int
xd3_decode_instruction (xd3_stream *stream)
{
  int ret;
  const xd3_dinst *inst;

  if (stream->inst_sect.buf == stream->inst_sect.buf_max)
    {
      stream->msg = "instruction underflow";
      return XD3_INVALID_INPUT;
    }

  inst = &stream->code_table[*stream->inst_sect.buf++];

  stream->dec_current1.type = inst->type1;
  stream->dec_current2.type = inst->type2;
  stream->dec_current1.size = inst->size1;
  stream->dec_current2.size = inst->size2;

  /* For each instruction with a real operation, decode the corresponding size and
   * addresses if necessary.  Assume a code-table may have NOOP in either position,
   * although this is unlikely. */
  if (inst->type1 != XD3_NOOP && (ret = xd3_decode_parse_halfinst (stream, & stream->dec_current1)))
    {
      return ret;
    }
  if (inst->type2 != XD3_NOOP && (ret = xd3_decode_parse_halfinst (stream, & stream->dec_current2)))
    {
      return ret;
    }
  return 0;
}

/* Output the result of a single half-instruction. OPT: This the decoder hotspot. */
static int
xd3_decode_output_halfinst (xd3_stream *stream, xd3_hinst *inst)
{
  /* To make this reentrant, set take = min (inst->size, available space)... */
  usize_t take = inst->size;

  XD3_ASSERT (inst->type != XD3_NOOP);

  switch (inst->type)
    {
    case XD3_RUN:
      {
	/* Only require a single data byte. */
	if (stream->data_sect.buf == stream->data_sect.buf_max)
	  {
	    stream->msg = "data underflow";
	    return XD3_INVALID_INPUT;
	  }

	/* TUNE: Probably want to eliminate memset/memcpy here */
	memset (stream->next_out + stream->avail_out,
		stream->data_sect.buf[0],
		take);

	stream->data_sect.buf += 1;
	stream->avail_out += take;
	inst->type = XD3_NOOP;
	break;
      }
    case XD3_ADD:
      {
	/* Require at least TAKE data bytes. */
	if (stream->data_sect.buf + take > stream->data_sect.buf_max)
	  {
	    stream->msg = "data underflow";
	    return XD3_INVALID_INPUT;
	  }

	memcpy (stream->next_out + stream->avail_out,
		stream->data_sect.buf,
		take);

	stream->data_sect.buf += take;
	stream->avail_out += take;
	inst->type = XD3_NOOP;
	break;
      }
    default:
      {
	usize_t i;
	const uint8_t *src;
	uint8_t *dst;

	/* See if it copies from the VCD_TARGET/VCD_SOURCE window or the target window.
	 * Out-of-bounds checks for the addresses and sizes are performed in
	 * xd3_decode_parse_halfinst. */
	if (inst->addr < stream->dec_cpylen)
	  {
	    if (stream->dec_win_ind & VCD_TARGET)
	      {
		/* For VCD_TARGET we know the entire range is in-memory, as established by
		 * decode_setup_buffers. */
		src = stream->dec_cpyaddrbase + inst->addr;
		inst->type = XD3_NOOP;
		inst->size = 0;
	      }
	    else
	      {
		/* In this case we have to read a source block, which could return control
		 * to the caller.  We need to know the first block number needed for this
		 * copy. */
		xd3_source *source;
		xoff_t block;
		usize_t blkoff;
		usize_t blksize;
		int ret;

	      more:

		source  = stream->src;
		block   = source->cpyoff_blocks;
		blkoff  = source->cpyoff_blkoff + inst->addr;
		blksize = source->blksize;

 		while (blkoff >= blksize)
		  {
		    block  += 1;
		    blkoff -= blksize;
		  }

		if ((ret = xd3_getblk (stream, block)))
		  {
		    /* could be a XD3_GETSRCBLK failure. */
		    XD3_ASSERT(ret != XD3_TOOFARBACK);
		    return ret;
		  }

		src = source->curblk + blkoff;

		/* This block either contains enough data or the source file is
		 * short. */
		if ((source->onblk != blksize) && (blkoff + take > source->onblk))
		  {
		    stream->msg = "source file too short";
		    return XD3_INVALID_INPUT;

		  }

		XD3_ASSERT (blkoff != blksize);

		if (blkoff + take <= blksize)
		  {
		    inst->type = XD3_NOOP;
		    inst->size = 0;
		  }
		else
		  {
		    /* This block doesn't contain all the data, modify the instruction, do
		     * not set to XD3_NOOP. */
		    take = blksize - blkoff;
		    inst->size -= take;
		    inst->addr += take;
		  }
	      }
	  }
	else
	  {
	    /* For a target-window copy, we know the entire range is in-memory.  The
	     * dec_tgtaddrbase is negatively offset by dec_cpylen because the addresses
	     * start beyond that point. */
	    src = stream->dec_tgtaddrbase + inst->addr;
	    inst->type = XD3_NOOP;
	    inst->size = 0;
	  }

 	dst = stream->next_out + stream->avail_out;

	stream->avail_out += take;

	/* Can't just memcpy here due to possible overlap. */
	for (i = take; i != 0; i -= 1)
	  {
	    *dst++ = *src++;
	  }

	take = inst->size;

	/* If there is more to copy, call getblk again. */
	if (inst->type != XD3_NOOP)
	  {
	    XD3_ASSERT (take > 0);
	    goto more;
	  }
	else
	  {
	    XD3_ASSERT (take == 0);
	  }
      }
    }

  return 0;
}

static int
xd3_decode_finish_window (xd3_stream *stream)
{
  stream->dec_winbytes  = 0;
  stream->dec_state     = DEC_FINISH;

  stream->data_sect.pos = 0;
  stream->inst_sect.pos = 0;
  stream->addr_sect.pos = 0;

  return XD3_OUTPUT;
}

static int
xd3_decode_sections (xd3_stream *stream)
{
  usize_t need, more, take;
  int copy, ret;

  if ((stream->flags & XD3_JUST_HDR) != 0)
    {
      /* Nothing left to do. */
      return xd3_decode_finish_window (stream);
    }

  /* To avoid copying, need this much data available */
  need = (stream->inst_sect.size +
	  stream->addr_sect.size +
	  stream->data_sect.size);

  /* The window may be entirely processed. */
  XD3_ASSERT (stream->dec_winbytes <= need);

  /* Compute how much more input is needed. */
  more = (need - stream->dec_winbytes);

  /* How much to consume. */
  take = min (more, stream->avail_in);

  /* See if the input is completely available, to avoid copy. */
  copy = (take != more);

  /* If the window is skipped... */
  if ((stream->flags & XD3_SKIP_WINDOW) != 0)
    {
      /* Skip the available input. */
      DECODE_INPUT (take);

      stream->dec_winbytes += take;

      if (copy)
	{
	  stream->msg = "further input required";
	  return XD3_INPUT;
	}

      return xd3_decode_finish_window (stream);
    }

  /* Process all but the DATA section. */
  switch (stream->dec_state)
    {
    default:
      stream->msg = "internal error";
      return XD3_INVALID_INPUT;

    case DEC_DATA:
      if ((ret = xd3_decode_section (stream, & stream->data_sect, DEC_INST, copy))) { return ret; }
    case DEC_INST:
      if ((ret = xd3_decode_section (stream, & stream->inst_sect, DEC_ADDR, copy))) { return ret; }
    case DEC_ADDR:
      if ((ret = xd3_decode_section (stream, & stream->addr_sect, DEC_EMIT, copy))) { return ret; }
    }

  XD3_ASSERT (stream->dec_winbytes == need);

#if SECONDARY_ANY
#define DECODE_SECONDARY_SECTION(UPPER,LOWER) \
  ((stream->dec_del_ind & VCD_ ## UPPER ## COMP) && \
   (ret = xd3_decode_secondary (stream, & stream-> LOWER ## _sect, \
					& xd3_sec_ ## LOWER (stream))))

  if (DECODE_SECONDARY_SECTION (DATA, data) ||
      DECODE_SECONDARY_SECTION (INST, inst) ||
      DECODE_SECONDARY_SECTION (ADDR, addr))
    {
      return ret;
    }
#endif

  if (stream->flags & XD3_SKIP_EMIT)
    {
      return xd3_decode_finish_window (stream);
    }

  /* OPT: A possible optimization is to avoid allocating memory in decode_setup_buffers
   * and to avoid a large memcpy when the window consists of a single VCD_SOURCE copy
   * instruction.  The only potential problem is if the following window is a VCD_TARGET,
   * then you need to remember... */
  if ((ret = xd3_decode_setup_buffers (stream))) { return ret; }

  return 0;
}

static int
xd3_decode_emit (xd3_stream *stream)
{
  int ret;

  /* Produce output: originally structured to allow reentrant code that fills as much of
   * the output buffer as possible, but VCDIFF semantics allows to copy from anywhere from
   * the target window, so instead allocate a sufficiently sized buffer after the target
   * window length is decoded.
   *
   * This code still needs to be reentrant to allow XD3_GETSRCBLK to return control.  This
   * is handled by setting the stream->dec_currentN instruction types to XD3_NOOP after
   * they have been processed. */
  XD3_ASSERT (! (stream->flags & XD3_SKIP_EMIT));
  XD3_ASSERT (stream->avail_out == 0);
  XD3_ASSERT (stream->dec_tgtlen <= stream->space_out);

  while (stream->inst_sect.buf != stream->inst_sect.buf_max)
    {
      /* Decode next instruction pair. */
      if ((stream->dec_current1.type == XD3_NOOP) &&
	  (stream->dec_current2.type == XD3_NOOP) &&
	  (ret = xd3_decode_instruction (stream))) { return ret; }

      /* Output for each instruction. */
      if ((stream->dec_current1.type != XD3_NOOP) &&
	  (ret = xd3_decode_output_halfinst (stream, & stream->dec_current1))) { return ret; }

      if ((stream->dec_current2.type != XD3_NOOP) &&
	  (ret = xd3_decode_output_halfinst (stream, & stream->dec_current2))) { return ret; }
    }

  if (stream->avail_out != stream->dec_tgtlen)
    {
      IF_DEBUG1 (DP(RINT "AVAIL_OUT(%d) != DEC_TGTLEN(%d)\n", stream->avail_out, stream->dec_tgtlen));
      stream->msg = "wrong window length";
      return XD3_INVALID_INPUT;
    }

  if (stream->data_sect.buf != stream->data_sect.buf_max)
    {
      stream->msg = "extra data section";
      return XD3_INVALID_INPUT;
    }

  if (stream->addr_sect.buf != stream->addr_sect.buf_max)
    {
      stream->msg = "extra address section";
      return XD3_INVALID_INPUT;
    }

  /* OPT: Should cksum computation be combined with the above loop? */
  if ((stream->dec_win_ind & VCD_ADLER32) != 0 &&
      (stream->flags & XD3_ADLER32_NOVER) == 0)
    {
      uint32_t a32 = adler32 (1L, stream->next_out, stream->avail_out);

      if (a32 != stream->dec_adler32)
	{
	  stream->msg = "target window checksum mismatch";
	  return XD3_INVALID_INPUT;
	}
    }

  /* Finished with a window. */
  return xd3_decode_finish_window (stream);
}

int
xd3_decode_input (xd3_stream *stream)
{
  int ret;

  if (stream->enc_state != 0)
    {
      stream->msg = "encoder/decoder transition";
      return XD3_INVALID_INPUT;
    }

#define BYTE_CASE(expr,x,nstate)                                               \
      do {                                                                     \
      if ( (expr) &&                                                           \
           ((ret = xd3_decode_byte (stream, & (x))) != 0) ) { return ret; }    \
      stream->dec_state = (nstate);                                            \
      } while (0)

#define OFFSET_CASE(expr,x,nstate)                                             \
      do {                                                                     \
      if ( (expr) &&                                                           \
           ((ret = xd3_decode_offset (stream, & (x))) != 0) ) { return ret; }  \
      stream->dec_state = (nstate);                                            \
      } while (0)

#define SIZE_CASE(expr,x,nstate)                                               \
      do {                                                                     \
      if ( (expr) &&                                                           \
           ((ret = xd3_decode_size (stream, & (x))) != 0) ) { return ret; }    \
      stream->dec_state = (nstate);                                            \
      } while (0)

#define SRCORTGT(x) (((x) & VCD_SRCORTGT) == VCD_SOURCE ||                     \
		     ((x) & VCD_SRCORTGT) == VCD_TARGET)

  switch (stream->dec_state)
    {
    case DEC_VCHEAD:
      {
	if ((ret = xd3_decode_bytes (stream, stream->dec_magic, & stream->dec_magicbytes, 4))) { return ret; }

	if (stream->dec_magic[0] != VCDIFF_MAGIC1 ||
	    stream->dec_magic[1] != VCDIFF_MAGIC2 ||
	    stream->dec_magic[2] != VCDIFF_MAGIC3)
	  {
	    stream->msg = "not a VCDIFF input";
	    return XD3_INVALID_INPUT;
	  }

	if (stream->dec_magic[3] != 0)
	  {
	    stream->msg = "VCDIFF input version > 0 is not supported";
	    return XD3_INVALID_INPUT;
	  }

	stream->dec_state = DEC_HDRIND;
      }
    case DEC_HDRIND:
      {
	if ((ret = xd3_decode_byte (stream, & stream->dec_hdr_ind))) { return ret; }

	if ((stream->dec_hdr_ind & VCD_INVHDR) != 0)
	  {
	    stream->msg = "unrecognized header indicator bits set";
	    return XD3_INVALID_INPUT;
	  }

	stream->dec_state = DEC_SECONDID;
      }

    case DEC_SECONDID:
      /* Secondary compressor ID: only if VCD_SECONDARY is set */
      if ((stream->dec_hdr_ind & VCD_SECONDARY) != 0)
	{
	  BYTE_CASE (1, stream->dec_secondid, DEC_TABLEN);

	  switch (stream->dec_secondid)
	    {
	    case VCD_FGK_ID:
	      FGK_CASE (stream);
	    case VCD_DJW_ID:
	      DJW_CASE (stream);
	    default:
	      stream->msg = "unknown secondary compressor ID";
	      return XD3_INVALID_INPUT;
	    }
	}

    case DEC_TABLEN:
      /* Length of code table data: only if VCD_CODETABLE is set */
      SIZE_CASE ((stream->dec_hdr_ind & VCD_CODETABLE) != 0, stream->dec_codetblsz, DEC_NEAR);

      /* The codetblsz counts the two NEAR/SAME bytes */
      if ((stream->dec_hdr_ind & VCD_CODETABLE) != 0) {
	if (stream->dec_codetblsz <= 2) {
	  stream->msg = "invalid code table size";
	  return ENOMEM;
	}
	stream->dec_codetblsz -= 2;
      }
    case DEC_NEAR:
      /* Near modes: only if VCD_CODETABLE is set */
      BYTE_CASE((stream->dec_hdr_ind & VCD_CODETABLE) != 0, stream->acache.s_near, DEC_SAME);
    case DEC_SAME:
      /* Same modes: only if VCD_CODETABLE is set */
      BYTE_CASE((stream->dec_hdr_ind & VCD_CODETABLE) != 0, stream->acache.s_same, DEC_TABDAT);
    case DEC_TABDAT:
      /* Compressed code table data */

      if ((stream->dec_hdr_ind & VCD_CODETABLE) != 0)
	{
	  /* Get the code table data. */
	  if ((stream->dec_codetbl == NULL) &&
	      (stream->dec_codetbl = xd3_alloc (stream, stream->dec_codetblsz, 1)) == NULL) { return ENOMEM; }

	  if ((ret = xd3_decode_bytes (stream, stream->dec_codetbl, & stream->dec_codetblbytes, stream->dec_codetblsz)))
	    {
	      return ret;
	    }

	  if ((ret = xd3_apply_table_encoding (stream, stream->dec_codetbl, stream->dec_codetblbytes)))
	    {
	      return ret;
	    }
	}
      else
	{
	  /* Use the default table. */
	  stream->acache.s_near = __rfc3284_code_table_desc.near_modes;
	  stream->acache.s_same = __rfc3284_code_table_desc.same_modes;
	  stream->code_table    = xd3_rfc3284_code_table ();
	}

      if ((ret = xd3_alloc_cache (stream))) { return ret; }

      stream->dec_state = DEC_APPLEN;

    case DEC_APPLEN:
      /* Length of application data */
      SIZE_CASE((stream->dec_hdr_ind & VCD_APPHEADER) != 0, stream->dec_appheadsz, DEC_APPDAT);

    case DEC_APPDAT:
      /* Application data */
      if (stream->dec_hdr_ind & VCD_APPHEADER)
	{
	  /* Note: we add an additional byte for padding, to allow 0-termination. */
	  if ((stream->dec_appheader == NULL) &&
	      (stream->dec_appheader = xd3_alloc (stream, stream->dec_appheadsz+1, 1)) == NULL) { return ENOMEM; }

	  stream->dec_appheader[stream->dec_appheadsz] = 0;

	  if ((ret = xd3_decode_bytes (stream, stream->dec_appheader, & stream->dec_appheadbytes, stream->dec_appheadsz)))
	    {
	      return ret;
	    }
	}

      stream->dec_hdrsize = stream->total_in;
      stream->dec_state = DEC_WININD;

    case DEC_WININD:
      {
	/* Start of a window: the window indicator */

	if ((ret = xd3_decode_byte (stream, & stream->dec_win_ind))) { return ret; }

	stream->current_window = stream->dec_window_count;

	if (XOFF_T_OVERFLOW (stream->dec_winstart, stream->dec_tgtlen))
	  {
	    stream->msg = "decoder file offset overflow";
	    return XD3_INVALID_INPUT;
	  }

	stream->dec_winstart += stream->dec_tgtlen;

	if ((stream->dec_win_ind & VCD_INVWIN) != 0)
	  {
	    stream->msg = "unrecognized window indicator bits set";
	    return XD3_INVALID_INPUT;
	  }

	if ((ret = xd3_decode_init_window (stream))) { return ret; }

	stream->dec_state = DEC_CPYLEN;

	IF_DEBUG1 (DP(RINT "--------- TARGET WINDOW %"Q"u ------------------\n", stream->current_window));
      }

    case DEC_CPYLEN:
      /* Copy window length: only if VCD_SOURCE or VCD_TARGET is set */
      SIZE_CASE(SRCORTGT (stream->dec_win_ind), stream->dec_cpylen, DEC_CPYOFF);

      /* Set the initial, logical decoder position (HERE address) in dec_position.  This
       * is set to just after the source/copy window, as we are just about to output the
       * first byte of target window. */
      stream->dec_position = stream->dec_cpylen;

    case DEC_CPYOFF:
      /* Copy window offset: only if VCD_SOURCE or VCD_TARGET is set */
      OFFSET_CASE(SRCORTGT (stream->dec_win_ind), stream->dec_cpyoff, DEC_ENCLEN);

      /* Copy offset and copy length may not overflow. */
      if (XOFF_T_OVERFLOW (stream->dec_cpyoff, stream->dec_cpylen))
	{
	  stream->msg = "decoder copy window overflows a file offset";
	  return XD3_INVALID_INPUT;
	}

      /* Check copy window bounds: VCD_TARGET window may not exceed current position. */
      if ((stream->dec_win_ind & VCD_TARGET) &&
	  (stream->dec_cpyoff + (xoff_t) stream->dec_cpylen > stream->dec_winstart))
	{
	  stream->msg = "VCD_TARGET window out of bounds";
	  return XD3_INVALID_INPUT;
	}

    case DEC_ENCLEN:
      /* Length of the delta encoding */
      SIZE_CASE(1, stream->dec_enclen, DEC_TGTLEN);
    case DEC_TGTLEN:
      /* Length of target window */
      SIZE_CASE(1, stream->dec_tgtlen, DEC_DELIND);

      /* Set the maximum decoder position, beyond which we should not decode any data.
       * This is the maximum value for dec_position.  This may not exceed the size of a
       * usize_t. */
      if (USIZE_T_OVERFLOW (stream->dec_cpylen, stream->dec_tgtlen))
	{
	  stream->msg = "decoder target window overflows a usize_t";
	  return XD3_INVALID_INPUT;
	}

      /* Check for malicious files. */
      if (stream->dec_tgtlen > XD3_HARDMAXWINSIZE)
	{
	  stream->msg = "hard window size exceeded";
	  return XD3_INVALID_INPUT;
	}

      stream->dec_maxpos = stream->dec_cpylen + stream->dec_tgtlen;

    case DEC_DELIND:
      /* Delta indicator */
      BYTE_CASE(1, stream->dec_del_ind, DEC_DATALEN);

      if ((stream->dec_del_ind & VCD_INVDEL) != 0)
	{
	  stream->msg = "unrecognized delta indicator bits set";
	  return XD3_INVALID_INPUT;
	}

      /* Delta indicator is only used with secondary compression. */
      if ((stream->dec_del_ind != 0) && (stream->sec_type == NULL))
	{
	  stream->msg = "invalid delta indicator bits set";
	  return XD3_INVALID_INPUT;
	}

      /* Section lengths */
    case DEC_DATALEN:
      SIZE_CASE(1, stream->data_sect.size, DEC_INSTLEN);
    case DEC_INSTLEN:
      SIZE_CASE(1, stream->inst_sect.size, DEC_ADDRLEN);
    case DEC_ADDRLEN:
      SIZE_CASE(1, stream->addr_sect.size, DEC_CKSUM);

    case DEC_CKSUM:
      /* Window checksum. */
      if ((stream->dec_win_ind & VCD_ADLER32) != 0)
	{
	  int i;

	  if ((ret = xd3_decode_bytes (stream, stream->dec_cksum, & stream->dec_cksumbytes, 4))) { return ret; }

	  for (i = 0; i < 4; i += 1)
	    {
	      stream->dec_adler32 = (stream->dec_adler32 << 8) | stream->dec_cksum[i];
	    }
	}

      stream->dec_state = DEC_DATA;

      /* Check dec_enclen for redundency, otherwise it is not really used. */
      {
	usize_t enclen_check = (1 + (xd3_sizeof_size (stream->dec_tgtlen) +
				    xd3_sizeof_size (stream->data_sect.size) +
				    xd3_sizeof_size (stream->inst_sect.size) +
				    xd3_sizeof_size (stream->addr_sect.size)) +
			       stream->data_sect.size +
			       stream->inst_sect.size +
			       stream->addr_sect.size +
			       ((stream->dec_win_ind & VCD_ADLER32) ? 4 : 0));

	if (stream->dec_enclen != enclen_check)
	  {
	    stream->msg = "incorrect encoding length (redundent)";
	    return XD3_INVALID_INPUT;
	  }
      }

      /* Returning here gives the application a chance to inspect the header, skip the
       * window, etc. */
      if (stream->current_window == 0) { return XD3_GOTHEADER; }
      else                             { return XD3_WINSTART; }

    case DEC_DATA:
    case DEC_INST:
    case DEC_ADDR:
      /* Next read the three sections. */
     if ((ret = xd3_decode_sections (stream))) { return ret; }

    case DEC_EMIT:

      /* To speed VCD_SOURCE block-address calculations, the source cpyoff_blocks and
       * cpyoff_blkoff are pre-computed. */
      if (stream->dec_win_ind & VCD_SOURCE)
	{
	  xd3_source *src = stream->src;

	  if (src == NULL)
	    {
	      stream->msg = "source input required";
	      return XD3_INVALID_INPUT;
	    }

	  src->cpyoff_blocks = stream->dec_cpyoff / src->blksize;
	  src->cpyoff_blkoff = stream->dec_cpyoff % src->blksize;
	}

      /* xd3_decode_emit returns XD3_OUTPUT on every success. */
      if ((ret = xd3_decode_emit (stream)) == XD3_OUTPUT)
	{
	  stream->total_out += (xoff_t) stream->avail_out;
	}

      return ret;

    case DEC_FINISH:
      {
	if (stream->dec_win_ind & VCD_TARGET)
	  {
	    if (stream->dec_lastwin == NULL)
	      {
		stream->dec_lastwin   = stream->next_out;
		stream->dec_lastspace = stream->space_out;
	      }
	    else
	      {
		xd3_swap_uint8p (& stream->dec_lastwin,   & stream->next_out);
		xd3_swap_usize_t (& stream->dec_lastspace, & stream->space_out);
	      }
	  }

	stream->dec_lastlen   = stream->dec_tgtlen;
	stream->dec_laststart = stream->dec_winstart;
	stream->dec_window_count += 1;

	/* Note: the updates to dec_winstart & current_window are deferred until after the
	 * next DEC_WININD byte is read. */
	stream->dec_state = DEC_WININD;
	return XD3_WINFINISH;
      }

    default:
      stream->msg = "invalid state";
      return XD3_INVALID_INPUT;
    }
}

#endif // _XDELTA3_DECODE_H_
