/*====================================================================*
 -  Copyright (C) 2001 Leptonica.  All rights reserved.
 -  This software is distributed in the hope that it will be
 -  useful, but with NO WARRANTY OF ANY KIND.
 -  No author or distributor accepts responsibility to anyone for the
 -  consequences of using this software, or for whether it serves any
 -  particular purpose or works at all, unless he or she says so in
 -  writing.  Everyone is granted permission to copy, modify and
 -  redistribute this source code, for commercial or non-commercial
 -  purposes, with the following restrictions: (1) the origin of this
 -  source code must not be misrepresented; (2) modified versions must
 -  be plainly marked as such; and (3) this notice may not be removed
 -  or altered from any source or modified source distribution.
 *====================================================================*/


/*
 *  fmorphlow.c
 *
 *      Dispatcher:
 *            l_int32   morphop_low()
 *
 *      Static Low-level:
 *            void      fdilate_h_3()
 *            void      fdilate_h_5()
 *            void      fdilate_h_7()
 *            void      fdilate_h_9()
 *            void      fdilate_v_3()
 *            void      fdilate_v_5()
 *            void      fdilate_v_7()
 *            void      fdilate_v_9()
 *            void      fdilate_dp_5()
 *            void      fdilate_dm_5()
 *            void      fdilate_sq_3()
 *            void      fdilate_sq_5()
 *
 *            void      ferode_h_3()
 *            void      ferode_h_5()
 *            void      ferode_h_7()
 *            void      ferode_h_9()
 *            void      ferode_v_3()
 *            void      ferode_v_5()
 *            void      ferode_v_7()
 *            void      ferode_v_9()
 *            void      ferode_dp_5()
 *            void      ferode_dm_5()
 *            void      ferode_sq_3()
 *            void      ferode_sq_5()
 *      
 */

#include <stdio.h>

#include "allheaders.h"



    /* static protos */
static void    fdilate_h_3(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32);
static void    fdilate_h_5(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32);
static void    fdilate_h_7(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32);
static void    fdilate_h_9(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32);
static void    fdilate_v_3(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32);
static void    fdilate_v_5(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32);
static void    fdilate_v_7(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32);
static void    fdilate_v_9(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32);
static void    fdilate_dp_5(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32);
static void    fdilate_dm_5(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32);
static void    fdilate_sq_3(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32);
static void    fdilate_sq_5(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32);
static void    ferode_h_3(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32);
static void    ferode_h_5(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32);
static void    ferode_h_7(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32);
static void    ferode_h_9(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32);
static void    ferode_v_3(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32);
static void    ferode_v_5(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32);
static void    ferode_v_7(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32);
static void    ferode_v_9(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32);
static void    ferode_dp_5(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32);
static void    ferode_dm_5(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32);
static void    ferode_sq_3(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32);
static void    ferode_sq_5(l_uint32 *, l_int32, l_int32, l_int32, l_uint32 *, l_int32);




/*---------------------------------------------------------------------*
 *                          Fast morph dispatcher                      *
 *---------------------------------------------------------------------*/
/*!
 *  morphop_low()
 *
 *       A dispatcher to appropriate low-level code, which was
 *       all written by hand.
 *
 *       This function is now deprecated, in favor of the auto-gen'd
 *       morphology functions built by code in fmorphauto.c and
 *       fhmtauto.c.
 */
l_int32
morphop_low(l_uint32  *datad,
	    l_int32    w,
	    l_int32    h,
	    l_int32    wpld,
	    l_uint32  *datas,
	    l_int32    wpls,
	    l_int32    morphtype,
	    l_int32    setype,
	    l_int32    size)
{

    PROCNAME("morphop_low");

    if (morphtype == MORPH_DILATION) {
	if (setype == MORPH_HORIZ) {
	    if (size == 3)
		fdilate_h_3(datad, w, h, wpld, datas, wpls);
	    else if (size == 5)
		fdilate_h_5(datad, w, h, wpld, datas, wpls);
	    else if (size == 7)
		fdilate_h_7(datad, w, h, wpld, datas, wpls);
	    else if (size == 9)
		fdilate_h_9(datad, w, h, wpld, datas, wpls);
	    else {
		fprintf(stderr, " No fast horiz dilation of size %d\n", size);
		ERROR_INT("horiz dilation not performed", procName, 1);
	    }
	}
	else if (setype == MORPH_VERT) {
	    if (size == 3)
		fdilate_v_3(datad, w, h, wpld, datas, wpls);
	    else if (size == 5)
		fdilate_v_5(datad, w, h, wpld, datas, wpls);
	    else if (size == 7)
		fdilate_v_7(datad, w, h, wpld, datas, wpls);
	    else if (size == 9)
		fdilate_v_9(datad, w, h, wpld, datas, wpls);
	    else {
		fprintf(stderr, " No fast vert dilation of size %d\n", size);
		ERROR_INT("vert dilation not performed", procName, 1);
	    }
	}
	else if (setype == MORPH_DIAGONAL) {
	    if (size == 5)
		fdilate_dp_5(datad, w, h, wpld, datas, wpls);
	    else if (size == -5)
		fdilate_dm_5(datad, w, h, wpld, datas, wpls);
	    else {
		fprintf(stderr, " No fast diag. dilation of size %d\n", size);
		ERROR_INT("diagonal dilation not performed", procName, 1);
	    }
	}
	else if (setype == MORPH_SQUARE) {
	    if (size == 3)
		fdilate_sq_3(datad, w, h, wpld, datas, wpls);
	    else if (size == 5)
		fdilate_sq_5(datad, w, h, wpld, datas, wpls);
	    else {
		fprintf(stderr, " No fast square dilation of size %d\n", size);
		ERROR_INT("square dilation not performed", procName, 1);
	    }
	}
    }
    else if (morphtype == MORPH_EROSION) {
	if (setype == MORPH_HORIZ) {
	    if (size == 3)
		ferode_h_3(datad, w, h, wpld, datas, wpls);
	    else if (size == 5)
		ferode_h_5(datad, w, h, wpld, datas, wpls);
	    else if (size == 7)
		ferode_h_7(datad, w, h, wpld, datas, wpls);
	    else if (size == 9)
		ferode_h_9(datad, w, h, wpld, datas, wpls);
	    else {
		fprintf(stderr, " No fast horiz erosion of size %d\n", size);
		ERROR_INT("horiz erosion not performed", procName, 1);
	    }
	}
	else if (setype == MORPH_VERT) {
	    if (size == 3)
		ferode_v_3(datad, w, h, wpld, datas, wpls);
	    else if (size == 5)
		ferode_v_5(datad, w, h, wpld, datas, wpls);
	    else if (size == 7)
		ferode_v_7(datad, w, h, wpld, datas, wpls);
	    else if (size == 9)
		ferode_v_9(datad, w, h, wpld, datas, wpls);
	    else {
		fprintf(stderr, " No fast vert erosion of size %d\n", size);
		ERROR_INT("vert erosion not performed", procName, 1);
	    }
	}
	else if (setype == MORPH_DIAGONAL) {
	    if (size == 5)
		ferode_dp_5(datad, w, h, wpld, datas, wpls);
	    else if (size == -5)
		ferode_dm_5(datad, w, h, wpld, datas, wpls);
	    else {
		fprintf(stderr, " No fast diag. erosion of size %d\n", size);
		ERROR_INT("diagonal erosion not performed", procName, 1);
	    }
	}
	else if (setype == MORPH_SQUARE) {
	    if (size == 3)
		ferode_sq_3(datad, w, h, wpld, datas, wpls);
	    else if (size == 5)
		ferode_sq_5(datad, w, h, wpld, datas, wpls);
	    else {
		fprintf(stderr, " No fast square erosion of size %d\n", size);
		ERROR_INT("square erosion not performed", procName, 1);
	    }
	}
    }
    else
	ERROR_INT("unknown morphtype", procName, 1);

    return 0;
}



/*--------------------------------------------------------------------------*
 *                            Low-level static routines                     *
 *--------------------------------------------------------------------------*/
/*
 *  Note: in all the low-level routines, the part of the image
 *        that is accessed has been clipped by ADDED_BORDER pixels
 *        on all four sides.  This is done in fmorph.c by 
 *        redefining w and h smaller and by moving the start-of-image
 *        pointers up to the beginning of this interior rectangle.
 */

	/*-----------------------------------------------------------*
	 *                    Horizontal dilations                   *
	 *-----------------------------------------------------------*/
static void
fdilate_h_3(l_uint32  *datad,
	    l_int32    w,
	    l_int32    h,
	    l_int32    wpld,
	    l_uint32  *datas,
	    l_int32    wpls)
{
l_int32             i;
register l_int32    j, pwpls;
register l_uint32  *sptr, *dptr;

    pwpls = (l_uint32)(w + 31) / 32;  /* proper wpl of src */

    for (i = 0; i < h; i++) {
	sptr = datas + i * wpls;
	dptr = datad + i * wpld;
	for (j = 0; j < pwpls; j++, sptr++, dptr++) {
	    *dptr = *sptr | (*sptr << 1) | (*sptr >> 1) |
		    (*(sptr - 1) << 31) |
		    (*(sptr + 1) >> 31);
	}
    }
}


static void
fdilate_h_5(l_uint32  *datad,
	    l_int32    w,
	    l_int32    h,
	    l_int32    wpld,
	    l_uint32  *datas,
	    l_int32    wpls)
{
l_int32             i;
register l_int32    j, pwpls;
register l_uint32  *sptr, *dptr;

    pwpls = (l_uint32)(w + 31) / 32;  /* proper wpl of src */

    for (i = 0; i < h; i++) {
	sptr = datas + i * wpls;
	dptr = datad + i * wpld;
	for (j = 0; j < pwpls; j++, sptr++, dptr++) {
	    *dptr = *sptr | (*sptr << 2) | (*sptr << 1) |
		    (*sptr >> 1) | (*sptr >> 2) |
		    (*(sptr - 1) << 30) | (*(sptr - 1) << 31) |
		    (*(sptr + 1) >> 30) | (*(sptr + 1) >> 31);
	}
    }
}


static void
fdilate_h_7(l_uint32  *datad,
	    l_int32    w,
	    l_int32    h,
	    l_int32    wpld,
	    l_uint32  *datas,
	    l_int32    wpls)
{
l_int32             i;
register l_int32    j, pwpls;
register l_uint32  *sptr, *dptr;

    pwpls = (l_uint32)(w + 31) / 32;  /* proper wpl of src */

    for (i = 0; i < h; i++) {
	sptr = datas + i * wpls;
	dptr = datad + i * wpld;
	for (j = 0; j < pwpls; j++, sptr++, dptr++) {
	    *dptr = *sptr |
		    (*sptr << 3) | (*sptr << 2) | (*sptr << 1) |
		    (*sptr >> 1) | (*sptr >> 2) | (*sptr >> 3) |
		    (*(sptr - 1) << 29) | (*(sptr - 1) << 30) |
		    (*(sptr - 1) << 31) | (*(sptr + 1) >> 29) |
		    (*(sptr + 1) >> 30) | (*(sptr + 1) >> 31);
	}
    }
}


static void
fdilate_h_9(l_uint32  *datad,
	    l_int32    w,
	    l_int32    h,
	    l_int32    wpld,
	    l_uint32  *datas,
	    l_int32    wpls)
{
l_int32             i;
register l_int32    j, pwpls;
register l_uint32  *sptr, *dptr;

    pwpls = (l_uint32)(w + 31) / 32;  /* proper wpl of src */

    for (i = 0; i < h; i++) {
	sptr = datas + i * wpls;
	dptr = datad + i * wpld;
	for (j = 0; j < pwpls; j++, sptr++, dptr++) {
	    *dptr = *sptr |
		    (*sptr << 4) | (*sptr << 3) | (*sptr << 2) | (*sptr << 1) |
		    (*sptr >> 1) | (*sptr >> 2) | (*sptr >> 3) | (*sptr >> 4) |
		    (*(sptr - 1) << 28) | (*(sptr - 1) << 29) |
		    (*(sptr - 1) << 30) | (*(sptr - 1) << 31) |
		    (*(sptr + 1) >> 28) | (*(sptr + 1) >> 29) |
		    (*(sptr + 1) >> 30) | (*(sptr + 1) >> 31);
	}
    }
}


	/*-----------------------------------------------------------*
	 *                     Vertical dilations                    *
	 *-----------------------------------------------------------*/
static void
fdilate_v_3(l_uint32         *datad,
	    l_int32           w,
	    l_int32           h,
	    l_int32           wpld,
	    l_uint32         *datas,
	    register l_int32  wpls)
{
l_int32             i;
register l_int32    j, pwpls;
register l_uint32  *sptr, *dptr;

    pwpls = (l_uint32)(w + 31) / 32;  /* proper wpl of src */

    for (i = 0; i < h; i++) {
	sptr = datas + i * wpls;
	dptr = datad + i * wpld;
	for (j = 0; j < pwpls; j++, sptr++, dptr++) {
	    *dptr = *sptr | *(sptr + wpls) | *(sptr - wpls);
	}
    }
}


static void
fdilate_v_5(l_uint32         *datad,
	    l_int32           w,
	    l_int32           h,
	    l_int32           wpld,
	    l_uint32         *datas,
	    register l_int32  wpls)
{
l_int32             i;
register l_int32    j, pwpls;
register l_int32    wpls2;
register l_uint32  *sptr, *dptr;

    pwpls = (l_uint32)(w + 31) / 32;  /* proper wpl of src */

    wpls2 = 2 * wpls;
    for (i = 0; i < h; i++) {
	sptr = datas + i * wpls;
	dptr = datad + i * wpld;
	for (j = 0; j < pwpls; j++, sptr++, dptr++) {
	    *dptr = *sptr | *(sptr - wpls2) | *(sptr - wpls) |
		    *(sptr + wpls) | *(sptr + wpls2);
	}
    }
}


static void
fdilate_v_7(l_uint32         *datad,
	    l_int32           w,
	    l_int32           h,
	    l_int32           wpld,
	    l_uint32         *datas,
	    register l_int32  wpls)
{
l_int32             i;
register l_int32    j, pwpls;
register l_int32    wpls2, wpls3;
register l_uint32  *sptr, *dptr;

    pwpls = (l_uint32)(w + 31) / 32;  /* proper wpl of src */

    wpls2 = 2 * wpls;
    wpls3 = 3 * wpls;
    for (i = 0; i < h; i++) {
	sptr = datas + i * wpls;
	dptr = datad + i * wpld;
	for (j = 0; j < pwpls; j++, sptr++, dptr++) {
	    *dptr = *sptr | *(sptr - wpls3) | *(sptr - wpls2) |
		    *(sptr - wpls) | *(sptr + wpls) | *(sptr + wpls2) |
		    *(sptr + wpls3);
	}
    }
}


static void
fdilate_v_9(l_uint32         *datad,
	    l_int32           w,
	    l_int32           h,
	    l_int32           wpld,
	    l_uint32         *datas,
	    register l_int32  wpls)
{
l_int32             i;
register l_int32    j, pwpls;
register l_int32    wpls2, wpls3, wpls4;
register l_uint32  *sptr, *dptr;

    pwpls = (l_uint32)(w + 31) / 32;  /* proper wpl of src */

    wpls2 = 2 * wpls;
    wpls3 = 3 * wpls;
    wpls4 = 4 * wpls;
    for (i = 0; i < h; i++) {
	sptr = datas + i * wpls;
	dptr = datad + i * wpld;
	for (j = 0; j < pwpls; j++, sptr++, dptr++) {
	    *dptr = *sptr | *(sptr - wpls4) | *(sptr - wpls3) |
		    *(sptr - wpls2) | *(sptr - wpls) |
		    *(sptr + wpls) | *(sptr + wpls2) |
		    *(sptr + wpls3) | *(sptr + wpls4);
	}
    }
}



	/*-----------------------------------------------------------*
	 *                     Diagonal dilations                    *
	 *-----------------------------------------------------------*/
/*
 *         +          45 degree, positive slope
 *        +
 *       +(c)
 *      +
 *     +
 */
static void
fdilate_dp_5(l_uint32  *datad,
	     l_int32    w,
	     l_int32    h,
	     l_int32    wpld,
	     l_uint32  *datas,
	     l_int32    wpls)
{
l_int32             i;
register l_int32    j, pwpls, wpls2;
register l_uint32  *sptr, *dptr;

    pwpls = (l_uint32)(w + 31) / 32;  /* proper wpl of src */

    wpls2 = 2 * wpls;
    for (i = 0; i < h; i++) {
	sptr = datas + i * wpls;
	dptr = datad + i * wpld;
	for (j = 0; j < pwpls; j++, sptr++, dptr++) {
	    *dptr = (*(sptr - wpls2) << 2) | (*(sptr - wpls2 + 1) >> 30) |
		    (*(sptr - wpls) << 1) | (*(sptr - wpls + 1) >> 31) |
	            *sptr |
		    (*(sptr + wpls) >> 1) | (*(sptr + wpls - 1) << 31) |
	            (*(sptr + wpls2) >> 2) | (*(sptr + wpls2 - 1) << 30);
	}
    }
}


/*
 *     +              45 degree, negative slope
 *      +
 *       +(c)
 *        +
 *         +
 */
static void
fdilate_dm_5(l_uint32  *datad,
	     l_int32    w,
	     l_int32    h,
	     l_int32    wpld,
	     l_uint32  *datas,
	     l_int32    wpls)
{
l_int32             i;
register l_int32    j, pwpls, wpls2;
register l_uint32  *sptr, *dptr;

    pwpls = (l_uint32)(w + 31) / 32;  /* proper wpl of src */

    wpls2 = 2 * wpls;
    for (i = 0; i < h; i++) {
	sptr = datas + i * wpls;
	dptr = datad + i * wpld;
	for (j = 0; j < pwpls; j++, sptr++, dptr++) {
	    *dptr = (*(sptr + wpls2) << 2) | (*(sptr + wpls2 + 1) >> 30) |
		    (*(sptr + wpls) << 1) | (*(sptr + wpls + 1) >> 31) |
	            *sptr |
		    (*(sptr - wpls) >> 1) | (*(sptr - wpls - 1) << 31) |
	            (*(sptr - wpls2) >> 2) | (*(sptr - wpls2 - 1) << 30);
	}
    }
}




	/*-----------------------------------------------------------*
	 *                     2d square dilations                   *
	 *-----------------------------------------------------------*/
static void
fdilate_sq_3(l_uint32         *datad,
	     l_int32           w,
	     l_int32           h,
	     l_int32           wpld,
	     l_uint32         *datas,
	     register l_int32  wpls)
{
l_int32             i, j, pwpls;
register l_uint32  *sptr, *dptr;
register l_uint32  *tptr1m, *tptr1p;

    pwpls = (l_uint32)(w + 31) / 32;  /* proper wpl of src */

    for (i = 0; i < h; i++) {
	sptr = datas + i * wpls;
	dptr = datad + i * wpld;
	for (j = 0; j < pwpls; j++, sptr++, dptr++) {
	    tptr1m = sptr - wpls;
	    tptr1p = sptr + wpls;
	    *dptr = *sptr | (*sptr << 1) | (*sptr >> 1) |
		    (*(sptr - 1) << 31) | (*(sptr + 1) >> 31) |
		    *tptr1m | (*tptr1m << 1) | (*tptr1m >> 1) |
		    (*(tptr1m - 1) << 31) | (*(tptr1m + 1) >> 31) |
	            *tptr1p | (*tptr1p << 1) | (*tptr1p >> 1) |
		    (*(tptr1p - 1) << 31) | (*(tptr1p + 1) >> 31);
	}
    }
}


static void
fdilate_sq_5(l_uint32         *datad,
	     l_int32           w,
	     l_int32           h,
	     l_int32           wpld,
	     l_uint32         *datas,
	     register l_int32  wpls)
{
l_int32             i, j, pwpls;
register l_uint32  *sptr, *dptr;
register l_uint32  *tptr1m, *tptr1p, *tptr2m, *tptr2p;

    pwpls = (l_uint32)(w + 31) / 32;  /* proper wpl of src */

    for (i = 0; i < h; i++) {
	sptr = datas + i * wpls;
	dptr = datad + i * wpld;
	for (j = 0; j < pwpls; j++, sptr++, dptr++) {
	    tptr2m = sptr - 2 * wpls;
	    tptr1m = sptr - wpls;
	    tptr1p = sptr + wpls;
	    tptr2p = sptr + 2 * wpls;
	    *dptr = *sptr | (*sptr << 2) | (*sptr << 1) |
		    (*sptr >> 1) | (*sptr >> 2) |
		    (*(sptr - 1) << 30) | (*(sptr - 1) << 31) |
		    (*(sptr + 1) >> 31) | (*(sptr + 1) >> 30) |
		    *tptr2m | (*tptr2m << 2) | (*tptr2m << 1) |
		    (*tptr2m >> 1) | (*tptr2m >> 2) |
		    (*(tptr2m - 1) << 30) | (*(tptr2m - 1) << 31) |
		    (*(tptr2m + 1) >> 31) | (*(tptr2m + 1) >> 30) |
		    *tptr1m | (*tptr1m << 2) | (*tptr1m << 1) |
		    (*tptr1m >> 1) | (*tptr1m >> 2) |
		    (*(tptr1m - 1) << 30) | (*(tptr1m - 1) << 31) |
		    (*(tptr1m + 1) >> 31) | (*(tptr1m + 1) >> 30) |
		    *tptr1p | (*tptr1p << 2) | (*tptr1p << 1) |
		    (*tptr1p >> 1) | (*tptr1p >> 2) |
		    (*(tptr1p - 1) << 30) | (*(tptr1p - 1) << 31) |
		    (*(tptr1p + 1) >> 31) | (*(tptr1p + 1) >> 30) |
		    *tptr2p | (*tptr2p << 2) | (*tptr2p << 1) |
		    (*tptr2p >> 1) | (*tptr2p >> 2) |
		    (*(tptr2p - 1) << 30) | (*(tptr2p - 1) << 31) |
		    (*(tptr2p + 1) >> 31) | (*(tptr2p + 1) >> 30);
	}
    }
}


	/*-----------------------------------------------------------*
	 *                    Horizontal erosions                    *
	 *-----------------------------------------------------------*/
static void
ferode_h_3(l_uint32  *datad,
	   l_int32    w,
	   l_int32    h,
	   l_int32    wpld,
	   l_uint32  *datas,
	   l_int32    wpls)
{
l_int32             i;
register l_int32    j, pwpls;
register l_uint32  *sptr, *dptr;

    pwpls = (l_uint32)(w + 31) / 32;  /* full & partial words/line of src */

    for (i = 0; i < h; i++) {
	sptr = datas + i * wpls;
	dptr = datad + i * wpld;
	for (j = 0; j < pwpls; j++, sptr++, dptr++) {
	    *dptr = ((*sptr >> 1) | (*(sptr - 1) << 31)) &
		    *sptr &
		    ((*sptr << 1) | (*(sptr + 1) >> 31));
	}
    }
}


static void
ferode_h_5(l_uint32  *datad,
	   l_int32    w,
	   l_int32    h,
	   l_int32    wpld,
	   l_uint32  *datas,
	   l_int32    wpls)
{
l_int32             i;
register l_int32    j, pwpls;
register l_uint32  *sptr, *dptr;

    pwpls = (l_uint32)(w + 31) / 32;  /* proper wpl of src */

    for (i = 0; i < h; i++) {
	sptr = datas + i * wpls;
	dptr = datad + i * wpld;
	for (j = 0; j < pwpls; j++, sptr++, dptr++) {
	    *dptr = ((*sptr >> 2) | (*(sptr - 1) << 30)) &
		    ((*sptr >> 1) | (*(sptr - 1) << 31)) &
	            *sptr &
		    ((*sptr << 1) | (*(sptr + 1) >> 31)) &
	            ((*sptr << 2) | (*(sptr + 1) >> 30));
	}
    }
}


static void
ferode_h_7(l_uint32  *datad,
	   l_int32    w,
	   l_int32    h,
	   l_int32    wpld,
	   l_uint32  *datas,
	   l_int32    wpls)
{
l_int32             i;
register l_int32    j, pwpls;
register l_uint32  *sptr, *dptr;

    pwpls = (l_uint32)(w + 31) / 32;  /* proper wpl of src */

    for (i = 0; i < h; i++) {
	sptr = datas + i * wpls;
	dptr = datad + i * wpld;
	for (j = 0; j < pwpls; j++, sptr++, dptr++) {
	    *dptr = ((*sptr >> 3) | (*(sptr - 1) << 29)) &
	            ((*sptr >> 2) | (*(sptr - 1) << 30)) &
		    ((*sptr >> 1) | (*(sptr - 1) << 31)) &
	            *sptr &
		    ((*sptr << 1) | (*(sptr + 1) >> 31)) &
	            ((*sptr << 2) | (*(sptr + 1) >> 30)) &
	            ((*sptr << 3) | (*(sptr + 1) >> 29));
	}
    }
}


static void
ferode_h_9(l_uint32  *datad,
	   l_int32    w,
	   l_int32    h,
	   l_int32    wpld,
	   l_uint32  *datas,
	   l_int32    wpls)
{
l_int32             i;
register l_int32    j, pwpls;
register l_uint32  *sptr, *dptr;

    pwpls = (l_uint32)(w + 31) / 32;  /* proper wpl of src */

    for (i = 0; i < h; i++) {
	sptr = datas + i * wpls;
	dptr = datad + i * wpld;
	for (j = 0; j < pwpls; j++, sptr++, dptr++) {
	    *dptr = ((*sptr >> 4) | (*(sptr - 1) << 28)) &
	            ((*sptr >> 3) | (*(sptr - 1) << 29)) &
	            ((*sptr >> 2) | (*(sptr - 1) << 30)) &
		    ((*sptr >> 1) | (*(sptr - 1) << 31)) &
	            *sptr &
		    ((*sptr << 1) | (*(sptr + 1) >> 31)) &
	            ((*sptr << 2) | (*(sptr + 1) >> 30)) &
	            ((*sptr << 3) | (*(sptr + 1) >> 29)) &
	            ((*sptr << 4) | (*(sptr + 1) >> 28));
	}
    }
}


	/*-----------------------------------------------------------*
	 *                     Vertical erosions                     *
	 *-----------------------------------------------------------*/
static void
ferode_v_3(l_uint32         *datad,
	   l_int32           w,
	   l_int32           h,
	   l_int32           wpld,
	   l_uint32         *datas,
	   register l_int32  wpls)
{
l_int32             i;
register l_int32    j, pwpls;
register l_uint32  *sptr, *dptr;

    pwpls = (l_uint32)(w + 31) / 32;  /* proper wpl of src */

    for (i = 0; i < h; i++) {
	sptr = datas + i * wpls;
	dptr = datad + i * wpld;
	for (j = 0; j < pwpls; j++, sptr++, dptr++) {
	    *dptr = *sptr & *(sptr + wpls) & *(sptr - wpls);
	}
    }
}


static void
ferode_v_5(l_uint32         *datad,
	   l_int32           w,
	   l_int32           h,
	   l_int32           wpld,
	   l_uint32         *datas,
	   register l_int32  wpls)
{
l_int32             i;
register l_int32    j, pwpls;
register l_int32    wpls2;
register l_uint32  *sptr, *dptr;

    pwpls = (l_uint32)(w + 31) / 32;  /* proper wpl of src */

    wpls2 = 2 * wpls;
    for (i = 0; i < h; i++) {
	sptr = datas + i * wpls;
	dptr = datad + i * wpld;
	for (j = 0; j < pwpls; j++, sptr++, dptr++) {
	    *dptr = *sptr & *(sptr - wpls2) & *(sptr - wpls) &
		    *(sptr + wpls) & *(sptr + wpls2);
	}
    }
}


static void
ferode_v_7(l_uint32         *datad,
	   l_int32           w,
	   l_int32           h,
	   l_int32           wpld,
	   l_uint32         *datas,
	   register l_int32  wpls)
{
l_int32             i;
register l_int32    j, pwpls;
register l_int32    wpls2, wpls3;
register l_uint32  *sptr, *dptr;

    pwpls = (l_uint32)(w + 31) / 32;  /* proper wpl of src */

    wpls2 = 2 * wpls;
    wpls3 = 3 * wpls;
    for (i = 0; i < h; i++) {
	sptr = datas + i * wpls;
	dptr = datad + i * wpld;
	for (j = 0; j < pwpls; j++, sptr++, dptr++) {
	    *dptr = *sptr & *(sptr - wpls3) & *(sptr - wpls2) &
		    *(sptr - wpls) & *(sptr + wpls) &
		    *(sptr + wpls2) & *(sptr + wpls3);
	}
    }
}


static void
ferode_v_9(l_uint32         *datad,
	   l_int32           w,
	   l_int32           h,
	   l_int32           wpld,
	   l_uint32         *datas,
	   register l_int32  wpls)
{
l_int32             i;
register l_int32    j, pwpls;
register l_int32    wpls2, wpls3, wpls4;
register l_uint32  *sptr, *dptr;

    pwpls = (l_uint32)(w + 31) / 32;  /* proper wpl of src */

    wpls2 = 2 * wpls;
    wpls3 = 3 * wpls;
    wpls4 = 4 * wpls;
    for (i = 0; i < h; i++) {
	sptr = datas + i * wpls;
	dptr = datad + i * wpld;
	for (j = 0; j < pwpls; j++, sptr++, dptr++) {
	    *dptr = *sptr & *(sptr - wpls4) & *(sptr - wpls3) &
		    *(sptr - wpls2) & *(sptr - wpls) &
		    *(sptr + wpls) & *(sptr + wpls2) &
		    *(sptr + wpls3) & *(sptr + wpls4);
	}
    }
}



	/*-----------------------------------------------------------*
	 *                     Diagonal erosions                     *
	 *-----------------------------------------------------------*/
/*
 *         +          45 degree, positive slope
 *        +
 *       +(c)
 *      +
 *     +
 */
static void
ferode_dp_5(l_uint32  *datad,
	    l_int32    w,
	    l_int32    h,
	    l_int32    wpld,
	    l_uint32  *datas,
	    l_int32    wpls)
{
l_int32             i;
register l_int32    j, pwpls, wpls2;
register l_uint32  *sptr, *dptr;

    pwpls = (l_uint32)(w + 31) / 32;  /* proper wpl of src */

    wpls2 = 2 * wpls;
    for (i = 0; i < h; i++) {
	sptr = datas + i * wpls;
	dptr = datad + i * wpld;
	for (j = 0; j < pwpls; j++, sptr++, dptr++) {
	    *dptr = ((*(sptr - wpls2) << 2) | (*(sptr - wpls2 + 1) >> 30)) &
		    ((*(sptr - wpls) << 1) | (*(sptr - wpls + 1) >> 31)) &
	            *sptr &
		    ((*(sptr + wpls) >> 1) | (*(sptr + wpls - 1) << 31)) &
	            ((*(sptr + wpls2) >> 2) | (*(sptr + wpls2 - 1) << 30));
	}
    }
}


/*
 *     +              45 degree, negative slope
 *      +
 *       +(c)
 *        +
 *         +
 */
static void
ferode_dm_5(l_uint32  *datad,
	    l_int32    w,
	    l_int32    h,
	    l_int32    wpld,
	    l_uint32  *datas,
	    l_int32    wpls)
{
l_int32             i;
register l_int32    j, pwpls, wpls2;
register l_uint32  *sptr, *dptr;

    pwpls = (l_uint32)(w + 31) / 32;  /* proper wpl of src */

    wpls2 = 2 * wpls;
    for (i = 0; i < h; i++) {
	sptr = datas + i * wpls;
	dptr = datad + i * wpld;
	for (j = 0; j < pwpls; j++, sptr++, dptr++) {
	    *dptr = ((*(sptr + wpls2) << 2) | (*(sptr + wpls2 + 1) >> 30)) &
		    ((*(sptr + wpls) << 1) | (*(sptr + wpls + 1) >> 31)) &
	            *sptr &
		    ((*(sptr - wpls) >> 1) | (*(sptr - wpls - 1) << 31)) &
	            ((*(sptr - wpls2) >> 2) | (*(sptr - wpls2 - 1) << 30));
	}
    }
}



	/*-----------------------------------------------------------*
	 *                     2d square erosions                    *
	 *-----------------------------------------------------------*/
static void
ferode_sq_3(l_uint32         *datad,
	    l_int32           w,
	    l_int32           h,
	    l_int32           wpld,
	    l_uint32         *datas,
	    register l_int32  wpls)
{
l_int32             i, j, pwpls;
register l_uint32  *sptr, *dptr;
register l_uint32  *tptr1m, *tptr1p;

    pwpls = (l_uint32)(w + 31) / 32;  /* proper wpl of src */

    for (i = 0; i < h; i++) {
	sptr = datas + i * wpls;
	dptr = datad + i * wpld;
	for (j = 0; j < pwpls; j++, sptr++, dptr++) {
	    tptr1m = sptr - wpls;
	    tptr1p = sptr + wpls;
	    *dptr = ((*sptr >> 1) | (*(sptr - 1) << 31)) &
		    *sptr &
		    ((*sptr << 1) | (*(sptr + 1) >> 31)) &
	            ((*tptr1m >> 1) | (*(tptr1m - 1) << 31)) &
		    *tptr1m &
		    ((*tptr1m << 1) | (*(tptr1m + 1) >> 31)) &
	            ((*tptr1p >> 1) | (*(tptr1p - 1) << 31)) &
		    *tptr1p &
		    ((*tptr1p << 1) | (*(tptr1p + 1) >> 31));
	}
    }
}


static void
ferode_sq_5(l_uint32         *datad,
	    l_int32           w,
	    l_int32           h,
	    l_int32           wpld,
	    l_uint32         *datas,
	    register l_int32  wpls)
{
l_int32             i, j, pwpls;
register l_uint32  *sptr, *dptr;
register l_uint32  *tptr1m, *tptr1p, *tptr2m, *tptr2p;

    pwpls = (l_uint32)(w + 31) / 32;  /* proper wpl of src */

    for (i = 0; i < h; i++) {
	sptr = datas + i * wpls;
	dptr = datad + i * wpld;
	for (j = 0; j < pwpls; j++, sptr++, dptr++) {
	    tptr2m = sptr - 2 * wpls;
	    tptr1m = sptr - wpls;
	    tptr1p = sptr + wpls;
	    tptr2p = sptr + 2 * wpls;
	    *dptr = ((*sptr >> 2) | (*(sptr - 1) << 30)) &
		    ((*sptr >> 1) | (*(sptr - 1) << 31)) &
	            *sptr &
		    ((*sptr << 1) | (*(sptr + 1) >> 31)) &
	            ((*sptr << 2) | (*(sptr + 1) >> 30)) &
	            ((*tptr2m >> 2) | (*(tptr2m - 1) << 30)) &
	            ((*tptr2m >> 1) | (*(tptr2m - 1) << 31)) &
		    *tptr2m &
		    ((*tptr2m << 1) | (*(tptr2m + 1) >> 31)) &
		    ((*tptr2m << 2) | (*(tptr2m + 1) >> 30)) &
	            ((*tptr1m >> 2) | (*(tptr1m - 1) << 30)) &
	            ((*tptr1m >> 1) | (*(tptr1m - 1) << 31)) &
		    *tptr1m &
		    ((*tptr1m << 1) | (*(tptr1m + 1) >> 31)) &
		    ((*tptr1m << 2) | (*(tptr1m + 1) >> 30)) &
	            ((*tptr1p >> 2) | (*(tptr1p - 1) << 30)) &
	            ((*tptr1p >> 1) | (*(tptr1p - 1) << 31)) &
		    *tptr1p &
		    ((*tptr1p << 1) | (*(tptr1p + 1) >> 31)) &
		    ((*tptr1p << 2) | (*(tptr1p + 1) >> 30)) &
	            ((*tptr2p >> 2) | (*(tptr2p - 1) << 30)) &
	            ((*tptr2p >> 1) | (*(tptr2p - 1) << 31)) &
		    *tptr2p &
		    ((*tptr2p << 1) | (*(tptr2p + 1) >> 31)) &
		    ((*tptr2p << 2) | (*(tptr2p + 1) >> 30));
	}
    }
}



