/*-
 * Copyright (C)2020..2021 @BABOLO http://www.babolo.ru/
 * punycode.c from RFC 3492 http://www.nicemice.net/idn/
 * Adam M. Costello http://www.nicemice.net/amc/
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#ident "@(#) Copyright (C)2020..2021 @BABOLO http://www.babolo.ru/"
#ident "@(#) from RFC 3492 http://www.nicemice.net/idn/"
#ident "@(#) Adam M. Costello http://www.nicemice.net/amc/"
#ident "@(#) $Id: puny.c,v 1.5 2021/12/05 17:53:10 babolo Exp $"

#define MIFE_COMPAT     5
#define BLIN_COMPAT     4
#define RECOBE_COMPAT   VMAJOR
#define RECOBE_INTERNAL 1

#include <sys/types.h>
#include <sys/event.h>
#include <sys/time.h>
#include <sysexits.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
#include <babolo/BLINflag.h>
#include "recobe.h"


/*** Bootstring parameters for Punycode ***/
#define RECOBE_PUNYBASE 36
#define RECOBE_PUNYTMIN 1
#define RECOBE_PUNYTMAX 26
#define RECOBE_PUNYSKEW 38
#define RECOBE_PUNYDAMP 700
#define RECOBE_INIBIAS  72
#define RECOBE_ININ     0x80
#define RECOBE_DELIM    '-'
#define PUNY_MAXINT     ((u_int32_t)-1)

static u_int32_t
pudide(u_int16_t c) {
#   define NO RECOBE_PUNYBASE
    static u_char d[128] =
    {   NO,   NO,   NO,   NO,    NO,   NO,   NO,   NO,    NO,   NO,   NO,   NO,    NO,   NO,   NO,   NO
    ,   NO,   NO,   NO,   NO,    NO,   NO,   NO,   NO,    NO,   NO,   NO,   NO,    NO,   NO,   NO,   NO
    ,   NO,   NO,   NO,   NO,    NO,   NO,   NO,   NO,    NO,   NO,   NO,   NO,    NO,   NO,   NO,   NO
    , 0x1A, 0x1B, 0x1C, 0x1D,  0x1E, 0x1F, 0x20, 0x21,  0x22, 0x23,   NO,   NO,    NO,   NO,   NO,   NO
    ,   NO, 0x80, 0x81, 0x82,  0x83, 0x84, 0x85, 0x86,  0x87, 0x88, 0x89, 0x8A,  0x8B, 0x8C, 0x8D, 0x8E
    , 0x8F, 0x90, 0x91, 0x92,  0x93, 0x94, 0x95, 0x96,  0x97, 0x98, 0x99,   NO,    NO,   NO,   NO,   NO
    ,   NO, 0x00, 0x01, 0x02,  0x03, 0x04, 0x05, 0x06,  0x07, 0x08, 0x09, 0x0A,  0x0B, 0x0C, 0x0D, 0x0E
    , 0x0F, 0x10, 0x11, 0x12,  0x13, 0x14, 0x15, 0x16,  0x17, 0x18, 0x19,   NO,    NO,   NO,   NO,   NO
    };
    return((c < 128) ? d[c] : RECOBE_PUNYBASE);
#   undef NO
}

/* pudien(d, flag) returns the basic code point whose value                        */
/* (when used for representing integers) is d, which needs to be in                */
/* the range 0 to RECOBE_PUNYBASE - 1.  The lowercase form is used unless flag is  */
/* nonzero, in which case the uppercase form is used if possible.                  */

static char
pudien(u_int32_t d, int flag) {
    static char l[] = "abcdefghijklmnopqrstuvwxyz0123456789";
    static char u[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
    char        r;

    r = 0;
    if  (d < RECOBE_PUNYBASE) r = (flag ? u : l)[d];
    return(r);
}

static u_int32_t
adapt(u_int32_t delta, u_int32_t numpoints, int firsttime) {         /*** Bias adaptation function ***/
    u_int32_t k;

    delta = firsttime ? (delta / RECOBE_PUNYDAMP) : (delta / 2);
    delta += delta / numpoints;
    for ( k = 0
        ; delta > ((RECOBE_PUNYBASE - RECOBE_PUNYTMIN) * RECOBE_PUNYTMAX) / 2
        ;  k += RECOBE_PUNYBASE
        ) {
        delta /= RECOBE_PUNYBASE - RECOBE_PUNYTMIN;
    }
    return(k + (RECOBE_PUNYBASE - RECOBE_PUNYTMIN + 1) * delta / (delta + RECOBE_PUNYSKEW));
}

static u_int32_t
threshold(u_int32_t k, u_int32_t bias) {
    return( (k <= bias)
          ? RECOBE_PUNYTMIN
          : (k >= (bias + RECOBE_PUNYTMAX)) ? RECOBE_PUNYTMAX : (k - bias)
          );
}

static int64_t
inword(recobe_chain *chain, u_int32_t idx) {
#   define blin_internal_flags (chain->flags)
    u_int32_t     l  = (chain->inwlen > 8) ? 8 : chain->inwlen;
    int64_t       ex = EX_OK;
    u_char       *c;

    ifBLIN_QX3("+ %u", idx);
    idx *= chain->inwlen;
    if  (  (chain->tail->size < chain->tail->length)
        || (chain->tail->length < chain->tail->position)
        ) {
        ifBLIN_QX0( "Must position <= length <= size, Now %u %u %u"
                  , chain->tail->position
                  , chain->tail->length
                  , chain->tail->size
                  );
        ex = -EX_DATAERR;
        errno = EINVAL;
        goto out;
    }
    if  ((chain->tail->length - chain->tail->position) < (idx + chain->inwlen)) {
        ifBLIN_QX0("length < (idx + inwlen)");
        ex = -EX_UNAVAILABLE;
        errno = EDOM;
        goto out;
    }
    c = &chain->tail->chunk[chain->tail->position];
    if  (!(chain->flags & RECOBE_ORDER)) ex = recobe_load(&c[idx], l);
    if  ((8 < chain->inwlen) && !!c[idx++]) {
        ex = -EX_DATAERR;
        errno = EINVAL;
        goto out;
    }
    if  ((9 < chain->inwlen) && !!c[idx++]) {
        ex = -EX_DATAERR;
        errno = EINVAL;
        goto out;
    }
    if  (!!(chain->flags & RECOBE_ORDER)) ex = recobe_load(&c[idx], -l);
out:
    ifBLIN_QX3("- %d", ex);
    return(ex);
#   undef blin_internal_flags
}

static int
puny_encode(recobe_chain *chain, recobe_chunk *chunk, int cs) {
#   define blin_internal_flags (chain->flags)
    u_char   *output = &chunk->chunk[chunk->length];
    u_int32_t outlen = recobe_chunkempty(chunk);
    u_int32_t inlen  = recobe_chunksize(chain->tail) / chain->inwlen;
    int64_t   input;
    u_int32_t delta  = 0;
    u_int32_t bias   = RECOBE_INIBIAS;
    u_int32_t outp   = 0;           /* number of characters that have been output   */
    int       ex     = EX_OK;
    u_int32_t n      = RECOBE_ININ;
    u_int32_t h;                    /* number of code points that have been handled */
    u_int32_t b;                    /* number of basic code points                  */
    u_int32_t m;
    u_int32_t q;
    u_int32_t k;
    u_int32_t t;

    RECOBE_CHUNK(chunk);
    if  (chain->inwlen > 4) {
        ifBLIN_QX0("inwlen=%u > 4", chain->inwlen);
        errno = EDOM;
        ex = -EX_UNAVAILABLE;
        goto out;
    }
    for (u_int32_t j = 0; j < inlen; ++j) {                          /* Handle the basic code points */
        if  (0 > (input = inword(chain, j))) {
            ifBLIN_QW0("input %u", j);
            ex = input;
            goto out;
        }
        if  (0x80 > input) {
            if  (outlen - outp < 2) {
                ifBLIN_QX0("outlen=%u - outp=%u < 2", outlen, outp);
                errno = ENOSPC;
                ex = -EX_CANTCREAT;
                goto out;
            }
            output[outp++] = input;
    }   }
    h = b = outp;
    if  (b > 0) output[outp++] = RECOBE_DELIM;
    while (h < inlen) {                                                        /* Main encoding loop */
        /* All non-basic code points < n have been handled already. Find the next larger one         */
	m = PUNY_MAXINT;
	for (u_int32_t j = 0;  j < inlen;  ++j) {
            int64_t realchar;

            if  (0 > (realchar = inword(chain, j))) {
                ifBLIN_QW0("input %u", j);
                ex = realchar;
                goto out;
            }
            if  (!!cs) realchar ^= recobe_unicodelower(realchar);
            if  ((realchar >= n) && (realchar < m)) m = realchar;
        }
        {   u_int64_t dt = (u_int64_t)delta + (u_int64_t)(m - n) * (u_int64_t)(h + 1);
                            /* Increase delta enough to advance the decoder's <n, i> state to <m, 0> */
            if  (dt > PUNY_MAXINT) {
                ifBLIN_QX0("dt=%llu - PUNY_MAXINT=%llu", dt, PUNY_MAXINT);
                errno = ERANGE;
                ex = -EX_UNAVAILABLE;
                goto out;
            }
            delta = dt;
        }
        n = m;
        for (u_int32_t j = 0; j < inlen; ++j) {
            int64_t realchar;
            u_int32_t caseup;

            if  (0 > (realchar = inword(chain, j))) {
                ifBLIN_QW0("input %u", j);
                ex = realchar;
                goto out;
            }
            caseup = 0;
            if  (!!cs) {
                caseup = recobe_unicodelower(realchar);
                realchar ^= caseup;
            }
            if  (realchar < n) {
                if  (++delta == 0) {
                    ifBLIN_QX0("delta=%u == 0", delta);
                    errno = ERANGE;
                    ex = -EX_UNAVAILABLE;
                    goto out;
            }   }
            if  (realchar == n) {        /* Represent delta as a generalized variable-length integer */
                for (q = delta, k = RECOBE_PUNYBASE; ; k += RECOBE_PUNYBASE) {
                    if  (outp >= outlen) {
                        ifBLIN_QX0("outp=%u >= outlen=%u", outp, outlen);
                        errno = ENOSPC;
                        ex = -EX_CANTCREAT;
                        goto out;
                    }
                    t = threshold(k, bias);
                    if  (q < t) break;
                    output[outp++] = pudien(t + (q - t) % (RECOBE_PUNYBASE - t), 0);
                    q = (q - t) / (RECOBE_PUNYBASE - t);
                }
                // printf("%u %08X %08X\n", j, realchar, caseup);
                output[outp++] = pudien(q, caseup);
                bias = adapt(delta, h + 1, h == b);
                delta = 0;
                ++h;
        }   }
        ++delta, ++n;
    }
    chunk->length += outp;
    chain->tail->position = chain->tail->length;
out:
    return(ex);
#   undef blin_internal_flags
}

static int
outword(recobe_chain *chain, recobe_chunk *chunk, u_int32_t idx, u_int64_t wchr) {
#   define blin_internal_flags (chain->flags)
    int ex = EX_OK;

    if  (chunk->size < ((idx + 1) * chain->outwlen + chunk->position)) {
        ifBLIN_QX0("size=%u < (idx=%u + 1) * outwlen=%u", chunk->size, idx, chain->outwlen);
        errno = ENOSPC;
        ex = -EX_CANTCREAT;
    } else {
        recobe_store( &chunk->chunk[idx * chain->outwlen + chunk->position]
                    , (chain->flags & RECOBE_ORDER) ? -(chain->outwlen) : chain->outwlen
                    , wchr
                    )
        ;
        if  (chunk->length < ((idx + 1) * chain->outwlen + chunk->position)) {
            chunk->length = (idx + 1) * chain->outwlen + chunk->position;
    }   }
    return(ex);
#   undef blin_internal_flags
}

static int
puny_decode(recobe_chain *chain, recobe_chunk *chunk, int cs) {
#   define blin_internal_flags (chain->flags)
    int       ex    = 0;
    u_int32_t outlen = recobe_chunkempty(chunk) / chain->outwlen;
    u_int32_t digit;
    u_int32_t bias;
    u_int32_t oldi;
    u_int32_t outp = 0;        /* the number of code points in the output array */
    u_int32_t in;              /* the index of the next character to be consumed */
    u_int32_t n = RECOBE_ININ;
    u_int32_t i = 0;
    u_int32_t j;
    u_int32_t w;
    u_int32_t k;
    u_int32_t t;

    bias = RECOBE_INIBIAS;
    {   u_int32_t b;   /* Number of basic code points */

        for (b = j = 0; j < recobe_chunksize(chain->tail); ++j) {          /* Find last RECOBE_DELIM */
            if (RECOBE_DELIM == recobe_chunktext(chain->tail)[j]) b = j;
        }
        if  (b > outlen) goto out;
        for (j = 0; j < b; ++j) {                                    /* Handle the basic code points */
            if  (0x80 & recobe_chunktext(chain->tail)[j]) {
                ifBLIN_QX2("Ill char 0x%02X", recobe_chunktext(chain->tail)[j]);
                ex = -EX_USAGE;
                errno = EINVAL;
                goto out;
            }
            if  (0 > (ex = outword(chain, chunk, outp++, recobe_chunktext(chain->tail)[j]))) {
                ifBLIN_QW0("outword");
                goto out;
        }   }
        in = (b > 0) ? (b + 1) : 0;
    }
    for (; in < recobe_chunksize(chain->tail); ++outp) {                       /* Main decoding loop */
        /* Decode a generalized variable-length integer into delta, which gets added to i.           */
        /* The overflow checking is easier if we increase i as we go, then subtract off              */
        /* its starting value at the end to obtain delta.                                            */
        for (oldi = i, w = 1, k = RECOBE_PUNYBASE; ; k += RECOBE_PUNYBASE) {
            if  (in >= recobe_chunksize(chain->tail)) {
                ifBLIN_QX5("No space on input");
                ex = -EX_USAGE;
                errno = EPIPE;
                goto out;
            }
            digit = pudide(recobe_chunktext(chain->tail)[in++]) & 0x007F;
            if  (digit >= RECOBE_PUNYBASE) {
                ifBLIN_QX2("Ill char 0x%02X", recobe_chunktext(chain->tail)[in - 1]);
                ex = -EX_USAGE;
                errno = ERANGE;
                goto out;
            }
            if  (digit > (PUNY_MAXINT - i) / w) {
                ifBLIN_QX2("Too big digit");
                ex = -EX_UNAVAILABLE;
                errno = EDOM;
                goto out;
            }
            i += digit * w;
            t = threshold(k, bias);
            if  (digit < t) break;
            {   u_int64_t ww = (u_int64_t)w * (RECOBE_PUNYBASE - t);

                if  (ww > PUNY_MAXINT) {
                    ifBLIN_QX2("Overflow");
                    ex = -EX_UNAVAILABLE;
                    errno = ERANGE;
                    goto out;
                }
                w = ww;
        }   }
        bias = adapt(i - oldi, outp + 1, oldi == 0);
        {   u_int64_t nn = n + (i / (outp + 1));/* i was supposed to wrap around from outp + 1 to 0, */
                                                /* incrementing n each time                          */
            if  (nn > PUNY_MAXINT) {
                ifBLIN_QX2("Overflow");
                ex = -EX_UNAVAILABLE;
                errno = ERANGE;
                goto out;
            }
            n = nn;
            i %= (outp + 1);
        }
        if  ((pudide(n) & 0x007F) < RECOBE_PUNYBASE) {                    /* not needed for Punycode */
            ifBLIN_QX2("Invalid value");
            ex = -EX_USAGE;
            errno = EINVAL;
            goto out;
        }
        if  (outp >= outlen) goto out;
        if  (!!cs && (pudide(recobe_chunktext(chain->tail)[in - 1]) & 0x80)) {
            n ^= recobe_unicodeupper(n);
        }
        /* Insert n at position i of the output */
        memmove( &chunk->chunk[(i + 1) * chain->outwlen + chunk->position]
               , &chunk->chunk[i * chain->outwlen + chunk->position]
               , (outp - i) * chain->outwlen
               );
        chunk->length += chain->outwlen;
        if  (0 > (ex = outword(chain, chunk, i++, n))) {
            ifBLIN_QW0("outword");
            goto out;
    }   }
    ex = outp;
out:
    chain->tail->position = chain->tail->length;
    return(ex);
#   undef blin_internal_flags
}

int
recobe_puny_encase(recobe_chain *chain, recobe_chunk *chunk) {
#   define blin_internal_flags (chain->flags)
    int ex;

    if  (0 > (ex = puny_encode(chain, chunk, 1))) ifBLIN_QW0("puny_encode case");
    return(ex);
#   undef blin_internal_flags
}

int
recobe_puny_encode(recobe_chain *chain, recobe_chunk *chunk) {
#   define blin_internal_flags (chain->flags)
    int ex;

    if  (0 > (ex = puny_encode(chain, chunk, 0))) ifBLIN_QW0("puny_encode");
    return(ex);
#   undef blin_internal_flags
}

int
recobe_puny_decase(recobe_chain *chain, recobe_chunk *chunk) {
    int ex = EX_OK;

#   define blin_internal_flags (chain->flags)
    if  (0 > (ex = puny_decode(chain, chunk, 1))) ifBLIN_QW0("puny_decode case");
    return(ex);
#   undef blin_internal_flags
}

int
recobe_puny_decode(recobe_chain *chain, recobe_chunk *chunk) {
    int ex = EX_OK;

#   define blin_internal_flags (chain->flags)
    if  (0 > (ex = puny_decode(chain, chunk, 0))) ifBLIN_QW0("puny_decode");
    return(ex);
#   undef blin_internal_flags
}
