/*-
 * Copyright (C) @BABOLO  2002 Dec 23
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#ifndef lint
static const char copyright[] = "\
@(#)Copyright (C) @BABOLO  2002 Dec 23\n\
@(#)All rights reserved.\n";
static const char rcsid[] = "$Id: parser.c,v 1.43 2009/11/17 07:04:23 babolo Exp $";
#endif /* not lint */

#define _PGOBLIN_CHAR_CLASSES_ 1
#include <sys/types.h>
#include <sysexits.h>
#include <unistd.h>
#include <stdlib.h>
#include <limits.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
#include <mife.h>
#include <err.h>
#include <babolo/parser.h>
#include <babolo/BLINflag.h>
#include "pgoblin.h"
#include "pgob.h"

static const char *statenames[] =
{ "bg", "bs", "nl", "nq", "tc", "tb", "tp", "sc", "cl", "cq", "ml", "mc", "tq", "xx"};

enum states
{ bg /*                                */
, bs /*                                 */
, nl /*                           */
, nq /*     ?       */
, tc /*                 */
, tb /*         */
, tp /*                                */
, sc /*       () */
, cl /*                        */
, cq /* ?                       */
, ml /*                           */
, mc /*                                 */
, tq /*   ?                 */
, xx
};
static const char *pn[] = 
{ "- ", "- ", "- ", "- ", "- ", "- ", "- ", "- "
, "- ", "- ", "Cp", "- ", "Cb", "Ce", "Cc", "Ci"
, "Co", "Cl", "- ", "- ", "Cx", "Cy", "- ", "- "
};

#define Cp  0x200000 /*             */
#define Cb  0x080000 /*       */
#define Ce  0x040000 /*        */
#define Cc  0x020000 /*               */
#define Ci  0x010000 /*     */
#define Co  0x008000 /*             */
#define Cl  0x004000 /*  lexor                   */
#define Cj  0x001000 /*     */
#define Cx  0x000800 /*                 */
#define Cy  0x000400 /*                    */
#define C_state 0xFF /*           */
#define Cio (Ci | Co)
#define Cd  (Cb | Ce)

static u_int32_t automa[xx][Lx] =
/*    \0 L0        \n Ll      \b Lb   Ls       # Ld       * Lo  *    @          */
{{    Cx|xx,          nl,        bs,        bs,        nq,        bs}/* bg @                   */
,{    Cx|xx,          nl,        bs,        bs,        bs,        bs}/* bs @                   */
,{    Cx|xx,          nl,        bs,        bs,        nq,        bs}/* nl @                */
,{ Co|Cl|xx,  Co|Cl|  cl,  Co|Cl|tb,  Co|Cl|tc,        bs,  Co|Cl|sc}/* nq @ 1           */
,{ Cl|Cj|xx,     Cl|  cl,     Cl|tb,     Cl|tc,     Cl|sc,     Cl|sc}/* tc                    */
,{Ci |   xx,          cl,        tb, Cp|    tp,        sc,        sc}/* tb        */
,{Ci |   xx,          cl,        sc, Cp|    tp,        sc,        sc}/* tp           */
,{Ci |   xx,          cl,        sc,        sc,        sc,        sc}/* sc                */
,{Ci |   xx, Cd|   Cc|ml, Cd|Cc| mc, Cd|Cc| mc,        cq, Cd|Cc| mc}/* cl               */
,{Cio|Cl|xx, Cio|Cl|  ml, Cio|Cl|tb, Cio|Cl|tc, Cd|Cc| mc, Cio|Cl|sc}/* q   1           */
,{Ci |   xx,    Ce|Cc|ml,    Cc| mc,    Cc| mc,        tq,    Cc| mc}/* ml #             */
,{Ce|Cc|Ci|xx,  Ce|Cc|ml,    Cc| mc,    Cc| mc, Cc|    mc,    Cc| mc}/* mc #                 */
,{Cio|Cl|xx, Cio|Cl|  cl, Cio|Cl|tb, Cio|Cl|tc, Cc|    mc, Cio|Cl|sc}/* tq # 1           */
};                                                                      /* #                  */

static int
placmd(pgoblin_main *options, pgoblin_prog *pgm, u_int64_t r_parm, u_int32_t r_cmd, char *r_beg, u_char *r_end, int lfcount, u_int32_t *localflags) {
    int ex = EX_OK;
    int i;

    ifBLIN_QV3(options->flags)
        fprintf(stderr, "[%d]=%02X(%016llX)\n", (int)pgm->curr, r_cmd, (long long)r_parm);
    if  (!r_cmd) {
        ifBLIN_QV1(options->flags) warnx("Unknown command at line %d", lfcount);
        ERROUT(EX_DATAERR, EINVAL);
    } else if (r_cmd > pgoblin.max) {
        ifBLIN_QV1(options->flags) warnx("Out of command range at line %d", lfcount);
        ERROUT(EX_SOFTWARE, EDOOFUS);
    }
    ifBLIN_QV4(options->flags)
        fprintf(stderr, "[%08X]\n", pgoblin.a[r_cmd].flags);
    if  (!(~pgoblin.a[r_cmd].flags & PGOBLIN_ILLEGAL)) {
        ifBLIN_QV1(options->flags) warnx("Illegal command at line %d", lfcount);
        ERROUT(EX_SOFTWARE, EDOOFUS);
    } else if (pgm->curr > pgm->maxx) {
        ifBLIN_QV1(options->flags) warnx("Programm too big");
        ERROUT(EX_SOFTWARE, EFBIG);
    } else if (pgoblin.a[r_cmd].flags & PGOBLIN_IGNORE) {
        *localflags |= PGOBLIN_IGNORE;
    } else if (!(*localflags & PGOBLIN_IGNORE) || (r_cmd != PGOBLIN_cont)) {
        u_int32_t cc;

        cc = 0;
        *localflags &= ~PGOBLIN_IGNORE;
        for (i = 0; i < pgoblin.symparam; i++) {
            if  (pgoblin.a[r_cmd].flags & pgoblin.p[i].sym) {
                u_int32_t y = pgoblin.binparams[PREG(r_cmd, i)];
                cc &= ~(PGOBLIN_ARGMASK << y);
                cc |= ((r_parm & PGOBLIN_ARGMASK) << y);
            }
            r_parm >>= PGOBLIN_BPARG;
        }
        if  (r_cmd & PGOBLIN_EXTENDED) {
            pgm->code[pgm->curr].cmd = PGOBLIN_extended | (cc & ~PGOBLIN_COMMAND);
            pgm->code[pgm->curr].e.cmd = (r_cmd & PGOBLIN_COMMAND);
        } else {
            pgm->code[pgm->curr].cmd = (r_cmd & PGOBLIN_COMMAND) | (cc & ~PGOBLIN_COMMAND);
            pgm->code[pgm->curr].e.lit = NULL;
            if  (r_beg && r_end) {
                *r_end = '\0';
                if  (!(pgoblin.a[r_cmd].flags & PGOBLIN_NOLITER)) {
                    pgm->code[pgm->curr].e.lit = r_beg;
                }
                ifBLIN_QV3(options->flags) {
                    if  (pgm->code[pgm->curr].e.lit)
                        fprintf(stderr, ">%s~\n", pgm->code[pgm->curr].e.lit);
        }   }   }
        pgm->curr++;
    }
out:
    return(ex);
}

int
pgoblin_parser(pgoblin_main *options, u_char *text, size_t txlen, pgoblin_prog **ppgm) {
    int ex = EX_OK;
    size_t offset;
    enum states state;
    u_int32_t control, localflags;
    const u_char **qq, *qp;
    u_char s, *r_end = NULL, *literal = NULL;
    char p, *r_beg = NULL;
    u_int32_t r_lex, r_cmd;
    u_int64_t r_parm;
    int rs, lfcount;
    pgoblin_prog *pgm = NULL;

    ifBLIN_QV3(options->flags) fprintf( stderr
                                      , "+pgoblin_parser: options %s, ppgm %s txt %d\n"
                                      , options ? "OK" : "NULL"
                                      , ppgm ? "OK" : "NULL"
                                      , (int)txlen
                                      );
    if  (!text) {
        ifBLIN_QV1(options->flags) warnx("No pgoblin program text");
        ERROUT(EX_DATAERR, EINVAL);
    } else if (!ppgm) {
        ifBLIN_QV1(options->flags) warnx("No pgoblin program");
        ERROUT(EX_DATAERR, EINVAL);
    } else if ((pgm = *ppgm)) {
        ifBLIN_QV3(options->flags) fprintf( stderr, "pgm %s\n", pgm ? "OK" : "NULL");
        if  (strncmp(pgm->id, "#pGoblin-" VERS, PGOBLIN_STRING_ID_TST)) {
            ifBLIN_QV1(options->flags) warnx("Illegal pgoblin program");
            ERROUT(EX_DATAERR, EPROGMISMATCH);
        }
    } else {
        if  (!(pgm = malloc(sizeof(pgoblin_tuple) * PGOBLIN_PROGSIZE + sizeof(pgoblin_prog)))) {
            ifBLIN_QV1(options->flags) warnx("Malloc failed in parser #1");
            ERROUT(EX_OSERR, ENOMEM);
        }
        bzero(pgm, sizeof(pgoblin_tuple) * PGOBLIN_PROGSIZE + sizeof(pgoblin_prog));
        strncpy(pgm->id, "#pGoblin-" VERS, PGOBLIN_STRING_ID_LEN);
        pgm->maxx = PGOBLIN_PROGSIZE;
        pgm->curr = 0;
        pgm->debug = NULL;
        pgm->flags = options->flags;
    }
    if  (!txlen) {
        ifBLIN_QV2(options->flags) warnx("Empty pgoblin program text");
        goto out;
    }
    ifBLIN_QV4(options->flags) fprintf(stderr, "pgm@%d:%d\n", (int)pgm->curr, (int)pgm->maxx);
    ifBLIN_QV5(options->flags) BLIN_STATEHEADER(stderr, pn)
    lfcount = 0;
    qp = &s;
    qq = &qp;
    r_parm = 0;
    r_lex = 0;
    r_cmd = 0;
    rs = -PGOBLIN_BPARG;
    localflags = PGOBLIN_IGNORE;
    for (state = bg, offset = 0, control = 0; state < xx; offset++, state = control & C_state) {
        if  (offset < txlen) {
            s = text[offset];
            p = pgoblin.class[s];
            if  (p < 0) p = Ls;
            if  (s == '\n') lfcount++;
        } else {
            p = L0;
            s = 0;
        }
        control = automa[state][(u_char)p];
        ifBLIN_QV5(options->flags)
            BLIN_STATEBODY(stderr, pn, rs & 0xFF, pgoblin.clasnames[(u_char)p], text, offset, r_lex, control, statenames, state, C_state, (text[offset + BLIN_STATEVAR_i]>=' '))
        if  (control & Cp) r_parm |= (~(u_int64_t)(pgoblin.class[s])) << (rs += PGOBLIN_BPARG);
        if  (control & Cb && !r_beg) r_beg = (char*)(literal = &text[offset]);
        if  (control & Ce && r_beg) r_end = literal;
        if  (control & Cc) {
            if  (literal != &text[offset]) *literal = text[offset];
            literal++;
        }
        if  (control & Ci) {
            if  (r_lex >= pgoblin.cmds->szt) r_cmd = ~r_lex;
            if  ((ex = placmd(options, pgm, r_parm, r_cmd, r_beg, r_end, lfcount, &localflags)))
                goto out;
        }
        if  (control & Co) {
            r_lex = 0;
            r_cmd = 0;
            r_parm = 0;
            rs = -PGOBLIN_BPARG;
            r_beg = 0;
            r_end = 0;
        }
        if  (control & Cl) {
            qp = &s;
            r_lex = babolo_testchar(pgoblin.cmds, r_lex, qq);
        }
        if  (control & Cj) {
            if  (r_lex >= pgoblin.cmds->szt) r_cmd = ~r_lex;
            if  ((ex = placmd(options, pgm, r_parm, r_cmd, r_beg, r_end, lfcount, &localflags)))
                goto out;
        }
        if  (control & Cx) {
            ifBLIN_QV2(options->flags) fprintf(stderr, " \n");
        }
        if  (control & Cy) {
            errno = EINVAL;
            ex = EX_DATAERR;
    }   }
out:
    *ppgm = pgm;
    ifBLIN_QV3(options->flags) fprintf(stderr, "-pgoblin_parser %d pgm %s\n", ex, pgm ? "OK" : "NULL");
    return(ex);
}
