Re: Pattern Matching

So, jetzt habe ich das Programm, ein bisschen mit define konstanten, statt direkt hingeschriebenen konstanten, und entsprechenden Fehlermeldungen und Nummern aufgewertet

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define     X                             1024
#define     END                           -1
#define     IF_BEGIN                      -2
#define     IF_ELSE                       -3
#define     IF_END                        -4
#define     REPEAT_BEGIN                  -5
#define     ROUND_BRACKET_OPEN            -6
#define     ROUND_BRACKET_CLOSE           -7
#define     STACKLASTWASLABEL             0
#define     STACKXREALLOOPLABEL           1
#define     STACKXREALIFBEGIN             2
#define     STACKXREALELSEBEGIN           3
#define     REALEND                       -8
#define     CONTROLSTATE                  -9
#define     ASCIIMIN                      0
#define     ASCIIMAX                      255
#define     LEX_ROUND_BRACKET_OPEN        -10
#define     LEX_ROUND_BRACKET_CLOSE       -11
#define     LEX_SQUARE_BRACKET_OPEN       -12
#define     LEX_SQUARE_BRACKET_CLOSE      -13
#define     LEX_COMMA                     -14
#define     LEX_STAR                      -15
#define     LEX_DOT                       -16
#define     LEX_END                       -17
#define     LEX_EMPTY                     -18
#define     LEX_A                         'a'
#define     LEX_B                         'b'
#define     LEX_F                         'f'
#define     LEX_N                         'n'
#define     LEX_R                         'r'
#define     LEX_T                         't'
#define     LEX_V                         'v'
#define     C_STRING_END                  0
#define     C_ROUND_BRACKET_OPEN          '('
#define     C_ROUND_BRACKET_CLOSE         ')'
#define     C_SQUARE_BRACKET_OPEN         '['
#define     C_SQUARE_BRACKET_CLOSE        ']'
#define     C_STAR                        '*'
#define     C_COMMA                       ','
#define     C_BACKSLASH                   '\'
#define     C_DOT                         '.'
#define     C_A                           'a'
#define     C_B                           'b'
#define     C_F                           'f'
#define     C_N                           'n'
#define     C_R                           'r'
#define     C_T                           't'
#define     C_V                           'v'
#define     ERR_NO_ERR_NORMAL             0
#define     ERR_USAGE                     1
#define     ERR_PARSE                     2
#define     ERR_PARSE_FORGOT_COMMA_IN_OR_INDEX                0
#define     ERR_PARSE_FORGOT_SQUARE_BRACKET_CLOSE_IN_OR_INDEX 1
#define     ERR_PARSE_FORGOT_ROUND_BRACKET_CLOSE_INDEX        2
#define     ERR_PROG_USAGE_WRONG_ARGS_INDEX                   3
#define     ERR_PARSE_FORGOT_COMMA_IN_OR_MSG                "%s Sie haben das Komma bei Alternative vergessenn"
#define     ERR_PARSE_FORGOT_SQUARE_BRACKET_CLOSE_IN_OR_MSG "%s Sie haben vergessen bei der Alternative die Klammer zu schliessenn"
#define     ERR_PARSE_FORGOT_ROUND_BRACKET_CLOSE_MSG        "%s fehler klammer vergessen %c %in"
#define     ERR_PROG_USAGE_WRONG_ARGS_MSG                       "%s Sie haben die falsche Anzahl an Parametern eingegebenn"
#define     ERR_TOP_NO_ERROR_NORMAL_MSG                     "Das Programm l"auft korrekt"
#define     ERR_TOP_USAGE_ERROR_MSG                         "Fehler bei der Verwendung des Programms: "
#define     ERR_TOP_PARSER_ERROR_MSG                        "Parser Error: "
#define     HLP_MSG_0_MSG                                   "%s %s: Zeigt diese hilfe ann"
#define     HLP_MSG_SEARCH_MSG                              "%s <PATTERN> Sucht nach dem Muster <PATTERN> im Textn"
#define     PARAMETER_0_HELP_STR            "--help"
#define     PARAMETER_SEARCH_STR            "--search"

#define     MIN_ARGC                2
#define     MAX_ARGC                3
#define     HLP_MSG_0_INDEX         0
#define     HLP_MSG_SEARCH_INDEX    1
#define     HLP_MSG_MAX_INDEX       1
#define     PARAMETER_0_HELP_INDEX  0
#define     PARAMETER_SEARCH_INDEX  1
#define     PARAMETER_MAX_INDEX     1



const char errtopmsg [][128] = {ERR_TOP_NO_ERROR_NORMAL_MSG, ERR_TOP_USAGE_ERROR_MSG , ERR_TOP_PARSER_ERROR_MSG};

const char errmsg [][256] = {ERR_PARSE_FORGOT_COMMA_IN_OR_MSG,
                    ERR_PARSE_FORGOT_SQUARE_BRACKET_CLOSE_IN_OR_MSG,
                    ERR_PARSE_FORGOT_ROUND_BRACKET_CLOSE_MSG,
                    ERR_PROG_USAGE_WRONG_ARGS_MSG };
const char helpmsg [][256] = {HLP_MSG_0_MSG, HLP_MSG_SEARCH_MSG};
const char parameterstr [][32] = {PARAMETER_0_HELP_STR, PARAMETER_SEARCH_STR};


int statechar [X];
int x;

int initstates () {
    int l;

    for (l = 0;  l < X;  l++) {
        statechar [l] = LEX_EMPTY;
    }

}

int jx = 0;

char text [] = "ab cdpkcdpkpkpkcdpkfhpkpkfhpkpkpkfhpkpkpkcdpkpkpkcdpkefghqqqqaber\nnnedb\nsuper";

//char expr [] = "abc*de[fasd,asddsr]qdsda*ghijk";
//char expr [] = "[*([[[([a,[[[p,q*],ca],[d,*(ead)]]]f*(mmm(nnn)mm)),a],asd],semdu]),*poller]";
//char expr [] = "[*([[[([a,[[[p,*(q)],ca],[d,*(ead)]]]f*(mmm(nnn)mm)),a],asd],semdu]),*poller]";
//char expr [] = "*(mmm(nnn)mm)";
//char expr [] = "a(b*(*(cd)aaa)(efg))";
char expr [] = "*ab *([cd,fh]*(pk))ef,gh*(.)*aber\\*nedb\\\nsuper";
//char expr [] = "abcd";
int i = 0;

int gettoken () {
    if ((expr [i] == C_STRING_END))
      return LEX_END;
    else if (expr [i] == C_ROUND_BRACKET_OPEN) {
      i++;
      return LEX_ROUND_BRACKET_OPEN;
    }
    else if (expr [i] == C_ROUND_BRACKET_CLOSE) {
      i++;
      return LEX_ROUND_BRACKET_CLOSE;
    }
    else if (expr [i] == C_SQUARE_BRACKET_OPEN) {
      i++;
      return LEX_SQUARE_BRACKET_OPEN;
    }
    else if (expr [i] == C_SQUARE_BRACKET_CLOSE) {
      i++;
      return LEX_SQUARE_BRACKET_CLOSE;
    }
    else if (expr [i] == C_STAR) {
      i++;
      return LEX_STAR;
    }
    else if (expr [i] == C_COMMA) {
      i++;
      return LEX_COMMA;
    }
    else if (expr [i] == C_DOT) {
      i++;
      return LEX_DOT;
    }
    else if (expr [i] == C_BACKSLASH) {
       i++;
       if (expr [i] == C_A) {
          i++;
          return LEX_A;
       }
       else if (expr [i] == C_B) {
          i++;
          return LEX_B;
       }
       else if (expr [i] == C_F) {
          i++;
          return LEX_F;
       }
       else if (expr [i] == C_N) {
          i++;
          return LEX_N;
       }
       else if (expr [i] == C_R) {
          i++;
          return LEX_R;
       }
       else if (expr [i] == C_T) {
          i++;
          return LEX_T;
        }
        else if (expr [i] == C_V) {
          i++;
          return LEX_V;
        }

       return expr [i++];
    }
    else if ((expr [i] >= ASCIIMIN) \&amp;\&amp; (expr [i] <= ASCIIMAX)) {
      return expr [i++];
    }
}

int tokenback_rec_mask1(int);
int tokenback_rec_mask2(int);

int tokenback_rec_mask1 (int j) {
  if (expr [j] == '\')
    return tokenback_rec_mask2 (j-1);
  else
    return 1;
}
int tokenback_rec_mask2 (int j) {
  if (expr [j] == '\')
    return tokenback_rec_mask1 (j-1);
  else
    return 0;
}

void tokenback () {
  i-=(2-tokenback_rec_mask1(i-2));
}

int stream ();
int followed ();
int compound ();
int or_operator ();
int repeat_operator ();

int or_operator () {
    int ch;
    if ((ch = gettoken ()) == LEX_SQUARE_BRACKET_OPEN) {
        statechar [x] = IF_BEGIN;
        x++;
        or_operator ();
        if (gettoken () != LEX_COMMA) {
            fprintf (stderr, errmsg [ERR_PARSE_FORGOT_COMMA_IN_OR_INDEX], errtopmsg [ERR_PARSE]);
            exit (ERR_PARSE);
        }
        statechar [x] = IF_ELSE;
        x++;
        or_operator ();
        if ((ch = gettoken ()) != LEX_SQUARE_BRACKET_CLOSE) {
            fprintf (stderr, errmsg [ERR_PARSE_FORGOT_SQUARE_BRACKET_CLOSE_IN_OR_INDEX], errtopmsg [ERR_PARSE]);
            exit (ERR_PARSE);
        }
        statechar [x] = IF_END;
        x++;
        repeat_operator ();
    }
    else if (ch == LEX_END)
      return 0;
    else {
        tokenback ();
        repeat_operator ();
    }
}


int repeat_operator () {
    int ch;
    if ((ch = gettoken ()) == LEX_STAR) {
        statechar [x] = REPEAT_BEGIN;
        x++;
        stream ();
    }
    else if (ch == LEX_END)
      return 0;
    else {
        tokenback ();
        stream ();
    }
}

int stream () {
    int r = 0;
    r = compound ();
    r |= followed ();
    if (r) {
        or_operator();
    }
}

int followed () {
    int ch = gettoken ();
    int st, xtmp;
    if ((ch >= ASCIIMIN) \&amp;\&amp; (ch <= ASCIIMAX)) {
        statechar [x] = ch;
        x = x+1;
        or_operator ();
        return 1;
    }
    else if (ch == LEX_DOT) {
        statechar [x] = ch;
        x = x+1;
        or_operator ();
        return 1;
    }
    else if (ch == LEX_END)
      return 0;
    else {
        tokenback ();
        return 0;
    }
}

int compound () {
    int ch;
    if ((ch = gettoken ()) == LEX_ROUND_BRACKET_OPEN) {
        statechar [x] = ROUND_BRACKET_OPEN;
        x++;
        or_operator ();
        if ((ch = gettoken ()) != LEX_ROUND_BRACKET_CLOSE) {
            fprintf (stderr, errmsg[ERR_PARSE_FORGOT_ROUND_BRACKET_CLOSE_INDEX], errtopmsg [ERR_PARSE], expr [i], i);
            exit (ERR_PARSE);
        }
        statechar [x] = ROUND_BRACKET_CLOSE;
        x++;
        return 1;
    }
    else if (ch == LEX_END)
      return 0;
    else {
        tokenback ();
        return 0;
    }
}

int realstates1 [1024];
int realstates2 [1024];
int realstateschar [1024];


int stacks [32][1024];
int stacksp [32];

void push (int stackid, int state) {
  stacks [stackid][stacksp[stackid]++] = state;
}

int pop (int stackid) {
  stacksp [stackid]--;
  return stacks [stackid][stacksp[stackid]];
}

int emptystack (int stackid) {
    return (stacksp[stackid] == 0);
}

void automatgen ( ) {
    int xi = 0;
    int xreal = 0;
    int itwas = 0;
    int tmp;

    for (xi = 0; xi <= x;  ) {

        if (((statechar [xi] >= ASCIIMIN) \&amp;\&amp; (statechar [xi] <= ASCIIMAX)) || (statechar [xi] == LEX_DOT)) {
            realstates1 [xreal] = xreal+1;
            realstates2 [xreal] = xreal+1;
            realstateschar [xreal] = statechar [xi];
            xreal++;
        }
        else if (statechar [xi] == ROUND_BRACKET_OPEN) {
            //push (STACKLASTWASLABEL, ROUND_BRACKET_OPEN);
        }
        else if (statechar [xi] == REPEAT_BEGIN) {
            xi++;
            if (statechar [xi] == ROUND_BRACKET_OPEN) {
                push (STACKXREALLOOPLABEL, xreal);
                push (STACKLASTWASLABEL, REPEAT_BEGIN);
            }
            else if ((statechar [xi] >= ASCIIMIN) \&amp;\&amp; (statechar [xi] <= ASCIIMAX)) {
                realstates1 [xreal] = xreal+1;
                realstates2 [xreal] = xreal+1;
                realstateschar [xreal] = statechar [xi];
                xreal++;
                realstates1 [xreal] = xreal+1;
                realstates2 [xreal] = xreal-1;
                realstateschar [xreal] = CONTROLSTATE;
                xreal++;
            }
        }
        else if (statechar [xi] == ROUND_BRACKET_CLOSE) {
            if ((itwas = pop (STACKLASTWASLABEL)) == REPEAT_BEGIN) {
                realstateschar [xreal] = CONTROLSTATE;
                realstates1 [xreal] = pop (STACKXREALLOOPLABEL);
                realstates2 [xreal] = xreal+1;
                xreal++;
            }
            else if (itwas = ROUND_BRACKET_OPEN);
        }
        else if (statechar [xi] == IF_BEGIN) {
            realstateschar [xreal] = CONTROLSTATE;
            realstates1 [xreal] = xreal+1;
            push (STACKXREALIFBEGIN, xreal);
            xreal++;
        }
        else if (statechar [xi] == IF_ELSE) {
            realstates2 [pop (STACKXREALIFBEGIN)] = xreal;
            push (STACKXREALELSEBEGIN, xreal-1);
        }
        else if (statechar [xi] == IF_END) {
            realstates2 [tmp = pop (STACKXREALELSEBEGIN)] = xreal;
            realstates1 [tmp] = xreal;
        }
        xi++;
    }

    realstateschar [xreal] = REALEND;
    realstates1 [xreal] = REALEND;
    realstates2 [xreal] = REALEND;


    int k;
    for (k = 0;  k <= xreal;  k++) {
        printf ("(%i, ", realstateschar [k]);
        printf ("%i, ", realstates1 [k]);
        printf ("%i)n", realstates2 [k]);
    }

}

int xtext = 0;

int automat (int xreal) {
    int xtext2;
    int flag = 0;


    for (;  (realstates1 [xreal] != REALEND) \&amp;\&amp; (realstates2 [xreal] != REALEND) \&amp;\&amp; (xtext < strlen(text));  xreal = realstates1 [xreal], xtext++) {
        if (realstateschar [xreal] == LEX_DOT)
            printf ("succeded: %c %sn", '.', text + xtext);
        else if (realstateschar [xreal] == text [xtext])
            printf ("succeded: %c %sn", text [xtext], text + xtext);
        else if ((realstateschar [xreal] == CONTROLSTATE) \&amp;\&amp; (realstates1 [xreal] != realstates2 [xreal])) {
            xtext2 = xtext;
            flag = (automat (realstates1 [xreal]) == 1);
            xtext = xtext2;
            flag |= (automat (realstates2 [xreal]) == 1);

            if (flag == 0) return -1;
            else return 1;
        }
        else if (realstates1 [xreal] == REALEND ) return 1;
        else {return -1;}
    }
    if (realstates1 [xreal] == REALEND) {
        return 1;
    }
    else {

        return -1;
    }
}

int main (int argc, char *argv[]) {
    int k, l;
    int h;
    if (!((argc >= MIN_ARGC) \&amp;\&amp; (argc <= MAX_ARGC))) {
      fprintf (stderr, errmsg [ERR_PROG_USAGE_WRONG_ARGS_INDEX], errtopmsg [ERR_USAGE]);
      fprintf (stderr, helpmsg [HLP_MSG_0_INDEX], argv [0], parameterstr [PARAMETER_0_HELP_INDEX] );
      exit (ERR_USAGE);
    }
    if (argc == 2) {
      if (strcmp (argv [1], parameterstr [PARAMETER_0_HELP_INDEX]) == 0) {
                  fprintf (stderr, helpmsg [HLP_MSG_0_INDEX], argv [0], parameterstr [PARAMETER_0_HELP_INDEX] );
      for (h = 1;  h <= PARAMETER_MAX_INDEX; h++)
            fprintf (stderr, helpmsg [h], parameterstr [h]);
        exit (ERR_NO_ERR_NORMAL );
      }
      exit (1);
    }

    initstates ();
    or_operator (0);

    for (l = 0;  l <= x;  l++) {
        if (statechar [l] == ROUND_BRACKET_OPEN)
            printf ("(ROUND_BRACKET_OPEN, %2i)n", l);
        else if (statechar [l] == ROUND_BRACKET_CLOSE)
            printf ("(ROUND_BRACKET_CLOSE, %2i)n", l);
        else if (statechar [l] == REPEAT_BEGIN)
            printf ("(REPEAT_BEGIN, %2i)n", l);
        else
            printf ("(%2i %2i)n", statechar [l], l);
    }

    automatgen ();
    printf ("%in", automat (0));
}