From 516e1dade5fe282ce591474aeaa94c305038a715 Mon Sep 17 00:00:00 2001 From: AntoHesse Date: Sun, 12 Feb 2017 02:40:34 +0100 Subject: [PATCH] enforcement grammar part 1 --- 42sh/grammar/rules | 6 ++ 42sh/includes/parser.h | 27 +++++- 42sh/sample/stack.sh | 2 +- 42sh/src/parser/aggregate_sym.c | 15 ++- 42sh/src/parser/eval_sym.c | 160 +++++++++++++++++++++++++++++++- 42sh/src/parser/ft_parse.c | 2 +- 42sh/src/parser/produce_sym.c | 37 +++++++- 42sh/src/parser/read_stack.c | 4 +- 8 files changed, 238 insertions(+), 15 deletions(-) diff --git a/42sh/grammar/rules b/42sh/grammar/rules index 75b22e2b..22b5afae 100644 --- a/42sh/grammar/rules +++ b/42sh/grammar/rules @@ -55,6 +55,12 @@ io_file - compound_command : VALID - AGGREGATION k = 0, new = io_redirect io_redirect - redirect_list : VALID - NOTHING io_redirect - compound_command : VALID - AGGREGATION k = 0, new = redirect_list +io_redirect - cmd_suffix : VALID - NOTHING +io_redirect - cmd_prefix : VALID - NOTHING +io_redirect - cmd_word : VALID - NOTHING +io_redirect - cmd_name : VALID - NOTHING + + // si transparence continuer remontee, // si dernier membre aurait pu aussi constitue membre actuel, ne rien fiare, ainsi que transparence diff --git a/42sh/includes/parser.h b/42sh/includes/parser.h index 3ebc25e6..ba7226e3 100644 --- a/42sh/includes/parser.h +++ b/42sh/includes/parser.h @@ -22,7 +22,7 @@ enum e_sym { - EMPTY = 1, + LINEBREAK = 1, TK_LESS, TK_GREAT, TK_DLESS, @@ -51,6 +51,16 @@ enum e_sym TK_N_WORD, TK_Q_WORD, TK_DQ_WORD, + TK_ASSIGNEMENT_WORD = 50, + TK_BANG, + TK_NAME, + TK_FOR, + TK_IO_NUMBER, + TK_DLESSDASH, + TK_LESSGREAT, + TK_CASE, + TK_LBRACE, + TK_IN, PROGRAM = 100, COMPLETE_COMMANDS, LIST, @@ -91,7 +101,6 @@ enum e_sym IO_HERE, HERE_END, NEWLINE_LIST, - LINEBREAK, SEPARATOR_OP, SEPARATOR, SEQUENTIAL_SEP, @@ -106,7 +115,7 @@ enum e_sym | WHILE_CLAUSE | UNTIL_CLAUSE | FUNCTION_DEFINITION | FUNCTION_BODY | FNAME\ | BRACE_GROUP | DO_GROUP | SIMPLE_COMMAND | CMD_NAME | CMD_WORD | CMD_PREFIX\ | CMD_SUFFIX | REDIRECT_LIST | IO_REDIRECT | IO_FILE | FILENAME | IO_HERE\ - | HERE_END | NEWLINE_LIST | LINEBREAK | SEPARATOR_OP | SEPARATOR | SEQUENTIAL_SEP + | HERE_END | NEWLINE_LIST | SEPARATOR_OP | SEPARATOR | SEQUENTIAL_SEP }; typedef unsigned long long int t_sym; @@ -168,6 +177,17 @@ typedef unsigned long long int t_sym; //#define ALL_SYM -1UL */ +struct s_aggrematch +{ + t_sym under; + t_sym top; + t_sym new_sym; +}; + +typedef struct s_aggrematch t_aggrematch; + +extern t_aggrematch g_aggrematch[]; + struct s_prodmatch { t_type token; @@ -179,7 +199,6 @@ typedef struct s_prodmatch t_prodmatch; extern t_prodmatch g_prodmatch[]; - struct s_stackmatch { t_sym under; diff --git a/42sh/sample/stack.sh b/42sh/sample/stack.sh index 4ec79147..9e2740c6 100644 --- a/42sh/sample/stack.sh +++ b/42sh/sample/stack.sh @@ -1 +1 @@ -ls > file1 > file2 +ls diff --git a/42sh/src/parser/aggregate_sym.c b/42sh/src/parser/aggregate_sym.c index 05632520..41cb395f 100644 --- a/42sh/src/parser/aggregate_sym.c +++ b/42sh/src/parser/aggregate_sym.c @@ -12,7 +12,20 @@ #include "parser.h" -//descente recursive pour multi aggregation +t_aggrematch g_aggrematch[] = +{ + {CMD_SUFFIX, TK_WORD, CMD_SUFFIX}, + {TK_PIPE, TK_WORD, PATTERN}, + {WORDLIST, TK_WORD, WORDLIST}, + + {CMD_PREFIX, TK_ASSIGNEMENT_WORD, CMD_PREFIX}, + + {ELSE_PART, TK_FI, IF_CLAUSE}, + {COMPOUND_LIST, TK_FI, IF_CLAUSE}, + + {TK_COMPOUND_LIST, TK_DONE, DO_GROUP}, + {TK_COMPOUND_LIST, TK_RBRACE, BRACE_GROUP}, + {0, 0, 0}, int aggregate_sym(t_sym **stack, t_sym *new_sym) { diff --git a/42sh/src/parser/eval_sym.c b/42sh/src/parser/eval_sym.c index 412488a7..ff6c7761 100644 --- a/42sh/src/parser/eval_sym.c +++ b/42sh/src/parser/eval_sym.c @@ -14,9 +14,163 @@ t_stackmatch g_stackmatch[] = { - {EMPTY, CMD_NAME}, - {TK_GREAT, CMD_NAME}, - {CMD_NAME, TK_GREAT}, + {CMD_SUFFIX, TK_WORD}, + {TK_PIPE, TK_WORD}, + {CASE, TK_WORD}, + {WORDLIST, TK_WORD}, + + {CMD_PREFIX, TK_ASSIGNEMENT_WORD}, + + {REDIRECT_LIST, TK_IO_NUMBER}, + {CMD_SUFFIX, TK_IO_NUMBER}, + {CMD_PREFIX, TK_IO_NUMBER}, + {CMD_NAME, TK_IO_NUMBER}, + {CMD_WORD, TK_IO_NUMBER}, + {LINEBREAK, TK_IO_NUMBER}, + {TK_BANG, TK_IO_NUMBER}, + {SEPARATOR_OP, TK_IO_NUMBER}, + {NEWLINE_LIST, TK_IO_NUMBER}, + + {TK_AND_IF, AND_OR}, + + {TK_OR_IF, AND_OR}, + + {TK_DSEMI, LINEBREAK}, + {TK_DSEMI, COMPOUND_LIST}, + + {TK_DLESS, TK_IO_NUMBER}, + {TK_DLESS, REDIRECT_LIST}, + {TK_DLESS, CMD_SUFFIX}, + {TK_DLESS, CMD_PREFIX}, + {TK_DLESS, CMD_WORD}, + {TK_DLESS, CMD_NAME}, + {TK_DLESS, LINEBREAK}, + {TK_DLESS, TK_BANG}, + {TK_DLESS, SEPARATOR_OP}, + {TK_DLESS, NEWLINE_LIST}, + + {TK_DLESSDASH, TK_IO_NUMBER}, + {TK_DLESSDASH, REDIRECT_LIST}, + {TK_DLESSDASH, CMD_SUFFIX}, + {TK_DLESSDASH, CMD_PREFIX}, + {TK_DLESSDASH, CMD_WORD}, + {TK_DLESSDASH, CMD_NAME}, + {TK_DLESSDASH, LINEBREAK}, + {TK_DLESSDASH, TK_BANG}, + {TK_DLESSDASH, SEPARATOR_OP}, + {TK_DLESSDASH, NEWLINE_LIST}, + + {TK_DGREAT, TK_IO_NUMBER}, + {TK_DGREAT, REDIRECT_LIST}, + {TK_DGREAT, CMD_SUFFIX}, + {TK_DGREAT, CMD_PREFIX}, + {TK_DGREAT, CMD_WORD}, + {TK_DGREAT, CMD_NAME}, + {TK_DGREAT, LINEBREAK}, + {TK_DGREAT, TK_BANG}, + {TK_DGREAT, SEPARATOR_OP}, + {TK_DGREAT, NEWLINE_LIST}, + + {TK_LESSAND, TK_IO_NUMBER}, + {TK_LESSAND, REDIRECT_LIST}, + {TK_LESSAND, CMD_SUFFIX}, + {TK_LESSAND, CMD_PREFIX}, + {TK_LESSAND, CMD_WORD}, + {TK_LESSAND, CMD_NAME}, + {TK_LESSAND, LINEBREAK}, + {TK_LESSAND, TK_BANG}, + {TK_LESSAND, SEPARATOR_OP}, + {TK_LESSAND, NEWLINE_LIST}, + + {TK_GREATAND, TK_IO_NUMBER}, + {TK_GREATAND, REDIRECT_LIST}, + {TK_GREATAND, CMD_SUFFIX}, + {TK_GREATAND, CMD_PREFIX}, + {TK_GREATAND, CMD_WORD}, + {TK_GREATAND, CMD_NAME}, + {TK_GREATAND, LINEBREAK}, + {TK_GREATAND, TK_BANG}, + {TK_GREATAND, SEPARATOR_OP}, + {TK_GREATAND, NEWLINE_LIST}, + + {TK_LESSGREAT, TK_IO_NUMBER}, + {TK_LESSGREAT, REDIRECT_LIST}, + {TK_LESSGREAT, CMD_SUFFIX}, + {TK_LESSGREAT, CMD_PREFIX}, + {TK_LESSGREAT, CMD_WORD}, + {TK_LESSGREAT, CMD_NAME}, + {TK_LESSGREAT, LINEBREAK}, + {TK_LESSGREAT, TK_BANG}, + {TK_LESSGREAT, SEPARATOR_OP}, + {TK_LESSGREAT, NEWLINE_LIST}, + + {TK_CLOBBER, TK_IO_NUMBER}, + {TK_CLOBBER, REDIRECT_LIST}, + {TK_CLOBBER, CMD_SUFFIX}, + {TK_CLOBBER, CMD_PREFIX}, + {TK_CLOBBER, CMD_WORD}, + {TK_CLOBBER, CMD_NAME}, + {TK_CLOBBER, LINEBREAK}, + {TK_CLOBBER, TK_BANG}, + {TK_CLOBBER, SEPARATOR_OP}, + {TK_CLOBBER, NEWLINE_LIST}, + + {TK_IF, LINEBREAK}, + {TK_IF, TK_BANG}, + {TK_IF, SEPARATOR_OP}, + {TK_IF, NEWLINE_LIST}, + + {TK_THEN, COMPOUND_LIST}, + + {TK_ELSE, COMPOUND_LIST}, + + {TK_ELIF, COMPOUND_LIST}, + + {TK_FI, ELSE_PART}, + {TK_FI, COMPOUND_LIST}, + + {TK_DO, COMPOUND_LIST}, + {TK_DO, NAME}, + {TK_DO, SEQUENTIAL_SEP}, + + {TK_DONE, COMPOUND_LIST}, + + {TK_CASE, LINEBREAK}, + {TK_CASE, TK_BANG}, + {TK_CASE, NEWLINE_LIST}, + {TK_CASE, SEPARATOR_OP}, + + {TK_ESAC, CASE_LIST}, + {TK_ESAC, CASE_LIST_NS}, + {TK_ESAC, LINEBREAK}, + + {TK_WHILE, LINEBREAK}, + {TK_WHILE, TK_BANG}, + {TK_WHILE, SEPARATOR_OP}, + {TK_WHILE, NEWLINE_LIST}, + + {TK_UNTIL, LINEBREAK}, + {TK_UNTIL, TK_BANG}, + {TK_UNTIL, SEPARATOR_OP}, + {TK_UNTIL, NEWLINE_LIST}, + + {TK_FOR, LINEBREAK}, + {TK_FOR, TK_BANG}, + {TK_FOR, SEPARATOR_OP}, + {TK_FOR, NEWLINE_LIST}, + + {TK_LBRACE, LINEBREAK}, + {TK_LBRACE, TK_BANG}, + {TK_LBRACE, SEPARATOR_OP}, + {TK_LBRACE, NEWLINE_LIST}, + + {TK_RBRACE, COMPOUND_LIST}, + + {TK_BANG, LINEBREAK}, + {TK_BANG, SEPARATOR_OP}, + {TK_BANG, NEWLINE_LIST}, + + {TK_IN, LINEBREAK}, {0, 0}, }; diff --git a/42sh/src/parser/ft_parse.c b/42sh/src/parser/ft_parse.c index dfcd3d4c..00f8036e 100644 --- a/42sh/src/parser/ft_parse.c +++ b/42sh/src/parser/ft_parse.c @@ -22,7 +22,7 @@ int ft_parse(t_btree **ast, t_list **token) state = UNDEFINED; new_sym = ft_memalloc(sizeof(t_sym)); stack = ft_memalloc(sizeof(t_sym) * 1000); - push_stack(stack, EMPTY); + push_stack(stack, LINEBREAK); while (*token) { produce_sym(*stack, new_sym, token); diff --git a/42sh/src/parser/produce_sym.c b/42sh/src/parser/produce_sym.c index d66023df..ef44af73 100644 --- a/42sh/src/parser/produce_sym.c +++ b/42sh/src/parser/produce_sym.c @@ -14,9 +14,37 @@ t_prodmatch g_prodmatch[] = { - {TK_N_WORD, EMPTY, CMD_NAME}, - {TK_GREAT, ALL, TK_GREAT}, - {TK_N_WORD, ALL, CMD_NAME}, + {TK_WORD, TK_DLESS, HERE_END}, + {TK_WORD, TK_DLESSDASH, HERE_END}, + {TK_WORD, TK_LESS, FILENAME}, + {TK_WORD, TK_LESSAND, FILENAME}, + {TK_WORD, TK_GREAT, FILENAME}, + {TK_WORD, TK_GREATAND, FILENAME}, + {TK_WORD, TK_DGREAT, FILENAME}, + {TK_WORD, TK_LESSGREAT, FILENAME}, + {TK_WORD, TK_CLOBBER, FILENAME}, + {TK_WORD, CMD_WORD, CMD_SUFFIX}, + {TK_WORD, CMD_NAME, CMD_SUFFIX}, + {TK_WORD, LINEBREAK, CMD_NAME}, + {TK_WORD, NEWLINE_LIST, CMD_NAME}, + {TK_WORD, IN, WORDLIST}, +// {{TK_WORD, CASE_LIST, PATTERN}, rule 4 ? + {TK_WORD, TK_PAREN_OPEN, PATTERN}, + + {TK_ASSIGNEMENT_WORD, LINEBREAK, CMD_PREFIX}, + {TK_ASSIGNEMENT_WORD, TK_BANG, CMD_PREFIX}, + {TK_ASSIGNEMENT_WORD, SEPARATOR_OP, CMD_PREFIX}, + {TK_ASSIGNEMENT_WORD, NEWLINE_LIST, CMD_PREFIX}, + + {TK_NAME, LINEBREAK, FNAME}, + {TK_NAME, TK_BANG, FNAME}, + {TK_NAME, SEPARATOR_OP, FNAME}, + {TK_NAME, NEWLINE_LIST, FNAME}, + {TK_NAME, TK_FOR, NAME}, + + {TK_NEWLINE, COMPLETE_COMMANDS, NEWLINE_LIST}, + {TK_NEWLINE, LINEBREAK, NEWLINE_LIST}, + {0, 0, 0}, }; @@ -27,6 +55,7 @@ int produce_sym(t_sym stack, t_sym *new_sym, t_list **lst) token = (*lst)->content; i = 0; + *new_sym = NULL; while (g_prodmatch[i].new_sym) { if (token->type == g_prodmatch[i].token @@ -34,5 +63,7 @@ int produce_sym(t_sym stack, t_sym *new_sym, t_list **lst) *new_sym = g_prodmatch[i].new_sym; i++; } + if (!*new_sym) + *new_sym = token->type; return (0); } diff --git a/42sh/src/parser/read_stack.c b/42sh/src/parser/read_stack.c index 4ffb8113..e7505232 100644 --- a/42sh/src/parser/read_stack.c +++ b/42sh/src/parser/read_stack.c @@ -36,8 +36,8 @@ char *read_state(t_sym current) return ("SIMPLE_COMMAND"); if (current == PROGRAM) return ("PROGRAM"); -*/ if (current == EMPTY) - return ("EMPTY"); +*/ if (current == LINEBREAK) + return ("LINEBREAK"); if (current != 0) return ("NON-DEFINED"); if (current == 0)