From ebc57c0da8ac33a88beead6f51028962d27033f8 Mon Sep 17 00:00:00 2001 From: "ariard@student.42.fr" Date: Thu, 9 Feb 2017 16:06:05 +0100 Subject: [PATCH] parser as a pushdown automata, test --- 42sh/Makefile | 4 +- 42sh/includes/lexer.h | 26 +++++++-- 42sh/includes/parser.h | 86 ++++++++++++++++++++++++++++-- 42sh/sample/stack.sh | 1 + 42sh/src/lexer/ft_lexer.c | 2 +- 42sh/src/lexer/ft_tokenize.c | 2 +- 42sh/src/lexer/lexer_less.c | 2 +- 42sh/src/lexer/lexer_word.c | 2 +- 42sh/src/main/shell_script.c | 18 +++---- 42sh/src/parser/ft_parse.c | 68 ++++++++++------------- 42sh/src/parser/old_parse.c | 58 ++++++++++++++++++++ 42sh/src/parser/parse_word.c | 2 +- 42sh/src/parser/produce_prim_sym.c | 38 +++++++++++++ 42sh/src/parser/read_stack.c | 41 ++++++++++++++ 14 files changed, 285 insertions(+), 65 deletions(-) create mode 100644 42sh/sample/stack.sh create mode 100644 42sh/src/parser/old_parse.c create mode 100644 42sh/src/parser/produce_prim_sym.c create mode 100644 42sh/src/parser/read_stack.c diff --git a/42sh/Makefile b/42sh/Makefile index 147fe340..672d19c9 100644 --- a/42sh/Makefile +++ b/42sh/Makefile @@ -170,6 +170,7 @@ main/shell_script.c\ main/sig_handler.c\ parser/parse.c\ parser/ft_parse.c\ +parser/produce_prim_sym.c\ parser/get_instruction.c\ parser/get_sub_instruction.c\ parser/parse_dgreat.c\ @@ -186,7 +187,8 @@ parser/parse_while.c\ parser/parse_if.c\ parser/parse_elif.c\ parser/parse_else.c\ -parser/parse_word.c +parser/parse_word.c\ +parser/read_stack.c SRCS = $(addprefix $(SRC_DIR), $(SRC_BASE)) OBJS = $(addprefix $(OBJ_DIR), $(SRC_BASE:.c=.o)) diff --git a/42sh/includes/lexer.h b/42sh/includes/lexer.h index 98fd2c83..b614d632 100644 --- a/42sh/includes/lexer.h +++ b/42sh/includes/lexer.h @@ -6,7 +6,7 @@ /* By: jhalford +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2016/12/01 12:15:50 by jhalford #+# #+# */ -/* Updated: 2017/02/06 20:35:43 by ariard ### ########.fr */ +/* Updated: 2017/02/09 15:26:01 by ariard ### ########.fr */ /* */ /* ************************************************************************** */ @@ -24,8 +24,27 @@ typedef struct s_nest t_nest; typedef long long t_type; -# define TK_LESS (1 << 0) -# define TK_GREAT (1 << 1) +/* + * Token need : + * DLESSDASH + * CLOBBER + * LESSGREAT + * IO_NUMBER + * NAME + * DSEMI + * ASSIGNEMENT_WORD + * Lbrace + * Rbrace + * Bang + * Case + * Esac + * for + * in + * +*/ + +# define TK_LESS (1 << 0) //transparent +# define TK_GREAT (1 << 1) //transparent # define TK_DLESS (1 << 2) # define TK_DGREAT (1 << 3) # define TK_LESSAND (1 << 4) @@ -83,7 +102,6 @@ enum e_lexstate IF, THEN, FI, - LIST, COMMENT, }; diff --git a/42sh/includes/parser.h b/42sh/includes/parser.h index 538825a3..eef07eaf 100644 --- a/42sh/includes/parser.h +++ b/42sh/includes/parser.h @@ -6,7 +6,7 @@ /* By: jhalford +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2016/12/01 12:15:54 by jhalford #+# #+# */ -/* Updated: 2017/02/06 20:35:28 by ariard ### ########.fr */ +/* Updated: 2017/02/09 15:58:13 by ariard ### ########.fr */ /* */ /* ************************************************************************** */ @@ -15,8 +15,87 @@ # include "minishell.h" -#define INSTRUCTION (TK_WHILE | TK_IF | TK_ELIF | TK_NEWLINE | TK_SEMI\ - | TK_ELSE | TK_UNTIL) +/* + * Parse POSIX grammar + * +*/ + +typedef unsigned long long int t_sym; + +#define PROGRAM (1UL << 0) +#define COMPLETE_COMMANDS (1UL << 1) +#define COMPLETE_COMMAND (1UL << 2) +#define LIST (1UL << 3) +#define AND_OR (1UL << 4) +#define PIPELINE (1UL << 5) +#define PIPE_SEQUENCE (1UL << 6) +#define COMMAND (1UL << 7) +#define COMPOUND_COMMAND (1UL << 8) +#define SUBSHELL (1UL << 9) +#define COMPOUND_LIST (1UL << 10) +#define TERM (1UL << 11) +#define FOR_CLAUSE (1UL << 12) +#define NAME (1UL << 13) +#define IN (1UL << 14) +#define WORDLIST (1UL << 15) +#define CASE_CLAUSE (1UL << 16) +#define CASE_LIST_NS (1UL << 17) +#define CASE_LIST (1UL << 18) +#define CASE_ITEM_NS (1UL << 19) +#define PATTERN (1UL << 20) +#define IF_CLAUSE (1UL << 21) +#define ELSE_PART (1UL << 22) +#define WHILE_CLAUSE (1UL << 23) +#define UNTIL_CLAUSE (1UL << 24) +#define FUNCTION_DEFINITION (1UL << 25) +#define FUNCTION_BODY (1UL << 26) +#define FNAME (1UL << 27) +#define BRACE_GROUP (1UL << 28) +#define DO_GROUP (1UL << 29) +#define SIMPLE_COMMAND (1UL << 30) +#define CMD_NAME (1UL << 31) +#define CMD_WORD (1UL << 32) +#define CMD_PREFIX (1UL << 33) +#define CMD_SUFFIX (1UL << 34) +#define REDIRECT_LIST (1UL << 35) +#define IO_REDIRECT (1UL << 36) +#define IO_FILE (1UL << 37) +#define FILENAME (1UL << 38) +#define IO_HERE (1UL << 39) +#define HERE_END (1UL << 40) +#define NEWLINE_LIST (1UL << 41) +#define LINEBREAK (1UL << 42) +#define SEPARATOR_OP (1UL << 43) +#define SEPARATOR (1UL << 44) +#define SEQUENTIAL_SEP (1UL << 45) + +#define SYM_DLESS (1UL << 46) +#define SYM_DGREAT (1UL << 47) +#define SYM_GREATAND (1UL << 48) +#define SYM_GREAT (1UL << 49) +#define SYM_LESSAND (1UL << 50) +#define SYM_LESS (1UL << 51) + + +int ft_parse(t_btree **ast, t_list **token); +int produce_prim_sym(t_sym *new_sym, t_list **lst); + +int ft_read_stack(t_sym stack[], int size); +char *read_state(t_sym current); + +enum e_parstate +{ + UNDEFINED, + ERROR, + SUCCESS, +}; + +typedef enum e_parstate t_parstate; + +/* + * Build AST + * +*/ typedef struct s_parser t_parser; typedef struct s_ld t_ld; @@ -64,7 +143,6 @@ extern t_parser g_parser[]; int parse(t_btree **ast, t_list **token); -int ft_parse(t_btree **ast, t_list **token); int get_instruction(t_list **lst); int get_sub_instruction(t_btree **ast, t_list **start, t_list **lst); diff --git a/42sh/sample/stack.sh b/42sh/sample/stack.sh new file mode 100644 index 00000000..35665a6c --- /dev/null +++ b/42sh/sample/stack.sh @@ -0,0 +1 @@ +ls > file1 diff --git a/42sh/src/lexer/ft_lexer.c b/42sh/src/lexer/ft_lexer.c index 6682e759..48385360 100644 --- a/42sh/src/lexer/ft_lexer.c +++ b/42sh/src/lexer/ft_lexer.c @@ -6,7 +6,7 @@ /* By: jhalford +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2017/02/02 15:30:59 by jhalford #+# #+# */ -/* Updated: 2017/02/06 20:27:07 by ariard ### ########.fr */ +/* Updated: 2017/02/09 15:36:59 by ariard ### ########.fr */ /* */ /* ************************************************************************** */ diff --git a/42sh/src/lexer/ft_tokenize.c b/42sh/src/lexer/ft_tokenize.c index eabed451..2498ad2d 100644 --- a/42sh/src/lexer/ft_tokenize.c +++ b/42sh/src/lexer/ft_tokenize.c @@ -6,7 +6,7 @@ /* By: jhalford +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2016/11/10 13:37:11 by jhalford #+# #+# */ -/* Updated: 2017/02/03 20:04:33 by ariard ### ########.fr */ +/* Updated: 2017/02/09 15:37:13 by ariard ### ########.fr */ /* */ /* ************************************************************************** */ diff --git a/42sh/src/lexer/lexer_less.c b/42sh/src/lexer/lexer_less.c index 0b00c6a1..d9e8ea36 100644 --- a/42sh/src/lexer/lexer_less.c +++ b/42sh/src/lexer/lexer_less.c @@ -6,7 +6,7 @@ /* By: jhalford +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2016/12/03 12:06:53 by jhalford #+# #+# */ -/* Updated: 2017/02/03 14:50:00 by ariard ### ########.fr */ +/* Updated: 2017/02/09 15:00:13 by ariard ### ########.fr */ /* */ /* ************************************************************************** */ diff --git a/42sh/src/lexer/lexer_word.c b/42sh/src/lexer/lexer_word.c index 08ca6489..ef8b06e9 100644 --- a/42sh/src/lexer/lexer_word.c +++ b/42sh/src/lexer/lexer_word.c @@ -6,7 +6,7 @@ /* By: jhalford +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2016/12/03 12:07:11 by jhalford #+# #+# */ -/* Updated: 2017/02/03 19:52:48 by ariard ### ########.fr */ +/* Updated: 2017/02/09 15:37:21 by ariard ### ########.fr */ /* */ /* ************************************************************************** */ diff --git a/42sh/src/main/shell_script.c b/42sh/src/main/shell_script.c index 6fb3c788..e86194d7 100644 --- a/42sh/src/main/shell_script.c +++ b/42sh/src/main/shell_script.c @@ -6,7 +6,7 @@ /* By: ariard +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2017/01/22 23:06:34 by ariard #+# #+# */ -/* Updated: 2017/02/06 22:05:34 by ariard ### ########.fr */ +/* Updated: 2017/02/09 15:26:55 by ariard ### ########.fr */ /* */ /* ************************************************************************** */ @@ -29,16 +29,12 @@ int shell_script() return (1); DG("after post_tokenize"); token_print(token); - - while (token) - { - if (parse(&ast, &token)) - return (1); - btree_print(STDBUG, ast, &ft_putast); - if (ft_exec(&ast)) - return (1); - ast = NULL; - } + if (ft_parse(&ast, &token)) + return (1); +// btree_print(STDBUG, ast, &ft_putast); +// if (ft_exec(&ast)) +// return (1); +// ast = NULL; script->size = 0; get_script_content(script); } diff --git a/42sh/src/parser/ft_parse.c b/42sh/src/parser/ft_parse.c index 5631a0a8..70a78875 100644 --- a/42sh/src/parser/ft_parse.c +++ b/42sh/src/parser/ft_parse.c @@ -3,56 +3,44 @@ /* ::: :::::::: */ /* ft_parse.c :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: jhalford +#+ +:+ +#+ */ +/* By: ariard +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ -/* Created: 2016/11/30 17:14:58 by jhalford #+# #+# */ -/* Updated: 2017/02/06 20:37:26 by ariard ### ########.fr */ +/* Created: 2017/02/09 14:30:22 by ariard #+# #+# */ +/* Updated: 2017/02/09 16:02:49 by ariard ### ########.fr */ /* */ /* ************************************************************************** */ #include "parser.h" -t_parser g_parser[] = +int ft_parse(t_btree **ast, t_list **token) { - {INSTRUCTION, &get_sub_instruction}, - {TK_AND_IF | TK_OR_IF, &parse_separator}, - {TK_AMP, &parse_separator}, - {TK_PIPE, &parse_separator}, - {TK_LESS, &parse_less}, - {TK_GREAT, &parse_great}, - {TK_DLESS, &parse_dless}, - {TK_DGREAT, &parse_dgreat}, - {TK_LESSAND, &parse_lessand}, - {TK_GREATAND, &parse_greatand}, - {TK_SUBSHELL, &parse_subshell}, - {TK_WORD, &parse_word}, - {0, 0}, -}; + t_sym *new_sym; + t_parstate state; -int ft_parse(t_btree **ast, t_list **start) -{ - t_list *lst; - t_astnode item; - int i; - - i = 0; - if (!*start) - return (0); - if (!*ast) + (void)ast; + state = UNDEFINED; + new_sym = ft_memalloc(sizeof(t_sym)); + while (*token) { - *ast = btree_create_node(&item, sizeof(item)); - ((t_astnode *)(*ast)->item)->data.token = NULL; - ((t_astnode *)(*ast)->item)->type = 0; - } - while (g_parser[i].type) - { - if ((lst = ft_lst_find(*start, &g_parser[i].type, &token_cmp_type))) + produce_prim_sym(new_sym, token); + DG("new sym : %s", read_state(*new_sym)); +/* if (eval_sym(head_stack, new_sym)) + state = ERROR; + else { - if (g_parser[i].f) - (*g_parser[i].f)(ast, start, &lst); - return (0); + aggregate_sym(head_stack, new_sym, struct_sym); + if (struct_sym) + pop(struct_sym.sym); + else if (!same_sym(new_sym, head_stack)) + push(new_sym); } - i++; - } + build_tree(token, ast); + if (head_stack == PROGRAM) + state = PROGRAM; + if (state == ERROR) + return (error_syntax(token)); + if (state == PROGRAM) + return (0); +*/ } return (0); } diff --git a/42sh/src/parser/old_parse.c b/42sh/src/parser/old_parse.c new file mode 100644 index 00000000..929b2e4a --- /dev/null +++ b/42sh/src/parser/old_parse.c @@ -0,0 +1,58 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* ft_parse.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: jhalford +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2016/11/30 17:14:58 by jhalford #+# #+# */ +/* Updated: 2017/02/09 15:24:15 by ariard ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "parser.h" + +t_parser g_parser[] = +{ + {INSTRUCTION, &get_sub_instruction}, + {TK_AND_IF | TK_OR_IF, &parse_separator}, + {TK_AMP, &parse_separator}, + {TK_PIPE, &parse_separator}, + {TK_LESS, &parse_less}, + {TK_GREAT, &parse_great}, + {TK_DLESS, &parse_dless}, + {TK_DGREAT, &parse_dgreat}, + {TK_LESSAND, &parse_lessand}, + {TK_GREATAND, &parse_greatand}, + {TK_SUBSHELL, &parse_subshell}, + {TK_WORD, &parse_word}, + {0, 0}, +}; + +int ft_parse(t_btree **ast, t_list **start) +{ + t_list *lst; + t_astnode item; + int i; + + i = 0; + if (!*start) + return (0); + if (!*ast) + { + *ast = btree_create_node(&item, sizeof(item)); + ((t_astnode *)(*ast)->item)->data.token = NULL; + ((t_astnode *)(*ast)->item)->type = 0; + } + while (g_parser[i].type) + { + if ((lst = ft_lst_find(*start, &g_parser[i].type, &token_cmp_type))) + { + if (g_parser[i].f) + (*g_parser[i].f)(ast, start, &lst); + return (0); + } + i++; + } + return (0); +} diff --git a/42sh/src/parser/parse_word.c b/42sh/src/parser/parse_word.c index e265a6a0..7fa5fa7c 100644 --- a/42sh/src/parser/parse_word.c +++ b/42sh/src/parser/parse_word.c @@ -6,7 +6,7 @@ /* By: jhalford +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2016/11/14 12:49:45 by jhalford #+# #+# */ -/* Updated: 2017/02/03 19:37:29 by ariard ### ########.fr */ +/* Updated: 2017/02/09 15:24:49 by ariard ### ########.fr */ /* */ /* ************************************************************************** */ diff --git a/42sh/src/parser/produce_prim_sym.c b/42sh/src/parser/produce_prim_sym.c new file mode 100644 index 00000000..0aab7439 --- /dev/null +++ b/42sh/src/parser/produce_prim_sym.c @@ -0,0 +1,38 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* produce_prim_sym.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ariard +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2017/02/09 14:55:10 by ariard #+# #+# */ +/* Updated: 2017/02/09 16:05:27 by ariard ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "parser.h" + +int produce_prim_sym(t_sym *new_sym, t_list **lst) +{ + t_token *token; + + token = (*lst)->content; + if (token->type == TK_N_WORD) + *new_sym = CMD_NAME; + else if (token->type == TK_NEWLINE) + *new_sym = NEWLINE_LIST; + else if (token->type == TK_DLESS) + *new_sym = SYM_DLESS; + else if (token->type == TK_DGREAT) + *new_sym = SYM_DGREAT; + else if (token->type == TK_GREATAND) + *new_sym = SYM_GREATAND; + else if (token->type == TK_GREAT) + *new_sym = SYM_GREAT; + else if (token->type == TK_LESSAND) + *new_sym = SYM_LESSAND; + else if (token->type == TK_LESS) + *new_sym = SYM_LESS; + ft_lst_delif(lst, (*lst)->content, &ft_addrcmp, &token_free); + return (0); +} diff --git a/42sh/src/parser/read_stack.c b/42sh/src/parser/read_stack.c new file mode 100644 index 00000000..cdf7a796 --- /dev/null +++ b/42sh/src/parser/read_stack.c @@ -0,0 +1,41 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* read_stack.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ariard +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2017/02/09 15:32:10 by ariard #+# #+# */ +/* Updated: 2017/02/09 16:02:30 by ariard ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "parser.h" + +char *read_state(t_sym current) +{ + if (current == CMD_NAME) + return ("CMD_NAME"); + if (current == NEWLINE_LIST) + return ("NEWLINE_LIST"); + if (current == SYM_DLESS) + return ("DLESS"); + if (current == SYM_DGREAT) + return ("DGREAT"); + if (current == SYM_GREATAND) + return ("GREATAND"); + if (current == SYM_GREAT) + return ("GREAT"); + if (current == SYM_LESSAND) + return ("LESSAND"); + if (current == SYM_LESS) + return ("LESS"); + return (NULL); +} + +int ft_read_stack(t_sym stack[], int size) +{ + while (stack[size]) + ft_putstr(read_state(stack[size--])); + return (0); +}