From 6ec9f9295ea54a70e84cc375c3aaef245e79337e Mon Sep 17 00:00:00 2001 From: Jack Halford Date: Fri, 10 Feb 2017 05:19:26 +0100 Subject: [PATCH] initial implementation looks ok, parenthesis needs fix --- 42sh/Makefile | 3 +- 42sh/includes/lexer.h | 13 ++-- 42sh/src/lexer/ft_post_tokenize.c | 40 ----------- 42sh/src/lexer/get_state_global.c | 2 +- 42sh/src/lexer/lexer_bquote.c | 14 ++-- 42sh/src/lexer/lexer_default.c | 19 ++---- 42sh/src/lexer/lexer_delim.c | 6 +- 42sh/src/lexer/lexer_dquote.c | 20 +++--- 42sh/src/lexer/lexer_lex.c | 2 +- 42sh/src/lexer/lexer_paren.c | 31 +++++++++ 42sh/src/lexer/lexer_quote.c | 16 +++-- 42sh/src/lexer/lexer_subshell.c | 37 ---------- 42sh/src/lexer/lexer_word.c | 5 +- 42sh/src/lexer/stack_to_prompt.c | 12 ++-- 42sh/src/line-editing/get_touch.c | 4 +- 42sh/src/line-editing/reader.c | 108 +++++++++++++++--------------- 42sh/src/main/main.c | 7 +- 17 files changed, 150 insertions(+), 189 deletions(-) delete mode 100644 42sh/src/lexer/ft_post_tokenize.c create mode 100644 42sh/src/lexer/lexer_paren.c delete mode 100644 42sh/src/lexer/lexer_subshell.c diff --git a/42sh/Makefile b/42sh/Makefile index d09f5200..0a38131a 100644 --- a/42sh/Makefile +++ b/42sh/Makefile @@ -137,7 +137,6 @@ job-control/sigtstp_handler.c\ job-control/sigttin_handler.c\ job-control/sigttou_handler.c\ lexer/command_getoutput.c\ -lexer/ft_post_tokenize.c\ lexer/get_state_global.c\ lexer/get_state_redir.c\ lexer/lexer_backslash.c\ @@ -153,9 +152,9 @@ lexer/lexer_lessand.c\ lexer/lexer_lex.c\ lexer/lexer_newline.c\ lexer/lexer_number.c\ +lexer/lexer_paren.c\ lexer/lexer_quote.c\ lexer/lexer_sep.c\ -lexer/lexer_subshell.c\ lexer/lexer_word.c\ lexer/reduce_bquotes.c\ lexer/reduce_parens.c\ diff --git a/42sh/includes/lexer.h b/42sh/includes/lexer.h index b9f11bde..14d04d60 100644 --- a/42sh/includes/lexer.h +++ b/42sh/includes/lexer.h @@ -31,14 +31,15 @@ # define TK_PAREN_OPEN (1 << 11) # define TK_PAREN_CLOSE (1 << 12) # define TK_BQUOTE (1 << 13) -# define TK_N_WORD (1 << 14) -# define TK_Q_WORD (1 << 15) -# define TK_DQ_WORD (1 << 16) +# define TK_WORD (1 << 14) +/* # define TK_N_WORD (1 << 14) */ +/* # define TK_Q_WORD (1 << 15) */ +/* # define TK_DQ_WORD (1 << 16) */ # define TK_COMMAND (1 << 17) # define TK_SUBSHELL (1 << 18) # define TK_NEWLINE (1 << 19) -# define TK_WORD (TK_N_WORD | TK_Q_WORD | TK_DQ_WORD) +/* # define TK_WORD (TK_N_WORD | TK_Q_WORD | TK_DQ_WORD) */ # define TK_REDIR (0x1 | 0x2 | 0x4 | 0x8 | 0x10 | 0x20) # define TK_NON_FREEABLE (TK_PAREN_OPEN | TK_PAREN_CLOSE | TK_BQUOTE) @@ -58,7 +59,7 @@ enum e_lexstate DQUOTE, BQUOTE, BACKSLASH, - SUBSHELL, + PAREN, COMMENT, }; @@ -116,7 +117,7 @@ int lexer_quote(t_list **alst, t_lexer *lexer); int lexer_dquote(t_list **alst, t_lexer *lexer); int lexer_bquote(t_list **alst, t_lexer *lexer); int lexer_backslash(t_list **alst, t_lexer *lexer); -int lexer_subshell(t_list **alst, t_lexer *lexer); +int lexer_paren(t_list **alst, t_lexer *lexer); int lexer_comment(t_list **alst, t_lexer *lexer); #endif diff --git a/42sh/src/lexer/ft_post_tokenize.c b/42sh/src/lexer/ft_post_tokenize.c deleted file mode 100644 index a0b4941f..00000000 --- a/42sh/src/lexer/ft_post_tokenize.c +++ /dev/null @@ -1,40 +0,0 @@ -/* ************************************************************************** */ -/* */ -/* ::: :::::::: */ -/* ft_post_tokenize.c :+: :+: :+: */ -/* +:+ +:+ +:+ */ -/* By: jhalford +#+ +:+ +#+ */ -/* +#+#+#+#+#+ +#+ */ -/* Created: 2017/01/11 16:11:11 by jhalford #+# #+# */ -/* Updated: 2017/02/06 14:33:34 by jhalford ### ########.fr */ -/* */ -/* ************************************************************************** */ - -#include "lexer.h" - -int ft_post_tokenize(t_list **alst, char **str) -{ - int ret; - t_flag tk; - - while ((ret = reduce_parens(alst, *str))) - if (ret == -1) - { - ft_dprintf(2, "{red}%s: parse error near '('{eoc}\n", SHELL_NAME); - return (-1); - } - tk = TK_PAREN_CLOSE; - if (ft_lst_find(*alst, &tk, token_cmp_type)) - { - ft_dprintf(2, "{red}%s: parse error near ')'{eoc}\n", SHELL_NAME); - return (-1); - } - while ((ret = reduce_bquotes(alst, str))) - if (ret == -1) - { - ft_dprintf(2, "{red}%s: parse error near '`'{eoc}\n", SHELL_NAME); - return (-1); - } - DG("new command from bquotes: '%s'", *str); - return (0); -} diff --git a/42sh/src/lexer/get_state_global.c b/42sh/src/lexer/get_state_global.c index 3f59ab73..bed48d83 100644 --- a/42sh/src/lexer/get_state_global.c +++ b/42sh/src/lexer/get_state_global.c @@ -32,6 +32,6 @@ t_lexstate get_state_global(t_lexer *lexer) else if (c == '`') return (BQUOTE); else if (c == '(' || c == ')') - return (SUBSHELL); + return (PAREN); return (0); } diff --git a/42sh/src/lexer/lexer_bquote.c b/42sh/src/lexer/lexer_bquote.c index a9e32dda..af4020b7 100644 --- a/42sh/src/lexer/lexer_bquote.c +++ b/42sh/src/lexer/lexer_bquote.c @@ -17,19 +17,21 @@ int lexer_bquote(t_list **alst, t_lexer *lexer) t_token *token; token = (*alst)->content; - token->type = TK_Q_WORD; - lexer->pos++; - push(&lexer->stack, BQUOTE); + token->type = TK_WORD; if (lexer->str[lexer->pos] == '`') { - lexer->state = WORD; lexer->pos++; + if (!(lexer->stack && *(int*)lexer->stack->content == BQUOTE)) + { + push(&lexer->stack, BQUOTE); + return (lexer_lex(alst, lexer)); + } + lexer->state = WORD; pop(&lexer->stack); return (lexer_lex(alst, lexer)); } - else if (lexer->str[lexer->pos] == 0) - return (0); token_append(token, lexer, 0, 0); + lexer->pos++; return (lexer_quote(alst, lexer)); } diff --git a/42sh/src/lexer/lexer_default.c b/42sh/src/lexer/lexer_default.c index ed43b9d6..777d6f11 100644 --- a/42sh/src/lexer/lexer_default.c +++ b/42sh/src/lexer/lexer_default.c @@ -14,27 +14,16 @@ int lexer_default(t_list **alst, t_lexer *lexer) { - t_lexstate state; t_token *token; char c; c = lexer->str[lexer->pos]; - if ((state = get_state_global(lexer))) - { - lexer->state = state; + if ((lexer->state = get_state_global(lexer))) return (lexer_lex(alst, lexer)); - } - if ((state = get_state_redir(lexer))) - { - lexer->state = state; + if ((lexer->state = get_state_redir(lexer))) return (lexer_lex(alst, lexer)); - } - else if (ft_isdigit(c)) - lexer->state = NUMBER; - else - lexer->state = WORD; + lexer->state = ft_isdigit(c) ? NUMBER : WORD; token = (*alst)->content; - token_append(token, lexer, 0, 0); - token->type = TK_N_WORD; + token->type = TK_WORD; return (lexer_lex(alst, lexer)); } diff --git a/42sh/src/lexer/lexer_delim.c b/42sh/src/lexer/lexer_delim.c index 6fa29b29..a30dd955 100644 --- a/42sh/src/lexer/lexer_delim.c +++ b/42sh/src/lexer/lexer_delim.c @@ -17,13 +17,17 @@ int lexer_delim(t_list **alst, t_lexer *lexer) t_token *token; token = (*alst)->content; + DG("DELIM"); while (ft_is_delim(lexer->str[lexer->pos])) lexer->pos++; + lexer->state = DEFAULT; if (token->type) + { return (lexer_lex(&(*alst)->next, lexer)); + } else { - if (!lexer->str[lexer->pos]) + if (lexer->str[lexer->pos] == 0) ft_lst_delif(alst, (*alst)->content, &ft_addrcmp, &token_free); return (lexer_lex(alst, lexer)); } diff --git a/42sh/src/lexer/lexer_dquote.c b/42sh/src/lexer/lexer_dquote.c index 7077fd7b..68157512 100644 --- a/42sh/src/lexer/lexer_dquote.c +++ b/42sh/src/lexer/lexer_dquote.c @@ -17,18 +17,20 @@ int lexer_dquote(t_list **alst, t_lexer *lexer) t_token *token; token = (*alst)->content; - token->type = TK_DQ_WORD; - if (*(int*)lexer->stack->content != DQUOTE) - push(&lexer->stack, DQUOTE); - lexer->pos++; + token->type = TK_WORD; if (lexer->str[lexer->pos] == '"') { lexer->pos++; + if (!(lexer->stack && *(int*)lexer->stack->content == DQUOTE)) + { + push(&lexer->stack, DQUOTE); + return (lexer_lex(alst, lexer)); + } lexer->state = WORD; pop(&lexer->stack); return (lexer_lex(alst, lexer)); } - else if (lexer->str[lexer->pos] == '\\') + if (lexer->str[lexer->pos] == '\\') { if (lexer->str[lexer->pos + 1] == '"') token_append(token, lexer, 1, 0); @@ -42,9 +44,11 @@ int lexer_dquote(t_list **alst, t_lexer *lexer) return (lexer_dquote(alst,lexer)); } else if (lexer->str[lexer->pos] == '`') + { + lexer->state = BQUOTE; lexer_bquote(alst, lexer); - else if (lexer->str[lexer->pos] == 0) - return (0); + } token_append(token, lexer, 1, 0); - return (lexer_dquote(alst, lexer)); + lexer->pos++; + return (lexer_lex(alst, lexer)); } diff --git a/42sh/src/lexer/lexer_lex.c b/42sh/src/lexer/lexer_lex.c index a406fbc8..054b03d9 100644 --- a/42sh/src/lexer/lexer_lex.c +++ b/42sh/src/lexer/lexer_lex.c @@ -28,7 +28,7 @@ int (*g_lexer[])(t_list **alst, t_lexer *lexer) = &lexer_dquote, &lexer_bquote, &lexer_backslash, - &lexer_subshell, + &lexer_paren, &lexer_comment, }; diff --git a/42sh/src/lexer/lexer_paren.c b/42sh/src/lexer/lexer_paren.c new file mode 100644 index 00000000..dd795357 --- /dev/null +++ b/42sh/src/lexer/lexer_paren.c @@ -0,0 +1,31 @@ +#include "lexer.h" + +int lexer_paren(t_list **alst, t_lexer *lexer) +{ + t_token *token; + t_list **lst; + + lst = alst; + if (*alst) + { + token = (*alst)->content; + if (token->type) + lst = &(*alst)->next; + } + token = token_init(); + *lst = ft_lstnew(token, sizeof(*token)); + token = (*lst)->content; + if (lexer->str[lexer->pos] == '(') + { + token->type = TK_PAREN_OPEN; + push(&lexer->stack, PAREN); + } + else if (lexer->stack && *(int*)lexer->stack->content == PAREN) + { + token->type = TK_PAREN_CLOSE; + pop(&lexer->stack); + } + lexer->pos++; + lexer->state = DEFAULT; + return (lexer_lex(&(*lst)->next, lexer)); +} diff --git a/42sh/src/lexer/lexer_quote.c b/42sh/src/lexer/lexer_quote.c index b0764b7d..2d3a6432 100644 --- a/42sh/src/lexer/lexer_quote.c +++ b/42sh/src/lexer/lexer_quote.c @@ -17,18 +17,20 @@ int lexer_quote(t_list **alst, t_lexer *lexer) t_token *token; token = (*alst)->content; - token->type = TK_Q_WORD; - lexer->pos++; - push(&lexer->stack, QUOTE); + token->type = TK_WORD; if (lexer->str[lexer->pos] == '\'') { - lexer->state = WORD; lexer->pos++; + if (!(lexer->stack && *(int*)lexer->stack->content == QUOTE)) + { + push(&lexer->stack, QUOTE); + return (lexer_lex(alst, lexer)); + } + lexer->state = WORD; pop(&lexer->stack); return (lexer_lex(alst, lexer)); } - else if (lexer->str[lexer->pos] == 0) - return (0); token_append(token, lexer, 1, 1); - return (lexer_quote(alst, lexer)); + lexer->pos++; + return (lexer_lex(alst, lexer)); } diff --git a/42sh/src/lexer/lexer_subshell.c b/42sh/src/lexer/lexer_subshell.c deleted file mode 100644 index 278849bb..00000000 --- a/42sh/src/lexer/lexer_subshell.c +++ /dev/null @@ -1,37 +0,0 @@ -/* ************************************************************************** */ -/* */ -/* ::: :::::::: */ -/* lexer_special.c :+: :+: :+: */ -/* +:+ +:+ +:+ */ -/* By: jhalford +#+ +:+ +#+ */ -/* +#+#+#+#+#+ +#+ */ -/* Created: 2017/01/11 15:35:38 by jhalford #+# #+# */ -/* Updated: 2017/02/09 22:09:07 by jhalford ### ########.fr */ -/* */ -/* ************************************************************************** */ - -#include "lexer.h" - -int lexer_subshell(t_list **alst, t_lexer *lexer) -{ - t_token *token; - t_list **lst; - - lst = alst; - if (*alst) - { - token = (*alst)->content; - if (token->type) - lst = &(*alst)->next; - } - token = token_init(); - *lst = ft_lstnew(token, sizeof(*token)); - token = (*lst)->content; - if (lexer->str[lexer->pos] == '(') - token->type = TK_PAREN_OPEN; - else if (lexer->str[lexer->pos] == ')') - token->type = TK_PAREN_CLOSE; - token->data = lexer->str; - lexer->pos++; - return (lexer_lex(&(*lst)->next, lexer)); -} diff --git a/42sh/src/lexer/lexer_word.c b/42sh/src/lexer/lexer_word.c index b5fa0ae9..599b2ac7 100644 --- a/42sh/src/lexer/lexer_word.c +++ b/42sh/src/lexer/lexer_word.c @@ -18,9 +18,12 @@ int lexer_word(t_list **alst, t_lexer *lexer) t_lexstate state; token = (*alst)->content; - token->type = TK_N_WORD; + token->type = TK_WORD; if ((state = get_state_global(lexer))) + { + lexer->state = state; return (lexer_lex(alst, lexer)); + } if ((state = get_state_redir(lexer))) { lexer->state = state; diff --git a/42sh/src/lexer/stack_to_prompt.c b/42sh/src/lexer/stack_to_prompt.c index 0343dcb2..35d83a5c 100644 --- a/42sh/src/lexer/stack_to_prompt.c +++ b/42sh/src/lexer/stack_to_prompt.c @@ -22,13 +22,13 @@ char *stack_to_prompt(t_list *stack) if (top == BACKSLASH) return ("> "); else if (top == QUOTE) - return ("quote >"); + return ("quote> "); else if (top == DQUOTE) - return ("dquote >"); + return ("dquote> "); else if (top == BQUOTE) - return ("bquote >"); - else if (top == SUBSHELL) - return ("subsh >"); + return ("bquote> "); + else if (top == PAREN) + return ("subsh> "); else - return (" > "); + return ("error> "); } diff --git a/42sh/src/line-editing/get_touch.c b/42sh/src/line-editing/get_touch.c index 71c31e85..7c90314c 100644 --- a/42sh/src/line-editing/get_touch.c +++ b/42sh/src/line-editing/get_touch.c @@ -71,7 +71,7 @@ char *ft_read_stdin(void) ft_print(ret); else if (ret == 10) return (STR); - /* else if (ft_isascii(ret) == 0) */ - /* ft_read_it(ret, &POS, &STR); */ + else if (ft_isascii(ret) == 0) + ft_read_it(ret, &POS, &STR); } } diff --git a/42sh/src/line-editing/reader.c b/42sh/src/line-editing/reader.c index 3068e52d..6d58ce23 100644 --- a/42sh/src/line-editing/reader.c +++ b/42sh/src/line-editing/reader.c @@ -10,62 +10,62 @@ /* */ /* ************************************************************************** */ -/* #include "minishell.h" */ +#include "minishell.h" -/* static void ft_read_it_3(char **str, char t[5], size_t *pos, int *j) */ -/* { */ -/* int i; */ +static void ft_read_it_3(char **str, char t[5], size_t *pos, int *j) +{ + int i; -/* i = 0; */ -/* while (i < 4 && t[i] == '\0') */ -/* ++i; */ -/* while (i < 4) */ -/* { */ -/* if (t[i] && ft_isprint(t[i])) */ -/* { */ -/* *str = ft_realloc_imput(*str, t[i], *pos); */ -/* ++(*pos); */ -/* ++(*j); */ -/* } */ -/* ++i; */ -/* } */ -/* } */ + i = 0; + while (i < 4 && t[i] == '\0') + ++i; + while (i < 4) + { + if (t[i] && ft_isprint(t[i])) + { + *str = ft_realloc_imput(*str, t[i], *pos); + ++(*pos); + ++(*j); + } + ++i; + } +} -/* static void ft_read_it_2(int input, char t[5]) */ -/* { */ -/* t[3] = (input / ft_pow(256, 3)) ? (input / ft_pow(256, 3)) : '\0'; */ -/* if (t[3]) */ -/* input = input % ft_pow(256, 3); */ -/* t[2] = (input / ft_pow(256, 2)) ? (input / ft_pow(256, 2)) : '\0'; */ -/* if (t[2]) */ -/* input = input % ft_pow(256, 2); */ -/* t[1] = (input / ft_pow(256, 1)) ? (input / ft_pow(256, 1)) : '\0'; */ -/* if (t[1]) */ -/* input = input % ft_pow(256, 1); */ -/* t[0] = (input / ft_pow(256, 0)) ? (input / ft_pow(256, 0)) : '\0'; */ -/* if (t[0]) */ -/* input = input % ft_pow(256, 0); */ -/* t[4] = '\0'; */ -/* } */ +static void ft_read_it_2(int input, char t[5]) +{ + t[3] = (input / ft_pow(256, 3)) ? (input / ft_pow(256, 3)) : '\0'; + if (t[3]) + input = input % ft_pow(256, 3); + t[2] = (input / ft_pow(256, 2)) ? (input / ft_pow(256, 2)) : '\0'; + if (t[2]) + input = input % ft_pow(256, 2); + t[1] = (input / ft_pow(256, 1)) ? (input / ft_pow(256, 1)) : '\0'; + if (t[1]) + input = input % ft_pow(256, 1); + t[0] = (input / ft_pow(256, 0)) ? (input / ft_pow(256, 0)) : '\0'; + if (t[0]) + input = input % ft_pow(256, 0); + t[4] = '\0'; +} -/* void ft_read_it(int input, size_t *pos, char **str) */ -/* { */ -/* int j; */ -/* char t[5]; */ -/* size_t pos_tmp; */ +void ft_read_it(int input, size_t *pos, char **str) +{ + int j; + char t[5]; + size_t pos_tmp; -/* j = 0; */ -/* pos_tmp = *pos; */ -/* if (input == TOUCHE_DELETE || input < 0 || input == 892427035 || */ -/* input == 126 || input == 993090331 || input == 925981467 || */ -/* input == 21298 || input == 892427035 || input == 8270395 || input == */ -/* 942758683 || input == 993090331 || input == 18489 || input == 17977) */ -/* return ; */ -/* ft_read_it_2(input, t); */ -/* ft_read_it_3(str, t, pos, &j); */ -/* *pos = pos_tmp; */ -/* ft_current_str((*str), *pos); */ -/* ft_get_next_str((*str), pos); */ -/* ft_putnc('\b', *pos - (pos_tmp + j)); */ -/* *pos = (pos_tmp + j); */ -/* } */ + j = 0; + pos_tmp = *pos; + if (input == TOUCHE_DELETE || input < 0 || input == 892427035 || + input == 126 || input == 993090331 || input == 925981467 || + input == 21298 || input == 892427035 || input == 8270395 || input == + 942758683 || input == 993090331 || input == 18489 || input == 17977) + return ; + ft_read_it_2(input, t); + ft_read_it_3(str, t, pos, &j); + *pos = pos_tmp; + ft_current_str((*str), *pos); + ft_get_next_str((*str), pos); + ft_putnc('\b', *pos - (pos_tmp + j)); + *pos = (pos_tmp + j); +} diff --git a/42sh/src/main/main.c b/42sh/src/main/main.c index 4cdf0625..a847c100 100644 --- a/42sh/src/main/main.c +++ b/42sh/src/main/main.c @@ -24,17 +24,20 @@ int interactive_shell() lexer.str = NULL; token = NULL; lexer.stack = NULL; + ast = NULL; do { if (lexer.stack && *(int*)lexer.stack->content == BACKSLASH) pop(&lexer.stack); ft_strappend(&lexer.str, readline(stack_to_prompt(lexer.stack))); + DG("[{mag}%s{eoc}]", lexer.str); ltoken = ft_lstlast(token); lexer_lex((token ? <oken : &token), &lexer); token_print(token); - } while (lexer.stack->content); + } while (lexer.stack); + DG("after lexing"); + token_print(token); if (ft_parse(&ast, &token)) return (1); - btree_print(STDBUG, ast, &ft_putast); if (ft_exec(&ast)) return (1);