From 0c99e8012c3f957f786071e51e06f93549241198 Mon Sep 17 00:00:00 2001 From: Jack Halford Date: Tue, 10 Jan 2017 14:09:18 +0100 Subject: [PATCH] lexer improvements --- 42sh/includes/lexer.h | 71 +++++++++++++++++--------------- 42sh/src/lexer/ft_tokenize.c | 12 +----- 42sh/src/lexer/get_lexer_state.c | 28 +++++++++++++ 42sh/src/lexer/lexer_default.c | 4 +- 42sh/src/lexer/lexer_delim.c | 2 +- 42sh/src/lexer/lexer_dquote.c | 9 ++-- 42sh/src/lexer/lexer_number.c | 5 ++- 42sh/src/lexer/lexer_quote.c | 7 ++-- 42sh/src/lexer/lexer_sep.c | 2 +- 42sh/src/lexer/lexer_word.c | 7 +++- 42sh/src/main/ft_putast.c | 19 +-------- 42sh/src/main/main.c | 4 +- 42sh/src/parser/ft_parse.c | 2 +- 13 files changed, 93 insertions(+), 79 deletions(-) create mode 100644 42sh/src/lexer/get_lexer_state.c diff --git a/42sh/includes/lexer.h b/42sh/includes/lexer.h index fbde230a..129b3cfe 100644 --- a/42sh/includes/lexer.h +++ b/42sh/includes/lexer.h @@ -6,7 +6,7 @@ /* By: jhalford +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2016/12/01 12:15:50 by jhalford #+# #+# */ -/* Updated: 2016/12/10 16:00:51 by jhalford ### ########.fr */ +/* Updated: 2017/01/10 13:59:06 by jhalford ### ########.fr */ /* */ /* ************************************************************************** */ @@ -17,20 +17,23 @@ typedef long long t_type; -# define TK_LESS 1 << 0 -# define TK_GREAT 1 << 1 -# define TK_DLESS 1 << 2 -# define TK_DGREAT 1 << 3 -# define TK_LESSAND 1 << 4 -# define TK_GREATAND 1 << 5 -# define TK_SEMI 1 << 6 -# define TK_PIPE 1 << 7 -# define TK_AND_IF 1 << 8 -# define TK_OR_IF 1 << 9 -# define TK_AMP 1 << 10 -# define TK_WORD 1 << 11 -# define TK_COMMAND 1 << 12 +# define TK_LESS (1 << 0) +# define TK_GREAT (1 << 1) +# define TK_DLESS (1 << 2) +# define TK_DGREAT (1 << 3) +# define TK_LESSAND (1 << 4) +# define TK_GREATAND (1 << 5) +# define TK_SEMI (1 << 6) +# define TK_PIPE (1 << 7) +# define TK_AND_IF (1 << 8) +# define TK_OR_IF (1 << 9) +# define TK_AMP (1 << 10) +# define TK_N_WORD (1 << 11) +# define TK_Q_WORD (1 << 12) +# define TK_DQ_WORD (1 << 13) +# define TK_COMMAND (1 << 14) +# define TK_WORD (TK_N_WORD | TK_Q_WORD | TK_DQ_WORD) # define TK_REDIR (0x1 | 0x2 | 0x4 | 0x8 | 0x10 | 0x20) enum e_lexstate @@ -62,27 +65,27 @@ typedef enum e_lexstate t_lexstate; extern int (*g_lexer[])(t_list **alst, char *str); -t_token *token_init(); -int ft_tokenize(t_list **alst, char *str, t_lexstate state); -int token_append(t_token *token, char c); -void token_free(void *data, size_t size); -int token_cmp_type(t_token *token, t_type *ref); -void token_print(t_list *lst); +t_token *token_init(); +int ft_tokenize(t_list **alst, char *str, t_lexstate state); +int token_append(t_token *token, char c); +void token_free(void *data, size_t size); +int token_cmp_type(t_token *token, t_type *ref); +void token_print(t_list *lst); -int ft_is_delim(char c); -void qstate_update(t_data *data, char c); +int ft_is_delim(char c); -int lexer_default(t_list **alst, char *str); -int lexer_delim(t_list **alst, char *str); -int lexer_sep(t_list **alst, char *str); -int lexer_word(t_list **alst, char *str); -int lexer_number(t_list **alst, char *str); -int lexer_less(t_list **alst, char *str); -int lexer_great(t_list **alst, char *str); -int lexer_lessand(t_list **alst, char *str); -int lexer_greatand(t_list **alst, char *str); -int lexer_quote(t_list **alst, char *str); -int lexer_dquote(t_list **alst, char *str); -int lexer_backslash(t_list **alst, char *str); +t_lexstate get_lexer_state(char *str); +int lexer_default(t_list **alst, char *str); +int lexer_delim(t_list **alst, char *str); +int lexer_sep(t_list **alst, char *str); +int lexer_word(t_list **alst, char *str); +int lexer_number(t_list **alst, char *str); +int lexer_less(t_list **alst, char *str); +int lexer_great(t_list **alst, char *str); +int lexer_lessand(t_list **alst, char *str); +int lexer_greatand(t_list **alst, char *str); +int lexer_quote(t_list **alst, char *str); +int lexer_dquote(t_list **alst, char *str); +int lexer_backslash(t_list **alst, char *str); #endif diff --git a/42sh/src/lexer/ft_tokenize.c b/42sh/src/lexer/ft_tokenize.c index 11b2f4bf..b1f95a2a 100644 --- a/42sh/src/lexer/ft_tokenize.c +++ b/42sh/src/lexer/ft_tokenize.c @@ -6,7 +6,7 @@ /* By: jhalford +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2016/11/10 13:37:11 by jhalford #+# #+# */ -/* Updated: 2016/12/05 14:15:23 by jhalford ### ########.fr */ +/* Updated: 2017/01/10 13:51:22 by jhalford ### ########.fr */ /* */ /* ************************************************************************** */ @@ -44,15 +44,5 @@ int ft_tokenize(t_list **alst, char *str, t_lexstate state) token = token_init(); *alst = ft_lstnew(token, sizeof(*token)); } - if (ft_is_delim(*str)) - state = DELIM; - else if (*str == '&' || *str == ';' || *str == '|') - state = SEP; - else if (*str == '\\') - state = BACKSLASH; - else if (*str == '\'') - return ((*g_lexer[QUOTE])(alst, str + 1)); - else if (*str == '\"') - return ((*g_lexer[DQUOTE])(alst, str + 1)); return ((*g_lexer[state])(alst, str)); } diff --git a/42sh/src/lexer/get_lexer_state.c b/42sh/src/lexer/get_lexer_state.c new file mode 100644 index 00000000..e58a31d6 --- /dev/null +++ b/42sh/src/lexer/get_lexer_state.c @@ -0,0 +1,28 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* get_lexer_state.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: jhalford +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2017/01/10 13:45:46 by jhalford #+# #+# */ +/* Updated: 2017/01/10 13:52:54 by jhalford ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "lexer.h" + +t_lexstate get_lexer_state(char *str) +{ + if (ft_is_delim(*str)) + return (DELIM); + else if (*str == '&' || *str == ';' || *str == '|') + return (SEP); + else if (*str == '\\') + return (BACKSLASH); + else if (*str == '\'') + return (QUOTE); + else if (*str == '\"') + return (DQUOTE); + return (0); +} diff --git a/42sh/src/lexer/lexer_default.c b/42sh/src/lexer/lexer_default.c index da00baee..e4fbb58d 100644 --- a/42sh/src/lexer/lexer_default.c +++ b/42sh/src/lexer/lexer_default.c @@ -6,7 +6,7 @@ /* By: jhalford +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2016/11/28 18:36:21 by jhalford #+# #+# */ -/* Updated: 2016/12/05 14:15:26 by jhalford ### ########.fr */ +/* Updated: 2017/01/10 13:54:17 by jhalford ### ########.fr */ /* */ /* ************************************************************************** */ @@ -18,6 +18,8 @@ int lexer_default(t_list **alst, char *str) t_token *token; state = DEFAULT; + if ((state = get_lexer_state(str))) + return (ft_tokenize(alst, str, state)); if (*str == '>') return (ft_tokenize(alst, str, GREAT)); else if (*str == '<') diff --git a/42sh/src/lexer/lexer_delim.c b/42sh/src/lexer/lexer_delim.c index 6c79b0a8..278210dd 100644 --- a/42sh/src/lexer/lexer_delim.c +++ b/42sh/src/lexer/lexer_delim.c @@ -6,7 +6,7 @@ /* By: jhalford +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2016/12/03 11:58:44 by jhalford #+# #+# */ -/* Updated: 2016/12/03 11:58:45 by jhalford ### ########.fr */ +/* Updated: 2017/01/10 13:49:23 by jhalford ### ########.fr */ /* */ /* ************************************************************************** */ diff --git a/42sh/src/lexer/lexer_dquote.c b/42sh/src/lexer/lexer_dquote.c index 0bf59e62..128648b4 100644 --- a/42sh/src/lexer/lexer_dquote.c +++ b/42sh/src/lexer/lexer_dquote.c @@ -6,7 +6,7 @@ /* By: jhalford +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2016/11/28 18:36:58 by jhalford #+# #+# */ -/* Updated: 2016/12/03 12:43:22 by jhalford ### ########.fr */ +/* Updated: 2017/01/10 13:58:17 by jhalford ### ########.fr */ /* */ /* ************************************************************************** */ @@ -17,14 +17,15 @@ int lexer_dquote(t_list **alst, char *str) t_token *token; token = (*alst)->content; - token->type = TK_WORD; + token->type = TK_DQ_WORD; + str++; if (*str == '\"') return (ft_tokenize(&(*alst)->next, str + 1, DEFAULT)); if (*str == '\\') { token_append(token, *(str + 1)); - return (lexer_dquote(alst, str + 2)); + return (lexer_dquote(alst, str + 1)); } token_append(token, *str); - return (lexer_dquote(alst, str + 1)); + return (lexer_dquote(alst, str)); } diff --git a/42sh/src/lexer/lexer_number.c b/42sh/src/lexer/lexer_number.c index 83816749..f2d3f558 100644 --- a/42sh/src/lexer/lexer_number.c +++ b/42sh/src/lexer/lexer_number.c @@ -6,7 +6,7 @@ /* By: jhalford +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2016/12/03 12:06:45 by jhalford #+# #+# */ -/* Updated: 2016/12/03 12:06:46 by jhalford ### ########.fr */ +/* Updated: 2017/01/10 13:54:33 by jhalford ### ########.fr */ /* */ /* ************************************************************************** */ @@ -15,8 +15,11 @@ int lexer_number(t_list **alst, char *str) { t_token *token; + t_lexstate state; token = (*alst)->content; + if ((state = get_lexer_state(str))) + return (ft_tokenize(alst, str, state)); if (*str == '>') return (ft_tokenize(alst, str, GREAT)); else if (*str == '<') diff --git a/42sh/src/lexer/lexer_quote.c b/42sh/src/lexer/lexer_quote.c index 46344785..458dd3aa 100644 --- a/42sh/src/lexer/lexer_quote.c +++ b/42sh/src/lexer/lexer_quote.c @@ -6,7 +6,7 @@ /* By: jhalford +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2016/12/03 12:07:08 by jhalford #+# #+# */ -/* Updated: 2016/12/03 12:07:08 by jhalford ### ########.fr */ +/* Updated: 2017/01/10 13:57:41 by jhalford ### ########.fr */ /* */ /* ************************************************************************** */ @@ -17,9 +17,10 @@ int lexer_quote(t_list **alst, char *str) t_token *token; token = (*alst)->content; - token->type = TK_WORD; + token->type = TK_Q_WORD; + str++; if (*str == '\'') return (ft_tokenize(&(*alst)->next, str + 1, WORD)); token_append(token, *str); - return (lexer_quote(alst, str + 1)); + return (lexer_quote(alst, str)); } diff --git a/42sh/src/lexer/lexer_sep.c b/42sh/src/lexer/lexer_sep.c index fa511fb5..d44a62df 100644 --- a/42sh/src/lexer/lexer_sep.c +++ b/42sh/src/lexer/lexer_sep.c @@ -6,7 +6,7 @@ /* By: jhalford +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2016/11/30 16:29:57 by jhalford #+# #+# */ -/* Updated: 2016/11/30 16:56:58 by jhalford ### ########.fr */ +/* Updated: 2017/01/10 13:49:57 by jhalford ### ########.fr */ /* */ /* ************************************************************************** */ diff --git a/42sh/src/lexer/lexer_word.c b/42sh/src/lexer/lexer_word.c index f6375e58..52c4c6e2 100644 --- a/42sh/src/lexer/lexer_word.c +++ b/42sh/src/lexer/lexer_word.c @@ -6,7 +6,7 @@ /* By: jhalford +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2016/12/03 12:07:11 by jhalford #+# #+# */ -/* Updated: 2016/12/03 12:07:12 by jhalford ### ########.fr */ +/* Updated: 2017/01/10 13:59:38 by jhalford ### ########.fr */ /* */ /* ************************************************************************** */ @@ -15,9 +15,12 @@ int lexer_word(t_list **alst, char *str) { t_token *token; + t_lexstate state; token = (*alst)->content; - token->type = TK_WORD; + token->type = TK_N_WORD; + if ((state = get_lexer_state(str))) + return (ft_tokenize(alst, str, state)); if (*str == '>') return (ft_tokenize(&(*alst)->next, str, GREAT)); else if (*str == '<') diff --git a/42sh/src/main/ft_putast.c b/42sh/src/main/ft_putast.c index 03607793..a720ea97 100644 --- a/42sh/src/main/ft_putast.c +++ b/42sh/src/main/ft_putast.c @@ -6,7 +6,7 @@ /* By: jhalford +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2016/11/14 18:18:04 by jhalford #+# #+# */ -/* Updated: 2016/12/06 20:09:27 by jhalford ### ########.fr */ +/* Updated: 2017/01/10 14:02:06 by jhalford ### ########.fr */ /* */ /* ************************************************************************** */ @@ -19,20 +19,6 @@ char *ft_putast(void *nodein) char str[5]; t_type type; }; - -/* struct s_tmp[] = */ -/* { */ -/* {TK_AMP, " & "}, */ -/* {TK_SEMI, " & "}, */ -/* {TK_AND_IF, " & "}, */ -/* {TK_OR_IF, " & "}, */ -/* {TK_PIPE, " & "}, */ -/* {TK_COMMAND, " & "}, */ -/* {TK_GREAT, " & "}, */ -/* {TK_AMP, " & "}, */ -/* {TK_AMP, " & "}, */ -/* } */ - node = nodein; if (node->type == TK_AMP) return (" & "); @@ -59,8 +45,5 @@ char *ft_putast(void *nodein) else if (node->type == TK_LESSAND) return (" <& "); else - { - ft_printf("type=%02i\n", node->type); return ("OTHER"); - } } diff --git a/42sh/src/main/main.c b/42sh/src/main/main.c index ef64df79..c8fc311b 100644 --- a/42sh/src/main/main.c +++ b/42sh/src/main/main.c @@ -6,7 +6,7 @@ /* By: jhalford +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2016/12/06 18:40:58 by jhalford #+# #+# */ -/* Updated: 2017/01/10 13:08:14 by jhalford ### ########.fr */ +/* Updated: 2017/01/10 14:00:17 by jhalford ### ########.fr */ /* */ /* ************************************************************************** */ @@ -31,7 +31,7 @@ int main(void) return (1); if (!token) continue ; - /* token_print(token); */ + token_print(token); if (ft_parse(&ast, &token)) return (1); btree_print(STDBUG, ast, &ft_putast); diff --git a/42sh/src/parser/ft_parse.c b/42sh/src/parser/ft_parse.c index cd669aa6..329566fe 100644 --- a/42sh/src/parser/ft_parse.c +++ b/42sh/src/parser/ft_parse.c @@ -6,7 +6,7 @@ /* By: jhalford +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2016/11/30 17:14:58 by jhalford #+# #+# */ -/* Updated: 2016/12/07 17:37:25 by jhalford ### ########.fr */ +/* Updated: 2017/01/10 14:00:09 by jhalford ### ########.fr */ /* */ /* ************************************************************************** */