Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 3 Jun 2018 18:19:41 +0000 (UTC)
From:      Piotr Pawel Stefaniak <pstef@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r334576 - head/usr.bin/indent
Message-ID:  <201806031819.w53IJfII038354@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: pstef
Date: Sun Jun  3 18:19:41 2018
New Revision: 334576
URL: https://svnweb.freebsd.org/changeset/base/334576

Log:
  indent(1): improve CHECK_SIZE_ macros
  
  Rewrite the macros so that they take a parameter. Consumers use it to signal
  how much room in the buffer they need; this lets them do that once when
  required space is known instead of doing the check once every loop step.
  
  Also take the parameter value into consideration when resizing the buffer;
  the requested space may be larger than the constant 400 bytes that the
  previous version used - now it's the sum of those two values.
  
  On the consumer side, don't copy strings byte by byte - use memcpy().
  
  Deduplicate code that copied base 2, base 8 and base 16 literals.
  
  Don't advance the e_token pointer once the token has been copied into
  s_token. This allows easy calculation of the token's length.

Modified:
  head/usr.bin/indent/indent.c
  head/usr.bin/indent/indent_globs.h
  head/usr.bin/indent/lexi.c
  head/usr.bin/indent/pr_comment.c

Modified: head/usr.bin/indent/indent.c
==============================================================================
--- head/usr.bin/indent/indent.c	Sun Jun  3 18:17:07 2018	(r334575)
+++ head/usr.bin/indent/indent.c	Sun Jun  3 18:19:41 2018	(r334576)
@@ -520,11 +520,12 @@ check_type:
 				 * '}' */
 	    if (s_com != e_com) {	/* the turkey has embedded a comment
 					 * in a line. fix it */
+		int len = e_com - s_com;
+
+		CHECK_SIZE_CODE(len + 3);
 		*e_code++ = ' ';
-		for (t_ptr = s_com; *t_ptr; ++t_ptr) {
-		    CHECK_SIZE_CODE;
-		    *e_code++ = *t_ptr;
-		}
+		memcpy(e_code, s_com, len);
+		e_code += len;
 		*e_code++ = ' ';
 		*e_code = '\0';	/* null terminate code sect */
 		ps.want_blank = false;
@@ -540,7 +541,10 @@ check_type:
 	/*-----------------------------------------------------*\
 	|	   do switch on type of token scanned		|
 	\*-----------------------------------------------------*/
-	CHECK_SIZE_CODE;
+	CHECK_SIZE_CODE(3);	/* maximum number of increments of e_code
+				 * before the next CHECK_SIZE_CODE or
+				 * dump_line() is 2. After that there's the
+				 * final increment for the null character. */
 	switch (type_code) {	/* now, decide what to do with the token */
 
 	case form_feed:	/* found a form feed in line */
@@ -651,19 +655,25 @@ check_type:
 	    else if (ps.want_blank)
 		*e_code++ = ' ';
 
-	    for (t_ptr = token; *t_ptr; ++t_ptr) {
-		CHECK_SIZE_CODE;
-		*e_code++ = *t_ptr;
+	    {
+		int len = e_token - s_token;
+
+		CHECK_SIZE_CODE(len);
+		memcpy(e_code, token, len);
+		e_code += len;
 	    }
 	    ps.want_blank = false;
 	    break;
 
 	case binary_op:	/* any binary operation */
-	    if (ps.want_blank)
-		*e_code++ = ' ';
-	    for (t_ptr = token; *t_ptr; ++t_ptr) {
-		CHECK_SIZE_CODE;
-		*e_code++ = *t_ptr;	/* move the operator */
+	    {
+		int len = e_token - s_token;
+
+		CHECK_SIZE_CODE(len + 1);
+		if (ps.want_blank)
+		    *e_code++ = ' ';
+		memcpy(e_code, token, len);
+		e_code += len;
 	    }
 	    ps.want_blank = true;
 	    break;
@@ -704,13 +714,20 @@ check_type:
 	    }
 	    ps.in_stmt = false;	/* seeing a label does not imply we are in a
 				 * stmt */
-	    for (t_ptr = s_code; *t_ptr; ++t_ptr)
-		*e_lab++ = *t_ptr;	/* turn everything so far into a label */
-	    e_code = s_code;
-	    *e_lab++ = ':';
-	    *e_lab++ = ' ';
-	    *e_lab = '\0';
+	    /*
+	     * turn everything so far into a label
+	     */
+	    {
+		int len = e_code - s_code;
 
+		CHECK_SIZE_LAB(len + 3);
+		memcpy(e_lab, s_code, len);
+		e_lab += len;
+		*e_lab++ = ':';
+		*e_lab++ = ' ';
+		*e_lab = '\0';
+		e_code = s_code;
+	    }
 	    force_nl = ps.pcase = scase;	/* ps.pcase will be used by
 						 * dump_line to decide how to
 						 * indent the label. force_nl
@@ -986,22 +1003,28 @@ check_type:
 		parse(hd_type);
 	    }
     copy_id:
-	    if (ps.want_blank)
-		*e_code++ = ' ';
-	    for (t_ptr = token; *t_ptr; ++t_ptr) {
-		CHECK_SIZE_CODE;
-		*e_code++ = *t_ptr;
+	    {
+		int len = e_token - s_token;
+
+		CHECK_SIZE_CODE(len + 1);
+		if (ps.want_blank)
+		    *e_code++ = ' ';
+		memcpy(e_code, s_token, len);
+		e_code += len;
 	    }
 	    if (type_code != funcname)
 		ps.want_blank = true;
 	    break;
 
 	case strpfx:
-	    if (ps.want_blank)
-		*e_code++ = ' ';
-	    for (t_ptr = token; *t_ptr; ++t_ptr) {
-		CHECK_SIZE_CODE;
-		*e_code++ = *t_ptr;
+	    {
+		int len = e_token - s_token;
+
+		CHECK_SIZE_CODE(len + 1);
+		if (ps.want_blank)
+		    *e_code++ = ' ';
+		memcpy(e_code, token, len);
+		e_code += len;
 	    }
 	    ps.want_blank = false;
 	    break;
@@ -1038,6 +1061,7 @@ check_type:
 		    (s_lab != e_lab) ||
 		    (s_code != e_code))
 		dump_line();
+	    CHECK_SIZE_LAB(1);
 	    *e_lab++ = '#';	/* move whole line to 'label' buffer */
 	    {
 		int         in_comment = 0;
@@ -1051,7 +1075,7 @@ check_type:
 			fill_buffer();
 		}
 		while (*buf_ptr != '\n' || (in_comment && !had_eof)) {
-		    CHECK_SIZE_LAB;
+		    CHECK_SIZE_LAB(2);
 		    *e_lab = *buf_ptr++;
 		    if (buf_ptr >= buf_end)
 			fill_buffer();
@@ -1119,6 +1143,7 @@ check_type:
 		    buf_end = sc_end;
 		    sc_end = NULL;
 		}
+		CHECK_SIZE_LAB(1);
 		*e_lab = '\0';	/* null terminate line */
 		ps.pcase = false;
 	    }
@@ -1249,14 +1274,14 @@ indent_declaration(int cur_dec_ind, int tabs_to_var)
     if (tabs_to_var) {
 	int tpos;
 
+	CHECK_SIZE_CODE(cur_dec_ind / tabsize);
 	while ((tpos = tabsize * (1 + pos / tabsize)) <= cur_dec_ind) {
-	    CHECK_SIZE_CODE;
 	    *e_code++ = '\t';
 	    pos = tpos;
 	}
     }
+    CHECK_SIZE_CODE(cur_dec_ind - pos + 1);
     while (pos < cur_dec_ind) {
-	CHECK_SIZE_CODE;
 	*e_code++ = ' ';
 	pos++;
     }

Modified: head/usr.bin/indent/indent_globs.h
==============================================================================
--- head/usr.bin/indent/indent_globs.h	Sun Jun  3 18:17:07 2018	(r334575)
+++ head/usr.bin/indent/indent_globs.h	Sun Jun  3 18:19:41 2018	(r334576)
@@ -52,9 +52,9 @@
 FILE       *input;		/* the fid for the input file */
 FILE       *output;		/* the output file */
 
-#define CHECK_SIZE_CODE \
-	if (e_code >= l_code) { \
-	    int nsize = l_code-s_code+400; \
+#define CHECK_SIZE_CODE(desired_size) \
+	if (e_code + (desired_size) >= l_code) { \
+	    int nsize = l_code-s_code + 400 + desired_size; \
 	    int code_len = e_code-s_code; \
 	    codebuf = (char *) realloc(codebuf, nsize); \
 	    if (codebuf == NULL) \
@@ -63,9 +63,9 @@ FILE       *output;		/* the output file */
 	    l_code = codebuf + nsize - 5; \
 	    s_code = codebuf + 1; \
 	}
-#define CHECK_SIZE_COM \
-	if (e_com >= l_com) { \
-	    int nsize = l_com-s_com+400; \
+#define CHECK_SIZE_COM(desired_size) \
+	if (e_com + (desired_size) >= l_com) { \
+	    int nsize = l_com-s_com + 400 + desired_size; \
 	    int com_len = e_com - s_com; \
 	    int blank_pos; \
 	    if (last_bl != NULL) \
@@ -81,9 +81,9 @@ FILE       *output;		/* the output file */
 	    l_com = combuf + nsize - 5; \
 	    s_com = combuf + 1; \
 	}
-#define CHECK_SIZE_LAB \
-	if (e_lab >= l_lab) { \
-	    int nsize = l_lab-s_lab+400; \
+#define CHECK_SIZE_LAB(desired_size) \
+	if (e_lab + (desired_size) >= l_lab) { \
+	    int nsize = l_lab-s_lab + 400 + desired_size; \
 	    int label_len = e_lab - s_lab; \
 	    labbuf = (char *) realloc(labbuf, nsize); \
 	    if (labbuf == NULL) \
@@ -92,9 +92,9 @@ FILE       *output;		/* the output file */
 	    l_lab = labbuf + nsize - 5; \
 	    s_lab = labbuf + 1; \
 	}
-#define CHECK_SIZE_TOKEN \
-	if (e_token >= l_token) { \
-	    int nsize = l_token-s_token+400; \
+#define CHECK_SIZE_TOKEN(desired_size) \
+	if (e_token + (desired_size) >= l_token) { \
+	    int nsize = l_token-s_token + 400 + desired_size; \
 	    int token_len = e_token - s_token; \
 	    tokenbuf = (char *) realloc(tokenbuf, nsize); \
 	    if (tokenbuf == NULL) \

Modified: head/usr.bin/indent/lexi.c
==============================================================================
--- head/usr.bin/indent/lexi.c	Sun Jun  3 18:17:07 2018	(r334575)
+++ head/usr.bin/indent/lexi.c	Sun Jun  3 18:19:41 2018	(r334576)
@@ -182,47 +182,32 @@ lexi(struct parser_state *state)
 
 	if (isdigit((unsigned char)*buf_ptr) ||
 	    (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
-	    enum base {
-		BASE_2, BASE_8, BASE_10, BASE_16
-	    };
 	    int         seendot = 0,
 	                seenexp = 0,
 			seensfx = 0;
-	    enum base	in_base = BASE_10;
 
-	    if (*buf_ptr == '0') {
+	    /*
+	     * base 2, base 8, base 16:
+	     */
+	    if (buf_ptr[0] == '0' && buf_ptr[1] != '.') {
+		int len;
+
 		if (buf_ptr[1] == 'b' || buf_ptr[1] == 'B')
-		    in_base = BASE_2;
+		    len = strspn(buf_ptr + 2, "01") + 2;
 		else if (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')
-		    in_base = BASE_16;
-		else if (isdigit((unsigned char)buf_ptr[1]))
-		    in_base = BASE_8;
-	    }
-	    switch (in_base) {
-	    case BASE_2:
-		*e_token++ = *buf_ptr++;
-		*e_token++ = *buf_ptr++;
-		while (*buf_ptr == '0' || *buf_ptr == '1') {
-		    CHECK_SIZE_TOKEN;
-		    *e_token++ = *buf_ptr++;
+		    len = strspn(buf_ptr + 2, "0123456789ABCDEFabcdef") + 2;
+		else
+		    len = strspn(buf_ptr + 1, "012345678") + 1;
+		if (len > 0) {
+		    CHECK_SIZE_TOKEN(len);
+		    memcpy(e_token, buf_ptr, len);
+		    e_token += len;
+		    buf_ptr += len;
 		}
-		break;
-	    case BASE_8:
-		*e_token++ = *buf_ptr++;
-		while (*buf_ptr >= '0' && *buf_ptr <= '8') {
-		    CHECK_SIZE_TOKEN;
-		    *e_token++ = *buf_ptr++;
-		}
-		break;
-	    case BASE_16:
-		*e_token++ = *buf_ptr++;
-		*e_token++ = *buf_ptr++;
-		while (isxdigit((unsigned char)*buf_ptr)) {
-		    CHECK_SIZE_TOKEN;
-		    *e_token++ = *buf_ptr++;
-		}
-		break;
-	    case BASE_10:
+		else
+		    diag2(1, "Unterminated literal");
+	    }
+	    else		/* base 10: */
 		while (1) {
 		    if (*buf_ptr == '.') {
 			if (seendot)
@@ -230,7 +215,7 @@ lexi(struct parser_state *state)
 			else
 			    seendot++;
 		    }
-		    CHECK_SIZE_TOKEN;
+		    CHECK_SIZE_TOKEN(3);
 		    *e_token++ = *buf_ptr++;
 		    if (!isdigit((unsigned char)*buf_ptr) && *buf_ptr != '.') {
 			if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
@@ -238,24 +223,21 @@ lexi(struct parser_state *state)
 			else {
 			    seenexp++;
 			    seendot++;
-			    CHECK_SIZE_TOKEN;
 			    *e_token++ = *buf_ptr++;
 			    if (*buf_ptr == '+' || *buf_ptr == '-')
 				*e_token++ = *buf_ptr++;
 			}
 		    }
 		}
-		break;
-	    }
+
 	    while (1) {
+		CHECK_SIZE_TOKEN(2);
 		if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) {
-		    CHECK_SIZE_TOKEN;
 		    *e_token++ = *buf_ptr++;
 		    seensfx |= 1;
 		    continue;
 		}
 		if (!(seensfx & 2) && (strchr("fFlL", *buf_ptr) != NULL)) {
-		    CHECK_SIZE_TOKEN;
 		    if (buf_ptr[1] == buf_ptr[0])
 		        *e_token++ = *buf_ptr++;
 		    *e_token++ = *buf_ptr++;
@@ -276,13 +258,13 @@ lexi(struct parser_state *state)
 			} else
 			    break;
 		}
-		CHECK_SIZE_TOKEN;
+		CHECK_SIZE_TOKEN(1);
 		/* copy it over */
 		*e_token++ = *buf_ptr++;
 		if (buf_ptr >= buf_end)
 		    fill_buffer();
 	    }
-	*e_token++ = '\0';
+	*e_token = '\0';
 
 	if (s_token[0] == 'L' && s_token[1] == '\0' &&
 	      (*buf_ptr == '"' || *buf_ptr == '\''))
@@ -397,6 +379,7 @@ lexi(struct parser_state *state)
 
     /* Scan a non-alphanumeric token */
 
+    CHECK_SIZE_TOKEN(3);		/* things like "<<=" */
     *e_token++ = *buf_ptr;		/* if it is only a one-character token, it is
 				 * moved here */
     *e_token = '\0';
@@ -424,9 +407,7 @@ lexi(struct parser_state *state)
 		    diag2(1, "Unterminated literal");
 		    goto stop_lit;
 		}
-		CHECK_SIZE_TOKEN;	/* Only have to do this once in this loop,
-					 * since CHECK_SIZE guarantees that there
-					 * are at least 5 entries left */
+		CHECK_SIZE_TOKEN(2);
 		*e_token = *buf_ptr++;
 		if (buf_ptr >= buf_end)
 		    fill_buffer();
@@ -585,8 +566,10 @@ stop_lit:
 	    break;
 	}
 	while (*buf_ptr == '*' || isspace((unsigned char)*buf_ptr)) {
-	    if (*buf_ptr == '*')
+	    if (*buf_ptr == '*') {
+		CHECK_SIZE_TOKEN(1);
 		*e_token++ = *buf_ptr;
+	    }
 	    if (++buf_ptr >= buf_end)
 		fill_buffer();
 	}
@@ -620,6 +603,7 @@ stop_lit:
 	    /*
 	     * handle ||, &&, etc, and also things as in int *****i
 	     */
+	    CHECK_SIZE_TOKEN(1);
 	    *e_token++ = *buf_ptr;
 	    if (++buf_ptr >= buf_end)
 		fill_buffer();
@@ -632,6 +616,7 @@ stop_lit:
     if (buf_ptr >= buf_end)	/* check for input buffer empty */
 	fill_buffer();
     state->last_u_d = unary_delim;
+    CHECK_SIZE_TOKEN(1);
     *e_token = '\0';		/* null terminate the token */
     return (code);
 }

Modified: head/usr.bin/indent/pr_comment.c
==============================================================================
--- head/usr.bin/indent/pr_comment.c	Sun Jun  3 18:17:07 2018	(r334575)
+++ head/usr.bin/indent/pr_comment.c	Sun Jun  3 18:19:41 2018	(r334576)
@@ -205,9 +205,9 @@ pr_comment(void)
 
     while (1) {			/* this loop will go until the comment is
 				 * copied */
-	CHECK_SIZE_COM;
 	switch (*buf_ptr) {	/* this checks for various spcl cases */
 	case 014:		/* check for a form feed */
+	    CHECK_SIZE_COM(3);
 	    if (!ps.box_com) {	/* in a text comment, break the line here */
 		ps.use_ff = true;
 		/* fix so dump_line uses a form feed */
@@ -232,6 +232,7 @@ pr_comment(void)
 		return;
 	    }
 	    last_bl = NULL;
+	    CHECK_SIZE_COM(4);
 	    if (ps.box_com || ps.last_nl) {	/* if this is a boxed comment,
 						 * we dont ignore the newline */
 		if (s_com == e_com)
@@ -255,7 +256,6 @@ pr_comment(void)
 		 */
 		else {		/* otherwise, insert one */
 		    last_bl = e_com;
-		    CHECK_SIZE_COM;
 		    *e_com++ = ' ';
 		}
 	    }
@@ -282,12 +282,11 @@ pr_comment(void)
 				 * of comment */
 	    if (++buf_ptr >= buf_end)	/* get to next char after * */
 		fill_buffer();
-
+	    CHECK_SIZE_COM(4);
 	    if (*buf_ptr == '/') {	/* it is the end!!! */
 	end_of_comment:
 		if (++buf_ptr >= buf_end)
 		    fill_buffer();
-		CHECK_SIZE_COM;
 		if (break_delim) {
 		    if (e_com > s_com + 3) {
 			dump_line();
@@ -308,6 +307,7 @@ pr_comment(void)
 	default:		/* we have a random char */
 	    now_col = count_spaces_until(ps.com_col, s_com, e_com);
 	    do {
+		CHECK_SIZE_COM(1);
 		*e_com = *buf_ptr++;
 		if (buf_ptr >= buf_end)
 		    fill_buffer();
@@ -337,11 +337,16 @@ pr_comment(void)
 		    t_ptr++)
 			;
 		last_bl = NULL;
+		/*
+		 * t_ptr will be somewhere between e_com (dump_line() reset)
+		 * and l_com. So it's safe to copy byte by byte from t_ptr
+		 * to e_com without any CHECK_SIZE_COM().
+		 */
 		while (*t_ptr != '\0') {
 		    if (*t_ptr == ' ' || *t_ptr == '\t')
 			last_bl = e_com;
 		    *e_com++ = *t_ptr++;
- 		}
+		}
 	    }
 	    break;
 	}



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201806031819.w53IJfII038354>