Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 22 Apr 1997 14:53:33 PDT
From:      "Marty Leisner" <leisner@sdsp.mc.xerox.com>
To:        Michael Hancock <michaelh@cet.co.jp>
Cc:        Charles Henrich <henrich@crh.cl.msu.edu>, freebsd-hackers@freebsd.org
Subject:   Re: flex vs. lex 
Message-ID:  <9704222153.AA09010@gnu.sdsp.mc.xerox.com>
In-Reply-To: Your message of "Mon, 21 Apr 1997 19:59:41 PDT." <Pine.SV4.3.95.970422115413.29070B-100000@parkplace.cet.co.jp> 

next in thread | previous in thread | raw e-mail | index | archive | help

In message <Pine.SV4.3.95.970422115413.29070B-100000@parkplace.cet.co.jp>,   you write:
>On Mon, 21 Apr 1997, Charles Henrich wrote:
>
>> Does anyone know how to take an old program that depends on lex quirkiness
>> (such as rewriting input() ) and make it work with flex?  Any pointers?  I've
>> been searching for an hour now and havent found a thing :(
>
>I was interested in getting AT&T's Software ToolChest which has CSCOPE and
>CSCOPE has the same problems.  The Bell Labs web pages now include BSD/OS
>support so I think they had to do something similar.
>
>Maybe you can find someone that has the old and new versions and get some
>insight.
>
>
>Mike

You happen to know where this is?

Here's Vern Paxson's changes to cscope:
(I may have done some stuff too)


I haven't seen a good explanation of lex internals...

--- scanner.l.~1~	Thu Sep 14 23:41:35 1995
+++ scanner.l	Mon Oct  7 22:01:21 1996
@@ -13,21 +13,6 @@
 
 #include "global.h"
 
-/* the line counting has been moved from character reading for speed */
-/* comments are discarded */
-#undef	input
-#ifdef __BORLANDC__
-static	int	msdosgetc(FILE *inputfile);
-
-#define input()  ((yytchar=((yytchar=(yytchar=yysptr>yysbuf?*--yysptr:getc(yyin))=='/'?comment():yytchar)==EOF?0:yytchar))=='\r'?msdosgetc(yyin):yytchar)
-#define noncommentinput()  ((yytchar=((yytchar=yysptr>yysbuf?*--yysptr:getc(yyin))==EOF?0:yytchar))=='\r'?msdosgetc(yyin):yytchar)
-#else
-#define input()  ((yytchar=(yytchar=yysptr>yysbuf?*--yysptr:getc(yyin))=='/'?comment():yytchar)==EOF?0:toascii(yytchar))
-#define noncommentinput()  ((yytchar=yysptr>yysbuf?*--yysptr:getc(yyin))==EOF?0:yytchar)
-#endif
-#undef	unput
-#define unput(c) (*yysptr++=(c))
-
 /* not a preprocessor line (allow Ingres(TM) "## char var;" lines) */
 #define	notpp()	(ppdefine == NO && (*yytext != '#' || yytext[1] == '#'))
 
@@ -73,11 +58,19 @@
 static	BOOL	typedefname;		/* typedef name use */
 static	int	token;			/* token found */
 
+static	int	strip_comments = 1;	/* if true, input() strips comments */
 void	multicharconstant();
+
+#define YY_INPUT(buf, result, max_size) result = cscope_input(buf, max_size);
+
+extern int yylineno;
+int yylineno;
+
 %}
 identifier	[a-zA-Z_][a-zA-Z_0-9]*
 number		\.?[0-9][.0-9a-fA-FlLuUxX]*
 %start SDL
+%array
 %a 6000
 %o 11000
 %p 3000
@@ -535,7 +528,7 @@
 \n		{	/* end of the line */
 			if (ppdefine == YES) {	/* end of a #define */
 				ppdefine = NO;
-				(void) yyless(yyleng - 1);	/* rescan \n */
+				yyless(yyleng - 1);	/* rescan \n */
 				last = first;
 				yymore();
 				return(DEFINEEND);
@@ -754,11 +747,13 @@
 {
 	register char	c;
 
+	strip_comments = 0;
+
 	/* scan until the terminator is found */
-	while ((c = yytext[yyleng++] = noncommentinput()) != terminator) {
+	while ((c = yytext[yyleng++] = input()) != terminator) {
 		switch (c) {
 		case '\\':	/* escape character */
-			if ((yytext[yyleng++] = noncommentinput()) == '\n') {
+			if ((yytext[yyleng++] = input()) == '\n') {
 				++yylineno;
 			}
 			break;
@@ -779,6 +774,7 @@
 			/* fall through */
 			
 		case LEXEOF:	/* end of file */
+			strip_comments = 1;
 			return;
 			
 		default:
@@ -792,7 +788,7 @@
 		if (yyleng >= STMTMAX) {
 			
 			/* truncate the token */
-			while ((c = noncommentinput()) != LEXEOF) {
+			while ((c = input()) != LEXEOF) {
 				if (c == terminator) {
 					unput(c);
 					break;
@@ -804,6 +800,8 @@
 		}
 	}
 	yytext[yyleng] = '\0';
+
+	strip_comments = 1;
 }
 #ifdef __BORLANDC__
 
@@ -823,3 +821,45 @@
 	return(c);
 }
 #endif
+
+static int
+cscope_input(buf, max_size)
+	char *buf;
+	int max_size;
+{
+	int	n, c;
+
+	for (n = 0; n < max_size; ++n) {
+		c = getc(yyin);
+		if ( c == '/' && strip_comments ) {
+			if (n == 0)
+				c = comment();
+			else {
+				/* Don't scan comments except at the beginning
+				 * of the buffer.  This is because comments
+				 * can contain embedded newlines, and these
+				 * lead to incrementing yylineno, but that
+				 * will happen before the lexer scans the
+				 * part of the buffer coming before the
+				 * comment, which will lead to incorrect
+				 * line numbers.
+				 */
+				(void) ungetc(c, yyin);
+				break;
+			}
+		}
+		if (c == EOF)
+			break;
+		buf[n] = toascii(c);
+		if ( c == '\'' || c == '"' ) {
+			/* These could lead to calls to multicharconstant()
+			 * which in turn will want any embedded comments,
+			 * so this is a good place to stop filling the input
+			 * buffer for now.
+			 */
+			++n;
+			break;
+		}
+	}
+	return n;
+}



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?9704222153.AA09010>