 /*
  Whygee's tiny HTML source cleaner/compressor/punctuation scanner
  (C) Yann Guidon mars 2000
 */

%{
  int space=0; /* initializes the space state to zero : don't add space at the beginning */
%}

%option main
%option caseless
%option noyywrap
%option never-interactive

%x PRE

SPACE  [ \x0D\n\t]+
SP     "<B>"|"<I>"|"<U>"|"<A"[^>]*">"|"("|"<FONT"[^>]*">"
PS     ")"|"."|"</U>"|"</A>"|","|"</B>"|"</I>"|"</FONT>"
SPS    ":"|";"
BRK    "<P>"|"<BR>"|"<TT>"|"</TT>"|"<HR"[^>]*">"|"<TD"[^>]*">"|"</TD>"|"<TH>"|"</TH>"|"<TR"[^>]*">"|"<META"[^>]*">"|"<LI>"|"<UL>"|"</UL>"|"<H"[1-9]">"|"</H"[1-9]">"|"</TR>"|"<TABLE"[^>]*">"|"</TABLE>"|"<DIV"[^>]*">"|"</DIV>"|"<CENTER>"|"</CENTER>"|"<HTML>"|"</HTML>"|"</BODY>"|"<HEAD>"|"<TITLE>"|"</TITLE>"|"</HEAD>"|"<BODY"[^>]*">"
WORD   (&[^ ;]+;)|([^ \x0D\n\t()<>:]+)
%%

&nbsp;&nbsp;  printf("&nbsp; ");
&nbsp;&nbsp;&nbsp;  printf("&nbsp; &nbsp;");
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; printf("&nbsp; &nbsp; &nbsp;");
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;  printf("&nbsp; &nbsp; &nbsp; ");
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; printf("&nbsp; &nbsp; &nbsp; &nbsp;");
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;  printf("&nbsp; &nbsp; &nbsp; &nbsp; ");

"<PRE>"   ECHO; BEGIN(PRE);
<PRE>.|\n ECHO;
<PRE>"</PRE>" ECHO; space=0; BEGIN(INITIAL);

{SPACE}   /* */
{SP}      {
            if (space==1)
              putchar(' ');
            ECHO;
            space=0;
          }
{PS}      {
            ECHO;
            space=1;
          }
{SPS}     {
            if (space==1)
              printf("&nbsp;");
            ECHO;
            space=1;
          }
{BRK}     {
            ECHO;
            space=0;
          }
{WORD}    {
            if (space==1)
              putchar(' ');
            ECHO;
            space=1;
          }


%%
