/*********************************************************************//**
*	Lexical analyzator.
*	Lexikalni analyzator se pouziva k syntakticke analyze textu.
*
*	author: Michal Jirous
*	date: 09.04.2009
*	file: lexan.cpp
**********************************************************************/

#include "lexan.h"
#include "parsing.h"


int g_iNumBytes = 0;
void incBytes(int value)
{
g_iNumBytes += value;
}
int getBytes()
{
	return g_iNumBytes;
}


CLexan::CLexan()
{
	m_nDefault = NULL;
	m_nCommentSlash = NULL;
	//m_nWrongChar = NULL;
	m_nRoot = NULL;
	m_sData = "";
	m_iDataIndex = 0;
	m_bSkipEmptyChars = false;
	m_uiCharacterTableSize = 0;
	bAllowingCharacters = true;
	bCaseSensitivity = true;
	bEmptyCharsSettings = true;
	intNode = NULL;
	doubleNode = NULL;
	for(int i=0;i < 255; i++)
	{
		cCharacterTable[i] = 0;
		bAllowedCharacters[i] = false;
	}
	m_iLexSymbol = LEXAN_NO_SYMBOL;
}


void CLexan::setAllowedCharacter( char c )
{
	if(!bAllowingCharacters)
		return;
	if( !bCaseSensitivity && parsing::isAlpha(c) && parsing::isUpper(c) ) return;
	unsigned char cChar = c + 128;
	//c += 128;	//jen kladna cisla
	bAllowedCharacters[cChar] = true;
	cCharacterTable[cChar] = m_uiCharacterTableSize++;
}

void CLexan::setAllowedCharacters( string sCharacters)
{
	if(!bAllowingCharacters)
		return;
	unsigned char cChar = 0;
	for(size_t i = 0; i < sCharacters.length(); i++)
	{
		if( !bCaseSensitivity && parsing::isAlpha(sCharacters.at(i)) && parsing::isUpper(sCharacters.at(i)) ) return;
		cChar = sCharacters.at(i) + 128;
		//c += 128;	//jen kladna cisla
		bAllowedCharacters[cChar] = true;
		cCharacterTable[cChar] = m_uiCharacterTableSize++;
	}
}

/* Vymazani stromu */
CLexan::~CLexan()
{
	if(m_nDefault != NULL )
		for(unsigned int i=0;i < m_uiCharacterTableSize; i++)
		{
			m_nDefault->next[i] = NULL;
			
		}
	if(intNode != NULL && doubleNode != NULL)
		for(unsigned int i=0;i < m_uiCharacterTableSize; i++)
		{
			intNode->next[i] = NULL;
			doubleNode->next[i] = NULL;
			
		}

	if(m_nCommentSlash != NULL )
		for( int i=0;i < m_nCommentSlash->m_iSize; i++)
		{
			m_nCommentSlash->next[i] = NULL;
		}
			
	if( m_nRoot )
	{
		m_nRoot->deleteNext(m_nDefault,m_nWrongChar);
		delete m_nRoot;
	}


}
void CLexan::createDefaultNode()
{
	if(bEmptyCharsSettings)
	{
		for(int i = 0; i < 128; i++)
		{
			if( parsing::isSpace(i) )
				setAllowedCharacter(i);
		}
	}
	m_nDefault = new nodeL( m_uiCharacterTableSize );	incBytes(sizeof(nodeL));
	bAllowingCharacters = false;
}

void CLexan::setDefaultReturnValue(int iSymbol)
{
	m_nDefault->m_iReturnValue = iSymbol;
}

/* Nastaveni znaku vytvarejici identifikator */
void CLexan::setCycleForChar(char cChar)
{
	if( !bCaseSensitivity && parsing::isAlpha( cChar ) && parsing::isUpper( cChar ) )
		cChar = parsing::toLower( cChar );

	unsigned char cCharacter = cChar + 128;	//pouze kladna cisla
	
	if( !bAllowedCharacters[cCharacter] ) return;
	m_nDefault->next[ cCharacterTable[cCharacter] ] = m_nDefault;
}

/* Zakazane znaky */
void CLexan::setWrongCharacter( char cCharacter )
{
	//m_nDefault->next[cCharacter] = m_nWrongChar;
}

void CLexan::setCommentDetection()
{
	int slash = '/' + 128;
	setAllowedCharacters("*/");
	nodeL *slashNode = NULL;
	if( m_nRoot->next[ cCharacterTable[slash] ] == NULL || m_nRoot->next[ cCharacterTable['/'] ] == m_nDefault)
	{
		if( m_nRoot->next[ cCharacterTable[slash ] ] == m_nDefault )
			m_nDefault->num_pointer_to_this--;
		slashNode = new nodeL(m_nDefault);
		m_nRoot->next[ cCharacterTable[slash] ] = slashNode;
		slashNode->num_pointer_to_this++;
	}
	slashNode = m_nRoot->next[ cCharacterTable[slash] ];
	m_nCommentSlash = new nodeL( m_nRoot->m_iSize );
	if(slashNode->next[ cCharacterTable[slash] ] != NULL )
		slashNode->next[ cCharacterTable[slash] ]->num_pointer_to_this--;
	slashNode->next[ cCharacterTable[slash] ] = m_nCommentSlash;
	m_nCommentSlash->num_pointer_to_this++;
	for( int i = -128; i < 128; i++ )
	{
		int cChar = i + 128;
		if( bAllowedCharacters[ cChar ])
			m_nCommentSlash->next[ cCharacterTable[cChar] ] = m_nCommentSlash;
	}
	m_nCommentSlash->m_iReturnValue = LEXAN_COMMENT;
	m_nCommentSlash->next[ cCharacterTable[13] ] = NULL;
}

/* Ukoncovaci znaky */
void CLexan::setEndCharacters( string sChars)
{
	unsigned char cCharacter = 0;	//pouze kladna cisla
	
	for( size_t i = 0; i < sChars.length(); i++ )
	{
		if( !bCaseSensitivity && parsing::isAlpha( sChars.at(i) ) && parsing::isUpper( sChars.at(i) ) )
			cCharacter = parsing::toLower( sChars.at(i) )+128;
		else
			cCharacter = sChars.at(i)+128;
		
		if( !bAllowedCharacters[cCharacter] ) continue;

		m_nDefault->next[ cCharacterTable[cCharacter] ] = NULL;
	}

}

/* Ukoncovaci znaky */
void CLexan::setEndCharacter( char cChar)
{
	unsigned char cCharacter = 0;

	if( !bCaseSensitivity && parsing::isAlpha( cChar ) && parsing::isUpper( cChar ) )
		cCharacter = parsing::toLower( cChar )+128;
	else
		cCharacter = cChar+128;	//pouze kladna cisla

	if( !bAllowedCharacters[cCharacter] ) return;
	m_nDefault->next[ cCharacterTable[ cCharacter ] ] = NULL;
	

}

void CLexan::init()
{
	//m_nWrongChar = new nodeL();
	//m_nWrongChar->m_iReturnValue = WRONG_CHAR;
	if(!bEmptyCharsSettings)
	{
		m_nRoot = new nodeL(m_nDefault);	incBytes(sizeof(nodeL));
		return;
	}
	for(int i = 0; i < 128; i++)
	{
		if( parsing::isSpace(i) )
			setEndCharacter(i);
	}
	m_nRoot = new nodeL(m_nDefault);	incBytes(sizeof(nodeL));
	for(int i = 0; i < 128; i++)
	{
		if( parsing::isSpace(i) )
			addSubject( string()+(char)i,LEXAN_SPACE,true);
	}
}


void CLexan::setNumberDetection()
{
	intNode = new nodeL( m_nRoot->m_iSize );		incBytes(sizeof(nodeL));
	doubleNode = new nodeL( m_nRoot->m_iSize );		incBytes(sizeof(nodeL));

	intNode->m_iReturnValue = LEXAN_INT;
	doubleNode->m_iReturnValue = LEXAN_DOUBLE;

	unsigned char cCharacter = 0;
	for(int i = '0'; i <= '9'; i++)
	{
		cCharacter = i + 128;
		if( !bAllowedCharacters[cCharacter] ) continue;
		intNode->next[ cCharacterTable[cCharacter] ] = intNode;

		doubleNode->next[ cCharacterTable[cCharacter] ] = doubleNode;
		
		if(m_nDefault != NULL && m_nRoot!= NULL && m_nRoot->next[ cCharacterTable[cCharacter] ] == m_nDefault)
			m_nDefault->num_pointer_to_this--;
		
		m_nRoot->next[ cCharacterTable[cCharacter] ] = intNode;
		intNode->num_pointer_to_this++;
	}

	cCharacter = '.'+128;
	if( !bAllowedCharacters[cCharacter] ) return;
	intNode->next[ cCharacterTable[cCharacter] ] = doubleNode;
	doubleNode->num_pointer_to_this++;
}


/* Vlozeni noveho objektu k analyze */
void CLexan::addSubject(std::string sName, int iReturnValue, bool bEndOne )
{
	unsigned char cZnak = 0;
	for( std::string::iterator i = sName.begin(); i != sName.end(); i++ )
	{
		
		if( !bCaseSensitivity && parsing::isAlpha( (*i) ) && parsing::isUpper( (*i) ) )
			cZnak = parsing::toLower( (*i) )+128;
		else
			cZnak = (*i)+128;
		if( !bAllowedCharacters[cZnak] ) return;
	}
	nodeL *tmpNode = m_nRoot;
        size_t i = 0;
        
        
        /*nacitame data po znaku a zkousime zda uz danny list ve stromu existuje,
        //pokud ne, tak ho vytvorime*/
        for( ; i < sName.length()-1; i++ )
        {
           	
			if( !bCaseSensitivity && parsing::isAlpha( sName.at(i) ) && parsing::isUpper( sName.at(i) ) )
				cZnak = parsing::toLower( sName.at(i) ) + 128;
			else
				cZnak = sName.at(i) + 128;
			//cZnak += 128;
			cZnak = cCharacterTable[ cZnak ];
			if( tmpNode->next[ cZnak ] == NULL || tmpNode->next[ cZnak ] == m_nDefault)
			{
				if(  tmpNode->next[ cZnak ] == m_nDefault )
					m_nDefault->num_pointer_to_this--;
               tmpNode->next[ cZnak ] = new nodeL(m_nDefault, iReturnValue, bEndOne);	incBytes(sizeof(nodeL));
			   tmpNode->next[ cZnak ]->num_pointer_to_this++;

			}
			
            tmpNode = tmpNode->next[ cZnak ];
			
        }
        
        
		if( !bCaseSensitivity && parsing::isAlpha( sName.at(i) ) && parsing::isUpper( sName.at(i) ) )
			cZnak = parsing::toLower( sName.at(i) ) + 128;
		else
			cZnak = sName.at(i) + 128;
		//cZnak += 128;
		cZnak = cCharacterTable[ cZnak ];
       //pro posledni znak identifikatoru provedeme test existence     
       if( tmpNode->next[ cZnak ] == NULL || tmpNode->next[ cZnak ] == m_nDefault )
	   {
		   if(  tmpNode->next[ cZnak ] == m_nDefault )
				m_nDefault->num_pointer_to_this--;
		   tmpNode->next[ cZnak ] = new nodeL(m_nDefault, iReturnValue, bEndOne);	incBytes(sizeof(nodeL));
		   tmpNode->next[ cZnak ]->num_pointer_to_this++;
	   }
       else
		   tmpNode->next[ cZnak ]->m_iReturnValue = iReturnValue;
}

/* Funkce analyzuje data a vraci ID analyzovane casti dat */
int CLexan::analyze()
{
        m_sSymbolName = "";
        nodeL *tmpNode = m_nRoot;
        int cZnak = 127;
        
        /*nacitame po znaku a prochazime strom. Pokud narazime na list
         . Pak vratime jeho hodnotu. tj. typ identifikatoru*/
        for(;m_iDataIndex < m_sData.length(); m_iDataIndex++)
        {
			
            
			if( !bCaseSensitivity && parsing::isAlpha( m_sData.at( m_iDataIndex ) ) && parsing::isUpper( m_sData.at( m_iDataIndex ) ) )
				cZnak = parsing::toLower( m_sData.at( m_iDataIndex ) ) + 128;
			else
				cZnak = m_sData.at( m_iDataIndex ) + 128;
			//cZnak += 128;
			if(cZnak == 127 )
				break;
			else if( !bAllowedCharacters[cZnak] )
			{
				m_iDataIndex++;
				return WRONG_CHAR;
			}
			cZnak = cCharacterTable[ cZnak ];
			if(tmpNode->next[ cZnak ] == NULL)	//neni uz kam skocit
            {
                if( tmpNode == m_nRoot )   //pokud skoncime hned na zacatku, tak tento identifikator neni definovan
                {
                    
                    m_sSymbolName += m_sData.at( m_iDataIndex );
					m_iDataIndex++;
                    return WRONG_CHAR;
                }
                else if(m_bSkipEmptyChars && tmpNode->m_iReturnValue == LEXAN_SPACE)
				{
					skipEmptyCharacters();
					return analyze();	//ignorujeme prazdne znaky
				}
				else
                    return tmpNode->m_iReturnValue;
            }
            else	//pokud ano, tak pokracujeme na list identifikovani znakem
            {
				tmpNode = tmpNode->next[ cZnak ];
                m_sSymbolName += m_sData.at( m_iDataIndex );
            }
          }
		if( m_sSymbolName.empty() && cZnak == 127 )
            return END_OF_FILE;
		else if(m_bSkipEmptyChars && tmpNode->m_iReturnValue == LEXAN_SPACE)
			return analyze();
        else
            return tmpNode->m_iReturnValue;
}




/* Cte data az do zadaneho znaku */
string CLexan::readTillCharacter(char cCharacter)
{
	string sReturnString = "";
	for( ; m_iDataIndex < m_sData.length(); m_iDataIndex++ )
	{
		if( cCharacter == m_sData.at(m_iDataIndex) )
			break;
		sReturnString += m_sData.at(m_iDataIndex);
	}
	return sReturnString;
}

/* Cte data az do znaku, ktery neni prazdny */
void CLexan::skipEmptyCharacters()
{
	for( ; m_iDataIndex < m_sData.length(); m_iDataIndex++ )
	{
		if( !parsing::isSpace( m_sData.at( m_iDataIndex ) ) )
			break;
		
	}

}

/* Vraci neanalyzovanou cast dat */
string CLexan::getRemainingString()
{
	return m_sData.substr(m_iDataIndex);

}
