#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <etc.h>
#include "lexc.h"
/*
	extrait un identificateur C d'une chaine
	Retourne position immédiatement après la fin de l'identificateur
*/
char *token_copyid (
	char *ptl,		/* pointe au début du ID */
	char *text)		/* contiendre le ID + '\0' */
{
	*text ++ = *ptl++;
	while (1){
		while (isalpha(*ptl) || isdigit(*ptl) || *ptl == '_') *text++ = *ptl++;
		#if 1
			char carac = *ptl;
			if (carac == MACRO_DESACTIVE_CAR
				|| carac == MACRO_ACTIVE_CAR){
				// Désactive/active une macro
				ptl = preproc_macroact (ptl);
			}else{
				break;
			}
		#else
			break;
		#endif
	}
	*text++ = '\0';
	return ptl;
}
/*
	Retourne != 0 si caractère est un début valide d'identificateur
*/

int token_idok(char carac)
{
	return carac == '_' || isalpha(carac);
}

typedef struct _tblkey{
	struct _tblkey *next;	/* Utilise recherche hashing élémentaire */
	char *word;
	KEYWORD type2;
} TBLKEY;
static TBLKEY tbl[]={
	{NULL,	"int", 		KEY_INT},
	{NULL,	"char",		KEY_CHAR},
	{NULL,	"unsigned",	KEY_UNSIGNED},
	{NULL,	"void",     KEY_VOID},
	{NULL,	"long",		KEY_LONG},
	{NULL,	"float",	KEY_FLOAT},
	{NULL,	"double",	KEY_DOUBLE},
	{NULL,	"struct",	KEY_STRUCT},
	{NULL,	"class",	KEY_CLASS},
	{NULL,	"typedef",	KEY_TYPEDEF},
	{NULL,	"union",	KEY_UNION},
	{NULL,	"enum",		KEY_ENUM},
	{NULL,	"extern",	KEY_EXTERN},
	{NULL,	"const",	KEY_CONST},
	{NULL,	"static",   KEY_STATIC},
	{NULL,	"export",   KEY_EXPORT},
	{NULL,	"_export",  KEY__EXPORT},
	{NULL,	"PRIVATE",	KEY_PRIVATE},
	{NULL,	"PROTECTED",KEY_PROTECTED},
	{NULL,	"PUBLIC",	KEY_PUBLIC},
	{NULL,	"VIRTUAL",	KEY_VIRTUAL},
	{NULL,	"private",	KEY_PRIVATE},
	{NULL,	"protected",KEY_PROTECTED},
	{NULL,	"public",	KEY_PUBLIC},
	{NULL,	"virtual",	KEY_VIRTUAL},
	{NULL,	"far",      KEY_FAR},
	{NULL,	"near",     KEY_NEAR},
	{NULL,	"for",		KEY_FOR},
	{NULL,	"while",	KEY_WHILE},
	{NULL,	"do",		KEY_DO},
	{NULL,	"if",		KEY_IF},
	{NULL,	"signed",	KEY_SIGNED},
	{NULL,	"volatile",	KEY_VOLATILE},
	{NULL,	"sizeof",	KEY_SIZEOF},
	{NULL,	"switch",	KEY_SWITCH},
	{NULL,	"break",	KEY_BREAK},
	{NULL,	"return",	KEY_RETURN},
	{NULL,	"else",		KEY_ELSE},
	{NULL,	"goto",		KEY_GOTO},
	{NULL,	"case",		KEY_CASE},
	{NULL,	"default",	KEY_DEFAULT},
	{NULL,	"short",	KEY_SHORT},
	{NULL,	"huge",		KEY_HUGE},
	{NULL,	"cdecl",	KEY_CDECL},
	{NULL,	"pascal",	KEY_PASCAL},
	{NULL,	"interrupt",KEY_INTERRUPT},
	{NULL,	"register",	KEY_REGISTER},
	{NULL,	"auto",		KEY_AUTO},
	{NULL,	"STATIC",	KEY_STATIC},
	{NULL,	"operator",	KEY_OPERATOR},
	{NULL,	"P_DEFVAL",	KEY_P_DEFVAL},
};
static TBLKEY *tok_index[256];
#define HASH_VAL(pt)	(((pt)[1] ^ ((pt)[0] << 1)) & 0xff)
/*
	Initialise la reconnaissance des mots clés par rechercher hashing
*/
void token_inithash()
{
	TBLKEY *pt = tbl;
	for (unsigned i=0; i<NB_ELM(tok_index); i++) tok_index[i] = NULL;
	/* Utilise la deuxième lettres du mot clé comme premier index */
	for (unsigned i=0; i<NB_ELM(tbl); i++, pt++){
		int hash = HASH_VAL(pt->word);
		pt->next = tok_index[hash];
		tok_index[hash] = pt;
	}
#if 0
	/* Fait des statistiques */
	{
		int maxs = 0;
		for (i = 0; i<NB_ELM(tok_index); i++){
			TBLKEY *pt = tok_index[i];
			int compte = 0;
			while (pt != NULL){
				compte++;
				pt = pt->next;
			}
			if (compte > maxs) maxs = compte;
		}
		fprintf (stderr,"recherche maximum = %d\n",maxs);
	}
#endif
}
/*
	Réassigne la chaine associé à un mot clé
	str vaut "vieux=nouveau" ou "vieux"; dans ce dernier cas le mot
	clé est annulé
	Retourne -1 si chaine invalide ou mot-clé inconnue
*/
export int token_chgkeyword(const char *str)
{
	int ret = -1;
	char tmp[100];
	TBLKEY *pt=tbl;
	char *ptegal;
	char *repl = "";
	strcpy (tmp,str);
	ptegal = strchr(tmp,'=');
	if (ptegal != NULL){
		*ptegal++ = '\0';
		repl = ptegal;
	}else{
		ptegal = "";
	}
	while (pt->word != NULL){
		if (strcmp(pt->word,tmp)==0){
			pt->word = strdup_err(repl,1);
			ret = 0;
			break;
		}
		pt++;
	}
	token_inithash();
	return ret;
}

/*
	Cesse de reconnaitre les mots clés C++
*/
export void token_nocplus (void)
{
	static const char *tb[]={
		"public","PUBLIC","protected","PROTECTED","private","PRIVATE",
		"class","virtual","VIRTUAL",
	};
	for (unsigned i=0; i<NB_ELM(tb); i++){
		token_chgkeyword (tb[i]);
	}
	
}
static char token_chkkey=1;
/*
	Enregistre si les mots clés doivent être reconnue ou pas

	Les préprocesseurs n'ont pas besoin de reconnaitres les mots clés.
	Cela optimise le traitement (spécifiquement parce que la recherche
	est séquentielle et loin d'être optimisé.
*/
export void token_setchkkey (int mode)
{
	token_chkkey = (char) mode;
}

/*
	Reconnait un mot-clé ou un identificateur et compose le token
	Retourne l'adresse dans la chaine après le token
*/

char *token_id (
	TOKEN *tok,		/* token à composer */
	char *ptl)		/* chaine à analyser */
					/* ptl pointe sur le début d'un identificateur */
{
	char *debut  = ptl;
    char *text   = token_alloctxt();
	char *id     = text;
	tok->longtxt = text;
	ptl = token_copyid (ptl,text);
	token_settxt (text + (int)(ptl-debut)+1);
	tok->type = TOK_ID;
	tok->type2.other = 0;
	if (token_chkkey){
		TBLKEY *pt= tok_index[HASH_VAL(text)];
		while (pt != NULL){
			if (strcmp(pt->word,id)==0){
				tok->type2.keyword = pt->type2;
				tok->type  = TOK_KEYWORD;
				break;
            }
			pt = pt->next;
		}
	}	
    return ptl;
}	
static char is_long;
static char is_hex;
static char is_octal;
static char is_float;

inline void token_whiledig (char *&text, char *&ptl)
{
	while (1){
		char carac = *ptl;
		if (carac == MACRO_DESACTIVE_CAR
			|| carac == MACRO_ACTIVE_CAR){
			// Désactive/active une macro
			ptl = preproc_macroact (ptl);
		}else if (isdigit(carac)){
			*text++ = carac;
			ptl++;
		}else{
			break;
		}
	}
}

inline void token_whilexdig (char *&text, char *&ptl)
{
	while (1){
		char carac = *ptl;
		if (carac == MACRO_DESACTIVE_CAR
			|| carac == MACRO_ACTIVE_CAR){
			// Désactive/active une macro
			ptl = preproc_macroact (ptl);
		}else if (isxdigit(carac)){
			*text++ = carac;
			ptl++;
		}else{
			break;
		}
	}
}

/*
	Extrait un nombre d'une chaine
	Retourne nouvelle valeur de ptl (saute le token)
	Reconnait uniquement les nombre entier (hexadecimaux aussi)
*/
char *token_copynumber(char *text, char *ptl)
{
	is_float = is_hex = is_octal = is_long = 0;
	/*
		Le préprocesseur tolere aussi les nombre hexadecimaux format
		assembleur. C'est une extension, mais de toutes façon c'est
		attrapé par le compilateur, et ce n'est pas ambigue. Un nombre
		ne peut pas en aucune circonstance être suivie immédiatement
		de la lettre h (sans espace intercalé).
	*/
	if (ptl[0] == '0'){
		*text++ = *ptl++;
		if (ptl[0] == 'x' || ptl[0] == 'X'){
			char carac;
			*text++ = *ptl++;
			is_hex = 1;
			while (isxdigit(*ptl)) *text++ = *ptl++;
			carac = *ptl;
			if (isalpha(carac)){
				while (1){
					carac = *ptl;
					char ucarac = (char)toupper(carac);		
					if (ucarac == 'L'){
						is_long = 1;
						*text++ = carac;
						ptl++;
					}else if (ucarac == 'U'){
						*text++ = carac;
						ptl++;
					}else{
						break;
					}
				}
			}
		}else{
			is_octal = 1;
		}
	}
	/* #Spécification: Analyse lexicale / Nombre entier
		Un suffixe permet de préciser le type des nombres entiers littéraux.
		Le suffixe est une lettre soit majuscule ou minuscule. Le suffixe
		est collé à droite du dernier chiffre.

		L : Le nombre précédant est de type "long"
		U : Le nombre précédant est de type "unsigned"
		H : Le nombre précédant est de type "unsigned" hexadécimal.
			Ceci est probalement une extension à ANSI C
	*/
	if (!is_hex){
		while (1) {
			char carac = *ptl++;
			if (carac == MACRO_DESACTIVE_CAR
				|| carac == MACRO_ACTIVE_CAR){
				// Désactive/active une macro
				ptl = preproc_macroact (ptl-1);
			}else if (isdigit(carac)){
				*text++ = carac;
			}else if (isxdigit(carac)){
				*text++ = carac;
				if (carac == 'e' || carac == 'E'){
					/* Peut être un point flottant */
					is_octal = 0;
					carac = *ptl;
					if (carac == '-' || carac == '+'){
						/* Assurement un point flottant */
						*text++ = carac;
						ptl++;
						is_float = 1;
						token_whiledig (text,ptl);
						if (*ptl == 'l' || *ptl == 'L'){
							*text++ = *ptl++;
						}
					}else{
						/* Ce qui suit est trop tolérant */
						token_whilexdig (text,ptl);
						if(*ptl == 'h' || *ptl == 'H'){
							is_hex = 1;
							*text++ = *ptl++;
						}else{
							is_float = 1;
							if (*ptl == 'l' || *ptl == 'L'){
								*text++ = *ptl++;
							}
						}
					}
				}else{
					is_hex   = 1;
					is_octal = 0;
					token_whilexdig (text,ptl);
					if (*ptl == 'h' || *ptl == 'H'){
						*text++ = *ptl++;
					}
				}
				break;
			}else if(carac == '.'){
				/* Assurément un point flottant */
				*text++ = carac;
				is_float = 1;
				is_octal = 0;
				token_whiledig (text,ptl);
				carac = *ptl;
				if (carac == 'e' || carac == 'E'){
					*text++ = carac;
					ptl++;
					carac = *ptl;
					if (carac == '-' || carac == '+'){
						*text++ = carac;
						ptl++;
					}
					token_whiledig (text,ptl);
				}
				if (*ptl == 'l' || *ptl == 'L'){
					*text++ = *ptl++;
				}
				break;
			}else if (isalpha(carac)){
				char ucarac = (char)toupper(carac);		
				if (ucarac == 'H'){
					is_hex   = 1;
					is_octal = 0;
					*text++ = carac;
				}else{
					while (1){
						if (ucarac == 'L'){
							is_long = 1;
							*text++ = carac;
						}else if (ucarac == 'U'){
							*text++ = carac;
						}else{
							ptl--;
							break;
						}
						carac = *ptl++;
						ucarac = (char)toupper(carac);
					}
				}
				break;
			}else{
				ptl--;
				break;
			}
		}
	}
	*text++='\0';
	return ptl;
}
/*
	Retourne la valeur numérique d'un token
	Si le token ne représente pas un nombre, retourne 0
***	Si le token représente un nombre point flottant, retourne 0
*/
long token_evalnum (TOKEN *tok)
{
	long val = 0;
	char *txt = tok->longtxt;
	switch (tok->type2.ctenum){
	case NUM_OCTAL:
	case NUM_OCTALL:
		while (isdigit(*txt)){
			val = val*8 + *txt - '0';
			txt++;
		}
		break;
	case NUM_HEXA:
	case NUM_HEXAL:
		while (1){
			if (isxdigit(*txt)){
				int dig = isdigit(*txt) ? *txt - '0' : (*txt & 0xf) + 9;
				val = val*16 + dig;
			}else if (*txt != 'x' && *txt != 'X'){
				break;
			}
			txt++;
		}
		break;
	case NUM_INT:
	case NUM_INTL:
		val = atol(txt);
		break;
	case NUM_FLOAT:
		break;
	}
	return val;
}
/*
	Reconnait un nombre et compose le token
	Retourne nouvelle valeur de ptl (saute le token)
	Reconnait les nombres entiers et point flottant.
*/
export char *token_number (
	TOKEN *tok,		/* token à composer */
	char *ptl)		/* chaine à analyser */
					/* ptl pointe sur le début d'un nombre */
{
	char *text = token_alloctxt ();
	tok->longtxt = text;
	tok->type  = TOK_CTENUM;
	{
		char *newptl = token_copynumber(text,ptl);
		CTENUM type2;
		if (is_octal){
			type2 = NUM_OCTAL;
			if (is_long) type2 = NUM_OCTALL;
		}else if (is_hex){
			type2 = NUM_HEXA;
			if (is_long) type2 = NUM_HEXAL;
		}else if (is_float){
			type2 = NUM_FLOAT;
		}else{
			type2 = NUM_INT;
			if (is_long) type2 = NUM_INTL;
		}
		tok->type2.ctenum = type2;
		token_settxt(text+(int)(newptl-ptl)+1);
		return (newptl);
	}
}

/*
	Reconnait une chaine de caractère (string) et compose le token
	Retourne nouvelle valeur de ptl (saute le token)
	Ne comprend pas la syntaxe de continuation
*/

char *token_string (
	TOKEN *tok,		/* token à composer */
	char *ptl)		/* chaine à analyser */
					/* ptl pointe sur le début d'une chaine */
{
	char *debut = ptl;
	char *text = token_alloctxt();
	tok->longtxt = text;
	tok->type = TOK_STRING;
	ptl = token_copystring (text,ptl,1000);
    token_settxt(text + (int)(ptl-debut)+1);
    return (ptl);
}
/*
	Reconnait un caractère entre guillement simple et compose le token
	Retourne nouvelle valeur de ptl (saute le token)
*/

char *token_quote (
	TOKEN *tok,		/* token à composer */
	char *ptl)		/* chaine à analyser */
					/* ptl pointe sur un ' */
{
	char *debut = ptl;
	char *text = token_alloctxt();
	tok->longtxt = text;
    tok->type = TOK_QUOTE;
	ptl = token_copyquote (text,ptl,100);		/* longueur arbitraire */
	token_settxt(text+(int)(ptl-debut)+1);
	return (ptl);
}

/*
	Retourne la valeur numérique représenté par chaine etre guillemet simple
*/
export char token_evalquote(TOKEN *tok)
{
	char *str = tok->longtxt+1;
	char ret = 0;
	if (*str == '\\'){
		str++;
		if (*str == 'x'){
			str++;
			while (isxdigit(*str)){
				char carac = *str++;
				if (carac >= 'A'){
					ret = (char)(ret * 16 + carac - ('A' - 10));
				}else if (carac > '9'){
					ret = (char)(ret * 16 + carac - ('a' - 10));
				}else{
					ret = (char)(ret * 16 + carac - '0');
				}
			}
		}else{
			while (isdigit(*str)){
				ret = (char)(ret * 8 + *str - '0');
				str++;
			}
		}
	}else{
		ret = *str;
	}
	return ret;
}

#ifdef TEST

static char *tbn[]={
	"1","0x19","019","0ef",
	"0.1",".1e10","10.e-10","10.e10","10.10e10","10.10e+10",
	NULL
};

static void test (const char *str, const char *ajout)
{
	char buf[100];
	char *nextpt;
	TOKEN tok;
	sprintf (buf,"%s%s",str,ajout);
	printf (":%s: -> ",buf);
	nextpt = token_number (&tok,buf);
	if (tok.type == TOK_CTENUM){
		static char *tb[]={
			"NUM_INT", "NUM_INTL",
			"NUM_HEXA", "NUM_HEXAL",
			"NUM_OCTAL", "NUM_OCTALL",
			"NUM_FLOAT"
		};
		printf (":%s: :%s: %s",tok.longtxt,nextpt
			,tb[tok.type2.ctenum]);
		if (strcmp(tok.longtxt,str)!=0) printf ("\t*******");
		printf ("\n");
	}else{
		printf ("Pas un nombre :%s:\n",nextpt);
	}
}

int main (int argc, char *[])
{
	if (argc == 2){
		int i;
		token_inithash();
		for (i=0; i<NB_ELM(tbl); i++){
			TBLKEY *pt = &tbl[i];
			TOKEN tok;
			char word[30];
			strcpy (word,pt->word);
			token_id (&tok,word);
			printf ("%s %d -> %d %d\n",word,pt->type2,tok.type,tok.type2.keyword);
			if (tok.type != TOK_KEYWORD || tok.type2.keyword != pt->type2){
				printf ("\t****\n");
			}
			strcat (word,"_");
			token_id (&tok,word);
			printf ("%s %d -> %d %d\n",word,pt->type2,tok.type,tok.type2.keyword);
			if (tok.type == TOK_KEYWORD){
				printf ("\t****\n");
			}
		}
	}else{
		int i=0;
		while (tbn[i] != NULL){
			char *pt = tbn[i];
			i++;
			test (pt,"");
			test (pt,"abcdef");
			test (pt,"   ");
			test (pt,"l");
			test (pt,"L");
			test (pt,"u");
			test (pt,"U");
			test (pt,"h");
			test (pt,"H");
			printf ("\n");
		}
	}
	return 0;
}

#endif