next up previous
Next: About this document ... Up: a Previous: Bibliography

Example of Prolog scanner generated by Elex

The following lists the resulting Prolog program for the example presented in section 2. In this case, Elex(Prolog) was instructed to produce compact code.
:- module( text, [ tokenize/2 ]).

tokenize([],[]).
tokenize([Char|Chars0],Tokens0) :-
    (   tokenize(Tokens0,Tokens1,[Char|Chars0],Chars1,Goal),
        look_ahead(Chars1,0)
    ->  call(Goal), tokenize(Chars1,Tokens1)
    ;   error(Char,Chars0,Tokens0)
    ).

look_ahead([],_).
look_ahead([H|T],N) :-  look_aheadX(N,[H|T]).

look_aheadX(0,_) :- !.
look_aheadX(N0,[H|T]) :- 
    tokenize(_,_,[H|T],Str,_), N is N0-1, look_ahead(Str,N).

tokenize(Tokens0,Tokens,String0,String,Goal):-
    tokenize(0,Tokens0,Tokens,String0,String,Symbol,[],Symbol,Goal).

tokenize(0,Ts0,Ts,[C|S0],S,[C|Sy0],Sy,T,G):-
    (   C >= 9, C =< 10   -> tokenize(8,Ts0,Ts,S0,S,Sy0,Sy,T,G)
    ;   C =:= 32          -> tokenize(8,Ts0,Ts,S0,S,Sy0,Sy,T,G)
    ;   C >= 48, C =< 57  -> tokenize(1,Ts0,Ts,S0,S,Sy0,Sy,T,G)
    ;   C >= 65, C =< 90  -> tokenize(7,Ts0,Ts,S0,S,Sy0,Sy,T,G)
    ;   C >= 97, C =< 122 -> tokenize(7,Ts0,Ts,S0,S,Sy0,Sy,T,G)).

tokenize(1,Ts0,Ts,[C|S0],S,[C|Sy0],Sy,T,G):-
    (   C =:= 46          -> tokenize(2,Ts0,Ts,S0,S,Sy0,Sy,T,G)
    ;   C >= 48, C =< 57  -> tokenize(1,Ts0,Ts,S0,S,Sy0,Sy,T,G)
    ;   C =:= 69          -> tokenize(4,Ts0,Ts,S0,S,Sy0,Sy,T,G)
    ;   C =:= 101         -> tokenize(4,Ts0,Ts,S0,S,Sy0,Sy,T,G)).

tokenize(2,Ts0,Ts,[C|S0],S,[C|Sy0],Sy,T,G):-
    (   C >= 48, C =< 57  -> tokenize(3,Ts0,Ts,S0,S,Sy0,Sy,T,G)).

tokenize(3,Ts0,Ts,[C|S0],S,[C|Sy0],Sy,T,G):-
    (   C >= 48, C =< 57  -> tokenize(3,Ts0,Ts,S0,S,Sy0,Sy,T,G)
    ;   C =:= 69          -> tokenize(4,Ts0,Ts,S0,S,Sy0,Sy,T,G)
    ;   C =:= 101         -> tokenize(4,Ts0,Ts,S0,S,Sy0,Sy,T,G)).

tokenize(4,Ts0,Ts,[C|S0],S,[C|Sy0],Sy,T,G):-
    (   C =:= 43          -> tokenize(5,Ts0,Ts,S0,S,Sy0,Sy,T,G)
    ;   C =:= 45          -> tokenize(5,Ts0,Ts,S0,S,Sy0,Sy,T,G)
    ;   C >= 48, C =< 57  -> tokenize(6,Ts0,Ts,S0,S,Sy0,Sy,T,G)).

tokenize(5,Ts0,Ts,[C|S0],S,[C|Sy0],Sy,T,G):-
    (   C >= 48, C =< 57  -> tokenize(6,Ts0,Ts,S0,S,Sy0,Sy,T,G)).

tokenize(6,Ts0,Ts,[C|S0],S,[C|Sy0],Sy,T,G):-
    (   C >= 48, C =< 57  -> tokenize(6,Ts0,Ts,S0,S,Sy0,Sy,T,G)).

tokenize(7,Ts0,Ts,[C|S0],S,[C|Sy0],Sy,T,G):-
    (   C =:= 45 -> tokenize(7,Ts0,Ts,S0,S,Sy0,Sy,T,G)
    ;   C >= 65, C =< 90  -> tokenize(7,Ts0,Ts,S0,S,Sy0,Sy,T,G)
    ;   C >= 97, C =< 122 -> tokenize(7,Ts0,Ts,S0,S,Sy0,Sy,T,G)).

tokenize(8,Ts0,Ts,[C|S0],S,[C|Sy0],Sy,T,G):-
    (   C >= 9, C =< 10  -> tokenize(8,Ts0,Ts,S0,S,Sy0,Sy,T,G)
    ;   C =:= 32         -> tokenize(8,Ts0,Ts,S0,S,Sy0,Sy,T,G)).

tokenize(1,Ts0,Ts,S,S,Sy,Sy,T,'Number'(T,Ts0,Ts)).
tokenize(3,Ts0,Ts,S,S,Sy,Sy,T,'Number'(T,Ts0,Ts)).
tokenize(6,Ts0,Ts,S,S,Sy,Sy,T,'Number'(T,Ts0,Ts)).
tokenize(7,Ts0,Ts,S,S,Sy,Sy,T,'Word'(T,Ts0,Ts)).
tokenize(8,Ts0,Ts,S,S,Sy,Sy,T,'WhiteSpace'(T,Ts0,Ts)).

'Number'(__Token,__Tokens0,__Tokens):-
    number_chars(Number,__Token),
    __Tokens0=[number(Number)|__Tokens].

'Word'(__Token,__Tokens0,__Tokens):-
    atom_chars(Word,__Token),
    __Tokens0=[w(Word)|__Tokens].

'WhiteSpace'(__Token,__Tokens0,__Tokens):-
    __Tokens0=__Tokens.

error(__Char,__Chars,__Tokens) :-
    __Tokens = [skip(__Char)|TokensRest],
    tokenize(__Chars,TokensRest).


Noord G.J.M. van
1998-09-25