Next: About this document ...
Up: a
Previous: Bibliography
The following lists the resulting Prolog
program for the example presented in section 2. In this
case, Elex(Prolog) was instructed to produce compact
code.
:- module( text, [ tokenize/2 ]).
tokenize([],[]).
tokenize([Char|Chars0],Tokens0) :-
( tokenize(Tokens0,Tokens1,[Char|Chars0],Chars1,Goal),
look_ahead(Chars1,0)
-> call(Goal), tokenize(Chars1,Tokens1)
; error(Char,Chars0,Tokens0)
).
look_ahead([],_).
look_ahead([H|T],N) :- look_aheadX(N,[H|T]).
look_aheadX(0,_) :- !.
look_aheadX(N0,[H|T]) :-
tokenize(_,_,[H|T],Str,_), N is N0-1, look_ahead(Str,N).
tokenize(Tokens0,Tokens,String0,String,Goal):-
tokenize(0,Tokens0,Tokens,String0,String,Symbol,[],Symbol,Goal).
tokenize(0,Ts0,Ts,[C|S0],S,[C|Sy0],Sy,T,G):-
( C >= 9, C =< 10 -> tokenize(8,Ts0,Ts,S0,S,Sy0,Sy,T,G)
; C =:= 32 -> tokenize(8,Ts0,Ts,S0,S,Sy0,Sy,T,G)
; C >= 48, C =< 57 -> tokenize(1,Ts0,Ts,S0,S,Sy0,Sy,T,G)
; C >= 65, C =< 90 -> tokenize(7,Ts0,Ts,S0,S,Sy0,Sy,T,G)
; C >= 97, C =< 122 -> tokenize(7,Ts0,Ts,S0,S,Sy0,Sy,T,G)).
tokenize(1,Ts0,Ts,[C|S0],S,[C|Sy0],Sy,T,G):-
( C =:= 46 -> tokenize(2,Ts0,Ts,S0,S,Sy0,Sy,T,G)
; C >= 48, C =< 57 -> tokenize(1,Ts0,Ts,S0,S,Sy0,Sy,T,G)
; C =:= 69 -> tokenize(4,Ts0,Ts,S0,S,Sy0,Sy,T,G)
; C =:= 101 -> tokenize(4,Ts0,Ts,S0,S,Sy0,Sy,T,G)).
tokenize(2,Ts0,Ts,[C|S0],S,[C|Sy0],Sy,T,G):-
( C >= 48, C =< 57 -> tokenize(3,Ts0,Ts,S0,S,Sy0,Sy,T,G)).
tokenize(3,Ts0,Ts,[C|S0],S,[C|Sy0],Sy,T,G):-
( C >= 48, C =< 57 -> tokenize(3,Ts0,Ts,S0,S,Sy0,Sy,T,G)
; C =:= 69 -> tokenize(4,Ts0,Ts,S0,S,Sy0,Sy,T,G)
; C =:= 101 -> tokenize(4,Ts0,Ts,S0,S,Sy0,Sy,T,G)).
tokenize(4,Ts0,Ts,[C|S0],S,[C|Sy0],Sy,T,G):-
( C =:= 43 -> tokenize(5,Ts0,Ts,S0,S,Sy0,Sy,T,G)
; C =:= 45 -> tokenize(5,Ts0,Ts,S0,S,Sy0,Sy,T,G)
; C >= 48, C =< 57 -> tokenize(6,Ts0,Ts,S0,S,Sy0,Sy,T,G)).
tokenize(5,Ts0,Ts,[C|S0],S,[C|Sy0],Sy,T,G):-
( C >= 48, C =< 57 -> tokenize(6,Ts0,Ts,S0,S,Sy0,Sy,T,G)).
tokenize(6,Ts0,Ts,[C|S0],S,[C|Sy0],Sy,T,G):-
( C >= 48, C =< 57 -> tokenize(6,Ts0,Ts,S0,S,Sy0,Sy,T,G)).
tokenize(7,Ts0,Ts,[C|S0],S,[C|Sy0],Sy,T,G):-
( C =:= 45 -> tokenize(7,Ts0,Ts,S0,S,Sy0,Sy,T,G)
; C >= 65, C =< 90 -> tokenize(7,Ts0,Ts,S0,S,Sy0,Sy,T,G)
; C >= 97, C =< 122 -> tokenize(7,Ts0,Ts,S0,S,Sy0,Sy,T,G)).
tokenize(8,Ts0,Ts,[C|S0],S,[C|Sy0],Sy,T,G):-
( C >= 9, C =< 10 -> tokenize(8,Ts0,Ts,S0,S,Sy0,Sy,T,G)
; C =:= 32 -> tokenize(8,Ts0,Ts,S0,S,Sy0,Sy,T,G)).
tokenize(1,Ts0,Ts,S,S,Sy,Sy,T,'Number'(T,Ts0,Ts)).
tokenize(3,Ts0,Ts,S,S,Sy,Sy,T,'Number'(T,Ts0,Ts)).
tokenize(6,Ts0,Ts,S,S,Sy,Sy,T,'Number'(T,Ts0,Ts)).
tokenize(7,Ts0,Ts,S,S,Sy,Sy,T,'Word'(T,Ts0,Ts)).
tokenize(8,Ts0,Ts,S,S,Sy,Sy,T,'WhiteSpace'(T,Ts0,Ts)).
'Number'(__Token,__Tokens0,__Tokens):-
number_chars(Number,__Token),
__Tokens0=[number(Number)|__Tokens].
'Word'(__Token,__Tokens0,__Tokens):-
atom_chars(Word,__Token),
__Tokens0=[w(Word)|__Tokens].
'WhiteSpace'(__Token,__Tokens0,__Tokens):-
__Tokens0=__Tokens.
error(__Char,__Chars,__Tokens) :-
__Tokens = [skip(__Char)|TokensRest],
tokenize(__Chars,TokensRest).
Noord G.J.M. van
1998-09-25