123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231(**************************************************************************)(* -*- tuareg -*- *)(* *)(* Copyright (C) 2017,2018,2019 Yann Régis-Gianas, Nicolas Jeannerod, *)(* Ralf Treinen. *)(* *)(* This is free software: you can redistribute it and/or modify it *)(* under the terms of the GNU General Public License, version 3. *)(* *)(* Additional terms apply, due to the reproduction of portions of *)(* the POSIX standard. Please refer to the file COPYING for details. *)(**************************************************************************)openCSTopenPrelexerStatemoduleLexer(U:sigend):sigvalpush_here_document_delimiter:string->word_cst->unitvalpush_here_document_operator:bool->(wordlocatedref)->unitvalstart_here_document_lexing:unit->unitvalnext_here_document:Lexing.lexbuf->PrelexerState.t->Pretoken.t*Lexing.position*Lexing.positionvalinside_here_document:unit->boolvalnext_word_is_here_document_delimiter:unit->boolvalnext_line_is_here_document:unit->boolend=struct(*specification:
If more than one "<<" or "<<-" operator is specified on a line, the
here-document associated with the first operator shall be supplied
first by the application and shall be read first by the shell.
*)typedelimiter_info={(** information about a delimiter of a here document: *)word:string;(** delimiting word, with quotes removed *)quoted:bool;(** parts of delimiting word quoted ? *)dashed:bool;(** here operator <<- ? *)contents_placeholder:CST.wordCST.locatedref(** placeholder for the contents of the here document *)}letdelimiters_queue=(Queue.create():delimiter_infoQueue.t)letdashed_tmp=ref(None:booloption)letword_ref_tmp=ref(None:wordlocatedrefoption)typestate=|NoHereDocuments(* we are currently not reading any here documents, nor have we seen
a here document operator on the current line. *)|GotHereOperator(* we have seen a here document operator but we haven't seen the
corresponding delimite word yet. *)|GotDelimiter(* we have seen a here document operator and its delimiter word. *)|InsideHereDocuments(* we are currently in the process of reading here documents. *)letstate=refNoHereDocumentsletpush_here_document_operatordashedword_ref=if!state=GotHereOperatorthen(* FIXME: we should raise an Error.DuringParsing here if we can
get the current lexing position. *)failwith"redirection operator found where a delimter word is expected";assert(!state=NoHereDocuments||!state=GotDelimiter);(* we accept a push of an operator only when the two variables
dashed_tmp and word_ref_tmp hold value None, that is either
- they have not been assigned a value (state NoHereDocuments),
- or they have been assigned a value which has been used up by
push_here_document_delimiter (state GotDelimiter).
*)assert(!dashed_tmp=None);dashed_tmp:=Somedashed;assert(!word_ref_tmp=None);word_ref_tmp:=Someword_ref;state:=GotHereOperatorletpush_here_document_delimiter_wcst=(* we accept a push of a delimiting word only if we have already received
information about an operator which has not yet been used.
*)assert(!state=GotHereOperator);letquoted_flag=reffalseinletdashed=match!dashed_tmpwith|Someb->dashed_tmp:=None;b|None->assertfalseandword_ref=match!word_ref_tmpwith|Somer->word_ref_tmp:=None;r|None->assertfalseandunquoted_w=letunword(Word(s,_))=sinletrecunquote=function|[]->""|WordDoubleQuoteds::w->quoted_flag:=true;QuoteRemoval.on_string(unwords)^unquotew|WordSingleQuoteds::w->quoted_flag:=true;unwords^unquotew|(WordLiterals|WordNames)::w->lets'=Str.(global_replace(regexp"\\")""s)inifs<>s'thenquoted_flag:=true;s'^unquotew|WordVariable(VariableAtom(s,NoAttribute))::w->"$"^s^unquotew|_->failwith"Unsupported expansion in here document delimiter"inunquotecstinletquoted=!quoted_flag||List.exists(functionWordSingleQuoted_->true|_->false)cstinQueue.add{(*specification:
If any part of word is quoted, the delimiter shall be formed by
performing quote removal on word, and the here-document lines shall
not be expanded. Otherwise, the delimiter shall be the word itself.
*)word=unquoted_w;quoted;dashed;contents_placeholder=word_ref}delimiters_queue;state:=GotDelimiterletnext_here_documentlexbufcurrent=(*specification:
The here-document shall be treated as a single word that begins after
the next <newline> and continues until there is a line containing only
the delimiter and a <newline>, with no <blank> characters in
between. Then the next here-document starts, if there is one.
*)assert(!state=InsideHereDocuments);letdelimiter_info=tryQueue.takedelimiters_queuewithQueue.Empty->failwith"here document problem"inletstore_here_documentend_markercstcontentsdoc_startdoc_end=(* store in the placeholder the here-document with contents [contents],
start position [doc_start], and end position [doc_end]. *)(*specification:
If no part of word is quoted ... the <backslash> in the
input behaves as the <backslash> inside double-quotes (see
Double-Quotes). However, the double-quote character ( ' )' shall
not be treated specially within a here-document, except when the
double-quote appears within "$()", "``", or "${}".
*)letcontents=ifdelimiter_info.quotedthenQuoteRemoval.backslash_as_in_doublequotescontentselsecontentsinletcontents,cst=remove_contents_suffixdoc_endend_markercontentscstinletcontents=(*specification:
If the redirection operator is "<<-", all leading <tab>
characters shall be stripped from input lines ...
*)ifdelimiter_info.dashedthenQuoteRemoval.remove_tabs_at_linestartcontentselsecontentsindelimiter_info.contents_placeholder:=CST.{value=Word(contents,cst);position={start_p=doc_start;end_p=doc_end}}inlet((Word(doc,cst)),doc_start,line_end)=letcurrent=enter_here_documentdelimiter_info.dasheddelimiter_info.wordcurrentinletresult=ifdelimiter_info.quotedthenletbuffer=Buffer.create13inletcurrent=Prelexer.single_quotesbuffercurrentlexbufinreturnlexbufcurrent[]elsePrelexer.tokencurrentlexbufinmatchresultwith|[Pretoken.NEWLINE,p1,p2]->(* Special case for empty here document or ended by EOF. *)(Word("",[]),p1,p2)|[Pretoken.EOF,_,pos]->raise(Errors.DuringParsingpos)|result->located_word_ofresultinstore_here_documentdelimiter_info.wordcstdocdoc_startline_end;ifQueue.is_emptydelimiters_queuethenstate:=NoHereDocuments;letbefore_stop=Lexing.({line_endwithpos_cnum=line_end.pos_cnum-1;pos_bol=line_end.pos_bol-1;})in(Pretoken.NEWLINE,before_stop,line_end)letstart_here_document_lexing()=assert(!state=GotDelimiter);state:=InsideHereDocumentsletnext_word_is_here_document_delimiter()=(* if we have a value in dashed_tmp this means that we have read
a here operator for which we have not yet seen the corresponding
delimiting word.
*)!dashed_tmp<>Noneletnext_line_is_here_document()=!state=GotDelimiterletinside_here_document()=!state=InsideHereDocumentsend