123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391(***********************************************************************)(* *)(* The CamlZip library *)(* *)(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)(* *)(* Copyright 2001 Institut National de Recherche en Informatique et *)(* en Automatique. All rights reserved. This file is distributed *)(* under the terms of the GNU Library General Public License, with *)(* the special exception on linking described in file LICENSE. *)(* *)(***********************************************************************)openCore(* Various parts of this module, as well as its global structure are
adapted from the gzip module in the camlzip library.
This module was then imported from biocaml 0.8 under LGPL *)letmax_block_size=0x10000letmax_isize=0xff00(* Justification for the above constants *)(* let compressBound = *)(* let open Ctypes in *)(* let open Foreign in *)(* foreign "compressBound" (int @-> returning int) *)(* # compressBound 0xff00;; *)(* - : int = 65311 *)(* The size of the header is 16, footer is 8, so data + header + footer < 0x10000 *)exceptionErrorofstringtypein_channel={ic:Stdlib.in_channel;(* Underlying channel *)in_bufz:bytes;(* Compressed block *)in_buf:bytes;(* Uncompressed block *)mutablein_block_offset:Int64.t;(* Offset of the current block *)mutablein_pos:int;(* Position in the current block *)mutablein_avail:int;(* Number of available characters in the current block, can be less than [max_block_size] *)mutablein_eof:bool;(* Flag indicating we reached the end of the file *)mutablein_stream:Zlib.stream;}letof_in_channelic={ic;in_bufz=Bytes.makemax_block_size'\000';in_buf=Bytes.makemax_block_size'\000';in_block_offset=Int64.zero;in_pos=0;in_avail=0;in_stream=Zlib.inflate_initfalse;in_eof=false}letopen_infn=of_in_channel(Stdlib.open_in_binfn)letdispose_iniz=iz.in_eof<-true;Zlib.inflate_endiz.in_streamletclose_iniz=dispose_iniz;In_channel.closeiz.icletinput_bytet=Caml.input_bytetletinput_u16ic=letb1=input_byteicinletb2=input_byteicinb1+b2lsl8letinput_s32ic=letb1=input_byteicinletb2=input_byteicinletb3=input_byteicinletb4=input_byteicinletopenInt32inbit_or(of_int_exnb1)(bit_or(shift_left(of_int_exnb2)8)(bit_or(shift_left(of_int_exnb3)16)(shift_left(of_int_exnb4)24)))(* Raises End_of_file iff there is no more block to read *)letread_headeriz=matchIn_channel.input_byteiz.icwith|None->iz.in_eof<-true;raiseEnd_of_file|Someid1->tryletid2=input_byteiz.icinifid1<>0x1F||id2<>0x8Bthenraise(Error"bad magic number, not a bgzf file");letcm=input_byteiz.icinifcm<>8thenraise(Error"unknown compression method");letflags=input_byteiz.icinifflags<>0x04thenraise(Error("bad flags, not a bgzf file"));for_=1to6doignore(input_byteiz.ic:int)done;letxlen=input_u16iz.icinletsi1=input_byteiz.icinletsi2=input_byteiz.icinletslen=input_u16iz.icinifsi1<>66||si2<>67||slen<>2thenraise(Error"bad extra subfield");letbsize=input_u16iz.icinfor_=1toxlen-6doignore(input_byteiz.ic:int)done;bsize-xlen-19withEnd_of_file->raise(Error"premature end of file, not a bgzf file")letread_blockiz=letrecloopposzlenzposlencrcsize=let(finished,used_in,used_out)=tryZlib.inflateiz.in_streamiz.in_bufzposzlenziz.in_bufposlenZlib.Z_SYNC_FLUSHwithZlib.Error(_,_)->raise(Error"error during decompression")inletposz=posz+used_ininletlenz=lenz-used_ininletcrc=Zlib.update_crccrciz.in_bufposused_outinletsize=size+used_outiniffinishedthencrc,sizeelseloopposzlenz(pos+used_out)(len-used_out)crcsizeintryiz.in_block_offset<-In_channel.posiz.ic;letcdata_size=read_headerizin(* read_header raises End_of_file iff there is no more block to read *)tryStdlib.really_inputiz.iciz.in_bufz0cdata_size;letref_crc=input_s32iz.icinletref_size=input_s32iz.ic|>Int32.to_int_exninZlib.inflate_endiz.in_stream;iz.in_stream<-Zlib.inflate_initfalse;letcrc,size=loop0cdata_size0max_block_sizeInt32.zero0inifInt32.(crc<>ref_crc)thenraise(Error"CRC mismatch, data corrupted");ifsize<>ref_sizethenraise(Error"size mismatch, data corrupted");iz.in_pos<-0;iz.in_avail<-sizewithEnd_of_file->raise(Error"premature end of file, not a bgzf file")withEnd_of_file->iz.in_eof<-trueletinputizbufposlen=letn=Bytes.lengthbufinifpos<0||len<0||pos+len>nthenraise(Invalid_argument"Bgzf.input");ifiz.in_eofthen0else(letrecloopposlenread=iflen=0thenreadelse(ifiz.in_pos=iz.in_availthenread_blockiz;ifiz.in_eofthenreadelse(letn=min(iz.in_avail-iz.in_pos)leninStdlib.Bytes.blitiz.in_bufiz.in_posbufposn;iz.in_pos<-iz.in_pos+n;loop(pos+n)(len-n)(read+n)))inloopposlen0)letrecreally_inputizbufposlen=iflen<=0then()else(letn=inputizbufposleninifn=0thenraiseEnd_of_fileelsereally_inputizbuf(pos+n)(len-n))letinput_stringizn=ifn<0thenraise(Invalid_argument"Bgzf.input_string iz n: n should be non negative");letr=Bytes.maken'@'inreally_inputizr0n;Bytes.unsafe_to_string~no_mutation_while_string_reachable:rletinput_char=letbuf=Bytes.create1infuniz->ifinputizbuf01=0thenraiseEnd_of_fileelseBytes.getbuf0letinput_u8iz=Char.to_int(input_chariz)(* input_s* functions adapted from Batteries BatIO module *)letinput_s8iz=letb=input_u8izinifbland128<>0thenb-256elsebletinput_u16iz=letb1=input_u8izinletb2=input_u8izinb1lor(b2lsl8)letinput_s16iz=leti=input_u16izinifiland32768<>0theni-65536elseiletinput_s32iz=letb1=input_u8izinletb2=input_u8izinletb3=input_u8izinletb4=input_u8izinInt32.bit_or(Int32.of_int_exnb1)(Int32.bit_or(Int32.shift_left(Int32.of_int_exnb2)8)(Int32.bit_or(Int32.shift_left(Int32.of_int_exnb3)16)(Int32.shift_left(Int32.of_int_exnb4)24)))letseek_inizi=letcoffset=Int64.shift_righti16inletuoffset=Int64.(to_int_exn(bit_and0xFFFFLi))inIn_channel.seekiz.iccoffset;iz.in_block_offset<-coffset;iz.in_eof<-false;ifuoffset=0then(iz.in_pos<-0;iz.in_avail<-0)else(read_blockiz;iz.in_pos<-iz.in_pos+uoffset)letvirtual_offsetiz=ifiz.in_pos=iz.in_availthenInt64.(shift_left(In_channel.posiz.ic)16)elseInt64.(shift_leftiz.in_block_offset16+of_int_exniz.in_pos)letwith_file_infn~f=letiz=open_infninletr=try`Ok(fiz)withe->`Erroreinclose_iniz;matchrwith|`Oky->y|`Errorexn->raiseexnexceptionUnparser_errorofstringtypeout_channel={out_chan:Stdlib.out_channel;out_ubuffer:bytes;out_cbuffer:bytes;mutableout_pos:int;(* position in out_ubuffer *)out_level:int;}letoutput_int16ocn=Out_channel.output_byteocn;Out_channel.output_byteoc(nlsr8)letoutput_int32ocn=letr=refninfor_=1to4doOut_channel.output_byteoc(Int32.to_int_exn!r);r:=Int32.shift_right_logical!r8doneletwrite_blockocbuflen~isize~crc32=letxlen=6inletbsize=20+xlen+leninassert(bsize<0x10000);Out_channel.output_byteoc0x1F;(* ID1 *)Out_channel.output_byteoc0x8B;(* ID2 *)Out_channel.output_byteoc8;(* compression method *)Out_channel.output_byteoc4;(* flags *)for_=1to4doOut_channel.output_byteoc0(* mtime *)done;Out_channel.output_byteoc0;(* xflags *)Out_channel.output_byteoc0xFF;(* OS (unknown) *)output_int16ocxlen;(* XLEN *)Out_channel.output_byteoc0x42;(* SI1 *)Out_channel.output_byteoc0x43;(* SI2 *)output_int16oc2;(* SLEN *)output_int16oc(bsize-1);(* BSIZE - 1*)Caml.outputocbuf0len;(* DATA *)output_int32occrc32;(* CRC32 *)output_int32ocisize(* ISIZE *)letof_out_channel?(level=6)oc=iflevel<1||level>9thenraise(invalid_arg"Bgzf: bad compression level");{out_chan=oc;out_ubuffer=Bytes.createmax_isize;out_cbuffer=Bytes.createmax_block_size;out_pos=0;out_level=level;}letopen_out?(level=6)filename=of_out_channel~level(Stdlib.open_out_binfilename)letpush_blockoz=letstream=Zlib.deflate_initoz.out_levelfalseinlet(_,used_in,used_out)=tryZlib.deflatestreamoz.out_ubuffer0oz.out_posoz.out_cbuffer0(Bytes.lengthoz.out_cbuffer)Zlib.Z_FINISHwithZlib.Error(_,_)->raise(Unparser_error("error during compression"))inassert(used_in=oz.out_pos);letcrc32=Zlib.update_crcInt32.zerooz.out_ubuffer0used_ininZlib.deflate_endstream;write_blockoz.out_chanoz.out_cbufferused_out~isize:(Int32.of_int_exnused_in)~crc32;oz.out_pos<-0letrecoutput~length~blitozbuf~pos~len=ifpos<0||len<0||pos+len>lengthbuftheninvalid_arg"Bgzf.output";(* If output buffer is full, flush it *)ifoz.out_pos=Bytes.lengthoz.out_ubufferthenpush_blockoz;letavailable=Bytes.lengthoz.out_ubuffer-oz.out_posinletncopy=minlenavailableinblitbufposoz.out_ubufferoz.out_posncopy;oz.out_pos<-oz.out_pos+ncopy;letremaining=len-ncopyinifremaining>0thenoutput~length~blitozbuf~pos:(pos+ncopy)~len:remainingletoutput_from_string=output~length:String.length~blit:Caml.Bytes.blit_stringletoutput=output~length:Bytes.length~blit:Caml.Bytes.blitletoutput_char=letbuf=Bytes.make1' 'infunozc->Bytes.setbuf0c;outputozbuf~pos:0~len:1(* output_* functions adapted from Batteries BatIO module *)letoutput_u8ozn=(* if n < 0 || n > 0xFF then raise (Invalid_argument "Bgzf.output_u8") ; *)output_charoz(Char.unsafe_of_int(nland0xFF))letoutput_s8ozn=ifn<-0x80||n>0x7Fthenraise(Invalid_argument"Bgzf.output_s8");ifn<0thenoutput_u8oz(n+256)elseoutput_u8oznletoutput_u16ozn=output_u8ozn;output_u8oz(nlsr8)letoutput_s16ozn=ifn<-0x8000||n>0x7FFFthenraise(Invalid_argument"Bgzf.output_s16");ifn<0thenoutput_u16oz(65536+n)elseoutput_u16oznletoutput_s32ozn=letbase=Int32.to_int_exnninletbig=Int32.to_int_exn(Int32.shift_right_logicaln24)inoutput_u8ozbase;output_u8oz(baselsr8);output_u8oz(baselsr16);output_u8ozbigletoutput_stringozs=output_from_stringozs~pos:0~len:(String.lengths)letbgzf_eof="\x1f\x8b\x08\x04\x00\x00\x00\x00\x00\xff\x06\x00BC\x02\x00\x1b\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00"letdispose_outoz=ifoz.out_pos>0thenpush_blockoz;Stdlib.output_stringoz.out_chanbgzf_eofletclose_outoz=dispose_outoz;Stdlib.close_outoz.out_chanletwith_file_out?levelfn~f=letoz=open_out?levelfninletr=try`Ok(foz)withe->`Erroreinclose_outoz;matchrwith|`Oky->y|`Errorexn->raiseexn