123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279openCoreopenBiocaml_baseopenPrintftype'aitem=[|`Commentofstring|`Recordof'a]typestrand=[|`Plus|`Minus|`Not_relevant|`Unknown]letparse_strand=function|"."->Ok`Not_relevant|"?"->Ok`Unknown|"+"->Ok`Plus|"-"->Ok`Minus|s->Errorsletunparse_strand=function|`Not_relevant->"."|`Unknown->"?"|`Plus->"+"|`Minus->"-"letparse_itemfline=match(line:Line.t:>string)with|""->`Comment""|line->ifChar.(line.[0]='#')then`Comment(String.sliceline10)elseletfields=String.split~on:'\t'linein`Record(ffields)letunparse_itemf=function|`Commentc->sprintf"#%s"c|`Recordr->String.concat~sep:"\t"(fr)moduletypeBase=sigtypetvalloc:t->GLoc.tvalfrom_fields:stringlist->tvalto_fields:t->stringlistendmoduletypeRecord=sigtypetvalloc:t->GLoc.tvalof_line:Line.t->tvalto_line:t->stringendmoduletypeS=sigtyperecordvalload:string->recorditemlistvalload_records:string->recordlistvalload_as_lmap:string->recordGAnnot.LMap.tvalsave:recorditemlist->string->unitvalsave_records:recordlist->string->unitend(* this trick is necessary because otherwise Record cannot be
extended (it should be possible in 4.08) *)moduleMake'(T:Base)=structmoduleInternal_record=structtypet=T.tletloc=T.locletof_lineline=line|>Line.split~on:'\t'|>T.from_fieldsletto_liner=T.to_fieldsr|>String.concat~sep:"\t"endmoduleItem=struct(* type t = T.t
* let loc = T.loc *)letof_line=parse_itemT.from_fieldsletto_line=unparse_itemT.to_fieldsendletloadfn=In_channel.read_linesfn|>List.map~f:(funl->Item.of_line(Line.of_string_unsafel))letsavebedfn=Out_channel.with_filefn~f:(funoc->List.iterbed~f:(funitem->Out_channel.output_stringoc(Item.to_lineitem);Out_channel.output_charoc'\n'))letload_recordsfn=loadfn|>List.filter_map~f:(function|`Comment_->None|`Recordr->Somer)letsave_recordsrsfn=save(List.maprs~f:(funr->`Recordr))fnletload_as_lmapfn=(* FIXME: could use stream to read bed file *)loadfn|>Stream.of_list|>CFStream.Stream.filter_map~f:(function|`Comment_->None|`Recordx->Some(T.locx,x))|>GAnnot.LMap.of_streamendmoduleMake(T:Base)=structincludeMake'(T)moduleRecord=Internal_recordendtypefields=stringlist[@@derivingshow]moduleBed3=structtyperecord={chrom:string;chromStart:int;chromEnd:int;}moduleBase=structtypet=recordletlocr=GLoc.{chr=r.chrom;lo=r.chromStart;hi=r.chromEnd}letfrom_fields=function|chrom::chromStart::chromEnd::_->{chrom;chromStart=Int.of_stringchromStart;chromEnd=Int.of_stringchromEnd;}|l->failwithf"Expected more fields, got %s"(show_fieldsl)()letto_fieldsr=[r.chrom;sprintf"%d"r.chromStart;sprintf"%d"r.chromEnd]endincludeMake'(Base)moduleRecord=structincludeInternal_recordletof_locl={chrom=l.GLoc.chr;chromStart=l.lo;chromEnd=l.hi;}endendmoduleBed4=structtyperecord={chrom:string;chromStart:int;chromEnd:int;name:string;}moduleBase=structtypet=recordletlocr=GLoc.{chr=r.chrom;lo=r.chromStart;hi=r.chromEnd}letfrom_fields=function|chrom::chromStart::chromEnd::name::_->{chrom;chromStart=Int.of_stringchromStart;chromEnd=Int.of_stringchromEnd;name}|l->failwithf"Expected more fields, got %s"(show_fieldsl)()letto_fieldsr=[r.chrom;sprintf"%d"r.chromStart;sprintf"%d"r.chromEnd;r.name]endincludeMake(Base)endmoduleBed5=structtyperecord={chrom:string;chromStart:int;chromEnd:int;name:string;score:int;}moduleBase=structtypet=recordletlocr=GLoc.{chr=r.chrom;lo=r.chromStart;hi=r.chromEnd}letfrom_fields=function|chrom::chromStart::chromEnd::name::score::_->{chrom;chromStart=Int.of_stringchromStart;chromEnd=Int.of_stringchromEnd;name;score=Int.of_stringscore}|l->failwithf"Expected more fields, got %s"(show_fieldsl)()letto_fieldsr=[r.chrom;sprintf"%d"r.chromStart;sprintf"%d"r.chromEnd;r.name;sprintf"%d"r.score]endincludeMake'(Base)moduleRecord=structincludeInternal_recordletto_bed4=function|`Commentc->`Commentc|`Recordr->`Record{Bed4.chrom=r.chrom;chromStart=r.chromStart;chromEnd=r.chromEnd;name=r.name}endendmoduleBed6=structtyperecord={chrom:string;chromStart:int;chromEnd:int;name:string;score:int;strand:strand;}moduleBase=structtypet=recordletlocr=GLoc.{chr=r.chrom;lo=r.chromStart;hi=r.chromEnd}letfrom_fields=function|chrom::chromStart::chromEnd::name::score::strand::_->{chrom;chromStart=Int.of_stringchromStart;chromEnd=Int.of_stringchromEnd;name;score=Int.of_stringscore;strand=(matchparse_strandstrandwith|Oks->s|Errormsg->failwithmsg);}|l->failwithf"Expected more fields, got %s"(show_fieldsl)()letto_fieldsr=[r.chrom;sprintf"%d"r.chromStart;sprintf"%d"r.chromEnd;r.name;sprintf"%d"r.score;(matchr.strandwith|`Not_relevant->"."|`Unknown->"?"|`Plus->"+"|`Minus->"-")]endincludeMake(Base)endtyperecord=GLoc.t*fieldsmoduleBase=structtypet=recordletloc=fstletfrom_fieldsxs=Bed3.Base.(from_fieldsxs|>loc),xsletto_fields=sndendincludeBaseincludeMake(Base)