123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224openCoreopenPrintftypestrand=[|`Plus|`Minus|`Not_relevant|`Unknown]letparse_strand=function|"."->Ok`Not_relevant|"?"->Ok`Unknown|"+"->Ok`Plus|"-"->Ok`Minus|s->Errorsletunparse_strand=function|`Not_relevant->"."|`Unknown->"?"|`Plus->"+"|`Minus->"-"moduletypeBase=sigtypetvalloc:t->GLoc.tvalfrom_fields:stringlist->tvalto_fields:t->stringlistendmoduletypeItem=sigtypetvalloc:t->GLoc.tvalof_line:Line.t->tvalto_line:t->stringendmoduletypeS=sigtypeitemvalload:string->itemlistvalload_as_lmap:string->itemGAnnot.LMap.tvalsave:itemlist->string->unitendmoduleMake(T:Base)=structmoduleItem=structtypet=T.tletloc=T.locletof_lineline=line|>Line.split~on:'\t'|>T.from_fieldsletto_liner=T.to_fieldsr|>String.concat~sep:"\t"endletloadfn=In_channel.read_linesfn|>List.map~f:(funl->Item.of_line(Line.of_string_unsafel))letsavebedfn=Out_channel.with_filefn~f:(funoc->List.iterbed~f:(funitem->Out_channel.output_stringoc(Item.to_lineitem);Out_channel.output_charoc'\n'))letload_as_lmapfn=(* FIXME: could use stream to read bed file *)loadfn|>Stdlib.List.to_seq|>Seq.map(funx->T.locx,x)|>GAnnot.LMap.of_seqendtypefields=stringlist[@@derivingshow]moduleBed3=structtypeitem={chrom:string;chromStart:int;chromEnd:int;}moduleBase=structtypet=itemletlocr=GLoc.{chr=r.chrom;lo=r.chromStart;hi=r.chromEnd}letfrom_fields=function|chrom::chromStart::chromEnd::_->{chrom;chromStart=Int.of_stringchromStart;chromEnd=Int.of_stringchromEnd;}|l->failwithf"Expected more fields, got %s"(show_fieldsl)()letto_fieldsr=[r.chrom;sprintf"%d"r.chromStart;sprintf"%d"r.chromEnd]endincludeMake(Base)moduleItem=structincludeItemletof_locl={chrom=l.GLoc.chr;chromStart=l.lo;chromEnd=l.hi;}endendmoduleBed4=structtypeitem={chrom:string;chromStart:int;chromEnd:int;name:string;}moduleBase=structtypet=itemletlocr=GLoc.{chr=r.chrom;lo=r.chromStart;hi=r.chromEnd}letfrom_fields=function|chrom::chromStart::chromEnd::name::_->{chrom;chromStart=Int.of_stringchromStart;chromEnd=Int.of_stringchromEnd;name}|l->failwithf"Expected more fields, got %s"(show_fieldsl)()letto_fieldsr=[r.chrom;sprintf"%d"r.chromStart;sprintf"%d"r.chromEnd;r.name]endincludeMake(Base)endmoduleBed5=structtypeitem={chrom:string;chromStart:int;chromEnd:int;name:string;score:int;}moduleBase=structtypet=itemletlocr=GLoc.{chr=r.chrom;lo=r.chromStart;hi=r.chromEnd}letfrom_fields=function|chrom::chromStart::chromEnd::name::score::_->{chrom;chromStart=Int.of_stringchromStart;chromEnd=Int.of_stringchromEnd;name;score=Int.of_stringscore}|l->failwithf"Expected more fields, got %s"(show_fieldsl)()letto_fieldsr=[r.chrom;sprintf"%d"r.chromStart;sprintf"%d"r.chromEnd;r.name;sprintf"%d"r.score]endincludeMake(Base)moduleItem=structincludeItemletto_bed4it={Bed4.chrom=it.chrom;chromStart=it.chromStart;chromEnd=it.chromEnd;name=it.name}endendmoduleBed6=structtypeitem={chrom:string;chromStart:int;chromEnd:int;name:string;score:int;strand:strand;}moduleBase=structtypet=itemletlocr=GLoc.{chr=r.chrom;lo=r.chromStart;hi=r.chromEnd}letfrom_fields=function|chrom::chromStart::chromEnd::name::score::strand::_->{chrom;chromStart=Int.of_stringchromStart;chromEnd=Int.of_stringchromEnd;name;score=Int.of_stringscore;strand=(matchparse_strandstrandwith|Oks->s|Errormsg->failwithmsg);}|l->failwithf"Expected more fields, got %s"(show_fieldsl)()letto_fieldsr=[r.chrom;sprintf"%d"r.chromStart;sprintf"%d"r.chromEnd;r.name;sprintf"%d"r.score;(matchr.strandwith|`Not_relevant->"."|`Unknown->"?"|`Plus->"+"|`Minus->"-")]endincludeMake(Base)endmoduleBase=structtypet=GLoc.t*fieldsletloc=fstletfrom_fieldsxs=Bed3.Base.(from_fieldsxs|>loc),xsletto_fields=sndendincludeMake(Base)