123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081openCore_kernelopenBistroopenBistro.Shell_dsltypespecies=[|`homo_sapiens|`mus_musculus]letucsc_reference_genome~release~species=matchspecieswith|`mus_musculuswhen63<=release&&release<=65->`mm9|`mus_musculuswhen81<=release->`mm10|`homo_sapienswhenrelease=71->`hg19|`homo_sapienswhen84<=release&&release<=87->`hg38|_->failwith"Ensembl.ucsc_reference_genome: unknown release for this species"(* acronym of the lab where the species was sequenced *)letlab_label_of_genome=function|`hg19->"GRCh37"|`hg38->"GRCh38"|`mm9->"NCBIM37"|`mm10->"GRCm38"letstring_of_species=function|`homo_sapiens->"homo_sapiens"|`mus_musculus->"mus_musculus"letucsc_chr_names_gtfgff=Workflow.shell~descr:"ensembl.ucsc_chr_names_gtf"[pipe[cmd"awk"[string"'{print \"chr\" $0}'";depgff];cmd"sed"[string"'s/chrMT/chrM/g'"];cmd"sed"[string"'s/chr#/#/g'"]~stdout:dest]]letgff?(chr_name=`ensembl)~releasespecies=leturl=sprintf"ftp://ftp.ensembl.org/pub/release-%d/gff3/%s/%s.%s.%d.gff3.gz"release(string_of_speciesspecies)(String.capitalize(string_of_speciesspecies))(lab_label_of_genome(ucsc_reference_genome~release~species))releaseinletgff=Bistro_unix.(gunzip(wgeturl))inmatchchr_namewith|`ensembl->gff|`ucsc->ucsc_chr_names_gtfgffletgtf?(chr_name=`ensembl)~releasespecies=leturl=sprintf"ftp://ftp.ensembl.org/pub/release-%d/gtf/%s/%s.%s.%d.gtf.gz"release(string_of_speciesspecies)(String.capitalize(string_of_speciesspecies))(lab_label_of_genome(ucsc_reference_genome~release~species))releaseinletf=matchchr_namewith|`ensembl->Fn.id|`ucsc->ucsc_chr_names_gtfinf@@Bistro_unix.(gunzip(wgeturl))letcdna~release~species=leturl=sprintf"ftp://ftp.ensembl.org/pub/release-%d/fasta/%s/cdna/%s.%s.cdna.all.fa.gz"release(string_of_speciesspecies)(String.capitalize(string_of_speciesspecies))(lab_label_of_genome(ucsc_reference_genome~release~species))inBistro_unix.wgeturlletdna~release~species=leturl=sprintf"ftp://ftp.ensembl.org/pub/release-%d/fasta/%s/dna/%s.%s.dna.primary_assembly.fa.gz"release(string_of_speciesspecies)(String.capitalize(string_of_speciesspecies))(lab_label_of_genome(ucsc_reference_genome~release~species))inBistro_unix.wgeturl