Source file cisa.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
open Core_kernel
open Bistro
open Bistro.Shell_dsl

let img = [ docker_image ~account:"pveber" ~name:"cisa" ~tag:"20140304" () ]

let merge ?(min_length = 100) xs =
  let config_line (label, fa) =
    [
      string "data=" ; dep fa ;
      string ",title=" ; string label ;
    ]
  in
  let config_file = file_dump (
      List.intersperse ~sep:[string "\n"] (
        [ string "count=" ; int (List.length xs) ]
        :: List.map xs ~f:config_line
        @
        [string "Master_file=" ; dest]
        :: [string "min_length=" ; int min_length]
        :: []
      )
      |> List.concat
      |> seq ~sep:""
    )
  in
  Workflow.shell ~descr:"cisa.Merge" ~img [
    mkdir_p tmp ;
    cmd "Merge.py" [ config_file ] ;
  ]

let cisa ~genome_size contigs =
  let ( := ) var expr = seq ~sep:"" [string var ; string "=" ; expr ] in
  let script = file_dump (
      seq ~sep:"\n" [
        "TMP" := tmp // "output" ;
        "GENOMESIZE" := int genome_size ;
        "CONTIGS" := dep contigs ;
        "DEST" := dest ;
        string {|
NUCMER=`which nucmer`
CISA=$(dirname $(readlink -f $(which CISA.py)))
MAKEBLASTDB=`which makeblastdb`
BLASTN=`which blastn`
CONFIG=$TMP/cisa.config

mkdir -p $TMP
mkdir -p $TMP/CISA1
cd $TMP

cat > $CONFIG <<__HEREDOC__
genome=$GENOMESIZE
infile=$CONTIGS
outfile=$DEST
nucmer=$NUCMER
R2_Gap=0.95
CISA=$CISA
makeblastdb=$MAKEBLASTDB
blastn=$BLASTN
__HEREDOC__

yes | CISA.py $CONFIG
|}
      ]
    )
  in
  Workflow.shell ~descr:"cisa" ~img [
    mkdir_p tmp ;
    cmd "bash" [ script ] ;
  ]