1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
open Core_kernel
open Bistro
open Bistro.Shell_dsl
let img = [ docker_image ~account:"pveber" ~name:"cisa" ~tag:"20140304" () ]
let merge ?(min_length = 100) xs =
let config_line (label, fa) =
[
string "data=" ; dep fa ;
string ",title=" ; string label ;
]
in
let config_file = file_dump (
List.intersperse ~sep:[string "\n"] (
[ string "count=" ; int (List.length xs) ]
:: List.map xs ~f:config_line
@
[string "Master_file=" ; dest]
:: [string "min_length=" ; int min_length]
:: []
)
|> List.concat
|> seq ~sep:""
)
in
Workflow.shell ~descr:"cisa.Merge" ~img [
mkdir_p tmp ;
cmd "Merge.py" [ config_file ] ;
]
let cisa ~genome_size contigs =
let ( := ) var expr = seq ~sep:"" [string var ; string "=" ; expr ] in
let script = file_dump (
seq ~sep:"\n" [
"TMP" := tmp // "output" ;
"GENOMESIZE" := int genome_size ;
"CONTIGS" := dep contigs ;
"DEST" := dest ;
string {|
NUCMER=`which nucmer`
CISA=$(dirname $(readlink -f $(which CISA.py)))
MAKEBLASTDB=`which makeblastdb`
BLASTN=`which blastn`
CONFIG=$TMP/cisa.config
mkdir -p $TMP
mkdir -p $TMP/CISA1
cd $TMP
cat > $CONFIG <<__HEREDOC__
genome=$GENOMESIZE
infile=$CONTIGS
outfile=$DEST
nucmer=$NUCMER
R2_Gap=0.95
CISA=$CISA
makeblastdb=$MAKEBLASTDB
blastn=$BLASTN
__HEREDOC__
yes | CISA.py $CONFIG
|}
]
)
in
Workflow.shell ~descr:"cisa" ~img [
mkdir_p tmp ;
cmd "bash" [ script ] ;
]