Source file bistro_nlp.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
open Bistro
open Shell_dsl
let wikipedia_summary q : text file =
let url = "https://en.wikipedia.org/api/rest_v1/page/summary/" ^ q in
Workflow.shell ~descr:"nlp.wikipedia_summary" [
pipe [
cmd "curl" [
quote ~using:'\'' (string url) ;
] ;
cmd "sed" ~stdout:dest [ string {|-n 's/.*"extract":"\(.*\)","extract_html.*/\1/p'|} ] ;
]
]
module Stanford_parser = struct
let img = [ docker_image ~account:"pveber" ~name:"stanford-parser" ~tag:"3.9.1" () ]
class type deps = object
inherit text
method format : [`stanford_parser_deps]
end
let lexparser (x : text file) : deps file =
Workflow.shell ~descr:"stanford_parser" ~img [
cmd "lexparser.sh" ~stdout:dest [ dep x ]
]
let dependensee (x : deps file) : png file =
Workflow.shell ~descr:"stanford_dependensee" ~img [
cmd "java" [
opt "-cp" string "/usr/bin/DependenSee.2.0.5.jar:/usr/bin/stanford-parser.jar:/usr/bin/stanford-parser-3.3.0-models.jar" ;
string "com.chaoticity.dependensee.Main" ;
opt "-t" dep x ;
dest ;
]
]
end