-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathveld.yaml
41 lines (37 loc) · 1.16 KB
/
veld.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
x-veld:
code:
description: "A statistical summary on conllu data, to count linguistic features of a conllu
file:
- count of total tokens
- count of total lemma
- count of lemma normalized by token (to put the lemma in relation with token)
- count of occurrence of each (Universal Dependencies) part of speech tag
Can be adapted to other use cases and made more flexible, but is primarily used in this chain
veld: https://github.com/veldhub/veld_chain__eltec_udpipe_inference"
topic:
- "NLP"
- "Machine Learning"
- "Tokenization"
- "Lemmatization"
- "Part Of Speech"
- "Dependency Parsing"
- "Universal Dependencies"
- "Grammatical Annotation"
input:
- volume: "/veld/input/"
file_type: "conllu"
output:
- volume: "/veld/output/"
file_type: "json"
content:
- "statistics"
- "NLP statistics"
services:
veld:
build: .
command: jupyter notebook --allow-root --ip='*' --NotebookApp.token='' --NotebookApp.password=''
ports:
- 8888:8888
working_dir: /veld/code/
volumes:
- ./src:/veld/code/