-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathFormats_rearrangement.yaml
211 lines (208 loc) · 6.75 KB
/
Formats_rearrangement.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
#
# Schema definitions for AIRR Formats rearrangement objects
#
# The Formats rearrangements
# Note: This is an extension of the MiAIRR rearrangements, and assumes that
# the MiAIRR has defined the MiAIRR rearrangement objects.
# required:
# - sequence_id
# - sequence
# - sample_id
# - constant
# - functional
# - rev_comp
# - c_call
# - v_score
# - d_score
# - j_score
# - c_score
# - v_cigar
# - d_cigar
# - j_cigar
# - c_cigar
Formats_Rearrangements:
discriminator: AIRRFormats
type: object
properties:
sequence_id:
type: string
description: Read/sequence identifier
sequence:
type: string
description: Nucleotide sequence (e.g., the "read" sequence; revcomp'd if
necessary)
sample_id:
type: string
description: The biological sample this read derives from (e.g., from BioSample
database)
constant:
type: string
description: Constant region gene (e.g., IGHG4, IGHA2, IGHE, TRBC)
functional:
type: boolean
description: VDJ sequence is predicted to be functional
rev_comp:
type: boolean
description: Sequence is reverse complemented
c_call:
type: string
description: C gene assignment
v_score:
type: number
description: V alignment score
d_score:
type: number
description: D alignment score
j_score:
type: number
description: J alignment score
c_score:
type: number
description: C alignment score
v_cigar:
type: string
description: V alignment CIGAR string
d_cigar:
type: string
description: D alignment CIGAR string
j_cigar:
type: string
description: J alignment CIGAR string
c_cigar:
type: string
description: C alignment CIGAR string
v_evalue:
type: number
description: V alignment E-value (when applicable)
d_evalue:
type: number
description: D alignment E-value (when applicable)
j_evalue:
type: number
description: J alignment E-value (when applicable)
v_identity:
type: number
description: V alignment identity
d_identity:
type: number
description: D alignment identity
j_identity:
type: number
description: J alignment identity
vdj_score:
type: number
description: Score for aligners that consider the full sequence as a whole
vdj_evalue:
type: number
description: E-value for aligners that consider the full sequence as a whole
vdj_identity:
type: number
description: Identity for aligners that consider the full sequence as a whole
vdj_cigar:
type: string
description: VDJ alignment CIGAR string
v_start:
type: integer
description: Position of first V nucleotide in 'sequence' field
v_germ_start:
type: integer
description: Position of 'v_start' field in IMGT numbered germline V(D)J sequence
fwr1_start:
type: integer
description: FWR1 start coordinate in sequence (transferred from germline)
fwr1_end:
type: integer
description: FWR1 end coordinate in sequence (transferred from germline)
cdr1_start:
type: integer
description: CDR1 start coord in sequence (transferred from germline)
cdr1_end:
type: integer
description: CDR1 end coord in sequence (transferred from germline)
fwr2_start:
type: integer
description: FWR2 start coord in sequence (transferred from germline)
fwr2_end:
type: integer
description: FWR2 end coord in sequence (transferred from germline)
cdr2_start:
type: integer
description: CDR2 start coord in sequence (transferred from germline)
cdr2_end:
type: integer
description: CDR2 end coord in sequence (transferred from germline)
fwr3_start:
type: integer
description: FWR3 start coord in sequence (transferred from germline)
fwr3_end:
type: integer
description: FWR3 end coord in sequence (transferred from germline)
cdr3_start:
type: integer
description: CDR3 start coord in sequence (transferred from germline)
cdr3_end:
type: integer
description: CDR3 end coord in sequence (transferred from germline)
fwr4_start:
type: integer
description: FWR3 start coord in sequence (transferred from germline)
fwr4_end:
type: integer
description: FWR4 end coord in sequence (transferred from germline)
v_end:
type: integer
description: End coordinate of the V segment (generally inside the CDR3)
d_start:
type: integer
description: Start coordinate of the D segment
d_germ_start:
type: integer
description: Position of 'd_start' field in IMGT numbered germline V(D)J sequence
d_end:
type: integer
description: End coordinate of the D segment
j_start:
type: integer
description: Start coordinate of the J segment (generally inside the CDR3)
j_germ_start:
type: integer
description: Position of 'j_start' field in IMGT numbered germline V(D)J sequence
j_end:
type: integer
description: End coordinate of the J segment
junction_length:
type: integer
description: Number of junction nucleotides in sequence_vdj
np1_length:
type: integer
description: Number of of nucleotides between sample V and D sequences
np2_length:
type: integer
description: Number of of nucleotides between sample D and J sequences
n1_length:
type: integer
description: Nucleotides 5' of the D-segment
n2_length:
type: integer
description: Nucleotides 3' of the D-segment
p3v_length:
type: integer
description: Palindromic nucleotides 3' of the V-segment
p5d_length:
type: integer
description: Palindromic nucleotides 5' of the D-segment
p3d_length:
type: integer
description: Palindromic nucleotides 3' of the D-segment
p5j_length:
type: integer
description: Palindromic nucleotides 5' of the J-segment
duplicate_count:
type: integer
description: Number of duplicate reads for this sequence
consensus_count:
type: integer
description: Number of reads contributing to the consensus for this sequence
clone:
type: string
description: Clone assignment for this sequence