Skip to content

Commit e36130e

Browse files
authored
REVAI-4324: Multichannel transcript grouping
1 parent 172ffac commit e36130e

File tree

6 files changed

+190
-31
lines changed

6 files changed

+190
-31
lines changed

src/rev_ai/__init__.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
# -*- coding: utf-8 -*-
22
"""Top-level package for rev_ai"""
33

4-
__version__ = '2.20.0'
4+
__version__ = '2.21.0'
55

66
from .models import Job, JobStatus, Account, Transcript, Monologue, Element, MediaConfig, \
7-
CaptionType, CustomVocabulary, TopicExtractionJob, TopicExtractionResult, Topic, Informant, \
8-
SpeakerName, LanguageIdentificationJob, LanguageIdentificationResult, LanguageConfidence, \
9-
SentimentAnalysisResult, SentimentValue, SentimentMessage, SentimentAnalysisJob, \
10-
CustomerUrlData, RevAiApiDeploymentConfigMap, RevAiApiDeployment
7+
CaptionType, GroupChannelsType, CustomVocabulary, TopicExtractionJob, TopicExtractionResult, \
8+
Topic, Informant, SpeakerName, LanguageIdentificationJob, LanguageIdentificationResult, \
9+
LanguageConfidence, SentimentAnalysisResult, SentimentValue, SentimentMessage, \
10+
SentimentAnalysisJob, CustomerUrlData, RevAiApiDeploymentConfigMap, RevAiApiDeployment

src/rev_ai/apiclient.py

+88-10
Original file line numberDiff line numberDiff line change
@@ -337,95 +337,154 @@ def get_list_of_jobs(self, limit=None, starting_after=None):
337337

338338
return [Job.from_json(job) for job in response.json()]
339339

340-
def get_transcript_text(self, id_):
340+
def get_transcript_text(self, id_, group_channels_by=None, group_channels_threshold_ms=None):
341341
"""Get the transcript of a specific job as plain text.
342342
343343
:param id_: id of job to be requested
344+
:param group_channels_by: optional, GroupChannelsType grouping strategy for
345+
multichannel transcripts. None for default.
346+
:param group_channels_threshold_ms: optional, grouping threshold in milliseconds.
347+
None for default.
344348
:returns: transcript data as text
345349
:raises: HTTPError
346350
"""
347351
if not id_:
348352
raise ValueError('id_ must be provided')
349353

354+
url = self._build_transcript_url(
355+
id_,
356+
group_channels_by=group_channels_by,
357+
group_channels_threshold_ms=group_channels_threshold_ms
358+
)
359+
350360
response = self._make_http_request(
351361
"GET",
352-
urljoin(self.base_url, 'jobs/{}/transcript'.format(id_)),
362+
url,
353363
headers={'Accept': 'text/plain'}
354364
)
355365

356366
return response.text
357367

358-
def get_transcript_text_as_stream(self, id_):
368+
def get_transcript_text_as_stream(self,
369+
id_,
370+
group_channels_by=None,
371+
group_channels_threshold_ms=None):
359372
"""Get the transcript of a specific job as a plain text stream.
360373
361374
:param id_: id of job to be requested
375+
:param group_channels_by: optional, GroupChannelsType grouping strategy for
376+
multichannel transcripts. None for default.
377+
:param group_channels_threshold_ms: optional, grouping threshold in milliseconds.
378+
None for default.
362379
:returns: requests.models.Response HTTP response which can be used to stream
363380
the payload of the response
364381
:raises: HTTPError
365382
"""
366383
if not id_:
367384
raise ValueError('id_ must be provided')
368385

386+
url = self._build_transcript_url(
387+
id_,
388+
group_channels_by=group_channels_by,
389+
group_channels_threshold_ms=group_channels_threshold_ms
390+
)
391+
369392
response = self._make_http_request(
370393
"GET",
371-
urljoin(self.base_url, 'jobs/{}/transcript'.format(id_)),
394+
url,
372395
headers={'Accept': 'text/plain'},
373396
stream=True
374397
)
375398

376399
return response
377400

378-
def get_transcript_json(self, id_):
401+
def get_transcript_json(self,
402+
id_,
403+
group_channels_by=None,
404+
group_channels_threshold_ms=None):
379405
"""Get the transcript of a specific job as json.
380406
381407
:param id_: id of job to be requested
408+
:param group_channels_by: optional, GroupChannelsType grouping strategy for
409+
multichannel transcripts. None for default.
410+
:param group_channels_threshold_ms: optional, grouping threshold in milliseconds.
411+
None for default.
382412
:returns: transcript data as json
383413
:raises: HTTPError
384414
"""
385415
if not id_:
386416
raise ValueError('id_ must be provided')
387417

418+
url = self._build_transcript_url(
419+
id_,
420+
group_channels_by=group_channels_by,
421+
group_channels_threshold_ms=group_channels_threshold_ms
422+
)
423+
388424
response = self._make_http_request(
389425
"GET",
390-
urljoin(self.base_url, 'jobs/{}/transcript'.format(id_)),
426+
url,
391427
headers={'Accept': self.rev_json_content_type}
392428
)
393429

394430
return response.json()
395431

396-
def get_transcript_json_as_stream(self, id_):
432+
def get_transcript_json_as_stream(self,
433+
id_,
434+
group_channels_by=None,
435+
group_channels_threshold_ms=None):
397436
"""Get the transcript of a specific job as streamed json.
398437
399438
:param id_: id of job to be requested
439+
:param group_channels_by: optional, GroupChannelsType grouping strategy for
440+
multichannel transcripts. None for default.
441+
:param group_channels_threshold_ms: optional, grouping threshold in milliseconds.
442+
None for default.
400443
:returns: requests.models.Response HTTP response which can be used to stream
401444
the payload of the response
402445
:raises: HTTPError
403446
"""
404447
if not id_:
405448
raise ValueError('id_ must be provided')
406449

450+
url = self._build_transcript_url(
451+
id_,
452+
group_channels_by=group_channels_by,
453+
group_channels_threshold_ms=group_channels_threshold_ms
454+
)
455+
407456
response = self._make_http_request(
408457
"GET",
409-
urljoin(self.base_url, 'jobs/{}/transcript'.format(id_)),
458+
url,
410459
headers={'Accept': self.rev_json_content_type},
411460
stream=True
412461
)
413462

414463
return response
415464

416-
def get_transcript_object(self, id_):
465+
def get_transcript_object(self, id_, group_channels_by=None, group_channels_threshold_ms=None):
417466
"""Get the transcript of a specific job as a python object`.
418467
419468
:param id_: id of job to be requested
469+
:param group_channels_by: optional, GroupChannelsType grouping strategy for
470+
multichannel transcripts. None for default.
471+
:param group_channels_threshold_ms: optional, grouping threshold in milliseconds.
472+
None for default.
420473
:returns: transcript data as a python object
421474
:raises: HTTPError
422475
"""
423476
if not id_:
424477
raise ValueError('id_ must be provided')
425478

479+
url = self._build_transcript_url(
480+
id_,
481+
group_channels_by=group_channels_by,
482+
group_channels_threshold_ms=group_channels_threshold_ms
483+
)
484+
426485
response = self._make_http_request(
427486
"GET",
428-
urljoin(self.base_url, 'jobs/{}/transcript'.format(id_)),
487+
url,
429488
headers={'Accept': self.rev_json_content_type}
430489
)
431490

@@ -814,3 +873,22 @@ def _create_job_options_payload(
814873

815874
def _create_captions_query(self, speaker_channel):
816875
return '' if speaker_channel is None else '?speaker_channel={}'.format(speaker_channel)
876+
877+
def _build_transcript_url(self, id_, group_channels_by=None, group_channels_threshold_ms=None):
878+
"""Build the get transcript url.
879+
880+
:param id_: id of job to be requested
881+
:param group_channels_by: optional, GroupChannelsType grouping strategy for
882+
multichannel transcripts. None for default.
883+
:param group_channels_threshold_ms: optional, grouping threshold in milliseconds.
884+
None for default.
885+
:returns: url for getting the transcript
886+
"""
887+
params = []
888+
if group_channels_by is not None:
889+
params.append('group_channels_by={}'.format(group_channels_by))
890+
if group_channels_threshold_ms is not None:
891+
params.append('group_channels_threshold_ms={}'.format(group_channels_threshold_ms))
892+
893+
query = '?{}'.format('&'.join(params))
894+
return urljoin(self.base_url, 'jobs/{}/transcript{}'.format(id_, query))

src/rev_ai/models/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from .customvocabulary import CustomVocabulary
55
from .streaming import MediaConfig
66
from .asynchronous import Job, JobStatus, Account, Transcript, Monologue, Element, CaptionType, \
7-
SpeakerName
7+
SpeakerName, GroupChannelsType
88
from .insights import TopicExtractionJob, TopicExtractionResult, Topic, Informant, \
99
SentimentAnalysisResult, SentimentValue, SentimentMessage, SentimentAnalysisJob
1010
from .language_id import LanguageIdentificationJob, LanguageIdentificationResult, LanguageConfidence

src/rev_ai/models/asynchronous/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@
77
from .account import Account
88
from .transcript import Transcript, Monologue, Element
99
from .speaker_name import SpeakerName
10+
from .group_channels_type import GroupChannelsType
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# -*- coding: utf-8 -*-
2+
"""Enum for group_channels_by types"""
3+
4+
from enum import Enum
5+
6+
7+
class GroupChannelsType(str, Enum):
8+
SPEAKER = 'speaker'
9+
SENTENCE = 'sentence'
10+
WORD = 'word'
11+
12+
@classmethod
13+
def from_string(cls, status):
14+
return cls[status.upper()]

0 commit comments

Comments
 (0)