@@ -337,95 +337,154 @@ def get_list_of_jobs(self, limit=None, starting_after=None):
337
337
338
338
return [Job .from_json (job ) for job in response .json ()]
339
339
340
- def get_transcript_text (self , id_ ):
340
+ def get_transcript_text (self , id_ , group_channels_by = None , group_channels_threshold_ms = None ):
341
341
"""Get the transcript of a specific job as plain text.
342
342
343
343
:param id_: id of job to be requested
344
+ :param group_channels_by: optional, GroupChannelsType grouping strategy for
345
+ multichannel transcripts. None for default.
346
+ :param group_channels_threshold_ms: optional, grouping threshold in milliseconds.
347
+ None for default.
344
348
:returns: transcript data as text
345
349
:raises: HTTPError
346
350
"""
347
351
if not id_ :
348
352
raise ValueError ('id_ must be provided' )
349
353
354
+ url = self ._build_transcript_url (
355
+ id_ ,
356
+ group_channels_by = group_channels_by ,
357
+ group_channels_threshold_ms = group_channels_threshold_ms
358
+ )
359
+
350
360
response = self ._make_http_request (
351
361
"GET" ,
352
- urljoin ( self . base_url , 'jobs/{}/transcript' . format ( id_ )) ,
362
+ url ,
353
363
headers = {'Accept' : 'text/plain' }
354
364
)
355
365
356
366
return response .text
357
367
358
- def get_transcript_text_as_stream (self , id_ ):
368
+ def get_transcript_text_as_stream (self ,
369
+ id_ ,
370
+ group_channels_by = None ,
371
+ group_channels_threshold_ms = None ):
359
372
"""Get the transcript of a specific job as a plain text stream.
360
373
361
374
:param id_: id of job to be requested
375
+ :param group_channels_by: optional, GroupChannelsType grouping strategy for
376
+ multichannel transcripts. None for default.
377
+ :param group_channels_threshold_ms: optional, grouping threshold in milliseconds.
378
+ None for default.
362
379
:returns: requests.models.Response HTTP response which can be used to stream
363
380
the payload of the response
364
381
:raises: HTTPError
365
382
"""
366
383
if not id_ :
367
384
raise ValueError ('id_ must be provided' )
368
385
386
+ url = self ._build_transcript_url (
387
+ id_ ,
388
+ group_channels_by = group_channels_by ,
389
+ group_channels_threshold_ms = group_channels_threshold_ms
390
+ )
391
+
369
392
response = self ._make_http_request (
370
393
"GET" ,
371
- urljoin ( self . base_url , 'jobs/{}/transcript' . format ( id_ )) ,
394
+ url ,
372
395
headers = {'Accept' : 'text/plain' },
373
396
stream = True
374
397
)
375
398
376
399
return response
377
400
378
- def get_transcript_json (self , id_ ):
401
+ def get_transcript_json (self ,
402
+ id_ ,
403
+ group_channels_by = None ,
404
+ group_channels_threshold_ms = None ):
379
405
"""Get the transcript of a specific job as json.
380
406
381
407
:param id_: id of job to be requested
408
+ :param group_channels_by: optional, GroupChannelsType grouping strategy for
409
+ multichannel transcripts. None for default.
410
+ :param group_channels_threshold_ms: optional, grouping threshold in milliseconds.
411
+ None for default.
382
412
:returns: transcript data as json
383
413
:raises: HTTPError
384
414
"""
385
415
if not id_ :
386
416
raise ValueError ('id_ must be provided' )
387
417
418
+ url = self ._build_transcript_url (
419
+ id_ ,
420
+ group_channels_by = group_channels_by ,
421
+ group_channels_threshold_ms = group_channels_threshold_ms
422
+ )
423
+
388
424
response = self ._make_http_request (
389
425
"GET" ,
390
- urljoin ( self . base_url , 'jobs/{}/transcript' . format ( id_ )) ,
426
+ url ,
391
427
headers = {'Accept' : self .rev_json_content_type }
392
428
)
393
429
394
430
return response .json ()
395
431
396
- def get_transcript_json_as_stream (self , id_ ):
432
+ def get_transcript_json_as_stream (self ,
433
+ id_ ,
434
+ group_channels_by = None ,
435
+ group_channels_threshold_ms = None ):
397
436
"""Get the transcript of a specific job as streamed json.
398
437
399
438
:param id_: id of job to be requested
439
+ :param group_channels_by: optional, GroupChannelsType grouping strategy for
440
+ multichannel transcripts. None for default.
441
+ :param group_channels_threshold_ms: optional, grouping threshold in milliseconds.
442
+ None for default.
400
443
:returns: requests.models.Response HTTP response which can be used to stream
401
444
the payload of the response
402
445
:raises: HTTPError
403
446
"""
404
447
if not id_ :
405
448
raise ValueError ('id_ must be provided' )
406
449
450
+ url = self ._build_transcript_url (
451
+ id_ ,
452
+ group_channels_by = group_channels_by ,
453
+ group_channels_threshold_ms = group_channels_threshold_ms
454
+ )
455
+
407
456
response = self ._make_http_request (
408
457
"GET" ,
409
- urljoin ( self . base_url , 'jobs/{}/transcript' . format ( id_ )) ,
458
+ url ,
410
459
headers = {'Accept' : self .rev_json_content_type },
411
460
stream = True
412
461
)
413
462
414
463
return response
415
464
416
- def get_transcript_object (self , id_ ):
465
+ def get_transcript_object (self , id_ , group_channels_by = None , group_channels_threshold_ms = None ):
417
466
"""Get the transcript of a specific job as a python object`.
418
467
419
468
:param id_: id of job to be requested
469
+ :param group_channels_by: optional, GroupChannelsType grouping strategy for
470
+ multichannel transcripts. None for default.
471
+ :param group_channels_threshold_ms: optional, grouping threshold in milliseconds.
472
+ None for default.
420
473
:returns: transcript data as a python object
421
474
:raises: HTTPError
422
475
"""
423
476
if not id_ :
424
477
raise ValueError ('id_ must be provided' )
425
478
479
+ url = self ._build_transcript_url (
480
+ id_ ,
481
+ group_channels_by = group_channels_by ,
482
+ group_channels_threshold_ms = group_channels_threshold_ms
483
+ )
484
+
426
485
response = self ._make_http_request (
427
486
"GET" ,
428
- urljoin ( self . base_url , 'jobs/{}/transcript' . format ( id_ )) ,
487
+ url ,
429
488
headers = {'Accept' : self .rev_json_content_type }
430
489
)
431
490
@@ -814,3 +873,22 @@ def _create_job_options_payload(
814
873
815
874
def _create_captions_query (self , speaker_channel ):
816
875
return '' if speaker_channel is None else '?speaker_channel={}' .format (speaker_channel )
876
+
877
+ def _build_transcript_url (self , id_ , group_channels_by = None , group_channels_threshold_ms = None ):
878
+ """Build the get transcript url.
879
+
880
+ :param id_: id of job to be requested
881
+ :param group_channels_by: optional, GroupChannelsType grouping strategy for
882
+ multichannel transcripts. None for default.
883
+ :param group_channels_threshold_ms: optional, grouping threshold in milliseconds.
884
+ None for default.
885
+ :returns: url for getting the transcript
886
+ """
887
+ params = []
888
+ if group_channels_by is not None :
889
+ params .append ('group_channels_by={}' .format (group_channels_by ))
890
+ if group_channels_threshold_ms is not None :
891
+ params .append ('group_channels_threshold_ms={}' .format (group_channels_threshold_ms ))
892
+
893
+ query = '?{}' .format ('&' .join (params ))
894
+ return urljoin (self .base_url , 'jobs/{}/transcript{}' .format (id_ , query ))
0 commit comments