Skip to content

Commit

Permalink
Text-to-Audio Task (#11)
Browse files Browse the repository at this point in the history
* adding text-to-audio task

* removed a console log

* misc styling updates

* Removed some commented out code
  • Loading branch information
walkingtowork authored Apr 19, 2024
1 parent ffc5eb5 commit 56ab90c
Show file tree
Hide file tree
Showing 17 changed files with 252 additions and 20 deletions.
6 changes: 6 additions & 0 deletions src/abstracts/_type.scss
Original file line number Diff line number Diff line change
Expand Up @@ -154,3 +154,9 @@ h3 {
@include headline3;
color: $almaMater;
}

audio {
filter: sepia(20%) saturate(70%) grayscale(1) contrast(99%) invert(11%);
height: 42px;
margin-right: 20px;
}
9 changes: 5 additions & 4 deletions src/components/Experiment/QuickInput/QuickInput.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@ import Task from "../../../helpers/Task";
import { TaskInputTypes } from "../../../helpers/TaskInputTypes";

export default function QuickInput(props) {
const inputType = Task.getStaticTask(props.model.output.type).inputType;
switch (inputType) {
const task = Task.getStaticTask(props.model.output.type)

switch (task.inputType) {
case TaskInputTypes.Text:
return <QuickTextInput {...props} />;
return <QuickTextInput hideUpload={task.hideUpload} {...props} />;
case TaskInputTypes.Audio:
return <QuickAudioInput {...props} />;
return <QuickAudioInput {...props} />;

case TaskInputTypes.Image:
default:
Expand Down
14 changes: 13 additions & 1 deletion src/components/Experiment/QuickInput/QuickInput.stories.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import {
textToCode,
audioToText,
textConversation,
textToAudio,
} from "../../../helpers/TaskIDs";
import {
SampleImageClassificationInputs,
Expand Down Expand Up @@ -115,14 +116,25 @@ AudioToText.args = {
},
};

export const TextToAudio = Template.bind({});
TextToAudio.args = {
sampleInputs: [
"a chill song with influences from lofi, chillstep and downtempo",
],
model: {
output: {
type: textToAudio,
},
},
};

export const TextConversation = Template.bind({});
TextConversation.args = {
sampleInputs: [
"Show me a recipe for pizza",
"What is the weather tomorrow?",
"What is the meaning of life?",
],
hideUpload: true,
model: {
output: {
type: textConversation,
Expand Down
2 changes: 1 addition & 1 deletion src/components/Experiment/QuickInput/QuickTextInput.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ export default function QuickTextInput(props) {
selectTab,
selectInput,
runModel,
hideUpload=false,
hideUpload,
} = useQuickInputControl(props);
const {getBlock, getElement} = useBEMNaming("quick-text-input");
const task = Task.getStaticTask(props.model.output.type);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ export default function useQuickInputControl(props) {
const tabs = [];

if (!props.hideSample) tabs.push(sample);
if (!props.hideUpload) tabs.push(upload);
if (!props.hideUpload) tabs.push(upload); // Currently only hideUpload is being used
if (!props.hideUrl) tabs.push(...input);

return tabs;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
.audio-to-text-output {
&__input-audio-content {
audio {
margin-top: 12px;
}
}
}
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import React from "react";
import useBEMNaming from "../../../../../common/useBEMNaming";

import "./AudioToText.scss"
export default function AudioToTextOutputInputSection(props) {
console.log("AudioToTextOutputInputSection: ", props)

const { getElement } = useBEMNaming("audio-to-text-output");
const input = props.input;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
@import "../../../../../App";

.text-output, .text-to-code-output, .audio-to-text-output, .text-conversation-output {
.text-output, .text-to-code-output,
.audio-to-text-output, .text-to-audio-output,
.text-conversation-output {
display: flex;
flex-direction: row;
gap: 72px;
Expand Down Expand Up @@ -43,12 +45,6 @@
&-audio-content {
padding-top: 12px;
font-size: 20px;
audio {
margin-top: 12px;
filter: sepia(20%) saturate(70%) grayscale(1) contrast(99%) invert(11%);
height: 42px;
margin-right: 20px;
}
}

&-submit-button {
Expand Down Expand Up @@ -101,6 +97,11 @@
background-color: $azulLightest;
padding: 16px 12px 12px;
}

.output-audio-content {
padding-top: 12px;
font-size: 20px;
}
}

.rating {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ export default function useTextOutput(trial) {
return trial.results.responses[0].features[0].generated_tokens
.map((token) => token.token)
.join(" ");

case "AUDIO":
return trial?.results?.responses[0]?.features[0] ?? "";
case "TEXT":
default:
return trial?.results?.responses[0]?.features[0]?.text ?? "";
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import useBEMNaming from "../../../../../common/useBEMNaming";
import OutputDuration from "../_Common/components/OutputDuration";
import Rating from "../Classification/Rating";
import React from "react";
import Task from "../../../../../helpers/Task";
import { textToText } from "../../../../../helpers/TaskIDs";
import DownloadIcon from "../../../../../resources/icons/icon-download.png"

export function AudioOutputBox(props) {
const { getElement } = useBEMNaming("text-to-audio-output");
const task = props.task ? Task.getStaticTask(props.task) : Task.getStaticTask(textToText);


console.log(props)
return (
<div className={getElement("results")}>
<div className={getElement("title-row")}>
<h3 className={getElement("title-row-title")}>
Output
</h3>
<OutputDuration duration={props.duration} />
</div>
<p className={getElement("subtitle")}>
{task.outputText}
</p>
<div
className={getElement("output-container output-container-background")}
>
<div className="output-audio-content audio-container">
<audio controls src={props.output.src} />
<a download={props.output.title} href={props.output.src}>
<img className="download-audio-icon" src={DownloadIcon} />
</a>
</div>
</div>

<Rating />
</div>
);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import React from "react";
import useBEMNaming from "../../../../../common/useBEMNaming";
import useTextOutput from "../Text/useTextOutput";

import { AudioOutputBox } from "./AudioOutputBox";
import TextOutputInputSection from "../Text/TextOutputInputSection";

import { textToAudio } from "../../../../../helpers/TaskIDs";
export default function TextToAudioOutput(props) {
const { getBlock } = useBEMNaming("text-to-audio-output");

// Note: This method could probably be renamed to a more generic 'useOutput' or similar?
const { output, inferenceDuration, input, setInput } = useTextOutput(
props.trial
);

const onSubmit = () => {
props.onSubmit(input);
};

return (
<div className={getBlock()}>
<TextOutputInputSection
input={input}
setInput={setInput}
onSubmit={onSubmit}
/>
<AudioOutputBox duration={inferenceDuration} output={output} task={textToAudio} />
</div>
);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import React from "react";
import TextToAudioOutput from "./TextToAudioOutput";
import { TestTextToAudioOutput } from "./testData/testTextToAudioOutput";

export default {
title: "Experiments/Quick Output/Text to Audio",
component: TextToAudioOutput,
};

const template = (args) => <TextToAudioOutput {...args} />;

export const Default = template.bind({});
Default.args = { trial: TestTextToAudioOutput };
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
export const TestTextToAudioOutputGeneratedToken = {
id: "sampleidhere"
};

export const TestTextToAudioOutput = {
id: "sampletesttexttoaudiooutputidhere",
inputs: [
'a chill song with influences from lofi, chillstep and downtempo',
],
completed_at: "2023-06-03T18:17:14.513854Z",
results: {
'duration': "9.216154124s",
'duration_for_inference': "9.193807904s",
'responses': [
{
'features': [
{
title: "text-to-audio-output.flac",
src: "https://xlab1.netlify.app/text-to-audio-output.flac",
type: 'AUDIO'
}
],
'id': "sampletesttexttoaudiooutputresponseidhere"
}
]
}
}
55 changes: 54 additions & 1 deletion src/helpers/DefaultModels.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { audioToText, textToText } from "./TaskIDs";
import { audioToText, textToAudio, textToText } from "./TaskIDs";

export const DefaultImageClassificationModel = {
id: 1,
Expand Down Expand Up @@ -331,6 +331,59 @@ export const DefaultAudioToTextModel = {
version: "1.0",
};

// Note, this is the same as Image Segmentation with minor changes to description etc
export const DefaultTextToAudioModel = {
id: 184,
created_at: "2022-04-29T20:48:47.370171Z",
updated_at: "2022-04-29T20:48:47.370171Z",
attributes: {
Top1: "",
Top5: "",
kind: "CNN",
manifest_author: "Jingning Tang",
training_dataset: "PASCAL VOC 2012",
},
description:
"Riffusion Text to Audio model, which is trained on the COCO (Common Objects in Context) dataset. Use deeplabv3_mnv2_dm05_pascal_train_aug(deeplabv3_mnv2_dm05_pascal_train_aug_2018_10_01) from TensorFlow DeepLab Model Zoo.\n",
short_description:
"DeepLabv3 is a deep convolutional neural networks for semantic audio transcrption. It employ atrous convolution in cascade or in parallel to capture multi-scale context by adopting multiple atrous rates.",
model: {
graph_checksum: "0336ceb67b378df8ada0efe9eadb5ac8",
graph_path:
"https://s3.amazonaws.com/store.carml.org/models/tensorflow/models/deeplabv3_mnv2_dm05_pascal_train_aug_2018_10_01/frozen_inference_graph.pb",
weights_checksum: "",
weights_path: "",
},
framework: {
id: 4,
name: "Riffusion",
version: "1.0",
architectures: [
{
name: "amd64",
},
],
},
input: {
description: "text to be converted to audio",
type: "text",
},
license: "Apache License, Version 2.0",
name: "DeepLabv3_MobileNet_v2_DM_05_PASCAL_VOC_Train_Aug",
output: {
description: "Audio version of the inputted text",
type: textToAudio,
},
url: {
github:
"https://github.com/rai-project/tensorflow/blob/master/builtin_models/DeepLabv3_MobileNet_v2_DM_05_PASCAL_VOC_Train_Aug.yml",
citation: "https://arxiv.org/pdf/1802.02611v3.pdf",
link1: "https://arxiv.org/pdf/1706.05587.pdf",
link2: "",
},
version: "1.0",
};

// Note, this is the same as Image Segmentation with minor changes to description etc
export const DefaultTextConversationModel = {
id: 184,
Expand Down
Loading

0 comments on commit 56ab90c

Please sign in to comment.