Added timestamp Granularity
This commit is contained in:
@ -20,6 +20,7 @@ abstract class CreateInterface {
|
||||
OpenAIAudioResponseFormat? responseFormat,
|
||||
double? temperature,
|
||||
String? language,
|
||||
List<OpenAIAudioTimestampGranularity>? timestamp_granularities,
|
||||
});
|
||||
|
||||
Future<OpenAIAudioModel> createTranslation({
|
||||
|
@ -12,6 +12,8 @@ enum OpenAIImageQuality { hd }
|
||||
|
||||
enum OpenAIImageResponseFormat { url, b64Json }
|
||||
|
||||
enum OpenAIAudioTimestampGranularity { word, segment }
|
||||
|
||||
enum OpenAIAudioResponseFormat { json, text, srt, verbose_json, vtt }
|
||||
|
||||
enum OpenAIAudioSpeechResponseFormat { mp3, opus, aac, flac }
|
||||
|
@ -8,6 +8,12 @@ final class OpenAIAudioModel {
|
||||
/// The text response from the audio requests.
|
||||
/// This is the only field that is returned from the API.
|
||||
final String text;
|
||||
final String? task;
|
||||
final String? language;
|
||||
final double? duration;
|
||||
|
||||
final List<Word>? words;
|
||||
final List<Segment>? segments;
|
||||
|
||||
@override
|
||||
int get hashCode => text.hashCode;
|
||||
@ -15,12 +21,26 @@ final class OpenAIAudioModel {
|
||||
/// {@macro openai_audio_model}
|
||||
const OpenAIAudioModel({
|
||||
required this.text,
|
||||
this.task,
|
||||
this.language,
|
||||
this.duration,
|
||||
this.words,
|
||||
this.segments,
|
||||
});
|
||||
|
||||
/// This is used to convert a [Map<String, dynamic>] object to a [OpenAIAudioModel] object.
|
||||
factory OpenAIAudioModel.fromMap(Map<String, dynamic> json) {
|
||||
return OpenAIAudioModel(
|
||||
text: json['text'],
|
||||
task: json['task'],
|
||||
language: json['language'],
|
||||
duration: json['duration'],
|
||||
words: json['words'] != null
|
||||
? List<Word>.from(json['words'].map((x) => Word.fromMap(x)))
|
||||
: null,
|
||||
segments: json['segments'] != null
|
||||
? List<Segment>.from(json['segments'].map((x) => Segment.fromMap(x)))
|
||||
: null,
|
||||
);
|
||||
}
|
||||
|
||||
@ -30,18 +50,147 @@ final class OpenAIAudioModel {
|
||||
Map<String, dynamic> toMap() {
|
||||
return {
|
||||
'text': text,
|
||||
if (task != null) 'task': task,
|
||||
if (language != null) 'language': language,
|
||||
if (duration != null) 'duration': duration,
|
||||
if (words != null) 'words': words,
|
||||
if (segments != null) 'segments': segments,
|
||||
};
|
||||
}
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
return 'OpenAIAudioModel(text: $text)';
|
||||
return 'OpenAIAudioModel(text: $text, task: $task, language: $language, duration: $duration, words: $words, segments: $segments)';
|
||||
}
|
||||
|
||||
@override
|
||||
bool operator ==(Object other) {
|
||||
if (identical(this, other)) return true;
|
||||
|
||||
return other is OpenAIAudioModel && other.text == text;
|
||||
return other is OpenAIAudioModel &&
|
||||
other.text == text &&
|
||||
other.task == task &&
|
||||
other.language == language &&
|
||||
other.duration == duration &&
|
||||
other.words == words &&
|
||||
other.segments == segments;
|
||||
}
|
||||
}
|
||||
|
||||
final class Word {
|
||||
final String word;
|
||||
final double start;
|
||||
final double end;
|
||||
|
||||
const Word({
|
||||
required this.word,
|
||||
required this.start,
|
||||
required this.end,
|
||||
});
|
||||
|
||||
factory Word.fromMap(Map<String, dynamic> json) {
|
||||
return Word(
|
||||
word: json['word'],
|
||||
start: json['start'],
|
||||
end: json['end'],
|
||||
);
|
||||
}
|
||||
|
||||
Map<String, dynamic> toMap() {
|
||||
return {
|
||||
'word': word,
|
||||
'start': start,
|
||||
'end': end,
|
||||
};
|
||||
}
|
||||
|
||||
@override
|
||||
String toString() => 'Word(word: $word, start: $start, end: $end)';
|
||||
|
||||
@override
|
||||
bool operator ==(Object other) {
|
||||
if (identical(this, other)) return true;
|
||||
|
||||
return other is Word &&
|
||||
other.word == word &&
|
||||
other.start == start &&
|
||||
other.end == end;
|
||||
}
|
||||
}
|
||||
|
||||
final class Segment {
|
||||
final int id;
|
||||
final int seek;
|
||||
final double start;
|
||||
final double end;
|
||||
final String text;
|
||||
final List<int> tokens;
|
||||
final double temperature;
|
||||
final double avg_logprob;
|
||||
final double compression_ratio;
|
||||
final double no_speech_prob;
|
||||
|
||||
const Segment({
|
||||
required this.id,
|
||||
required this.seek,
|
||||
required this.start,
|
||||
required this.end,
|
||||
required this.text,
|
||||
required this.tokens,
|
||||
required this.temperature,
|
||||
required this.avg_logprob,
|
||||
required this.compression_ratio,
|
||||
required this.no_speech_prob,
|
||||
});
|
||||
|
||||
factory Segment.fromMap(Map<String, dynamic> json) {
|
||||
return Segment(
|
||||
id: json['id'],
|
||||
seek: json['seek'],
|
||||
start: json['start'],
|
||||
end: json['end'],
|
||||
text: json['text'],
|
||||
tokens: List<int>.from(json['tokens']),
|
||||
temperature: json['temperature'],
|
||||
avg_logprob: json['avg_logprob'],
|
||||
compression_ratio: json['compression_ratio'],
|
||||
no_speech_prob: json['no_speech_prob'],
|
||||
);
|
||||
}
|
||||
|
||||
Map<String, dynamic> toMap() {
|
||||
return {
|
||||
'id': id,
|
||||
'seek': seek,
|
||||
'start': start,
|
||||
'end': end,
|
||||
'text': text,
|
||||
'tokens': tokens,
|
||||
'temperature': temperature,
|
||||
'avg_logprob': avg_logprob,
|
||||
'compression_ratio': compression_ratio,
|
||||
'no_speech_prob': no_speech_prob,
|
||||
};
|
||||
}
|
||||
|
||||
@override
|
||||
String toString() =>
|
||||
'Segment(id: $id, seek: $seek, start: $start, end: $end, text: $text, tokens: $tokens, temperature: $temperature, avg_logprob: $avg_logprob, compression_ratio: $compression_ratio, no_speech_prob: $no_speech_prob)';
|
||||
|
||||
@override
|
||||
bool operator ==(Object other) {
|
||||
if (identical(this, other)) return true;
|
||||
|
||||
return other is Segment &&
|
||||
other.id == id &&
|
||||
other.seek == seek &&
|
||||
other.start == start &&
|
||||
other.end == end &&
|
||||
other.text == text &&
|
||||
other.tokens == tokens &&
|
||||
other.temperature == temperature &&
|
||||
other.avg_logprob == avg_logprob &&
|
||||
other.compression_ratio == compression_ratio &&
|
||||
other.no_speech_prob == no_speech_prob;
|
||||
}
|
||||
}
|
||||
|
@ -34,6 +34,8 @@ interface class OpenAIAudio implements OpenAIAudioBase {
|
||||
///
|
||||
/// [language] is the language of the input audio. Supplying the input language in **ISO-639-1** format will improve accuracy and latency.
|
||||
///
|
||||
/// [timestamp_granularities] The timestamp granularities to populate for this transcription. response_format must be set verbose_json to use timestamp granularities. Either: word or segment, both doesnt work.
|
||||
///
|
||||
/// Example:
|
||||
/// ```dart
|
||||
/// final transcription = await openai.audio.createTranscription(
|
||||
@ -52,6 +54,7 @@ interface class OpenAIAudio implements OpenAIAudioBase {
|
||||
OpenAIAudioResponseFormat? responseFormat,
|
||||
double? temperature,
|
||||
String? language,
|
||||
List<OpenAIAudioTimestampGranularity>? timestamp_granularities,
|
||||
}) async {
|
||||
return await OpenAINetworkingClient.fileUpload(
|
||||
file: file,
|
||||
@ -62,6 +65,9 @@ interface class OpenAIAudio implements OpenAIAudioBase {
|
||||
if (responseFormat != null) "response_format": responseFormat.name,
|
||||
if (temperature != null) "temperature": temperature.toString(),
|
||||
if (language != null) "language": language,
|
||||
if (timestamp_granularities != null)
|
||||
"timestamp_granularities[]":
|
||||
timestamp_granularities.map((e) => e.name).join(","),
|
||||
},
|
||||
onSuccess: (Map<String, dynamic> response) {
|
||||
return OpenAIAudioModel.fromMap(response);
|
||||
|
@ -376,9 +376,21 @@ void main() async {
|
||||
model: "whisper-1",
|
||||
responseFormat: OpenAIAudioResponseFormat.json,
|
||||
);
|
||||
expect(transcription, isA<OpenAIAudioModel>());
|
||||
expect(transcription.text, isA<String>());
|
||||
});
|
||||
|
||||
test("create transcription with timestamp granularity", () async {
|
||||
final transcription = await OpenAI.instance.audio.createTranscription(
|
||||
file: audioExampleFile,
|
||||
model: "whisper-1",
|
||||
responseFormat: OpenAIAudioResponseFormat.verbose_json,
|
||||
timestamp_granularities: [OpenAIAudioTimestampGranularity.word],
|
||||
);
|
||||
|
||||
expect(transcription, isA<OpenAIAudioModel>());
|
||||
expect(transcription.text, isA<String>());
|
||||
expect(transcription.words, isA<List>());
|
||||
});
|
||||
test("create translation", () async {
|
||||
final translation = await OpenAI.instance.audio.createTranslation(
|
||||
|
Reference in New Issue
Block a user