Added timestamp Granularity

2024-02-21 19:06:16 +05:30
parent 1f8e675fc1
commit cc6fd5e4e3
5 changed files with 172 additions and 2 deletions
--- a/lib/src/core/base/audio/interfaces.dart
+++ b/lib/src/core/base/audio/interfaces.dart
@ -20,6 +20,7 @@ abstract class CreateInterface {
    OpenAIAudioResponseFormat? responseFormat,
    double? temperature,
    String? language,
+    List<OpenAIAudioTimestampGranularity>? timestamp_granularities,
  });

  Future<OpenAIAudioModel> createTranslation({
--- a/lib/src/core/enum.dart
+++ b/lib/src/core/enum.dart
@ -12,6 +12,8 @@ enum OpenAIImageQuality { hd }

 enum OpenAIImageResponseFormat { url, b64Json }

+enum OpenAIAudioTimestampGranularity { word, segment }
+
 enum OpenAIAudioResponseFormat { json, text, srt, verbose_json, vtt }

 enum OpenAIAudioSpeechResponseFormat { mp3, opus, aac, flac }
--- a/lib/src/core/models/audio/audio.dart
+++ b/lib/src/core/models/audio/audio.dart
@ -8,6 +8,12 @@ final class OpenAIAudioModel {
  /// The text response from the audio requests.
  /// This is the only field that is returned from the API.
  final String text;
+  final String? task;
+  final String? language;
+  final double? duration;
+
+  final List<Word>? words;
+  final List<Segment>? segments;

  @override
  int get hashCode => text.hashCode;
@ -15,12 +21,26 @@ final class OpenAIAudioModel {
  /// {@macro openai_audio_model}
  const OpenAIAudioModel({
    required this.text,
+    this.task,
+    this.language,
+    this.duration,
+    this.words,
+    this.segments,
  });

  /// This is used  to convert a [Map<String, dynamic>] object to a [OpenAIAudioModel] object.
  factory OpenAIAudioModel.fromMap(Map<String, dynamic> json) {
    return OpenAIAudioModel(
      text: json['text'],
+      task: json['task'],
+      language: json['language'],
+      duration: json['duration'],
+      words: json['words'] != null
+          ? List<Word>.from(json['words'].map((x) => Word.fromMap(x)))
+          : null,
+      segments: json['segments'] != null
+          ? List<Segment>.from(json['segments'].map((x) => Segment.fromMap(x)))
+          : null,
    );
  }

@ -30,18 +50,147 @@ final class OpenAIAudioModel {
  Map<String, dynamic> toMap() {
    return {
      'text': text,
+      if (task != null) 'task': task,
+      if (language != null) 'language': language,
+      if (duration != null) 'duration': duration,
+      if (words != null) 'words': words,
+      if (segments != null) 'segments': segments,
    };
  }

  @override
  String toString() {
-    return 'OpenAIAudioModel(text: $text)';
+    return 'OpenAIAudioModel(text: $text, task: $task, language: $language, duration: $duration, words: $words, segments: $segments)';
  }

  @override
  bool operator ==(Object other) {
    if (identical(this, other)) return true;

-    return other is OpenAIAudioModel && other.text == text;
+    return other is OpenAIAudioModel &&
+        other.text == text &&
+        other.task == task &&
+        other.language == language &&
+        other.duration == duration &&
+        other.words == words &&
+        other.segments == segments;
+  }
+}
+
+final class Word {
+  final String word;
+  final double start;
+  final double end;
+
+  const Word({
+    required this.word,
+    required this.start,
+    required this.end,
+  });
+
+  factory Word.fromMap(Map<String, dynamic> json) {
+    return Word(
+      word: json['word'],
+      start: json['start'],
+      end: json['end'],
+    );
+  }
+
+  Map<String, dynamic> toMap() {
+    return {
+      'word': word,
+      'start': start,
+      'end': end,
+    };
+  }
+
+  @override
+  String toString() => 'Word(word: $word, start: $start, end: $end)';
+
+  @override
+  bool operator ==(Object other) {
+    if (identical(this, other)) return true;
+
+    return other is Word &&
+        other.word == word &&
+        other.start == start &&
+        other.end == end;
+  }
+}
+
+final class Segment {
+  final int id;
+  final int seek;
+  final double start;
+  final double end;
+  final String text;
+  final List<int> tokens;
+  final double temperature;
+  final double avg_logprob;
+  final double compression_ratio;
+  final double no_speech_prob;
+
+  const Segment({
+    required this.id,
+    required this.seek,
+    required this.start,
+    required this.end,
+    required this.text,
+    required this.tokens,
+    required this.temperature,
+    required this.avg_logprob,
+    required this.compression_ratio,
+    required this.no_speech_prob,
+  });
+
+  factory Segment.fromMap(Map<String, dynamic> json) {
+    return Segment(
+      id: json['id'],
+      seek: json['seek'],
+      start: json['start'],
+      end: json['end'],
+      text: json['text'],
+      tokens: List<int>.from(json['tokens']),
+      temperature: json['temperature'],
+      avg_logprob: json['avg_logprob'],
+      compression_ratio: json['compression_ratio'],
+      no_speech_prob: json['no_speech_prob'],
+    );
+  }
+
+  Map<String, dynamic> toMap() {
+    return {
+      'id': id,
+      'seek': seek,
+      'start': start,
+      'end': end,
+      'text': text,
+      'tokens': tokens,
+      'temperature': temperature,
+      'avg_logprob': avg_logprob,
+      'compression_ratio': compression_ratio,
+      'no_speech_prob': no_speech_prob,
+    };
+  }
+
+  @override
+  String toString() =>
+      'Segment(id: $id, seek: $seek, start: $start, end: $end, text: $text, tokens: $tokens, temperature: $temperature, avg_logprob: $avg_logprob, compression_ratio: $compression_ratio, no_speech_prob: $no_speech_prob)';
+
+  @override
+  bool operator ==(Object other) {
+    if (identical(this, other)) return true;
+
+    return other is Segment &&
+        other.id == id &&
+        other.seek == seek &&
+        other.start == start &&
+        other.end == end &&
+        other.text == text &&
+        other.tokens == tokens &&
+        other.temperature == temperature &&
+        other.avg_logprob == avg_logprob &&
+        other.compression_ratio == compression_ratio &&
+        other.no_speech_prob == no_speech_prob;
  }
 }
--- a/lib/src/instance/audio/audio.dart
+++ b/lib/src/instance/audio/audio.dart
@ -34,6 +34,8 @@ interface class OpenAIAudio implements OpenAIAudioBase {
  ///
  /// [language] is the language of the input audio. Supplying the input language in **ISO-639-1** format will improve accuracy and latency.
  ///
+  /// [timestamp_granularities] The timestamp granularities to populate for this transcription. response_format must be set verbose_json to use timestamp granularities. Either: word or segment, both doesnt work.
+  ///
  /// Example:
  /// ```dart
  /// final transcription = await openai.audio.createTranscription(
@ -52,6 +54,7 @@ interface class OpenAIAudio implements OpenAIAudioBase {
    OpenAIAudioResponseFormat? responseFormat,
    double? temperature,
    String? language,
+    List<OpenAIAudioTimestampGranularity>? timestamp_granularities,
  }) async {
    return await OpenAINetworkingClient.fileUpload(
      file: file,
@ -62,6 +65,9 @@ interface class OpenAIAudio implements OpenAIAudioBase {
        if (responseFormat != null) "response_format": responseFormat.name,
        if (temperature != null) "temperature": temperature.toString(),
        if (language != null) "language": language,
+        if (timestamp_granularities != null)
+          "timestamp_granularities[]":
+              timestamp_granularities.map((e) => e.name).join(","),
      },
      onSuccess: (Map<String, dynamic> response) {
        return OpenAIAudioModel.fromMap(response);
--- a/test/openai_test.dart
+++ b/test/openai_test.dart
@ -376,9 +376,21 @@ void main() async {
        model: "whisper-1",
        responseFormat: OpenAIAudioResponseFormat.json,
      );
+      expect(transcription, isA<OpenAIAudioModel>());
+      expect(transcription.text, isA<String>());
+    });
+
+    test("create transcription with timestamp granularity", () async {
+      final transcription = await OpenAI.instance.audio.createTranscription(
+        file: audioExampleFile,
+        model: "whisper-1",
+        responseFormat: OpenAIAudioResponseFormat.verbose_json,
+        timestamp_granularities: [OpenAIAudioTimestampGranularity.word],
+      );

      expect(transcription, isA<OpenAIAudioModel>());
      expect(transcription.text, isA<String>());
+      expect(transcription.words, isA<List>());
    });
    test("create translation", () async {
      final translation = await OpenAI.instance.audio.createTranslation(