diff --git a/packages/genkit_openai/lib/genkit_openai.dart b/packages/genkit_openai/lib/genkit_openai.dart index 1c33cb47..b59e8d86 100644 --- a/packages/genkit_openai/lib/genkit_openai.dart +++ b/packages/genkit_openai/lib/genkit_openai.dart @@ -16,17 +16,22 @@ import 'dart:async'; import 'package:genkit/plugin.dart'; -import 'src/chat.dart' as chat; +import 'src/audio.dart' as audio_lib; +import 'src/chat.dart' as chat_lib; import 'src/openai_plugin.dart'; +export 'src/audio.dart' show AudioOptions, OpenAIAudioOptions, isAudioModel; export 'src/chat.dart' show OpenAIChatOptions, OpenAIOptions; export 'src/converters.dart' show GenkitConverter; export 'src/utils.dart' show + OpenAIClientConfig, + buildOpenAIClient, defaultModelInfo, getModelType, modelInfoFor, oSeriesModelInfo, + rethrowAsGenkitException, supportsTools, supportsVision; @@ -65,11 +70,21 @@ class OpenAICompatPluginHandle { ); } - /// Reference to a model - ModelRef model(String name) { + /// Reference to a chat model. + ModelRef model(String name) { return modelRef( 'openai/$name', - customOptions: chat.chatModelOptionsSchema(), + customOptions: chat_lib.chatModelOptionsSchema(), + ); + } + + /// Reference to an audio chat model (e.g. `gpt-4o-audio-preview`). + /// + /// Audio models accept text input and return both text and audio output. + ModelRef audioModel(String name) { + return modelRef( + 'openai/$name', + customOptions: audio_lib.audioOptionsSchema(), ); } } diff --git a/packages/genkit_openai/lib/src/audio.dart b/packages/genkit_openai/lib/src/audio.dart new file mode 100644 index 00000000..f23525e9 --- /dev/null +++ b/packages/genkit_openai/lib/src/audio.dart @@ -0,0 +1,267 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import 'dart:convert'; +import 'dart:io'; +import 'dart:typed_data'; + +import 'package:genkit/plugin.dart'; +import 'package:openai_dart/openai_dart.dart' as sdk; +import 'package:schemantic/schemantic.dart'; + +import 'converters.dart'; +import 'utils.dart'; + +part 'audio.g.dart'; + +/// Options for OpenAI audio chat models (e.g. `gpt-4o-audio-preview`). +/// +/// These models accept text input and return both text and audio output. +/// The output always includes `modalities: ['text', 'audio']` — there is no +/// need to configure modalities explicitly. +@Schema() +abstract class $OpenAIAudioOptions { + /// Model version override (e.g. `gpt-4o-audio-preview-2024-12-17`). + String? get version; + + /// Sampling temperature (0.0 – 2.0). + @DoubleField(minimum: 0.0, maximum: 2.0) + double? get temperature; + + /// Nucleus sampling (0.0 – 1.0). + @DoubleField(minimum: 0.0, maximum: 1.0) + double? get topP; + + /// Maximum tokens to generate. + int? get maxTokens; + + /// Seed for deterministic sampling. + int? get seed; + + /// User identifier for abuse detection. + String? get user; + + /// Voice for the audio output. + @StringField( + enumValues: [ + 'alloy', + 'ash', + 'ballad', + 'coral', + 'echo', + 'fable', + 'nova', + 'onyx', + 'sage', + 'shimmer', + 'verse', + ], + ) + String? get voice; + + /// Audio encoding format for the output. + @StringField(enumValues: ['wav', 'mp3', 'flac', 'opus', 'pcm16']) + String? get audioFormat; +} + +typedef AudioOptions = OpenAIAudioOptions; + +/// Returns `true` when [name] is an audio chat model. +/// +/// Audio chat models produce both text and audio output via the chat +/// completions API. They are identified by having `audio` in their model ID, +/// which distinguishes them from TTS (`tts-*`), transcription (`whisper-*`), +/// and realtime (`*-realtime-*`) models. +bool isAudioModel(String name) => name.toLowerCase().contains('audio'); + +ModelInfo audioModelInfo(String label) => ModelInfo( + label: label, + supports: { + 'media': false, + 'multiturn': true, + 'systemRole': true, + 'tools': false, + }, +); + +/// Returns the [SchemanticType] for [OpenAIAudioOptions]. +SchemanticType audioOptionsSchema() => + OpenAIAudioOptions.$schema; + +/// Parses audio model options from an action config map. +OpenAIAudioOptions parseAudioOptions(Map? config) { + return config != null + ? OpenAIAudioOptions.$schema.parse(config) + : OpenAIAudioOptions(); +} + +const Map _chatAudioMediaTypes = { + 'mp3': 'audio/mpeg', + 'wav': 'audio/wav', + 'flac': 'audio/flac', + 'opus': 'audio/opus', + 'pcm16': 'audio/L16', +}; + +sdk.ChatAudioConfig buildChatAudioConfig(OpenAIAudioOptions options) { + return sdk.ChatAudioConfig( + voice: sdk.ChatAudioVoice.fromJson(options.voice ?? 'alloy'), + format: sdk.ChatAudioFormat.fromJson(options.audioFormat ?? 'mp3'), + ); +} + +/// Makes a raw HTTP request to the chat completions endpoint so the full +/// JSON body (including `message.audio`) is accessible before being parsed +/// into a typed object (the openai_dart SDK drops the audio field). +Future handleChatAudioNonStreaming( + sdk.ChatCompletionCreateRequest request, + OpenAIClientConfig resolved, +) async { + final baseUrl = resolved.baseUrl ?? 'https://api.openai.com/v1'; + final url = Uri.parse('$baseUrl/chat/completions'); + final httpClient = HttpClient(); + + try { + final httpRequest = await httpClient.postUrl(url); + httpRequest.headers.set(HttpHeaders.contentTypeHeader, 'application/json'); + httpRequest.headers.set( + HttpHeaders.authorizationHeader, + 'Bearer ${resolved.apiKey}', + ); + if (resolved.headers != null) { + for (final entry in resolved.headers!.entries) { + httpRequest.headers.set(entry.key, entry.value); + } + } + httpRequest.write(jsonEncode(request.toJson())); + + final response = await httpRequest.close(); + final bytes = await _collectBytes(response); + final bodyText = utf8.decode(bytes, allowMalformed: true); + + if (response.statusCode < 200 || response.statusCode >= 300) { + throw GenkitException( + 'OpenAI chat audio API error ($bodyText)', + status: StatusCodes.fromHttpStatus(response.statusCode), + details: bodyText, + ); + } + + final rawJson = jsonDecode(bodyText) as Map; + return buildChatAudioResponse(rawJson, request); + } finally { + httpClient.close(force: false); + } +} + +ModelResponse buildChatAudioResponse( + Map rawJson, + sdk.ChatCompletionCreateRequest request, +) { + final choices = rawJson['choices'] as List?; + if (choices == null || choices.isEmpty) { + throw GenkitException('Model returned no choices.'); + } + + final firstChoice = choices.first as Map; + final finishReason = firstChoice['finish_reason'] as String?; + final messageJson = firstChoice['message'] as Map?; + + // Build the text/tool parts via the existing converter. + final chatCompletion = sdk.ChatCompletion.fromJson(rawJson); + final message = GenkitConverter.fromOpenAIAssistantMessage( + chatCompletion.choices.first.message, + ); + + // Extract audio data that the SDK drops from the parsed AssistantMessage. + final audioJson = messageJson?['audio'] as Map?; + final audioBase64 = audioJson?['data'] as String?; + + final List content; + if (audioBase64 != null && audioBase64.isNotEmpty) { + final format = request.audio?.format.toJson() ?? 'mp3'; + final mediaType = _chatAudioMediaTypes[format] ?? 'audio/mpeg'; + final dataUri = 'data:$mediaType;base64,$audioBase64'; + content = [ + ...message.content, + MediaPart( + media: Media(contentType: mediaType, url: dataUri), + ), + ]; + } else { + content = message.content; + } + + return ModelResponse( + finishReason: GenkitConverter.mapFinishReason(finishReason), + message: Message(role: message.role, content: content), + raw: rawJson, + ); +} + +Future> _collectBytes(HttpClientResponse response) async { + final builder = BytesBuilder(); + await for (final chunk in response) { + builder.add(chunk); + } + return builder.takeBytes(); +} + +/// Creates an audio chat model (e.g. `gpt-4o-audio-preview`). +/// +/// [resolveClientConfig] is a callback that resolves the OpenAI client +/// configuration at request time, typically provided by the plugin. +Model createAudioChatModel( + String modelName, + ModelInfo? info, + Future Function() resolveClientConfig, +) { + final modelInfo = info ?? audioModelInfo(modelName); + + return Model( + name: 'openai/$modelName', + customOptions: audioOptionsSchema(), + metadata: {'model': modelInfo.toJson()}, + fn: (req, ctx) async { + final modelRequest = req!; + final options = parseAudioOptions(modelRequest.config); + + final resolvedConfig = await resolveClientConfig(); + final client = buildOpenAIClient(resolvedConfig); + + try { + final request = sdk.ChatCompletionCreateRequest( + model: options.version ?? modelName, + messages: GenkitConverter.toOpenAIMessages( + modelRequest.messages, + null, + ), + temperature: options.temperature, + topP: options.topP, + maxCompletionTokens: options.maxTokens, + seed: options.seed, + user: options.user, + modalities: [sdk.ChatModality.text, sdk.ChatModality.audio], + audio: buildChatAudioConfig(options), + ); + + return await handleChatAudioNonStreaming(request, resolvedConfig); + } catch (e, stackTrace) { + rethrowAsGenkitException(e, stackTrace, 'audio'); + } finally { + client.close(); + } + }, + ); +} diff --git a/packages/genkit_openai/lib/src/audio.g.dart b/packages/genkit_openai/lib/src/audio.g.dart new file mode 100644 index 00000000..6e70beab --- /dev/null +++ b/packages/genkit_openai/lib/src/audio.g.dart @@ -0,0 +1,208 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// GENERATED CODE - DO NOT MODIFY BY HAND +// dart format width=80 + +part of 'audio.dart'; + +// ************************************************************************** +// SchemaGenerator +// ************************************************************************** + +base class OpenAIAudioOptions { + factory OpenAIAudioOptions.fromJson(Map json) => + $schema.parse(json); + + OpenAIAudioOptions._(this._json); + + OpenAIAudioOptions({ + String? version, + double? temperature, + double? topP, + int? maxTokens, + int? seed, + String? user, + String? voice, + String? audioFormat, + }) { + _json = { + 'version': ?version, + 'temperature': ?temperature, + 'topP': ?topP, + 'maxTokens': ?maxTokens, + 'seed': ?seed, + 'user': ?user, + 'voice': ?voice, + 'audioFormat': ?audioFormat, + }; + } + + late final Map _json; + + static const SchemanticType $schema = + _OpenAIAudioOptionsTypeFactory(); + + String? get version { + return _json['version'] as String?; + } + + set version(String? value) { + if (value == null) { + _json.remove('version'); + } else { + _json['version'] = value; + } + } + + double? get temperature { + return (_json['temperature'] as num?)?.toDouble(); + } + + set temperature(double? value) { + if (value == null) { + _json.remove('temperature'); + } else { + _json['temperature'] = value; + } + } + + double? get topP { + return (_json['topP'] as num?)?.toDouble(); + } + + set topP(double? value) { + if (value == null) { + _json.remove('topP'); + } else { + _json['topP'] = value; + } + } + + int? get maxTokens { + return _json['maxTokens'] as int?; + } + + set maxTokens(int? value) { + if (value == null) { + _json.remove('maxTokens'); + } else { + _json['maxTokens'] = value; + } + } + + int? get seed { + return _json['seed'] as int?; + } + + set seed(int? value) { + if (value == null) { + _json.remove('seed'); + } else { + _json['seed'] = value; + } + } + + String? get user { + return _json['user'] as String?; + } + + set user(String? value) { + if (value == null) { + _json.remove('user'); + } else { + _json['user'] = value; + } + } + + String? get voice { + return _json['voice'] as String?; + } + + set voice(String? value) { + if (value == null) { + _json.remove('voice'); + } else { + _json['voice'] = value; + } + } + + String? get audioFormat { + return _json['audioFormat'] as String?; + } + + set audioFormat(String? value) { + if (value == null) { + _json.remove('audioFormat'); + } else { + _json['audioFormat'] = value; + } + } + + @override + String toString() { + return _json.toString(); + } + + Map toJson() { + return _json; + } +} + +base class _OpenAIAudioOptionsTypeFactory + extends SchemanticType { + const _OpenAIAudioOptionsTypeFactory(); + + @override + OpenAIAudioOptions parse(Object? json) { + return OpenAIAudioOptions._(json as Map); + } + + @override + JsonSchemaMetadata get schemaMetadata => JsonSchemaMetadata( + name: 'OpenAIAudioOptions', + definition: $Schema + .object( + properties: { + 'version': $Schema.string(), + 'temperature': $Schema.number(minimum: 0.0, maximum: 2.0), + 'topP': $Schema.number(minimum: 0.0, maximum: 1.0), + 'maxTokens': $Schema.integer(), + 'seed': $Schema.integer(), + 'user': $Schema.string(), + 'voice': $Schema.string( + enumValues: [ + 'alloy', + 'ash', + 'ballad', + 'coral', + 'echo', + 'fable', + 'nova', + 'onyx', + 'sage', + 'shimmer', + 'verse', + ], + ), + 'audioFormat': $Schema.string( + enumValues: ['wav', 'mp3', 'flac', 'opus', 'pcm16'], + ), + }, + required: [], + ) + .value, + dependencies: [], + ); +} diff --git a/packages/genkit_openai/lib/src/openai_plugin.dart b/packages/genkit_openai/lib/src/openai_plugin.dart index d95c44cf..077d1b51 100644 --- a/packages/genkit_openai/lib/src/openai_plugin.dart +++ b/packages/genkit_openai/lib/src/openai_plugin.dart @@ -16,7 +16,8 @@ import 'package:genkit/plugin.dart'; import 'package:openai_dart/openai_dart.dart' as sdk; import '../genkit_openai.dart'; -import 'chat.dart' as chat; +import 'audio.dart' as audio; +import 'chat.dart' as chat_lib; /// Core plugin implementation class OpenAIPlugin extends GenkitPlugin { @@ -56,6 +57,15 @@ class OpenAIPlugin extends GenkitPlugin { for (final modelId in availableModelIds) { final modelType = getModelType(modelId); + if (modelType == 'audio') { + if (audio.isAudioModel(modelId)) { + actions.add( + audio.createAudioChatModel(modelId, null, _resolveClientConfig), + ); + } + continue; + } + if (modelType != 'chat' && modelType != 'unknown') { continue; } @@ -82,14 +92,7 @@ class OpenAIPlugin extends GenkitPlugin { /// Fetch available model IDs from OpenAI API Future> _fetchAvailableModels() async { final resolvedConfig = await _resolveClientConfig(); - - final client = sdk.OpenAIClient( - config: sdk.OpenAIConfig( - authProvider: sdk.ApiKeyProvider(resolvedConfig.apiKey), - baseUrl: resolvedConfig.baseUrl ?? 'https://api.openai.com/v1', - defaultHeaders: resolvedConfig.headers ?? const {}, - ), - ); + final client = buildOpenAIClient(resolvedConfig); try { final response = await client.models.list(); @@ -106,7 +109,7 @@ class OpenAIPlugin extends GenkitPlugin { } } - Future<_ResolvedClientConfig> _resolveClientConfig() async { + Future _resolveClientConfig() async { final configuredApiKey = await _resolveApiKey(); if (configuredApiKey == null || configuredApiKey.trim().isEmpty) { throw GenkitException( @@ -115,7 +118,7 @@ class OpenAIPlugin extends GenkitPlugin { ); } - return _ResolvedClientConfig( + return OpenAIClientConfig( apiKey: configuredApiKey.trim(), baseUrl: baseUrl, headers: headers, @@ -140,6 +143,20 @@ class OpenAIPlugin extends GenkitPlugin { for (final modelId in modelIds) { final modelType = getModelType(modelId); + + if (modelType == 'audio') { + if (audio.isAudioModel(modelId)) { + modelMetadataList.add( + modelMetadata( + 'openai/$modelId', + modelInfo: audio.audioModelInfo(modelId), + customOptions: audio.audioOptionsSchema(), + ), + ); + } + continue; + } + if (modelType != 'chat' && modelType != 'unknown') { continue; } @@ -148,7 +165,7 @@ class OpenAIPlugin extends GenkitPlugin { modelMetadata( 'openai/$modelId', modelInfo: modelInfoFor(modelId), - customOptions: chat.chatModelOptionsSchema(), + customOptions: chat_lib.chatModelOptionsSchema(), ), ); } @@ -166,6 +183,9 @@ class OpenAIPlugin extends GenkitPlugin { @override Action? resolve(String actionType, String name) { if (actionType == 'model') { + if (audio.isAudioModel(name)) { + return audio.createAudioChatModel(name, null, _resolveClientConfig); + } return _createModel(name, null); } return null; @@ -176,30 +196,24 @@ class OpenAIPlugin extends GenkitPlugin { return Model( name: 'openai/$modelName', - customOptions: chat.chatModelOptionsSchema(), + customOptions: chat_lib.chatModelOptionsSchema(), metadata: {'model': modelInfo.toJson()}, fn: (req, ctx) async { final modelRequest = req!; - final options = chat.parseChatModelOptions(modelRequest.config); + final options = chat_lib.parseChatModelOptions(modelRequest.config); final resolvedConfig = await _resolveClientConfig(); - final client = sdk.OpenAIClient( - config: sdk.OpenAIConfig( - authProvider: sdk.ApiKeyProvider(resolvedConfig.apiKey), - baseUrl: resolvedConfig.baseUrl ?? 'https://api.openai.com/v1', - defaultHeaders: resolvedConfig.headers ?? const {}, - ), - ); + final client = buildOpenAIClient(resolvedConfig); try { final supports = modelInfo.supports; final supportsTools = supports?['tools'] == true; - final isJsonMode = chat.isJsonStructuredOutput( + final isJsonMode = chat_lib.isJsonStructuredOutput( modelRequest.output?.format, modelRequest.output?.contentType, ); - final responseFormat = chat.buildOpenAIResponseFormat( + final responseFormat = chat_lib.buildOpenAIResponseFormat( modelRequest.output?.schema, ); final request = sdk.ChatCompletionCreateRequest( @@ -221,31 +235,14 @@ class OpenAIPlugin extends GenkitPlugin { user: options.user, responseFormat: isJsonMode ? responseFormat : null, ); + if (ctx.streamingRequested) { return await _handleStreaming(client, request, ctx); } else { return await _handleNonStreaming(client, request); } } catch (e, stackTrace) { - if (e is GenkitException) { - rethrow; - } - - StatusCodes? status; - String? details; - - if (e is sdk.ApiException) { - status = StatusCodes.fromHttpStatus(e.statusCode); - details = e.body?.toString(); - } - - throw GenkitException( - 'OpenAI API error: $e', - status: status, - details: details ?? e.toString(), - underlyingException: e, - stackTrace: stackTrace, - ); + rethrowAsGenkitException(e, stackTrace, 'chat'); } finally { client.close(); } @@ -329,15 +326,3 @@ class OpenAIPlugin extends GenkitPlugin { ); } } - -final class _ResolvedClientConfig { - final String apiKey; - final String? baseUrl; - final Map? headers; - - const _ResolvedClientConfig({ - required this.apiKey, - required this.baseUrl, - required this.headers, - }); -} diff --git a/packages/genkit_openai/lib/src/utils.dart b/packages/genkit_openai/lib/src/utils.dart index 30fb3a65..8512e23d 100644 --- a/packages/genkit_openai/lib/src/utils.dart +++ b/packages/genkit_openai/lib/src/utils.dart @@ -12,7 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -import 'package:genkit/genkit.dart'; +import 'package:genkit/plugin.dart'; +import 'package:openai_dart/openai_dart.dart' as sdk; final RegExp _oSeriesPattern = RegExp(r'^o\d+(?:-|$)'); final RegExp _gptPattern = RegExp(r'^gpt-\d+(\.\d+)?o?(?:-|$)'); @@ -216,7 +217,7 @@ String getModelType(String modelId) { return 'code'; } - // Audio models (TTS, transcription, realtime, speech-to-text). + // Audio models (TTS, transcription, realtime, speech-to-text, chat audio). if (id.contains('tts') || id.contains('audio') || id.contains('realtime') || @@ -264,3 +265,52 @@ String getModelType(String modelId) { // Unknown model type. return 'unknown'; } + +/// Holds the resolved credentials and endpoint configuration for an OpenAI +/// (or compatible) API request. +final class OpenAIClientConfig { + final String apiKey; + final String? baseUrl; + final Map? headers; + + const OpenAIClientConfig({required this.apiKey, this.baseUrl, this.headers}); +} + +/// Builds an [sdk.OpenAIClient] from an [OpenAIClientConfig]. +sdk.OpenAIClient buildOpenAIClient(OpenAIClientConfig config) { + return sdk.OpenAIClient( + config: sdk.OpenAIConfig( + authProvider: sdk.ApiKeyProvider(config.apiKey), + baseUrl: config.baseUrl ?? 'https://api.openai.com/v1', + defaultHeaders: config.headers ?? const {}, + ), + ); +} + +/// Rethrows [e] wrapped in a [GenkitException]. +/// +/// API errors are mapped to their corresponding [StatusCodes]; all other +/// errors are wrapped with a generic status. [modelType] is used as a prefix +/// in the error message (e.g. `'chat'`, `'audio'`). +Never rethrowAsGenkitException( + Object e, + StackTrace stackTrace, + String modelType, +) { + if (e is GenkitException) throw e; + + StatusCodes? status; + String? details; + if (e is sdk.ApiException) { + status = StatusCodes.fromHttpStatus(e.statusCode); + details = e.body?.toString(); + } + + throw GenkitException( + 'OpenAI $modelType API error: $e', + status: status, + details: details ?? e.toString(), + underlyingException: e, + stackTrace: stackTrace, + ); +} diff --git a/packages/genkit_openai/test/openai_plugin_audio_test.dart b/packages/genkit_openai/test/openai_plugin_audio_test.dart new file mode 100644 index 00000000..58fd2dcc --- /dev/null +++ b/packages/genkit_openai/test/openai_plugin_audio_test.dart @@ -0,0 +1,109 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import 'package:genkit_openai/genkit_openai.dart'; +import 'package:test/test.dart'; + +void main() { + group('isAudioModel', () { + test('recognises known audio chat models', () { + expect(isAudioModel('gpt-4o-audio-preview'), isTrue); + expect(isAudioModel('gpt-4o-audio-preview-2024-12-17'), isTrue); + expect(isAudioModel('gpt-4o-audio-preview-2024-10-01'), isTrue); + }); + + test('returns false for non-audio models', () { + expect(isAudioModel('gpt-4o'), isFalse); + expect(isAudioModel('gpt-4o-mini'), isFalse); + expect(isAudioModel('o3'), isFalse); + }); + }); + + group('OpenAIAudioOptions', () { + test('creates with all fields null by default', () { + final opts = OpenAIAudioOptions(); + expect(opts.version, isNull); + expect(opts.temperature, isNull); + expect(opts.topP, isNull); + expect(opts.maxTokens, isNull); + expect(opts.seed, isNull); + expect(opts.user, isNull); + expect(opts.voice, isNull); + expect(opts.audioFormat, isNull); + }); + + test('parses voice and audioFormat', () { + final opts = OpenAIAudioOptions.$schema.parse({ + 'voice': 'nova', + 'audioFormat': 'wav', + }); + expect(opts.voice, 'nova'); + expect(opts.audioFormat, 'wav'); + }); + + test('parses standard chat fields', () { + final opts = OpenAIAudioOptions.$schema.parse({ + 'temperature': 0.7, + 'maxTokens': 256, + 'seed': 42, + }); + expect(opts.temperature, 0.7); + expect(opts.maxTokens, 256); + expect(opts.seed, 42); + }); + + test('parses version override', () { + final opts = OpenAIAudioOptions.$schema.parse({ + 'version': 'gpt-4o-audio-preview-2024-12-17', + }); + expect(opts.version, 'gpt-4o-audio-preview-2024-12-17'); + }); + }); + + group('AudioOptions typedef', () { + test('AudioOptions is an alias for OpenAIAudioOptions', () { + final opts = AudioOptions(); + expect(opts, isA()); + }); + }); + + group('OpenAICompatPluginHandle.audioModel', () { + test('returns ref with prefixed name', () { + final ref = openAI.audioModel('gpt-4o-audio-preview'); + expect(ref.name, 'openai/gpt-4o-audio-preview'); + }); + + test('ref carries OpenAIAudioOptions schema', () { + final ref = openAI.audioModel('gpt-4o-audio-preview'); + expect(ref.customOptions, isNotNull); + }); + }); + + group('getModelType for audio models', () { + test('gpt-4o-audio-preview is classified as audio', () { + expect(getModelType('gpt-4o-audio-preview'), 'audio'); + }); + + test('gpt-4o is classified as chat (not audio)', () { + expect(getModelType('gpt-4o'), 'chat'); + }); + }); + + group('Plugin', () { + test('creates plugin instance', () { + final plugin = openAI(apiKey: 'test-key'); + expect(plugin, isNotNull); + }); + }); +} diff --git a/testapps/openai_sample/lib/chat_audio.dart b/testapps/openai_sample/lib/chat_audio.dart new file mode 100644 index 00000000..ca0dadd0 --- /dev/null +++ b/testapps/openai_sample/lib/chat_audio.dart @@ -0,0 +1,119 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import 'dart:io'; + +import 'package:genkit/genkit.dart'; +import 'package:genkit_openai/genkit_openai.dart'; + +Media _requireMedia(GenerateResponseHelper response) { + final media = response.media; + if (media == null) throw StateError('Model returned no audio media.'); + return media; +} + +/// Baseline: alloy voice, mp3 output — the simplest possible audio generation. +Flow defineChatAudioFlow(Genkit ai) { + return ai.defineFlow( + name: 'chatAudio', + inputSchema: .string( + defaultValue: 'Say hello from Genkit Dart using chat audio model.', + ), + outputSchema: Media.$schema, + fn: (prompt, _) async { + final response = await ai.generate( + model: openAI.audioModel('gpt-4o-audio-preview'), + prompt: prompt, + config: OpenAIAudioOptions(voice: 'alloy', audioFormat: 'mp3'), + ); + return _requireMedia(response); + }, + ); +} + +/// Creative: high [temperature] + wide [topP] for expressive, varied output. +/// +/// Demonstrates the sampling controls. The ballad voice suits storytelling well. +Flow defineChatAudioCreativeFlow(Genkit ai) { + return ai.defineFlow( + name: 'chatAudioCreative', + inputSchema: .string( + defaultValue: + 'Tell me a whimsical story about a robot learning to dance.', + ), + outputSchema: Media.$schema, + fn: (prompt, _) async { + final response = await ai.generate( + model: openAI.audioModel('gpt-4o-audio-preview'), + prompt: prompt, + config: OpenAIAudioOptions( + voice: 'ballad', + audioFormat: 'wav', + temperature: 1.4, + topP: 0.95, + maxTokens: 512, + ), + ); + return _requireMedia(response); + }, + ); +} + +/// Multi-turn: passes prior conversation [messages] alongside the new prompt. +/// +/// Also demonstrates [version] pinning (to prevent rolling-update drift) and +/// a fixed [seed] with low [temperature] for reproducible follow-up responses. +Flow defineChatAudioMultiTurnFlow(Genkit ai) { + return ai.defineFlow( + name: 'chatAudioMultiTurn', + inputSchema: .string( + defaultValue: 'And what is the most famous landmark there?', + ), + outputSchema: Media.$schema, + fn: (followUp, _) async { + final response = await ai.generate( + model: openAI.audioModel('gpt-4o-audio-preview'), + messages: [ + Message( + role: Role.user, + content: [TextPart(text: 'What is the capital of France?')], + ), + Message( + role: Role.model, + content: [TextPart(text: 'The capital of France is Paris.')], + ), + ], + prompt: followUp, + config: OpenAIAudioOptions( + version: 'gpt-4o-audio-preview-2024-12-17', + voice: 'nova', + audioFormat: 'mp3', + temperature: 0.2, + seed: 42, + ), + ); + return _requireMedia(response); + }, + ); +} + +void main() { + final ai = Genkit( + plugins: [openAI(apiKey: Platform.environment['OPENAI_API_KEY'])], + ); + + defineChatAudioFlow(ai); + defineChatAudioCreativeFlow(ai); + defineChatAudioMultiTurnFlow(ai); +}