diff --git a/firebase-ai/CHANGELOG.md b/firebase-ai/CHANGELOG.md index 7c4c924ab9f..400b2697720 100644 --- a/firebase-ai/CHANGELOG.md +++ b/firebase-ai/CHANGELOG.md @@ -1,5 +1,9 @@ # Unreleased +- [feature] Added support for configuring the aspect ratio nad image size when generating images + with the Gemini Image models. +- [feature] Added FinishReasons which can be returned from Gemini image generation. + # 17.7.0 - [changed] Added `LiveAudioConversationConfig` to control different aspects of the conversation diff --git a/firebase-ai/api.txt b/firebase-ai/api.txt index 357fb1c2a8b..64ab6bea0cb 100644 --- a/firebase-ai/api.txt +++ b/firebase-ai/api.txt @@ -210,6 +210,23 @@ package com.google.firebase.ai.type { public final class APINotConfiguredException extends com.google.firebase.ai.type.FirebaseAIException { } + public final class AspectRatio { + field public static final com.google.firebase.ai.type.AspectRatio.Companion Companion; + field public static final com.google.firebase.ai.type.AspectRatio LANDSCAPE_16x9; + field public static final com.google.firebase.ai.type.AspectRatio LANDSCAPE_21x9; + field public static final com.google.firebase.ai.type.AspectRatio LANDSCAPE_3x2; + field public static final com.google.firebase.ai.type.AspectRatio LANDSCAPE_4x3; + field public static final com.google.firebase.ai.type.AspectRatio LANDSCAPE_5x4; + field public static final com.google.firebase.ai.type.AspectRatio PORTRAIT_2x3; + field public static final com.google.firebase.ai.type.AspectRatio PORTRAIT_3x4; + field public static final com.google.firebase.ai.type.AspectRatio PORTRAIT_4x5; + field public static final com.google.firebase.ai.type.AspectRatio PORTRAIT_9x16; + field public static final com.google.firebase.ai.type.AspectRatio SQUARE_1x1; + } + + public static final class AspectRatio.Companion { + } + public final class AudioRecordInitializationFailedException extends com.google.firebase.ai.type.FirebaseAIException { ctor public AudioRecordInitializationFailedException(String message); } @@ -374,8 +391,13 @@ package com.google.firebase.ai.type { property public final int ordinal; field public static final com.google.firebase.ai.type.FinishReason BLOCKLIST; field public static final com.google.firebase.ai.type.FinishReason.Companion Companion; + field public static final com.google.firebase.ai.type.FinishReason IMAGE_OTHER; + field public static final com.google.firebase.ai.type.FinishReason IMAGE_PROHIBITED_CONTENT; + field public static final com.google.firebase.ai.type.FinishReason IMAGE_RECITATION; + field public static final com.google.firebase.ai.type.FinishReason IMAGE_SAFETY; field public static final com.google.firebase.ai.type.FinishReason MALFORMED_FUNCTION_CALL; field public static final com.google.firebase.ai.type.FinishReason MAX_TOKENS; + field public static final com.google.firebase.ai.type.FinishReason NO_IMAGE; field public static final com.google.firebase.ai.type.FinishReason OTHER; field public static final com.google.firebase.ai.type.FinishReason PROHIBITED_CONTENT; field public static final com.google.firebase.ai.type.FinishReason RECITATION; @@ -463,6 +485,7 @@ package com.google.firebase.ai.type { method public com.google.firebase.ai.type.GenerationConfig build(); method public com.google.firebase.ai.type.GenerationConfig.Builder setCandidateCount(Integer? candidateCount); method public com.google.firebase.ai.type.GenerationConfig.Builder setFrequencyPenalty(Float? frequencyPenalty); + method public com.google.firebase.ai.type.GenerationConfig.Builder setImageConfig(com.google.firebase.ai.type.ImageConfig? imageConfig); method public com.google.firebase.ai.type.GenerationConfig.Builder setMaxOutputTokens(Integer? maxOutputTokens); method public com.google.firebase.ai.type.GenerationConfig.Builder setPresencePenalty(Float? presencePenalty); method public com.google.firebase.ai.type.GenerationConfig.Builder setResponseMimeType(String? responseMimeType); @@ -475,6 +498,7 @@ package com.google.firebase.ai.type { method public com.google.firebase.ai.type.GenerationConfig.Builder setTopP(Float? topP); field public Integer? candidateCount; field public Float? frequencyPenalty; + field public com.google.firebase.ai.type.ImageConfig? imageConfig; field public Integer? maxOutputTokens; field public Float? presencePenalty; field public String? responseMimeType; @@ -622,6 +646,20 @@ package com.google.firebase.ai.type { public static final class HarmSeverity.Companion { } + public final class ImageConfig { + } + + public static final class ImageConfig.Builder { + ctor public ImageConfig.Builder(); + method public com.google.firebase.ai.type.ImageConfig build(); + method public com.google.firebase.ai.type.ImageConfig.Builder setAspectRatio(com.google.firebase.ai.type.AspectRatio? aspectRatio); + method public com.google.firebase.ai.type.ImageConfig.Builder setImageSize(com.google.firebase.ai.type.ImageSize imageSize); + } + + public final class ImageConfigKt { + method public static com.google.firebase.ai.type.ImageConfig imageConfig(kotlin.jvm.functions.Function1 init); + } + public final class ImagePart implements com.google.firebase.ai.type.Part { ctor public ImagePart(android.graphics.Bitmap image); method public android.graphics.Bitmap getImage(); @@ -630,6 +668,16 @@ package com.google.firebase.ai.type { property public boolean isThought; } + public final class ImageSize { + field public static final com.google.firebase.ai.type.ImageSize.Companion Companion; + field public static final com.google.firebase.ai.type.ImageSize SIZE_1K; + field public static final com.google.firebase.ai.type.ImageSize SIZE_2K; + field public static final com.google.firebase.ai.type.ImageSize SIZE_4K; + } + + public static final class ImageSize.Companion { + } + public final class ImagenAspectRatio { field public static final com.google.firebase.ai.type.ImagenAspectRatio.Companion Companion; field public static final com.google.firebase.ai.type.ImagenAspectRatio LANDSCAPE_16x9; diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AspectRatio.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AspectRatio.kt new file mode 100644 index 00000000000..4a0b3b0bc62 --- /dev/null +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AspectRatio.kt @@ -0,0 +1,43 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.firebase.ai.type + +/** Represents the aspect ratio that the generated image should conform to. */ +public class AspectRatio private constructor(internal val internalVal: String) { + public companion object { + /** A square image, useful for icons, profile pictures, etc. */ + @JvmField public val SQUARE_1x1: AspectRatio = AspectRatio("1:1") + /** A portrait image in 3:4, the aspect ratio of older TVs. */ + @JvmField public val PORTRAIT_3x4: AspectRatio = AspectRatio("3:4") + /** A landscape image in 4:3, the aspect ratio of older TVs. */ + @JvmField public val LANDSCAPE_4x3: AspectRatio = AspectRatio("4:3") + /** A portrait image in 9:16, the aspect ratio of modern monitors and phone screens. */ + @JvmField public val PORTRAIT_9x16: AspectRatio = AspectRatio("9:16") + /** A landscape image in 16:9, the aspect ratio of modern monitors and phone screens. */ + @JvmField public val LANDSCAPE_16x9: AspectRatio = AspectRatio("16:9") + /** A portrait image in 4:5, the aspect ratio for prints from digital cameras. */ + @JvmField public val PORTRAIT_4x5: AspectRatio = AspectRatio("4:5") + /** A landscape image in 5:4, the aspect ratio for prints from digital cameras. */ + @JvmField public val LANDSCAPE_5x4: AspectRatio = AspectRatio("5:4") + /** A portrait image in 4:5, the aspect ratio for prints from 35mm film cameras. */ + @JvmField public val PORTRAIT_2x3: AspectRatio = AspectRatio("2:3") + /** A landscape image in 4:5, the aspect ratio for prints from 35mm film cameras. */ + @JvmField public val LANDSCAPE_3x2: AspectRatio = AspectRatio("3:2") + /** A ultrawide image in 21:9, an aspect ratio commonly used in modern movies. */ + @JvmField public val LANDSCAPE_21x9: AspectRatio = AspectRatio("21:9") + } +} diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Candidate.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Candidate.kt index e14d768dac8..c07e420d963 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Candidate.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Candidate.kt @@ -247,7 +247,12 @@ public class FinishReason private constructor(public val name: String, public va BLOCKLIST, PROHIBITED_CONTENT, SPII, - MALFORMED_FUNCTION_CALL; + MALFORMED_FUNCTION_CALL, + IMAGE_SAFETY, + IMAGE_PROHIBITED_CONTENT, + NO_IMAGE, + IMAGE_RECITATION, + IMAGE_OTHER; internal object Serializer : KSerializer by FirstOrdinalSerializer(Internal::class) @@ -262,6 +267,11 @@ public class FinishReason private constructor(public val name: String, public va PROHIBITED_CONTENT -> FinishReason.PROHIBITED_CONTENT SPII -> FinishReason.SPII MALFORMED_FUNCTION_CALL -> FinishReason.MALFORMED_FUNCTION_CALL + IMAGE_SAFETY -> FinishReason.IMAGE_SAFETY + IMAGE_PROHIBITED_CONTENT -> FinishReason.IMAGE_PROHIBITED_CONTENT + NO_IMAGE -> FinishReason.NO_IMAGE + IMAGE_RECITATION -> FinishReason.IMAGE_RECITATION + IMAGE_OTHER -> FinishReason.IMAGE_OTHER else -> FinishReason.UNKNOWN } } @@ -301,6 +311,22 @@ public class FinishReason private constructor(public val name: String, public va /** The function call generated by the model is invalid. */ @JvmField public val MALFORMED_FUNCTION_CALL: FinishReason = FinishReason("MALFORMED_FUNCTION_CALL", 9) + + /** Token generation stopped because generated images has safety violations. */ + @JvmField public val IMAGE_SAFETY: FinishReason = FinishReason("IMAGE_SAFETY", 10) + + /** Image generation stopped because generated images has other prohibited content. */ + @JvmField + public val IMAGE_PROHIBITED_CONTENT: FinishReason = FinishReason("IMAGE_PROHIBITED_CONTENT", 11) + + /** The model was expected to generate an image, but none was generated. */ + @JvmField public val NO_IMAGE: FinishReason = FinishReason("NO_IMAGE", 12) + + /** Image generation stopped because the generated image may be a recitation from a source. */ + @JvmField public val IMAGE_RECITATION: FinishReason = FinishReason("IMAGE_RECITATION", 13) + + /** Image generation stopped for a reason not otherwise specified. */ + @JvmField public val IMAGE_OTHER: FinishReason = FinishReason("IMAGE_OTHER", 14) } } diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/GenerationConfig.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/GenerationConfig.kt index a496098787f..7cb40641d8e 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/GenerationConfig.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/GenerationConfig.kt @@ -92,6 +92,7 @@ private constructor( internal val responseSchema: Schema?, internal val responseModalities: List?, internal val thinkingConfig: ThinkingConfig?, + internal val imageConfig: ImageConfig?, ) { /** @@ -137,6 +138,7 @@ private constructor( @JvmField public var responseSchema: Schema? = null @JvmField public var responseModalities: List? = null @JvmField public var thinkingConfig: ThinkingConfig? = null + @JvmField public var imageConfig: ImageConfig? = null public fun setTemperature(temperature: Float?): Builder = apply { this.temperature = temperature @@ -170,6 +172,9 @@ private constructor( public fun setThinkingConfig(thinkingConfig: ThinkingConfig?): Builder = apply { this.thinkingConfig = thinkingConfig } + public fun setImageConfig(imageConfig: ImageConfig?): Builder = apply { + this.imageConfig = imageConfig + } /** Create a new [GenerationConfig] with the attached arguments. */ public fun build(): GenerationConfig = @@ -185,7 +190,8 @@ private constructor( responseMimeType = responseMimeType, responseSchema = responseSchema, responseModalities = responseModalities, - thinkingConfig = thinkingConfig + thinkingConfig = thinkingConfig, + imageConfig = imageConfig ) } @@ -202,7 +208,8 @@ private constructor( responseMimeType = responseMimeType, responseSchema = responseSchema?.toInternalOpenApi(), responseModalities = responseModalities?.map { it.toInternal() }, - thinkingConfig = thinkingConfig?.toInternal() + thinkingConfig = thinkingConfig?.toInternal(), + imageConfig = imageConfig?.toInternal() ) @Serializable @@ -218,7 +225,8 @@ private constructor( @SerialName("frequency_penalty") val frequencyPenalty: Float? = null, @SerialName("response_schema") val responseSchema: Schema.InternalOpenAPI? = null, @SerialName("response_modalities") val responseModalities: List? = null, - @SerialName("thinking_config") val thinkingConfig: ThinkingConfig.Internal? = null + @SerialName("thinking_config") val thinkingConfig: ThinkingConfig.Internal? = null, + @SerialName("image_config") val imageConfig: ImageConfig.Internal? = null ) public companion object { diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImageConfig.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImageConfig.kt new file mode 100644 index 00000000000..869f9b2e22c --- /dev/null +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImageConfig.kt @@ -0,0 +1,80 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.firebase.ai.type + +import kotlinx.serialization.Serializable + +/** + * Configuration parameters to use for image generation. + * + * @property aspectRatio The aspect ratio of the generated image. + * @property imageSize The size of generated images. + */ +public class ImageConfig +internal constructor(internal val aspectRatio: AspectRatio?, internal val imageSize: ImageSize?) { + + /** + * Builder for creating an [ImageConfig]. + * + * Mainly intended for Java interop. Kotlin consumers should use [imageConfig] for a more + * idiomatic experience. + * + * @property aspectRatio See [ImageConfig.aspectRatio]. + * @property imageSize See [ImageConfig.imageSize]. + * @see [imageConfig] + */ + public class Builder { + @JvmField + @set:JvmSynthetic // hide void setter from Java + public var aspectRatio: AspectRatio? = null + + @JvmField + @set:JvmSynthetic // hide void setter from Java + public var imageSize: ImageSize? = null + + public fun setAspectRatio(aspectRatio: AspectRatio?): Builder = apply { + this.aspectRatio = aspectRatio + } + + public fun setImageSize(imageSize: ImageSize): Builder = apply { this.imageSize = imageSize } + + /** Create a new [ImageConfig] with the attached arguments. */ + public fun build(): ImageConfig = ImageConfig(aspectRatio = aspectRatio, imageSize = imageSize) + } + + internal fun toInternal() = + Internal(aspectRatio = aspectRatio?.internalVal, imageSize = imageSize?.internalVal) + + @Serializable internal data class Internal(val aspectRatio: String?, val imageSize: String?) +} + +/** + * Helper method to construct an [ImageConfig] in a DSL-like manner. + * + * Example Usage: + * ``` + * imageConfig { + * aspectRatio = AspectRatio.LANDSCAPE_16x9 + * imageSize = ImageSize.SIZE_2K + * } + * ``` + */ +public fun imageConfig(init: ImageConfig.Builder.() -> Unit): ImageConfig { + val builder = ImageConfig.Builder() + builder.init() + return builder.build() +} diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImageSize.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImageSize.kt new file mode 100644 index 00000000000..5928e470355 --- /dev/null +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImageSize.kt @@ -0,0 +1,29 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.firebase.ai.type + +/** Specifies the size of generated images. */ +public class ImageSize private constructor(internal val internalVal: String) { + + public companion object { + @JvmField public val SIZE_1K: ImageSize = ImageSize("1K") + + @JvmField public val SIZE_2K: ImageSize = ImageSize("2K") + + @JvmField public val SIZE_4K: ImageSize = ImageSize("4K") + } +} diff --git a/firebase-ai/src/test/java/com/google/firebase/ai/SerializationTests.kt b/firebase-ai/src/test/java/com/google/firebase/ai/SerializationTests.kt index 215b1eca9eb..566dd33e09b 100644 --- a/firebase-ai/src/test/java/com/google/firebase/ai/SerializationTests.kt +++ b/firebase-ai/src/test/java/com/google/firebase/ai/SerializationTests.kt @@ -153,7 +153,12 @@ internal class SerializationTests { "BLOCKLIST", "PROHIBITED_CONTENT", "SPII", - "MALFORMED_FUNCTION_CALL" + "MALFORMED_FUNCTION_CALL", + "IMAGE_SAFETY", + "IMAGE_PROHIBITED_CONTENT", + "NO_IMAGE", + "IMAGE_RECITATION", + "IMAGE_OTHER" ] }, "safetyRatings": {