Skip to content
6 changes: 3 additions & 3 deletions apps/sim/app/api/a2a/serve/[agentId]/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import {
import { type AuthResult, AuthType, checkHybridAuth } from '@/lib/auth/hybrid'
import { acquireLock, getRedisClient, releaseLock } from '@/lib/core/config/redis'
import { validateUrlWithDNS } from '@/lib/core/security/input-validation.server'
import { getClientIp } from '@/lib/core/utils/request'
import { SSE_HEADERS } from '@/lib/core/utils/sse'
import { getBaseUrl } from '@/lib/core/utils/urls'
import { generateId } from '@/lib/core/utils/uuid'
Expand Down Expand Up @@ -52,10 +53,9 @@ function getCallerFingerprint(request: NextRequest, userId?: string | null): str
return `user:${userId}`
}

const forwardedFor = request.headers.get('x-forwarded-for')?.split(',')[0]?.trim()
const realIp = request.headers.get('x-real-ip')?.trim()
const clientIp = getClientIp(request)
const userAgent = request.headers.get('user-agent')?.trim() || 'unknown'
return `public:${forwardedFor || realIp || 'unknown'}:${userAgent}`
return `public:${clientIp}:${userAgent}`
}

function hasCallerAccessToTask(
Expand Down
4 changes: 2 additions & 2 deletions apps/sim/app/api/demo-requests/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { type NextRequest, NextResponse } from 'next/server'
import { env } from '@/lib/core/config/env'
import type { TokenBucketConfig } from '@/lib/core/rate-limiter'
import { RateLimiter } from '@/lib/core/rate-limiter'
import { generateRequestId } from '@/lib/core/utils/request'
import { generateRequestId, getClientIp } from '@/lib/core/utils/request'
import { getEmailDomain } from '@/lib/core/utils/urls'
import { sendEmail } from '@/lib/messaging/email/mailer'
import { getFromEmailAddress } from '@/lib/messaging/email/utils'
Expand All @@ -25,7 +25,7 @@ export async function POST(req: NextRequest) {
const requestId = generateRequestId()

try {
const ip = req.headers.get('x-forwarded-for')?.split(',')[0]?.trim() ?? 'unknown'
const ip = getClientIp(req)
const storageKey = `public:demo-request:${ip}`

const { allowed, remaining, resetAt } = await rateLimiter.checkRateLimitDirect(
Expand Down
4 changes: 2 additions & 2 deletions apps/sim/app/api/help/integration-request/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { z } from 'zod'
import { env } from '@/lib/core/config/env'
import type { TokenBucketConfig } from '@/lib/core/rate-limiter'
import { RateLimiter } from '@/lib/core/rate-limiter'
import { generateRequestId } from '@/lib/core/utils/request'
import { generateRequestId, getClientIp } from '@/lib/core/utils/request'
import { getEmailDomain } from '@/lib/core/utils/urls'
import { sendEmail } from '@/lib/messaging/email/mailer'
import {
Expand Down Expand Up @@ -37,7 +37,7 @@ export async function POST(req: NextRequest) {
const requestId = generateRequestId()

try {
const ip = req.headers.get('x-forwarded-for')?.split(',')[0]?.trim() ?? 'unknown'
const ip = getClientIp(req)
const storageKey = `public:integration-request:${ip}`

const { allowed, remaining, resetAt } = await rateLimiter.checkRateLimitDirect(
Expand Down
11 changes: 11 additions & 0 deletions apps/sim/app/api/settings/voice/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import { NextResponse } from 'next/server'
import { hasSTTService } from '@/lib/speech/config'

/**
* Returns whether server-side STT is configured.
* Unauthenticated — the response is a single boolean,
* not sensitive data, and deployed chat visitors need it.
*/
export async function GET() {
return NextResponse.json({ sttAvailable: hasSTTService() })
}
169 changes: 169 additions & 0 deletions apps/sim/app/api/speech/token/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
import { db } from '@sim/db'
import { chat } from '@sim/db/schema'
import { createLogger } from '@sim/logger'
import { eq } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import { getSession } from '@/lib/auth'
import { hasExceededCostLimit } from '@/lib/billing/core/subscription'
import { recordUsage } from '@/lib/billing/core/usage-log'
import { env } from '@/lib/core/config/env'
import { getCostMultiplier, isBillingEnabled } from '@/lib/core/config/feature-flags'
import { RateLimiter } from '@/lib/core/rate-limiter'
import { validateAuthToken } from '@/lib/core/security/deployment'
import { getClientIp } from '@/lib/core/utils/request'

const logger = createLogger('SpeechTokenAPI')

export const dynamic = 'force-dynamic'

const ELEVENLABS_TOKEN_URL = 'https://api.elevenlabs.io/v1/single-use-token/realtime_scribe'

const VOICE_SESSION_COST_PER_MIN = 0.008
const VOICE_SESSION_MAX_MINUTES = 3
const VOICE_SESSION_COST = VOICE_SESSION_COST_PER_MIN * VOICE_SESSION_MAX_MINUTES

const STT_TOKEN_RATE_LIMIT = {
maxTokens: 30,
refillRate: 3,
refillIntervalMs: 72 * 1000,
} as const

const rateLimiter = new RateLimiter()

async function validateChatAuth(
request: NextRequest,
chatId: string
): Promise<{ valid: boolean; ownerId?: string }> {
try {
const chatResult = await db
.select({
id: chat.id,
userId: chat.userId,
isActive: chat.isActive,
authType: chat.authType,
password: chat.password,
})
.from(chat)
.where(eq(chat.id, chatId))
.limit(1)

if (chatResult.length === 0 || !chatResult[0].isActive) {
return { valid: false }
}

const chatData = chatResult[0]

if (chatData.authType === 'public') {
return { valid: true, ownerId: chatData.userId }
}

const cookieName = `chat_auth_${chatId}`
const authCookie = request.cookies.get(cookieName)
if (authCookie && validateAuthToken(authCookie.value, chatId, chatData.password)) {
return { valid: true, ownerId: chatData.userId }
}

return { valid: false }
} catch (error) {
logger.error('Error validating chat auth for STT:', error)
return { valid: false }
}
}

export async function POST(request: NextRequest) {
try {
const body = await request.json().catch(() => ({}))
const chatId = body?.chatId as string | undefined
const skipBilling = body?.skipBilling === true

let billingUserId: string | undefined

if (chatId) {
const chatAuth = await validateChatAuth(request, chatId)
if (!chatAuth.valid) {
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
billingUserId = chatAuth.ownerId
} else {
const session = await getSession()
if (!session?.user?.id) {
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
billingUserId = session.user.id
}

if (isBillingEnabled) {
const rateLimitKey = chatId
? `stt-token:chat:${chatId}:${getClientIp(request)}`
: `stt-token:user:${billingUserId}`

const rateCheck = await rateLimiter.checkRateLimitDirect(rateLimitKey, STT_TOKEN_RATE_LIMIT)
if (!rateCheck.allowed) {
return NextResponse.json(
{ error: 'Voice input rate limit exceeded. Please try again later.' },
{
status: 429,
headers: {
'Retry-After': String(Math.ceil((rateCheck.retryAfterMs ?? 60000) / 1000)),
},
}
)
}
}

if (billingUserId && isBillingEnabled) {
const exceeded = await hasExceededCostLimit(billingUserId)
if (exceeded) {
return NextResponse.json(
{ error: 'Usage limit exceeded. Please upgrade your plan to continue.' },
{ status: 402 }
)
}
}

const apiKey = env.ELEVENLABS_API_KEY
if (!apiKey?.trim()) {
return NextResponse.json(
{ error: 'Speech-to-text service is not configured' },
{ status: 503 }
)
}

const response = await fetch(ELEVENLABS_TOKEN_URL, {
method: 'POST',
headers: { 'xi-api-key': apiKey },
})

if (!response.ok) {
const errBody = await response.json().catch(() => ({}))
const message =
errBody.detail || errBody.message || `Token request failed (${response.status})`
logger.error('ElevenLabs token request failed', { status: response.status, message })
return NextResponse.json({ error: message }, { status: 502 })
}

const data = await response.json()

if (billingUserId && !skipBilling) {
await recordUsage({
userId: billingUserId,
entries: [
{
category: 'fixed',
source: 'voice-input',
description: `Voice input session (${VOICE_SESSION_MAX_MINUTES} min)`,
cost: VOICE_SESSION_COST * getCostMultiplier(),
},
],
}).catch((err) => {
logger.warn('Failed to record voice input usage, continuing:', err)
})
}

return NextResponse.json({ token: data.token })
} catch (error) {
const message = error instanceof Error ? error.message : 'Failed to generate speech token'
logger.error('Speech token error:', error)
return NextResponse.json({ error: message }, { status: 500 })
}
}
13 changes: 12 additions & 1 deletion apps/sim/app/chat/[identifier]/chat.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,14 @@ export default function ChatClient({ identifier }: { identifier: string }) {
const [authRequired, setAuthRequired] = useState<'password' | 'email' | 'sso' | null>(null)

const [isVoiceFirstMode, setIsVoiceFirstMode] = useState(false)
const [sttAvailable, setSttAvailable] = useState(false)

useEffect(() => {
fetch('/api/settings/voice')
.then((r) => (r.ok ? r.json() : { sttAvailable: false }))
.then((data) => setSttAvailable(data.sttAvailable === true))
.catch(() => setSttAvailable(false))
}, [])
const { isStreamingResponse, abortControllerRef, stopStreaming, handleStreamedResponse } =
useChatStreaming()
const audioContextRef = useRef<AudioContext | null>(null)
Expand Down Expand Up @@ -443,8 +451,9 @@ export default function ChatClient({ identifier }: { identifier: string }) {
}, [isStreamingResponse, stopStreaming, setMessages, stopAudio])

const handleVoiceStart = useCallback(() => {
if (!sttAvailable) return
setIsVoiceFirstMode(true)
}, [])
}, [sttAvailable])

const handleExitVoiceMode = useCallback(() => {
setIsVoiceFirstMode(false)
Expand Down Expand Up @@ -494,6 +503,7 @@ export default function ChatClient({ identifier }: { identifier: string }) {
isStreaming={isStreamingResponse}
isPlayingAudio={isPlayingAudio}
audioContextRef={audioContextRef}
chatId={chatConfig?.id}
messages={messages.map((msg) => ({
content: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content),
type: msg.type,
Expand Down Expand Up @@ -529,6 +539,7 @@ export default function ChatClient({ identifier }: { identifier: string }) {
isStreaming={isStreamingResponse}
onStopStreaming={() => stopStreaming(setMessages)}
onVoiceStart={handleVoiceStart}
sttAvailable={sttAvailable}
/>
</div>
</div>
Expand Down
22 changes: 11 additions & 11 deletions apps/sim/app/chat/components/input/input.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,6 @@ const logger = createLogger('ChatInput')

const MAX_TEXTAREA_HEIGHT = 200

const IS_STT_AVAILABLE =
typeof window !== 'undefined' &&
!!(
(window as Window & { SpeechRecognition?: unknown; webkitSpeechRecognition?: unknown })
.SpeechRecognition ||
(window as Window & { webkitSpeechRecognition?: unknown }).webkitSpeechRecognition
)

interface AttachedFile {
id: string
name: string
Expand All @@ -37,7 +29,15 @@ export const ChatInput: React.FC<{
onStopStreaming?: () => void
onVoiceStart?: () => void
voiceOnly?: boolean
}> = ({ onSubmit, isStreaming = false, onStopStreaming, onVoiceStart, voiceOnly = false }) => {
sttAvailable?: boolean
}> = ({
onSubmit,
isStreaming = false,
onStopStreaming,
onVoiceStart,
voiceOnly = false,
sttAvailable = false,
}) => {
const fileInputRef = useRef<HTMLInputElement>(null)
const textareaRef = useRef<HTMLTextAreaElement>(null)
const [inputValue, setInputValue] = useState('')
Expand Down Expand Up @@ -142,7 +142,7 @@ export const ChatInput: React.FC<{
return (
<Tooltip.Provider>
<div className='flex items-center justify-center'>
{IS_STT_AVAILABLE && (
{sttAvailable && (
<Tooltip.Root>
<Tooltip.Trigger asChild>
<div>
Expand Down Expand Up @@ -295,7 +295,7 @@ export const ChatInput: React.FC<{

{/* Right: mic + send */}
<div className='flex items-center gap-1.5'>
{IS_STT_AVAILABLE && (
{sttAvailable && (
<Tooltip.Root>
<Tooltip.Trigger asChild>
<button
Expand Down
Loading
Loading