simstudioai · icecrasher321 · Apr 8, 2026 · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026
diff --git a/apps/sim/app/api/a2a/serve/[agentId]/route.ts b/apps/sim/app/api/a2a/serve/[agentId]/route.ts
@@ -15,6 +15,7 @@ import {
 import { type AuthResult, AuthType, checkHybridAuth } from '@/lib/auth/hybrid'
 import { acquireLock, getRedisClient, releaseLock } from '@/lib/core/config/redis'
 import { validateUrlWithDNS } from '@/lib/core/security/input-validation.server'
+import { getClientIp } from '@/lib/core/utils/request'
 import { SSE_HEADERS } from '@/lib/core/utils/sse'
 import { getBaseUrl } from '@/lib/core/utils/urls'
 import { generateId } from '@/lib/core/utils/uuid'
@@ -52,10 +53,9 @@ function getCallerFingerprint(request: NextRequest, userId?: string | null): str
     return `user:${userId}`
   }
 
-  const forwardedFor = request.headers.get('x-forwarded-for')?.split(',')[0]?.trim()
-  const realIp = request.headers.get('x-real-ip')?.trim()
+  const clientIp = getClientIp(request)
   const userAgent = request.headers.get('user-agent')?.trim() || 'unknown'
-  return `public:${forwardedFor || realIp || 'unknown'}:${userAgent}`
+  return `public:${clientIp}:${userAgent}`
 }
 
 function hasCallerAccessToTask(

diff --git a/apps/sim/app/api/demo-requests/route.ts b/apps/sim/app/api/demo-requests/route.ts
@@ -3,7 +3,7 @@ import { type NextRequest, NextResponse } from 'next/server'
 import { env } from '@/lib/core/config/env'
 import type { TokenBucketConfig } from '@/lib/core/rate-limiter'
 import { RateLimiter } from '@/lib/core/rate-limiter'
-import { generateRequestId } from '@/lib/core/utils/request'
+import { generateRequestId, getClientIp } from '@/lib/core/utils/request'
 import { getEmailDomain } from '@/lib/core/utils/urls'
 import { sendEmail } from '@/lib/messaging/email/mailer'
 import { getFromEmailAddress } from '@/lib/messaging/email/utils'
@@ -25,7 +25,7 @@ export async function POST(req: NextRequest) {
   const requestId = generateRequestId()
 
   try {
-    const ip = req.headers.get('x-forwarded-for')?.split(',')[0]?.trim() ?? 'unknown'
+    const ip = getClientIp(req)
     const storageKey = `public:demo-request:${ip}`
 
     const { allowed, remaining, resetAt } = await rateLimiter.checkRateLimitDirect(

diff --git a/apps/sim/app/api/help/integration-request/route.ts b/apps/sim/app/api/help/integration-request/route.ts
@@ -4,7 +4,7 @@ import { z } from 'zod'
 import { env } from '@/lib/core/config/env'
 import type { TokenBucketConfig } from '@/lib/core/rate-limiter'
 import { RateLimiter } from '@/lib/core/rate-limiter'
-import { generateRequestId } from '@/lib/core/utils/request'
+import { generateRequestId, getClientIp } from '@/lib/core/utils/request'
 import { getEmailDomain } from '@/lib/core/utils/urls'
 import { sendEmail } from '@/lib/messaging/email/mailer'
 import {
@@ -37,7 +37,7 @@ export async function POST(req: NextRequest) {
   const requestId = generateRequestId()
 
   try {
-    const ip = req.headers.get('x-forwarded-for')?.split(',')[0]?.trim() ?? 'unknown'
+    const ip = getClientIp(req)
     const storageKey = `public:integration-request:${ip}`
 
     const { allowed, remaining, resetAt } = await rateLimiter.checkRateLimitDirect(

diff --git a/apps/sim/app/api/settings/voice/route.ts b/apps/sim/app/api/settings/voice/route.ts
@@ -0,0 +1,11 @@
+import { NextResponse } from 'next/server'
+import { hasSTTService } from '@/lib/speech/config'
+
+/**
+ * Returns whether server-side STT is configured.
+ * Unauthenticated — the response is a single boolean,
+ * not sensitive data, and deployed chat visitors need it.
+ */
+export async function GET() {
+  return NextResponse.json({ sttAvailable: hasSTTService() })
+}
diff --git a/apps/sim/app/api/speech/token/route.ts b/apps/sim/app/api/speech/token/route.ts
@@ -0,0 +1,169 @@
+import { db } from '@sim/db'
+import { chat } from '@sim/db/schema'
+import { createLogger } from '@sim/logger'
+import { eq } from 'drizzle-orm'
+import { type NextRequest, NextResponse } from 'next/server'
+import { getSession } from '@/lib/auth'
+import { hasExceededCostLimit } from '@/lib/billing/core/subscription'
+import { recordUsage } from '@/lib/billing/core/usage-log'
+import { env } from '@/lib/core/config/env'
+import { getCostMultiplier, isBillingEnabled } from '@/lib/core/config/feature-flags'
+import { RateLimiter } from '@/lib/core/rate-limiter'
+import { validateAuthToken } from '@/lib/core/security/deployment'
+import { getClientIp } from '@/lib/core/utils/request'
+
+const logger = createLogger('SpeechTokenAPI')
+
+export const dynamic = 'force-dynamic'
+
+const ELEVENLABS_TOKEN_URL = 'https://api.elevenlabs.io/v1/single-use-token/realtime_scribe'
+
+const VOICE_SESSION_COST_PER_MIN = 0.008
+const VOICE_SESSION_MAX_MINUTES = 3
+const VOICE_SESSION_COST = VOICE_SESSION_COST_PER_MIN * VOICE_SESSION_MAX_MINUTES
+
+const STT_TOKEN_RATE_LIMIT = {
+  maxTokens: 30,
+  refillRate: 3,
+  refillIntervalMs: 72 * 1000,
+} as const
+
+const rateLimiter = new RateLimiter()
+
+async function validateChatAuth(
+  request: NextRequest,
+  chatId: string
+): Promise<{ valid: boolean; ownerId?: string }> {
+  try {
+    const chatResult = await db
+      .select({
+        id: chat.id,
+        userId: chat.userId,
+        isActive: chat.isActive,
+        authType: chat.authType,
+        password: chat.password,
+      })
+      .from(chat)
+      .where(eq(chat.id, chatId))
+      .limit(1)
+
+    if (chatResult.length === 0 || !chatResult[0].isActive) {
+      return { valid: false }
+    }
+
+    const chatData = chatResult[0]
+
+    if (chatData.authType === 'public') {
+      return { valid: true, ownerId: chatData.userId }
+    }
+
+    const cookieName = `chat_auth_${chatId}`
+    const authCookie = request.cookies.get(cookieName)
+    if (authCookie && validateAuthToken(authCookie.value, chatId, chatData.password)) {
+      return { valid: true, ownerId: chatData.userId }
+    }
+
+    return { valid: false }
+  } catch (error) {
+    logger.error('Error validating chat auth for STT:', error)
+    return { valid: false }
+  }
+}
+
+export async function POST(request: NextRequest) {
+  try {
+    const body = await request.json().catch(() => ({}))
+    const chatId = body?.chatId as string | undefined
+    const skipBilling = body?.skipBilling === true
+
+    let billingUserId: string | undefined
+
+    if (chatId) {
+      const chatAuth = await validateChatAuth(request, chatId)
+      if (!chatAuth.valid) {
+        return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
+      }
+      billingUserId = chatAuth.ownerId
+    } else {
+      const session = await getSession()
+      if (!session?.user?.id) {
+        return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
+      }
+      billingUserId = session.user.id
+    }
+
+    if (isBillingEnabled) {
+      const rateLimitKey = chatId
+        ? `stt-token:chat:${chatId}:${getClientIp(request)}`
+        : `stt-token:user:${billingUserId}`
+
+      const rateCheck = await rateLimiter.checkRateLimitDirect(rateLimitKey, STT_TOKEN_RATE_LIMIT)
+      if (!rateCheck.allowed) {
+        return NextResponse.json(
+          { error: 'Voice input rate limit exceeded. Please try again later.' },
+          {
+            status: 429,
+            headers: {
+              'Retry-After': String(Math.ceil((rateCheck.retryAfterMs ?? 60000) / 1000)),
+            },
+          }
+        )
+      }
+    }
+
+    if (billingUserId && isBillingEnabled) {
+      const exceeded = await hasExceededCostLimit(billingUserId)
+      if (exceeded) {
+        return NextResponse.json(
+          { error: 'Usage limit exceeded. Please upgrade your plan to continue.' },
+          { status: 402 }
+        )
+      }
+    }
+
+    const apiKey = env.ELEVENLABS_API_KEY
+    if (!apiKey?.trim()) {
+      return NextResponse.json(
+        { error: 'Speech-to-text service is not configured' },
+        { status: 503 }
+      )
+    }
+
+    const response = await fetch(ELEVENLABS_TOKEN_URL, {
+      method: 'POST',
+      headers: { 'xi-api-key': apiKey },
+    })
+
+    if (!response.ok) {
+      const errBody = await response.json().catch(() => ({}))
+      const message =
+        errBody.detail || errBody.message || `Token request failed (${response.status})`
+      logger.error('ElevenLabs token request failed', { status: response.status, message })
+      return NextResponse.json({ error: message }, { status: 502 })
+    }
+
+    const data = await response.json()
+
+    if (billingUserId && !skipBilling) {
+      await recordUsage({
+        userId: billingUserId,
+        entries: [
+          {
+            category: 'fixed',
+            source: 'voice-input',
+            description: `Voice input session (${VOICE_SESSION_MAX_MINUTES} min)`,
+            cost: VOICE_SESSION_COST * getCostMultiplier(),
+          },
+        ],
+      }).catch((err) => {
+        logger.warn('Failed to record voice input usage, continuing:', err)
+      })
+    }
+
+    return NextResponse.json({ token: data.token })
+  } catch (error) {
+    const message = error instanceof Error ? error.message : 'Failed to generate speech token'
+    logger.error('Speech token error:', error)
+    return NextResponse.json({ error: message }, { status: 500 })
+  }
+}
diff --git a/apps/sim/app/chat/[identifier]/chat.tsx b/apps/sim/app/chat/[identifier]/chat.tsx
@@ -127,6 +127,14 @@ export default function ChatClient({ identifier }: { identifier: string }) {
   const [authRequired, setAuthRequired] = useState<'password' | 'email' | 'sso' | null>(null)
 
   const [isVoiceFirstMode, setIsVoiceFirstMode] = useState(false)
+  const [sttAvailable, setSttAvailable] = useState(false)
+
+  useEffect(() => {
+    fetch('/api/settings/voice')
+      .then((r) => (r.ok ? r.json() : { sttAvailable: false }))
+      .then((data) => setSttAvailable(data.sttAvailable === true))
+      .catch(() => setSttAvailable(false))
+  }, [])
   const { isStreamingResponse, abortControllerRef, stopStreaming, handleStreamedResponse } =
     useChatStreaming()
   const audioContextRef = useRef<AudioContext | null>(null)
@@ -443,8 +451,9 @@ export default function ChatClient({ identifier }: { identifier: string }) {
   }, [isStreamingResponse, stopStreaming, setMessages, stopAudio])
 
   const handleVoiceStart = useCallback(() => {
+    if (!sttAvailable) return
     setIsVoiceFirstMode(true)
-  }, [])
+  }, [sttAvailable])
 
   const handleExitVoiceMode = useCallback(() => {
     setIsVoiceFirstMode(false)
@@ -494,6 +503,7 @@ export default function ChatClient({ identifier }: { identifier: string }) {
         isStreaming={isStreamingResponse}
         isPlayingAudio={isPlayingAudio}
         audioContextRef={audioContextRef}
+        chatId={chatConfig?.id}
         messages={messages.map((msg) => ({
           content: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content),
           type: msg.type,
@@ -529,6 +539,7 @@ export default function ChatClient({ identifier }: { identifier: string }) {
             isStreaming={isStreamingResponse}
             onStopStreaming={() => stopStreaming(setMessages)}
             onVoiceStart={handleVoiceStart}
+            sttAvailable={sttAvailable}
           />
         </div>
       </div>

diff --git a/apps/sim/app/chat/components/input/input.tsx b/apps/sim/app/chat/components/input/input.tsx
@@ -14,14 +14,6 @@ const logger = createLogger('ChatInput')
 
 const MAX_TEXTAREA_HEIGHT = 200
 
-const IS_STT_AVAILABLE =
-  typeof window !== 'undefined' &&
-  !!(
-    (window as Window & { SpeechRecognition?: unknown; webkitSpeechRecognition?: unknown })
-      .SpeechRecognition ||
-    (window as Window & { webkitSpeechRecognition?: unknown }).webkitSpeechRecognition
-  )
-
 interface AttachedFile {
   id: string
   name: string
@@ -37,7 +29,15 @@ export const ChatInput: React.FC<{
   onStopStreaming?: () => void
   onVoiceStart?: () => void
   voiceOnly?: boolean
-}> = ({ onSubmit, isStreaming = false, onStopStreaming, onVoiceStart, voiceOnly = false }) => {
+  sttAvailable?: boolean
+}> = ({
+  onSubmit,
+  isStreaming = false,
+  onStopStreaming,
+  onVoiceStart,
+  voiceOnly = false,
+  sttAvailable = false,
+}) => {
   const fileInputRef = useRef<HTMLInputElement>(null)
   const textareaRef = useRef<HTMLTextAreaElement>(null)
   const [inputValue, setInputValue] = useState('')
@@ -142,7 +142,7 @@ export const ChatInput: React.FC<{
     return (
       <Tooltip.Provider>
         <div className='flex items-center justify-center'>
-          {IS_STT_AVAILABLE && (
+          {sttAvailable && (
             <Tooltip.Root>
               <Tooltip.Trigger asChild>
                 <div>
@@ -295,7 +295,7 @@ export const ChatInput: React.FC<{
 
               {/* Right: mic + send */}
               <div className='flex items-center gap-1.5'>
-                {IS_STT_AVAILABLE && (
+                {sttAvailable && (
                   <Tooltip.Root>
                     <Tooltip.Trigger asChild>
                       <button