Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions apps/sim/connectors/asana/asana.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger'
import { AsanaIcon } from '@/components/icons'
import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils'
import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types'
import { computeContentHash, joinTagArray, parseTagDate } from '@/connectors/utils'
import { joinTagArray, parseTagDate } from '@/connectors/utils'

const logger = createLogger('AsanaConnector')

Expand Down Expand Up @@ -240,7 +240,6 @@ export const asanaConnector: ConnectorConfig = {

for (const task of result.data) {
const content = buildTaskContent(task)
const contentHash = await computeContentHash(content)
const tagNames = task.tags?.map((t) => t.name).filter(Boolean) || []

documents.push({
Expand All @@ -249,7 +248,7 @@ export const asanaConnector: ConnectorConfig = {
content,
mimeType: 'text/plain',
sourceUrl: task.permalink_url || undefined,
contentHash,
contentHash: `asana:${task.gid}:${task.modified_at ?? ''}`,
metadata: {
project: currentProjectGid,
assignee: task.assignee?.name,
Expand Down Expand Up @@ -315,7 +314,6 @@ export const asanaConnector: ConnectorConfig = {
if (!task) return null

const content = buildTaskContent(task)
const contentHash = await computeContentHash(content)
const tagNames = task.tags?.map((t) => t.name).filter(Boolean) || []

return {
Expand All @@ -324,7 +322,7 @@ export const asanaConnector: ConnectorConfig = {
content,
mimeType: 'text/plain',
sourceUrl: task.permalink_url || undefined,
contentHash,
contentHash: `asana:${task.gid}:${task.modified_at ?? ''}`,
metadata: {
assignee: task.assignee?.name,
completed: task.completed,
Expand Down
63 changes: 24 additions & 39 deletions apps/sim/connectors/fireflies/fireflies.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger'
import { FirefliesIcon } from '@/components/icons'
import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils'
import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types'
import { computeContentHash, parseTagDate } from '@/connectors/utils'
import { parseTagDate } from '@/connectors/utils'

const logger = createLogger('FirefliesConnector')

Expand Down Expand Up @@ -196,50 +196,34 @@ export const firefliesConnector: ConnectorConfig = {
id
name
}
sentences {
index
speaker_name
text
}
summary {
keywords
action_items
overview
short_summary
}
}
}`,
variables
)

const transcripts = (data.transcripts || []) as FirefliesTranscript[]

const documents: ExternalDocument[] = await Promise.all(
transcripts.map(async (transcript) => {
const content = formatTranscriptContent(transcript)
const contentHash = await computeContentHash(content)

const meetingDate = transcript.date ? new Date(transcript.date).toISOString() : undefined
const speakerNames = transcript.speakers?.map((s) => s.name).filter(Boolean) ?? []

return {
externalId: transcript.id,
title: transcript.title || 'Untitled Meeting',
content,
mimeType: 'text/plain' as const,
sourceUrl: transcript.transcript_url || undefined,
contentHash,
metadata: {
hostEmail: transcript.host_email,
duration: transcript.duration,
meetingDate,
participants: transcript.participants,
speakers: speakerNames,
keywords: transcript.summary?.keywords,
},
}
})
)
const documents: ExternalDocument[] = transcripts.map((transcript) => {
const meetingDate = transcript.date ? new Date(transcript.date).toISOString() : undefined
const speakerNames = transcript.speakers?.map((s) => s.name).filter(Boolean) ?? []

return {
externalId: transcript.id,
title: transcript.title || 'Untitled Meeting',
content: '',
contentDeferred: true,
mimeType: 'text/plain' as const,
sourceUrl: transcript.transcript_url || undefined,
contentHash: `fireflies:${transcript.id}:${transcript.date ?? ''}:${transcript.duration ?? ''}`,
metadata: {
hostEmail: transcript.host_email,
duration: transcript.duration,
meetingDate,
participants: transcript.participants,
speakers: speakerNames,
},
}
})

const totalFetched = ((syncContext?.totalDocsFetched as number) ?? 0) + documents.length
if (syncContext) syncContext.totalDocsFetched = totalFetched
Expand Down Expand Up @@ -296,7 +280,7 @@ export const firefliesConnector: ConnectorConfig = {
if (!transcript) return null

const content = formatTranscriptContent(transcript)
const contentHash = await computeContentHash(content)
const contentHash = `fireflies:${transcript.id}:${transcript.date ?? ''}:${transcript.duration ?? ''}`

const meetingDate = transcript.date ? new Date(transcript.date).toISOString() : undefined
const speakerNames = transcript.speakers?.map((s) => s.name).filter(Boolean) ?? []
Expand All @@ -305,6 +289,7 @@ export const firefliesConnector: ConnectorConfig = {
externalId: transcript.id,
title: transcript.title || 'Untitled Meeting',
content,
contentDeferred: false,
mimeType: 'text/plain',
sourceUrl: transcript.transcript_url || undefined,
contentHash,
Expand Down
12 changes: 5 additions & 7 deletions apps/sim/connectors/google-calendar/google-calendar.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger'
import { GoogleCalendarIcon } from '@/components/icons'
import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils'
import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types'
import { computeContentHash, parseTagDate } from '@/connectors/utils'
import { parseTagDate } from '@/connectors/utils'

const logger = createLogger('GoogleCalendarConnector')

Expand Down Expand Up @@ -195,14 +195,12 @@ function getTimeRange(sourceConfig: Record<string, unknown>): { timeMin: string;
/**
* Converts a CalendarEvent to an ExternalDocument.
*/
async function eventToDocument(event: CalendarEvent): Promise<ExternalDocument | null> {
function eventToDocument(event: CalendarEvent): ExternalDocument | null {
if (event.status === 'cancelled') return null

const content = eventToContent(event)
if (!content.trim()) return null

const contentHash = await computeContentHash(content)

const startTime = event.start?.dateTime || event.start?.date || ''
const attendeeCount = event.attendees?.filter((a) => !a.resource).length || 0

Expand All @@ -212,7 +210,7 @@ async function eventToDocument(event: CalendarEvent): Promise<ExternalDocument |
content,
mimeType: 'text/plain',
sourceUrl: event.htmlLink || `https://calendar.google.com/calendar/event?eid=${event.id}`,
contentHash,
contentHash: `gcal:${event.id}:${event.updated ?? ''}`,
metadata: {
startTime,
endTime: event.end?.dateTime || event.end?.date || '',
Expand Down Expand Up @@ -348,7 +346,7 @@ export const googleCalendarConnector: ConnectorConfig = {

const documents: ExternalDocument[] = []
for (const event of events) {
const doc = await eventToDocument(event)
const doc = eventToDocument(event)
if (doc) documents.push(doc)
}

Expand Down Expand Up @@ -392,7 +390,7 @@ export const googleCalendarConnector: ConnectorConfig = {

if (event.status === 'cancelled') return null

return eventToDocument(event)
return eventToDocument(event) ?? null
},

validateConfig: async (
Expand Down
74 changes: 30 additions & 44 deletions apps/sim/connectors/google-docs/google-docs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger'
import { GoogleDocsIcon } from '@/components/icons'
import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils'
import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types'
import { computeContentHash, joinTagArray, parseTagDate } from '@/connectors/utils'
import { joinTagArray, parseTagDate } from '@/connectors/utils'

const logger = createLogger('GoogleDocsConnector')

Expand Down Expand Up @@ -117,40 +117,23 @@ async function fetchDocContent(accessToken: string, documentId: string): Promise
}

/**
* Converts a Drive file entry into an ExternalDocument by fetching its content
* from the Google Docs API.
* Creates a lightweight stub from a Drive file entry. Content is deferred
* and only fetched via getDocument for new or changed documents.
*/
async function fileToDocument(
accessToken: string,
file: DriveFile
): Promise<ExternalDocument | null> {
try {
const content = await fetchDocContent(accessToken, file.id)
if (!content.trim()) {
logger.info(`Skipping empty document: ${file.name} (${file.id})`)
return null
}

const contentHash = await computeContentHash(content)

return {
externalId: file.id,
title: file.name || 'Untitled',
content,
mimeType: 'text/plain',
sourceUrl: file.webViewLink || `https://docs.google.com/document/d/${file.id}/edit`,
contentHash,
metadata: {
modifiedTime: file.modifiedTime,
createdTime: file.createdTime,
owners: file.owners?.map((o) => o.displayName || o.emailAddress).filter(Boolean),
},
}
} catch (error) {
logger.warn(`Failed to extract content from document: ${file.name} (${file.id})`, {
error: error instanceof Error ? error.message : String(error),
})
return null
function fileToStub(file: DriveFile): ExternalDocument {
return {
externalId: file.id,
title: file.name || 'Untitled',
content: '',
contentDeferred: true,
mimeType: 'text/plain',
sourceUrl: file.webViewLink || `https://docs.google.com/document/d/${file.id}/edit`,
contentHash: `gdocs:${file.id}:${file.modifiedTime ?? ''}`,
metadata: {
modifiedTime: file.modifiedTime,
createdTime: file.createdTime,
owners: file.owners?.map((o) => o.displayName || o.emailAddress).filter(Boolean),
},
}
}

Expand Down Expand Up @@ -246,18 +229,11 @@ export const googleDocsConnector: ConnectorConfig = {
const maxDocs = sourceConfig.maxDocs ? Number(sourceConfig.maxDocs) : 0
const previouslyFetched = (syncContext?.totalDocsFetched as number) ?? 0

const CONCURRENCY = 5
const documents: ExternalDocument[] = []
for (let i = 0; i < files.length; i += CONCURRENCY) {
if (maxDocs > 0 && previouslyFetched + documents.length >= maxDocs) break
const batch = files.slice(i, i + CONCURRENCY)
const results = await Promise.all(batch.map((file) => fileToDocument(accessToken, file)))
documents.push(...(results.filter(Boolean) as ExternalDocument[]))
}
let documents = files.map(fileToStub)
if (maxDocs > 0) {
const remaining = maxDocs - previouslyFetched
if (documents.length > remaining) {
documents.splice(remaining)
documents = documents.slice(0, remaining)
}
}

Expand Down Expand Up @@ -300,7 +276,17 @@ export const googleDocsConnector: ConnectorConfig = {
if (file.trashed) return null
if (file.mimeType !== 'application/vnd.google-apps.document') return null

return fileToDocument(accessToken, file)
try {
const content = await fetchDocContent(accessToken, file.id)
if (!content.trim()) return null

return { ...fileToStub(file), content, contentDeferred: false }
} catch (error) {
logger.warn(`Failed to extract content from document: ${file.name} (${file.id})`, {
error: error instanceof Error ? error.message : String(error),
})
return null
}
},

validateConfig: async (
Expand Down
43 changes: 23 additions & 20 deletions apps/sim/connectors/google-sheets/google-sheets.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger'
import { GoogleSheetsIcon } from '@/components/icons'
import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils'
import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types'
import { computeContentHash, parseTagDate } from '@/connectors/utils'
import { parseTagDate } from '@/connectors/utils'

const logger = createLogger('GoogleSheetsConnector')

Expand Down Expand Up @@ -168,7 +168,6 @@ async function sheetToDocument(
return null
}

const contentHash = await computeContentHash(content)
const rowCount = dataRows.length

return {
Expand All @@ -177,7 +176,7 @@ async function sheetToDocument(
content,
mimeType: 'text/plain',
sourceUrl: `https://docs.google.com/spreadsheets/d/${spreadsheetId}/edit#gid=${sheet.sheetId}`,
contentHash,
contentHash: `gsheets:${spreadsheetId}:${sheet.sheetId}:${modifiedTime ?? ''}`,
metadata: {
spreadsheetId,
spreadsheetTitle,
Expand Down Expand Up @@ -259,22 +258,24 @@ export const googleSheetsConnector: ConnectorConfig = {
sheetCount: sheets.length,
})

const documents: ExternalDocument[] = []
for (let i = 0; i < sheets.length; i += CONCURRENCY) {
const batch = sheets.slice(i, i + CONCURRENCY)
const results = await Promise.all(
batch.map((sheet) =>
sheetToDocument(
accessToken,
spreadsheetId,
metadata.properties.title,
sheet,
modifiedTime
)
)
)
documents.push(...(results.filter(Boolean) as ExternalDocument[]))
}
const documents: ExternalDocument[] = sheets.map((sheet) => ({
externalId: `${spreadsheetId}__sheet__${sheet.sheetId}`,
title: `${metadata.properties.title} - ${sheet.title}`,
content: '',
contentDeferred: true,
mimeType: 'text/plain',
sourceUrl: `https://docs.google.com/spreadsheets/d/${spreadsheetId}/edit#gid=${sheet.sheetId}`,
contentHash: `gsheets:${spreadsheetId}:${sheet.sheetId}:${modifiedTime ?? ''}`,
metadata: {
spreadsheetId,
spreadsheetTitle: metadata.properties.title,
sheetTitle: sheet.title,
sheetId: sheet.sheetId,
rowCount: sheet.gridProperties?.rowCount,
columnCount: sheet.gridProperties?.columnCount,
...(modifiedTime ? { modifiedTime } : {}),
},
}))

return {
documents,
Expand Down Expand Up @@ -324,13 +325,15 @@ export const googleSheetsConnector: ConnectorConfig = {
return null
}

return sheetToDocument(
const doc = await sheetToDocument(
accessToken,
spreadsheetId,
metadata.properties.title,
sheetEntry.properties,
modifiedTime
)
if (!doc) return null
return { ...doc, contentDeferred: false }
},

validateConfig: async (
Expand Down
Loading
Loading