Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions apps/sim/connectors/asana/asana.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger'
import { AsanaIcon } from '@/components/icons'
import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils'
import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types'
import { computeContentHash, joinTagArray, parseTagDate } from '@/connectors/utils'
import { joinTagArray, parseTagDate } from '@/connectors/utils'

const logger = createLogger('AsanaConnector')

Expand Down Expand Up @@ -240,7 +240,6 @@ export const asanaConnector: ConnectorConfig = {

for (const task of result.data) {
const content = buildTaskContent(task)
const contentHash = await computeContentHash(content)
const tagNames = task.tags?.map((t) => t.name).filter(Boolean) || []

documents.push({
Expand All @@ -249,7 +248,7 @@ export const asanaConnector: ConnectorConfig = {
content,
mimeType: 'text/plain',
sourceUrl: task.permalink_url || undefined,
contentHash,
contentHash: `asana:${task.gid}:${task.modified_at ?? ''}`,
metadata: {
project: currentProjectGid,
assignee: task.assignee?.name,
Expand Down Expand Up @@ -315,7 +314,6 @@ export const asanaConnector: ConnectorConfig = {
if (!task) return null

const content = buildTaskContent(task)
const contentHash = await computeContentHash(content)
const tagNames = task.tags?.map((t) => t.name).filter(Boolean) || []

return {
Expand All @@ -324,7 +322,7 @@ export const asanaConnector: ConnectorConfig = {
content,
mimeType: 'text/plain',
sourceUrl: task.permalink_url || undefined,
contentHash,
contentHash: `asana:${task.gid}:${task.modified_at ?? ''}`,
metadata: {
assignee: task.assignee?.name,
completed: task.completed,
Expand Down
63 changes: 24 additions & 39 deletions apps/sim/connectors/fireflies/fireflies.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger'
import { FirefliesIcon } from '@/components/icons'
import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils'
import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types'
import { computeContentHash, parseTagDate } from '@/connectors/utils'
import { parseTagDate } from '@/connectors/utils'

const logger = createLogger('FirefliesConnector')

Expand Down Expand Up @@ -196,50 +196,34 @@ export const firefliesConnector: ConnectorConfig = {
id
name
}
sentences {
index
speaker_name
text
}
summary {
keywords
action_items
overview
short_summary
}
}
}`,
variables
)

const transcripts = (data.transcripts || []) as FirefliesTranscript[]

const documents: ExternalDocument[] = await Promise.all(
transcripts.map(async (transcript) => {
const content = formatTranscriptContent(transcript)
const contentHash = await computeContentHash(content)

const meetingDate = transcript.date ? new Date(transcript.date).toISOString() : undefined
const speakerNames = transcript.speakers?.map((s) => s.name).filter(Boolean) ?? []

return {
externalId: transcript.id,
title: transcript.title || 'Untitled Meeting',
content,
mimeType: 'text/plain' as const,
sourceUrl: transcript.transcript_url || undefined,
contentHash,
metadata: {
hostEmail: transcript.host_email,
duration: transcript.duration,
meetingDate,
participants: transcript.participants,
speakers: speakerNames,
keywords: transcript.summary?.keywords,
},
}
})
)
const documents: ExternalDocument[] = transcripts.map((transcript) => {
const meetingDate = transcript.date ? new Date(transcript.date).toISOString() : undefined
const speakerNames = transcript.speakers?.map((s) => s.name).filter(Boolean) ?? []

return {
externalId: transcript.id,
title: transcript.title || 'Untitled Meeting',
content: '',
contentDeferred: true,
mimeType: 'text/plain' as const,
sourceUrl: transcript.transcript_url || undefined,
contentHash: `fireflies:${transcript.id}:${transcript.date ?? ''}:${transcript.duration ?? ''}`,
metadata: {
hostEmail: transcript.host_email,
duration: transcript.duration,
meetingDate,
participants: transcript.participants,
speakers: speakerNames,
},
}
})

const totalFetched = ((syncContext?.totalDocsFetched as number) ?? 0) + documents.length
if (syncContext) syncContext.totalDocsFetched = totalFetched
Expand Down Expand Up @@ -296,7 +280,7 @@ export const firefliesConnector: ConnectorConfig = {
if (!transcript) return null

const content = formatTranscriptContent(transcript)
const contentHash = await computeContentHash(content)
const contentHash = `fireflies:${transcript.id}:${transcript.date ?? ''}:${transcript.duration ?? ''}`

const meetingDate = transcript.date ? new Date(transcript.date).toISOString() : undefined
const speakerNames = transcript.speakers?.map((s) => s.name).filter(Boolean) ?? []
Expand All @@ -305,6 +289,7 @@ export const firefliesConnector: ConnectorConfig = {
externalId: transcript.id,
title: transcript.title || 'Untitled Meeting',
content,
contentDeferred: false,
mimeType: 'text/plain',
sourceUrl: transcript.transcript_url || undefined,
contentHash,
Expand Down
12 changes: 5 additions & 7 deletions apps/sim/connectors/google-calendar/google-calendar.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger'
import { GoogleCalendarIcon } from '@/components/icons'
import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils'
import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types'
import { computeContentHash, parseTagDate } from '@/connectors/utils'
import { parseTagDate } from '@/connectors/utils'

const logger = createLogger('GoogleCalendarConnector')

Expand Down Expand Up @@ -195,14 +195,12 @@ function getTimeRange(sourceConfig: Record<string, unknown>): { timeMin: string;
/**
* Converts a CalendarEvent to an ExternalDocument.
*/
async function eventToDocument(event: CalendarEvent): Promise<ExternalDocument | null> {
function eventToDocument(event: CalendarEvent): ExternalDocument | null {
if (event.status === 'cancelled') return null

const content = eventToContent(event)
if (!content.trim()) return null

const contentHash = await computeContentHash(content)

const startTime = event.start?.dateTime || event.start?.date || ''
const attendeeCount = event.attendees?.filter((a) => !a.resource).length || 0

Expand All @@ -212,7 +210,7 @@ async function eventToDocument(event: CalendarEvent): Promise<ExternalDocument |
content,
mimeType: 'text/plain',
sourceUrl: event.htmlLink || `https://calendar.google.com/calendar/event?eid=${event.id}`,
contentHash,
contentHash: `gcal:${event.id}:${event.updated ?? ''}`,
metadata: {
startTime,
endTime: event.end?.dateTime || event.end?.date || '',
Expand Down Expand Up @@ -348,7 +346,7 @@ export const googleCalendarConnector: ConnectorConfig = {

const documents: ExternalDocument[] = []
for (const event of events) {
const doc = await eventToDocument(event)
const doc = eventToDocument(event)
if (doc) documents.push(doc)
}

Expand Down Expand Up @@ -392,7 +390,7 @@ export const googleCalendarConnector: ConnectorConfig = {

if (event.status === 'cancelled') return null

return eventToDocument(event)
return eventToDocument(event) ?? null
},

validateConfig: async (
Expand Down
74 changes: 30 additions & 44 deletions apps/sim/connectors/google-docs/google-docs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger'
import { GoogleDocsIcon } from '@/components/icons'
import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils'
import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types'
import { computeContentHash, joinTagArray, parseTagDate } from '@/connectors/utils'
import { joinTagArray, parseTagDate } from '@/connectors/utils'

const logger = createLogger('GoogleDocsConnector')

Expand Down Expand Up @@ -117,40 +117,23 @@ async function fetchDocContent(accessToken: string, documentId: string): Promise
}

/**
* Converts a Drive file entry into an ExternalDocument by fetching its content
* from the Google Docs API.
* Creates a lightweight stub from a Drive file entry. Content is deferred
* and only fetched via getDocument for new or changed documents.
*/
async function fileToDocument(
accessToken: string,
file: DriveFile
): Promise<ExternalDocument | null> {
try {
const content = await fetchDocContent(accessToken, file.id)
if (!content.trim()) {
logger.info(`Skipping empty document: ${file.name} (${file.id})`)
return null
}

const contentHash = await computeContentHash(content)

return {
externalId: file.id,
title: file.name || 'Untitled',
content,
mimeType: 'text/plain',
sourceUrl: file.webViewLink || `https://docs.google.com/document/d/${file.id}/edit`,
contentHash,
metadata: {
modifiedTime: file.modifiedTime,
createdTime: file.createdTime,
owners: file.owners?.map((o) => o.displayName || o.emailAddress).filter(Boolean),
},
}
} catch (error) {
logger.warn(`Failed to extract content from document: ${file.name} (${file.id})`, {
error: error instanceof Error ? error.message : String(error),
})
return null
function fileToStub(file: DriveFile): ExternalDocument {
return {
externalId: file.id,
title: file.name || 'Untitled',
content: '',
contentDeferred: true,
mimeType: 'text/plain',
sourceUrl: file.webViewLink || `https://docs.google.com/document/d/${file.id}/edit`,
contentHash: `gdocs:${file.id}:${file.modifiedTime ?? ''}`,
metadata: {
modifiedTime: file.modifiedTime,
createdTime: file.createdTime,
owners: file.owners?.map((o) => o.displayName || o.emailAddress).filter(Boolean),
},
}
}

Expand Down Expand Up @@ -246,18 +229,11 @@ export const googleDocsConnector: ConnectorConfig = {
const maxDocs = sourceConfig.maxDocs ? Number(sourceConfig.maxDocs) : 0
const previouslyFetched = (syncContext?.totalDocsFetched as number) ?? 0

const CONCURRENCY = 5
const documents: ExternalDocument[] = []
for (let i = 0; i < files.length; i += CONCURRENCY) {
if (maxDocs > 0 && previouslyFetched + documents.length >= maxDocs) break
const batch = files.slice(i, i + CONCURRENCY)
const results = await Promise.all(batch.map((file) => fileToDocument(accessToken, file)))
documents.push(...(results.filter(Boolean) as ExternalDocument[]))
}
let documents = files.map(fileToStub)
if (maxDocs > 0) {
const remaining = maxDocs - previouslyFetched
if (documents.length > remaining) {
documents.splice(remaining)
documents = documents.slice(0, remaining)
}
}

Expand Down Expand Up @@ -300,7 +276,17 @@ export const googleDocsConnector: ConnectorConfig = {
if (file.trashed) return null
if (file.mimeType !== 'application/vnd.google-apps.document') return null

return fileToDocument(accessToken, file)
try {
const content = await fetchDocContent(accessToken, file.id)
if (!content.trim()) return null

return { ...fileToStub(file), content, contentDeferred: false }
} catch (error) {
logger.warn(`Failed to extract content from document: ${file.name} (${file.id})`, {
error: error instanceof Error ? error.message : String(error),
})
return null
}
},

validateConfig: async (
Expand Down
43 changes: 23 additions & 20 deletions apps/sim/connectors/google-sheets/google-sheets.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger'
import { GoogleSheetsIcon } from '@/components/icons'
import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils'
import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types'
import { computeContentHash, parseTagDate } from '@/connectors/utils'
import { parseTagDate } from '@/connectors/utils'

const logger = createLogger('GoogleSheetsConnector')

Expand Down Expand Up @@ -168,7 +168,6 @@ async function sheetToDocument(
return null
}

const contentHash = await computeContentHash(content)
const rowCount = dataRows.length

return {
Expand All @@ -177,7 +176,7 @@ async function sheetToDocument(
content,
mimeType: 'text/plain',
sourceUrl: `https://docs.google.com/spreadsheets/d/${spreadsheetId}/edit#gid=${sheet.sheetId}`,
contentHash,
contentHash: `gsheets:${spreadsheetId}:${sheet.sheetId}:${modifiedTime ?? ''}`,
metadata: {
spreadsheetId,
spreadsheetTitle,
Expand Down Expand Up @@ -259,22 +258,24 @@ export const googleSheetsConnector: ConnectorConfig = {
sheetCount: sheets.length,
})

const documents: ExternalDocument[] = []
for (let i = 0; i < sheets.length; i += CONCURRENCY) {
const batch = sheets.slice(i, i + CONCURRENCY)
const results = await Promise.all(
batch.map((sheet) =>
sheetToDocument(
accessToken,
spreadsheetId,
metadata.properties.title,
sheet,
modifiedTime
)
)
)
documents.push(...(results.filter(Boolean) as ExternalDocument[]))
}
const documents: ExternalDocument[] = sheets.map((sheet) => ({
externalId: `${spreadsheetId}__sheet__${sheet.sheetId}`,
title: `${metadata.properties.title} - ${sheet.title}`,
content: '',
contentDeferred: true,
mimeType: 'text/plain',
sourceUrl: `https://docs.google.com/spreadsheets/d/${spreadsheetId}/edit#gid=${sheet.sheetId}`,
contentHash: `gsheets:${spreadsheetId}:${sheet.sheetId}:${modifiedTime ?? ''}`,
metadata: {
spreadsheetId,
spreadsheetTitle: metadata.properties.title,
sheetTitle: sheet.title,
sheetId: sheet.sheetId,
rowCount: sheet.gridProperties?.rowCount,
columnCount: sheet.gridProperties?.columnCount,
...(modifiedTime ? { modifiedTime } : {}),
},
}))

return {
documents,
Expand Down Expand Up @@ -324,13 +325,15 @@ export const googleSheetsConnector: ConnectorConfig = {
return null
}

return sheetToDocument(
const doc = await sheetToDocument(
accessToken,
spreadsheetId,
metadata.properties.title,
sheetEntry.properties,
modifiedTime
)
if (!doc) return null
return { ...doc, contentDeferred: false }
},

validateConfig: async (
Expand Down
Loading
Loading