{
  "id": "yt-transcript-04",
  "meta": {
    "instanceId": "vorlux-hub"
  },
  "name": "Vorlux AI | YouTube Transcript Pull (Daily)",
  "active": true,
  "nodes": [
    {
      "id": "d4e5f6a7-0004-4ddd-8004-000000000001",
      "name": "Daily 6am",
      "type": "n8n-nodes-base.scheduleTrigger",
      "typeVersion": 1.2,
      "position": [220, 300],
      "parameters": {
        "rule": {
          "interval": [
            {
              "field": "hours",
              "hoursInterval": 24
            }
          ]
        }
      }
    },
    {
      "id": "d4e5f6a7-0004-4ddd-8004-000000000011",
      "name": "Webhook Trigger",
      "type": "n8n-nodes-base.webhook",
      "typeVersion": 2,
      "position": [220, 500],
      "parameters": {
        "path": "youtube-transcript-pull",
        "httpMethod": "POST",
        "responseMode": "onReceived",
        "options": {}
      }
    },
    {
      "id": "d4e5f6a7-0004-4ddd-8004-000000000002",
      "name": "Get Recent Videos",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.2,
      "position": [460, 300],
      "parameters": {
        "method": "GET",
        "url": "={{$env.VORLUX_HUB_URL}}/api/integrations/youtube?action=search&limit=20&missing_transcript=true",
        "options": {
          "timeout": 15000
        }
      },
      "notes": "Gets recent videos that don't have transcripts yet"
    },
    {
      "id": "d4e5f6a7-0004-4ddd-8004-000000000003",
      "name": "Has Videos?",
      "type": "n8n-nodes-base.if",
      "typeVersion": 2,
      "position": [700, 300],
      "parameters": {
        "conditions": {
          "options": {
            "caseSensitive": true,
            "leftValue": "",
            "typeValidation": "strict"
          },
          "conditions": [
            {
              "leftValue": "={{ ($json.data || []).length > 0 }}",
              "rightValue": "true",
              "operator": {
                "type": "string",
                "operation": "equals"
              }
            }
          ],
          "combinator": "and"
        }
      }
    },
    {
      "id": "d4e5f6a7-0004-4ddd-8004-000000000004",
      "name": "Pull Transcripts",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [940, 200],
      "notes": "For each video, attempts to pull transcript using 4 fallback methods",
      "parameters": {
        "mode": "runOnceForAllItems",
        "jsCode": "const videos = $input.first().json.data || [];\nconst hubUrl = $env.VORLUX_HUB_URL || 'http://localhost:3010';\nconst results = [];\n\nfor (const video of videos.slice(0, 10)) {\n  const videoId = video.video_id || video.videoId || video.id;\n  let transcript = '';\n  let method = 'none';\n  \n  // Method 1: Hub's built-in transcript fetcher\n  try {\n    const r1 = await fetch(hubUrl + '/api/integrations/youtube/liked-pipeline', {\n      method: 'POST',\n      headers: { 'Content-Type': 'application/json' },\n      body: JSON.stringify({ videoId, action: 'fetch_transcript' }),\n      signal: AbortSignal.timeout(30000)\n    });\n    const d1 = await r1.json();\n    if (d1.data?.transcript && d1.data.transcript.length > 100) {\n      transcript = d1.data.transcript;\n      method = 'hub_api';\n    }\n  } catch {}\n  \n  // Method 2: YouTube transcript API (supadata)\n  if (!transcript) {\n    try {\n      const r2 = await fetch(`https://api.supadata.ai/v1/youtube/transcript?videoId=${videoId}&text=true`, {\n        signal: AbortSignal.timeout(15000)\n      });\n      const d2 = await r2.json();\n      if (d2.content && d2.content.length > 100) {\n        transcript = d2.content;\n        method = 'supadata';\n      }\n    } catch {}\n  }\n  \n  // Method 3: youtube-transcript-api via local proxy\n  if (!transcript) {\n    try {\n      const r3 = await fetch(hubUrl + '/api/integrations/youtube/transcript?videoId=' + videoId, {\n        signal: AbortSignal.timeout(20000)\n      });\n      const d3 = await r3.json();\n      if (d3.data?.transcript && d3.data.transcript.length > 100) {\n        transcript = d3.data.transcript;\n        method = 'local_proxy';\n      }\n    } catch {}\n  }\n  \n  // Method 4: Auto-generated captions via YouTube innertube\n  if (!transcript) {\n    try {\n      const r4 = await fetch(`https://www.youtube.com/watch?v=${videoId}`, { signal: AbortSignal.timeout(10000) });\n      const html = await r4.text();\n      const captionMatch = html.match(/\"captionTracks\":\\[\\{\"baseUrl\":\"([^\"]+)\"/);\n      if (captionMatch) {\n        const captionUrl = captionMatch[1].replace(/\\\\u0026/g, '&');\n        const cRes = await fetch(captionUrl, { signal: AbortSignal.timeout(10000) });\n        const cXml = await cRes.text();\n        transcript = cXml.replace(/<[^>]+>/g, ' ').replace(/\\s+/g, ' ').trim();\n        method = 'innertube_captions';\n      }\n    } catch {}\n  }\n  \n  results.push({\n    videoId,\n    title: video.title || '',\n    transcriptLength: transcript.length,\n    method,\n    hasTranscript: transcript.length > 100,\n    transcript: transcript.substring(0, 10000)\n  });\n}\n\nreturn [{ json: { results, pulled: results.filter(r => r.hasTranscript).length, total: results.length } }];"
      }
    },
    {
      "id": "d4e5f6a7-0004-4ddd-8004-000000000005",
      "name": "Store Transcripts",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [1200, 200],
      "notes": "Stores each pulled transcript back to the Hub content_projects table",
      "parameters": {
        "mode": "runOnceForAllItems",
        "jsCode": "const results = $input.first().json.results || [];\nconst hubUrl = $env.VORLUX_HUB_URL || 'http://localhost:3010';\nlet stored = 0;\n\nfor (const r of results.filter(r => r.hasTranscript)) {\n  try {\n    await fetch(hubUrl + '/api/content/pipeline', {\n      method: 'POST',\n      headers: { 'Content-Type': 'application/json' },\n      body: JSON.stringify({\n        action: 'store_transcript',\n        videoId: r.videoId,\n        title: r.title,\n        transcript: r.transcript,\n        method: r.method,\n        source: 'youtube_transcript_pull'\n      }),\n      signal: AbortSignal.timeout(10000)\n    });\n    stored++;\n  } catch {}\n}\n\nreturn [{ json: { stored, total: results.length, pulled: results.filter(r => r.hasTranscript).length, methods: results.reduce((acc, r) => { acc[r.method] = (acc[r.method] || 0) + 1; return acc; }, {}) } }];"
      }
    },
    {
      "id": "d4e5f6a7-0004-4ddd-8004-000000000006",
      "name": "Discord Notify",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.2,
      "position": [1440, 200],
      "parameters": {
        "method": "POST",
        "url": "={{$env.DISCORD_CONTENT_WEBHOOK}}",
        "sendBody": true,
        "specifyBody": "json",
        "jsonBody": "={\"embeds\":[{\"title\":\"YouTube Transcripts Pulled\",\"description\":\"Pulled: {{ $json.pulled }}/{{ $json.total }} videos\\nStored: {{ $json.stored }}\\nMethods: {{ JSON.stringify($json.methods) }}\",\"color\":16711680,\"footer\":{\"text\":\"YouTube Transcript Pull - Daily\"}}]}",
        "options": {
          "timeout": 10000
        }
      }
    }
  ],
  "connections": {
    "Daily 6am": {
      "main": [
        [
          {
            "node": "Get Recent Videos",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Webhook Trigger": {
      "main": [
        [
          {
            "node": "Get Recent Videos",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Get Recent Videos": {
      "main": [
        [
          {
            "node": "Has Videos?",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Has Videos?": {
      "main": [
        [
          {
            "node": "Pull Transcripts",
            "type": "main",
            "index": 0
          }
        ],
        []
      ]
    },
    "Pull Transcripts": {
      "main": [
        [
          {
            "node": "Store Transcripts",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Store Transcripts": {
      "main": [
        [
          {
            "node": "Discord Notify",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  },
  "settings": {
    "executionOrder": "v1",
    "saveManualExecutions": true,
    "saveExecutionProgress": true
  },
  "tags": [
    { "name": "youtube" },
    { "name": "transcript" },
    { "name": "content" }
  ],
  "versionId": "2"
}