LiamKhoaLe commited on
Commit
46a1c15
·
1 Parent(s): 5411a7d

Upd README configs

Browse files
README.md CHANGED
@@ -95,6 +95,40 @@ Open: `http://localhost:8000/static/` • Health: `GET /healthz`
95
 
96
  [Agent Assignment](https://huggingface.co/spaces/BinKhoaLe1812/EdSummariser/blob/main/AGENT_ASNM.md)
97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  ### License
99
 
100
  Apache-2.0
 
95
 
96
  [Agent Assignment](https://huggingface.co/spaces/BinKhoaLe1812/EdSummariser/blob/main/AGENT_ASNM.md)
97
 
98
+ ### Manage ingestion_python as a git subtree (brief)
99
+
100
+ Remotes (set once):
101
+
102
+ ```bash
103
+ git remote add hfi1 https://huggingface.co/spaces/BinKhoaLe1812/StuddyBuddy_Ingestion1
104
+ git remote add hfi2 https://huggingface.co/spaces/BinKhoaLe1812/StuddyBuddy_Ingestion2
105
+ git remote add hfi3 https://huggingface.co/spaces/BinKhoaLe1812/StuddyBuddy_Ingestion3
106
+ ```
107
+
108
+ Push ingestion_python to a Space:
109
+
110
+ ```bash
111
+ # push current ingestion_python contents to hfi1 main
112
+ git subtree push --prefix=ingestion_python hfi1 main
113
+
114
+ # (alternatives)
115
+ git subtree push --prefix=ingestion_python hfi2 main
116
+ git subtree push --prefix=ingestion_python hfi3 main
117
+ ```
118
+
119
+ Pull updates from a Space into the folder:
120
+
121
+ ```bash
122
+ git fetch hfi1
123
+ git subtree pull --prefix=ingestion_python hfi1 main --squash
124
+ ```
125
+
126
+ Links:
127
+
128
+ - [StuddyBuddy_Ingestion1](https://huggingface.co/spaces/BinKhoaLe1812/StuddyBuddy_Ingestion1)
129
+ - [StuddyBuddy_Ingestion2](https://huggingface.co/spaces/BinKhoaLe1812/StuddyBuddy_Ingestion2)
130
+ - [StuddyBuddy_Ingestion3](https://huggingface.co/spaces/BinKhoaLe1812/StuddyBuddy_Ingestion3)
131
+
132
  ### License
133
 
134
  Apache-2.0
ingestion_js/.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .vercel
ingestion_js/README.md CHANGED
@@ -24,6 +24,51 @@ Mirror of `ingestion_python` implemented as Next.js Route Handlers for Vercel.
24
  - `EMBED_BASE_URL`: remote embed service base, provides POST `/embed`
25
  - `NVIDIA_API`, or `NVIDIA_API_1..N`: caption API keys (optional)
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  ## Notes
28
 
29
  - Jobs are tracked in MongoDB collection `jobs` for serverless safety.
 
24
  - `EMBED_BASE_URL`: remote embed service base, provides POST `/embed`
25
  - `NVIDIA_API`, or `NVIDIA_API_1..N`: caption API keys (optional)
26
 
27
+ ## Deploy to Vercel
28
+
29
+ - Root directory: `ingestion_js`
30
+ - Set Project Settings → Build Command: `next build`
31
+ - Output: Next.js default
32
+ - Environment Variables: set the vars above in Vercel
33
+
34
+ ## cURL Samples
35
+
36
+ Replace `BASE` with your deployment URL.
37
+
38
+ Health:
39
+
40
+ ```bash
41
+ curl -X GET "$BASE/api/health" -H "Content-Type: application/json"
42
+ ```
43
+
44
+ Upload:
45
+
46
+ ```bash
47
+ curl -X POST "$BASE/api/upload" \
48
+ -F "user_id=YOUR_USER_ID" \
49
+ -F "project_id=YOUR_PROJECT_ID" \
50
+ -F "files=@../exefiles/Lecture5_ML.pdf" \
51
+ -F "files=@../exefiles/Lecture6_ANN_DL.pdf"
52
+ ```
53
+
54
+ Status:
55
+
56
+ ```bash
57
+ curl -X GET "$BASE/api/upload/status?job_id=YOUR_JOB_ID" -H "Content-Type: application/json"
58
+ ```
59
+
60
+ List Files:
61
+
62
+ ```bash
63
+ curl -X GET "$BASE/api/files?user_id=YOUR_USER_ID&project_id=YOUR_PROJECT_ID" -H "Content-Type: application/json"
64
+ ```
65
+
66
+ Get Chunks:
67
+
68
+ ```bash
69
+ curl -X GET "$BASE/api/files/chunks?user_id=YOUR_USER_ID&project_id=YOUR_PROJECT_ID&filename=Lecture5_ML.pdf&limit=5" -H "Content-Type: application/json"
70
+ ```
71
+
72
  ## Notes
73
 
74
  - Jobs are tracked in MongoDB collection `jobs` for serverless safety.
ingestion_js/app/api/health/route.ts CHANGED
@@ -1,5 +1,5 @@
1
  import { NextResponse } from 'next/server'
2
- import { getMongo } from '@/lib/mongo'
3
 
4
  export const dynamic = 'force-dynamic'
5
 
@@ -8,6 +8,7 @@ export async function GET() {
8
  let mongodb_connected = false
9
  try {
10
  await mongo.db.command({ ping: 1 })
 
11
  mongodb_connected = true
12
  } catch {
13
  mongodb_connected = false
 
1
  import { NextResponse } from 'next/server'
2
+ import { getMongo, ensureIndexes } from '@/lib/mongo'
3
 
4
  export const dynamic = 'force-dynamic'
5
 
 
8
  let mongodb_connected = false
9
  try {
10
  await mongo.db.command({ ping: 1 })
11
+ await ensureIndexes()
12
  mongodb_connected = true
13
  } catch {
14
  mongodb_connected = false
ingestion_js/app/api/upload/route.ts CHANGED
@@ -6,21 +6,11 @@ import { buildCardsFromPages } from '@/lib/chunker'
6
  import { embedRemote } from '@/lib/embedder'
7
  import { deleteFileData, storeCards, upsertFileSummary } from '@/lib/mongo'
8
  import { cheapSummarize } from '@/lib/summarizer'
9
- import { createJob, getJob, updateJob } from '@/lib/jobs'
10
 
11
  export const dynamic = 'force-dynamic'
12
  export const runtime = 'nodejs'
13
 
14
- export async function GET(req: NextRequest) {
15
- // Status endpoint: /api/upload?job_id=...
16
- const { searchParams } = new URL(req.url)
17
- const job_id = searchParams.get('job_id')
18
- if (!job_id) return NextResponse.json({ error: 'job_id is required' }, { status: 400 })
19
- const job = await getJob(job_id)
20
- if (!job) return NextResponse.json({ error: 'job not found' }, { status: 404 })
21
- return NextResponse.json({ job_id, status: job.status, total: job.total, completed: job.completed, last_error: job.last_error })
22
- }
23
-
24
  export async function POST(req: NextRequest) {
25
  const form = await req.formData()
26
  const user_id = String(form.get('user_id') || '')
 
6
  import { embedRemote } from '@/lib/embedder'
7
  import { deleteFileData, storeCards, upsertFileSummary } from '@/lib/mongo'
8
  import { cheapSummarize } from '@/lib/summarizer'
9
+ import { createJob, updateJob } from '@/lib/jobs'
10
 
11
  export const dynamic = 'force-dynamic'
12
  export const runtime = 'nodejs'
13
 
 
 
 
 
 
 
 
 
 
 
14
  export async function POST(req: NextRequest) {
15
  const form = await req.formData()
16
  const user_id = String(form.get('user_id') || '')
ingestion_js/app/api/upload/status/route.ts ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { NextRequest, NextResponse } from 'next/server'
2
+ import { getJob } from '@/lib/jobs'
3
+
4
+ export const dynamic = 'force-dynamic'
5
+ export const runtime = 'nodejs'
6
+
7
+ export async function GET(req: NextRequest) {
8
+ const { searchParams } = new URL(req.url)
9
+ const job_id = searchParams.get('job_id') || ''
10
+ if (!job_id) return NextResponse.json({ error: 'job_id is required' }, { status: 400 })
11
+ const job = await getJob(job_id)
12
+ if (!job) return NextResponse.json({ error: 'job not found' }, { status: 404 })
13
+ return NextResponse.json({ job_id, status: job.status, total: job.total, completed: job.completed, last_error: job.last_error })
14
+ }