feat(web): Use crawlee/playwright to retrieve web content in quality mode. It retrieves content more reliably than fetch + JSDoc, at the expense of speed.
This commit is contained in:
parent
044f30a547
commit
87a7ffb445
10 changed files with 4580 additions and 549 deletions
|
|
@ -1,9 +1,10 @@
|
|||
FROM node:20.18.0-slim AS builder
|
||||
FROM --platform=linux/amd64 node:20-slim AS builder
|
||||
|
||||
WORKDIR /home/perplexica
|
||||
|
||||
COPY package.json yarn.lock ./
|
||||
RUN yarn install --frozen-lockfile --network-timeout 600000
|
||||
ENV NEXT_TELEMETRY_DISABLED=1
|
||||
|
||||
COPY tsconfig.json next.config.mjs next-env.d.ts postcss.config.js drizzle.config.ts tailwind.config.ts ./
|
||||
COPY src ./src
|
||||
|
|
@ -12,7 +13,9 @@ COPY public ./public
|
|||
RUN mkdir -p /home/perplexica/data
|
||||
RUN yarn build
|
||||
|
||||
FROM node:20.18.0-slim
|
||||
FROM --platform=linux/amd64 node:20-slim
|
||||
|
||||
ENV NEXT_TELEMETRY_DISABLED=1
|
||||
|
||||
WORKDIR /home/perplexica
|
||||
|
||||
|
|
@ -22,6 +25,11 @@ COPY --from=builder /home/perplexica/.next/static ./public/_next/static
|
|||
COPY --from=builder /home/perplexica/.next/standalone ./
|
||||
COPY --from=builder /home/perplexica/data ./data
|
||||
|
||||
RUN mkdir /home/perplexica/uploads
|
||||
RUN mkdir /home/perplexica/uploads && \
|
||||
npx -y playwright install chromium --with-deps && \
|
||||
npm install playwright && \
|
||||
apt-get update && \
|
||||
apt-get install -y procps && \
|
||||
apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
CMD ["node", "server.js"]
|
||||
Loading…
Add table
Add a link
Reference in a new issue