From 8f2f6d168407cbdd118e9cf9a290ded023022a6a Mon Sep 17 00:00:00 2001 From: claude Date: Sun, 31 May 2026 20:46:30 +0000 Subject: [PATCH] CI: push images to in-cluster registry to bypass Traefik The TeX Live layer (~3.5 GB) failed to push to registry.alocoq.fr: Traefik severed the upload mid-stream ("client disconnected during blob PUT ... unexpected EOF"), buildkit retried at the wrong offset, and the registry returned "blob upload invalid". Push to the in-cluster registry Service (registry.git.svc.cluster.local:5000) instead, so the upload never traverses Traefik. Changes: - buildctl outputs use registry.insecure=true (registry is plain HTTP) - add a verso-buildkitd-config ConfigMap with buildkitd.toml marking the registry http/insecure, so the second build can pull the base image back - the verso Deployment and rolling update reference the in-cluster image NOTE: the cluster nodes' containerd must also treat registry.git.svc.cluster.local:5000 as an insecure registry, otherwise the kubelet image pull for the test deployment will fail. That is node- level config outside this repo. Co-Authored-By: Claude Opus 4.8 --- .gitea/workflows/deploy-verso.yml | 38 +++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/.gitea/workflows/deploy-verso.yml b/.gitea/workflows/deploy-verso.yml index 807383ec34..03406f1c35 100644 --- a/.gitea/workflows/deploy-verso.yml +++ b/.gitea/workflows/deploy-verso.yml @@ -19,6 +19,21 @@ jobs: run: | kubectl -n ci delete job verso-buildkit --ignore-not-found=true --wait=true + # buildkitd config: mark the in-cluster registry as http (insecure) + # so the second build can resolve/pull the base image we just pushed. + cat <<'EOF' | kubectl apply -f - + apiVersion: v1 + kind: ConfigMap + metadata: + name: verso-buildkitd-config + namespace: ci + data: + buildkitd.toml: | + [registry."registry.git.svc.cluster.local:5000"] + http = true + insecure = true + EOF + cat <<'EOF' | kubectl apply -f - apiVersion: batch/v1 kind: Job @@ -52,27 +67,40 @@ jobs: - | set -eux + # Push to the in-cluster registry (plain HTTP) to bypass + # the Traefik ingress, whose read timeout was killing the + # multi-GB TeX Live layer upload mid-stream. The base + # image is pulled back in for the second build, so the + # registry must be marked insecure for both push and pull + # (buildkitd.toml handles the pull/resolve side). + REG=registry.git.svc.cluster.local:5000 + buildctl-daemonless.sh build \ --frontend=dockerfile.v0 \ --local context=/workspace/repo \ --local dockerfile=/workspace/repo/server-ce \ --opt filename=Dockerfile-base \ - --output type=image,name=registry.alocoq.fr/verso-base:latest,push=true + --output type=image,name=$REG/verso-base:latest,push=true,registry.insecure=true buildctl-daemonless.sh build \ --frontend=dockerfile.v0 \ --local context=/workspace/repo \ --local dockerfile=/workspace/repo/server-ce \ --opt filename=Dockerfile \ - --opt build-arg:OVERLEAF_BASE_TAG=registry.alocoq.fr/verso-base:latest \ - --output type=image,name=registry.alocoq.fr/verso:latest,push=true + --opt build-arg:OVERLEAF_BASE_TAG=$REG/verso-base:latest \ + --output type=image,name=$REG/verso:latest,push=true,registry.insecure=true volumeMounts: - name: workspace mountPath: /workspace + - name: buildkitd-config + mountPath: /etc/buildkit volumes: - name: workspace emptyDir: {} + - name: buildkitd-config + configMap: + name: verso-buildkitd-config EOF - name: Wait for build @@ -212,7 +240,7 @@ jobs: spec: containers: - name: verso - image: registry.alocoq.fr/verso:latest + image: registry.git.svc.cluster.local:5000/verso:latest ports: - containerPort: 80 env: @@ -244,7 +272,7 @@ jobs: - name: Deploy Verso image run: | kubectl -n test set image deployment/verso \ - verso=registry.alocoq.fr/verso:latest + verso=registry.git.svc.cluster.local:5000/verso:latest kubectl -n test rollout restart deployment/verso kubectl -n test rollout status deployment/verso --timeout=300s