Retry tests on network errors, improve test docs (#64)

This commit is contained in:
Valentin Klopfenstein 2025-03-12 17:47:49 +01:00 committed by GitHub
parent 9b0da083ff
commit 886d4f49b6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 122 additions and 31 deletions

View file

@ -2,6 +2,9 @@ name: Run code tests
on:
push:
paths:
- '.github/workflows/**'
- 'src/**'
workflow_call:
secrets:
DNSIMPLE_API_TOKEN:
@ -37,11 +40,17 @@ jobs:
env:
DNSIMPLE_API_TOKEN: ${{ secrets.DNSIMPLE_API_TOKEN }}
DNSIMPLE_ZONE_NAME: ${{ secrets.DNSIMPLE_ZONE_NAME }}
shell: 'script -q -e -c "bash {0}"'
timeout-minutes: 15
run: |
export TEST_ASSET_KUBE_APISERVER=${{ steps.kubebuilder.outputs.BIN_DIR }}/kube-apiserver
export TEST_ASSET_ETCD=${{ steps.kubebuilder.outputs.BIN_DIR }}/etcd
export TEST_ASSET_KUBECTL=${{ steps.kubebuilder.outputs.BIN_DIR }}/kubectl
export TEST_ZONE_NAME="${DNSIMPLE_ZONE_NAME}." # add trailing dot
YLW='\033[1;33m'
NC='\033[0m'
echo """apiVersion: v1
kind: Secret
metadata:
@ -51,4 +60,21 @@ jobs:
token: $DNSIMPLE_API_TOKEN
""" > testdata/dnsimple-token.yaml
cd src
go test -v .
# Occasionally, transient network errors can make tests fail
attempt=0
max_attempts=3
test_exit_code=0
while [ $attempt -lt $max_attempts ]; do
attempt=$((attempt+1))
output=$(go test -v . 2>&1 | tee /dev/tty)
test_exit_code=$?
if echo "$output" | grep -q -e "Temporary failure in name resolution" -e "connection reset by peer" -e "i/o timeout"; then
echo -e "${YLW}Detected transient network error. Retrying... ($attempt/$max_attempts)${NC}"
else
break
fi
done
exit $test_exit_code

View file

@ -12,26 +12,31 @@ on:
jobs:
test:
runs-on: ubuntu-latest
strategy:
max-parallel: 3
matrix:
# Always quote versions to prevent int truncation (1.30 -> 1.3)
# https://kubernetes.io/releases
k8s-version: ["1.30", "1.31", "1.32"]
# https://cert-manager.io/docs/releases/ (Always include path version)
cm-version: ["1.16.0", "1.17.0"]
steps:
- uses: actions/checkout@v4
- name: Start minikube
uses: medyagh/setup-minikube@master
with:
kubernetes-version: 1.31.3
kubernetes-version: ${{ matrix.k8s-version }}
- name: Install cert-manager, patch upstream dns servers, wait for readiness
run: |
echo "Target cert-manager version: ${{ vars.TARGET_CERT_MANAGER_VERSION }}"
kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/${{ vars.TARGET_CERT_MANAGER_VERSION }}/cert-manager.yaml
echo "Target cert-manager version: ${{ matrix.cm-version }}"
kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v${{ matrix.cm-version }}/cert-manager.yaml
# Patch cert-manager to use DNSimple's nameservers for faster propagation-checks
kubectl patch deployment cert-manager -n cert-manager --type='json' -p='[{"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "--dns01-recursive-nameservers=ns1.dnsimple.com:53"}]'
kubectl wait --for=condition=available --timeout=600s deployment/cert-manager-webhook -n cert-manager
- name: Install cert-manager-webhook-dnsimple, wait for readiness
env:
DNSIMPLE_API_TOKEN: ${{ secrets.DNSIMPLE_API_TOKEN }}
@ -48,7 +53,7 @@ jobs:
helm -n cert-manager list
max_wait_time_seconds=600
max_wait_time_seconds=800
sleep_between_iterations=10
start=$(date +%s)
@ -57,7 +62,7 @@ jobs:
echo ""
echo "Awaiting succesful deployment for max ${max_wait_time_seconds} seconds or until $(date --date="@$end")"
while [ $(date +%s) -le $end ]; do
echo "[i] New iteration at $(date +%s)"
echo "[i] New iteration at $(date)"
kubectl -n cert-manager get po
if [ $(kubectl -n cert-manager get po | grep Crash | wc -l) -gt 0 ]; then
@ -101,11 +106,10 @@ jobs:
""" > certificate.yaml
kubectl apply -f certificate.yaml
- name: Assert that the DNS record was created
env:
DNSIMPLE_ZONE_NAME: ${{ secrets.DNSIMPLE_ZONE_NAME }}
timeout-minutes: 10
timeout-minutes: 10
run: |
while true; do
if nslookup -type=TXT _acme-challenge.gh-action-test.$DNSIMPLE_ZONE_NAME ns1.dnsimple.com; then
@ -114,9 +118,36 @@ jobs:
sleep 30
done
# This step can time out, but it timing out doesn't necessarily mean that the webhook is not working.
# Timeouts mainly happen due to the environment of the runner and/or parallelism, thus such occurrences will simply be dismissed as warnings.
- name: Check the certificate status
run: |
kubectl wait --for=condition=ready --timeout=600s certificate/dnsimple-test
# this should not be necessary since the certificate is usually ready once the DNS record is propagated
kubectl get certificate dnsimple-test -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' | grep True
max_wait_time_seconds=300
end=$(( $(date +%s) + $max_wait_time_seconds ))
start=$(date +%s)
sleep 5
while [ $(date +%s) -le $end ]; do
OUT_CRT=$(kubectl get certificate/dnsimple-test -o jsonpath='{.status.conditions}')
OUT_CRQ=$(kubectl get CertificateRequest -o json)
echo "Certificate:"
echo "$OUT_CRT"
echo "CertificateRequest:"
echo "$OUT_CRQ" | jq .items[0].status.conditions
if [ $(echo "$OUT_CRT" | grep -iE "Failed|Denied" | wc -l) -gt 0 ]; then
echo "::Error title=Certificate resource errored::The certificate ressource has an error"
exit 1
fi
if [ $(kubectl get certificate dnsimple-test -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}') == "True" ]; then
echo "Certificate is ready after $(( $(date +%s) - $start )) seconds"
exit 0
fi
sleep 20
echo -e "\n[i] New iteration at $(date)"
done
echo "::warning title=Certificate timed out::Have timed out waiting for certificate"

View file

@ -1,12 +1,14 @@
name: Run full test suite
on:
push:
branches:
- master
# To prevent this time intesive suite from running redundandtly, it will only run on PRs.
# If a PR is merged, it also creates a push and thus this workflow unnecessarily runs again.
pull_request:
branches:
- master
paths:
- '.github/workflows/**'
- 'src/**'
jobs:
code-test:

View file

@ -1,7 +1,9 @@
GO ?= $(shell which go)
OS ?= $(shell $(GO) env GOOS)
ARCH ?= $(shell $(GO) env GOARCH)
KUBE_VERSION=1.25.0
# Available versions: https://storage.googleapis.com/kubebuilder-tools
KUBE_VERSION=$(shell curl -s https://storage.googleapis.com/kubebuilder-tools | grep -oP 'kubebuilder-tools-\K[0-9]+\.[0-9]+\.[0-9]+' | sort -V | tail -n 1 || echo "1.30.0")
# required by go tests
export TEST_ASSET_ETCD=../_test/kubebuilder/etcd

View file

@ -75,26 +75,34 @@ The Helm chart accepts the following values:
All cert-manager webhooks have to pass the DNS01 provider conformance testing suite.
### Pull requests
Prerequisites for PRs are implemented as GitHub-actions. All tests should pass before a PR is merged:
- the `cert-manager` conformance suite is run with provided kubebuilder fixtures
- a custom test suite running on a working k8s cluster (using `minikube`) is executed as well
Prerequisites for PRs are implemented as GitHub-actions. All tests should pass before a PR is merged:
- The `cert-manager` conformance suite is run with provided kubebuilder fixtures
- A custom test suite running on a working k8s cluster (using `minikube`) is executed as well
### Local testing
#### Test suite
You can also run tests locally, as specified in the `Makefile`:
Tests can be run locally according to the `Makefile`:
1. Set-up `testdata/` according to its [README][3].
- `dnsimple-token.yaml` should be filled with a valid token (for either the sandbox or production environment)
- `dnsimple.env` should contain the remaining environment variables (non sensitive)
2. Execute the test suite:
1. Set up `testdata/` according to its [README][3]
- `dnsimple-token.yaml` should be filled with a valid token (for either the sandbox or production environment)
2. Set env var `TEST_ZONE_NAME`, adding a trailing dot
- `export TEST_ZONE_NAME="<zone>."`
3. Execute the test suite:
```bash
make test
```
> [!NOTE]
> Kubebuilder will always use the latest version available.
#### In-cluster testing
1. Install cert-manager:
```bash
kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.14.3/cert-manager.yaml
```
2. Install the webhook:
```bash
helm install cert-manager-webhook-dnsimple \
@ -103,6 +111,7 @@ You can also run tests locally, as specified in the `Makefile`:
--set clusterIssuer.staging.enabled=true \
./charts/cert-manager-webhook-dnsimple
```
3. Test away... You can create a sample certificate to ensure the webhook is working correctly:
```bash
kubectl apply -f - <<<EOF
@ -120,10 +129,16 @@ You can also run tests locally, as specified in the `Makefile`:
EOF
```
#### GitHub Actions
Each PR is vetted against a full test suite that tests changes against multiple versions of both Kubernetes and Cert-Manager using a matrix strategy.
Generally, tested k8s versions are the [last 3 supported major versions](https://kubernetes.io/releases/).
Cert-Manager is tested uisng the [last 2 supported versions](https://cert-manager.io/docs/releases/).
## Releases
### Docker images
Every push to `master` or on a pull-request triggers the upload of a new docker image to the GitHub Container Registry (this is configured through github actions). These images should **not considered stable** and are tagged with `commit-<hash>`. **We recommend using a specific version tag for production deployments instead.**
Every push to `master` or on a pull-request triggers the upload of a new docker image to the GitHub Container Registry (this is configured through github actions).
These images should **not be considered stable** and are tagged with `commit-<hash>`. **We recommend using a specific version tag for production deployments instead.**
Tagged images are considered stable, these are the ones referenced by the default helm values.

View file

@ -1,22 +1,36 @@
package main
import (
"fmt"
"os"
"strings"
"testing"
dns "github.com/cert-manager/cert-manager/test/acme"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
)
var (
zone = os.Getenv("TEST_ZONE_NAME")
const (
testdata_dir = "../testdata"
)
var (
zone = os.Getenv("TEST_ZONE_NAME")
)
func TestRunsSuite(t *testing.T) {
log.SetLogger(zap.New(zap.UseFlagOptions(&zap.Options{})))
// The manifest path should contain a file named config.json that is a
// snippet of valid configuration that should be included on the
// ChallengeRequest passed as part of the test cases.
// Ensure trailing dot
if !strings.HasSuffix(zone, ".") {
zone = fmt.Sprintf("%s.", zone)
}
fixture := dns.NewFixture(&dnsimpleDNSProviderSolver{},
dns.SetResolvedZone(zone),
dns.SetAllowAmbientCredentials(false),

3
testdata/README.md vendored
View file

@ -6,4 +6,5 @@ Copy the `dnsimple-token.yaml.example` example file removing the `.example` suff
$ cp dnsimple-token.yaml{.example,}
```
Replace the placeholders for the API token in `dnsimple-token.yaml`. The API token can be generated in your DNSimple account settings in the automation tab.
Replace the placeholders for the API token in `dnsimple-token.yaml`.
The API token can be generated in your DNSimple account settings in the automation tab.