Retry tests on network errors, improve test docs (#64)

This commit is contained in:
Valentin Klopfenstein 2025-03-12 17:47:49 +01:00 committed by GitHub
parent 9b0da083ff
commit 886d4f49b6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 122 additions and 31 deletions

View file

@ -2,6 +2,9 @@ name: Run code tests
on: on:
push: push:
paths:
- '.github/workflows/**'
- 'src/**'
workflow_call: workflow_call:
secrets: secrets:
DNSIMPLE_API_TOKEN: DNSIMPLE_API_TOKEN:
@ -37,11 +40,17 @@ jobs:
env: env:
DNSIMPLE_API_TOKEN: ${{ secrets.DNSIMPLE_API_TOKEN }} DNSIMPLE_API_TOKEN: ${{ secrets.DNSIMPLE_API_TOKEN }}
DNSIMPLE_ZONE_NAME: ${{ secrets.DNSIMPLE_ZONE_NAME }} DNSIMPLE_ZONE_NAME: ${{ secrets.DNSIMPLE_ZONE_NAME }}
shell: 'script -q -e -c "bash {0}"'
timeout-minutes: 15
run: | run: |
export TEST_ASSET_KUBE_APISERVER=${{ steps.kubebuilder.outputs.BIN_DIR }}/kube-apiserver export TEST_ASSET_KUBE_APISERVER=${{ steps.kubebuilder.outputs.BIN_DIR }}/kube-apiserver
export TEST_ASSET_ETCD=${{ steps.kubebuilder.outputs.BIN_DIR }}/etcd export TEST_ASSET_ETCD=${{ steps.kubebuilder.outputs.BIN_DIR }}/etcd
export TEST_ASSET_KUBECTL=${{ steps.kubebuilder.outputs.BIN_DIR }}/kubectl export TEST_ASSET_KUBECTL=${{ steps.kubebuilder.outputs.BIN_DIR }}/kubectl
export TEST_ZONE_NAME="${DNSIMPLE_ZONE_NAME}." # add trailing dot export TEST_ZONE_NAME="${DNSIMPLE_ZONE_NAME}." # add trailing dot
YLW='\033[1;33m'
NC='\033[0m'
echo """apiVersion: v1 echo """apiVersion: v1
kind: Secret kind: Secret
metadata: metadata:
@ -51,4 +60,21 @@ jobs:
token: $DNSIMPLE_API_TOKEN token: $DNSIMPLE_API_TOKEN
""" > testdata/dnsimple-token.yaml """ > testdata/dnsimple-token.yaml
cd src cd src
go test -v .
# Occasionally, transient network errors can make tests fail
attempt=0
max_attempts=3
test_exit_code=0
while [ $attempt -lt $max_attempts ]; do
attempt=$((attempt+1))
output=$(go test -v . 2>&1 | tee /dev/tty)
test_exit_code=$?
if echo "$output" | grep -q -e "Temporary failure in name resolution" -e "connection reset by peer" -e "i/o timeout"; then
echo -e "${YLW}Detected transient network error. Retrying... ($attempt/$max_attempts)${NC}"
else
break
fi
done
exit $test_exit_code

View file

@ -12,26 +12,31 @@ on:
jobs: jobs:
test: test:
runs-on: ubuntu-latest runs-on: ubuntu-latest
strategy:
max-parallel: 3
matrix:
# Always quote versions to prevent int truncation (1.30 -> 1.3)
# https://kubernetes.io/releases
k8s-version: ["1.30", "1.31", "1.32"]
# https://cert-manager.io/docs/releases/ (Always include path version)
cm-version: ["1.16.0", "1.17.0"]
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Start minikube - name: Start minikube
uses: medyagh/setup-minikube@master uses: medyagh/setup-minikube@master
with: with:
kubernetes-version: 1.31.3 kubernetes-version: ${{ matrix.k8s-version }}
- name: Install cert-manager, patch upstream dns servers, wait for readiness - name: Install cert-manager, patch upstream dns servers, wait for readiness
run: | run: |
echo "Target cert-manager version: ${{ vars.TARGET_CERT_MANAGER_VERSION }}" echo "Target cert-manager version: ${{ matrix.cm-version }}"
kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/${{ vars.TARGET_CERT_MANAGER_VERSION }}/cert-manager.yaml kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v${{ matrix.cm-version }}/cert-manager.yaml
# Patch cert-manager to use DNSimple's nameservers for faster propagation-checks # Patch cert-manager to use DNSimple's nameservers for faster propagation-checks
kubectl patch deployment cert-manager -n cert-manager --type='json' -p='[{"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "--dns01-recursive-nameservers=ns1.dnsimple.com:53"}]' kubectl patch deployment cert-manager -n cert-manager --type='json' -p='[{"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "--dns01-recursive-nameservers=ns1.dnsimple.com:53"}]'
kubectl wait --for=condition=available --timeout=600s deployment/cert-manager-webhook -n cert-manager kubectl wait --for=condition=available --timeout=600s deployment/cert-manager-webhook -n cert-manager
- name: Install cert-manager-webhook-dnsimple, wait for readiness - name: Install cert-manager-webhook-dnsimple, wait for readiness
env: env:
DNSIMPLE_API_TOKEN: ${{ secrets.DNSIMPLE_API_TOKEN }} DNSIMPLE_API_TOKEN: ${{ secrets.DNSIMPLE_API_TOKEN }}
@ -48,7 +53,7 @@ jobs:
helm -n cert-manager list helm -n cert-manager list
max_wait_time_seconds=600 max_wait_time_seconds=800
sleep_between_iterations=10 sleep_between_iterations=10
start=$(date +%s) start=$(date +%s)
@ -57,7 +62,7 @@ jobs:
echo "" echo ""
echo "Awaiting succesful deployment for max ${max_wait_time_seconds} seconds or until $(date --date="@$end")" echo "Awaiting succesful deployment for max ${max_wait_time_seconds} seconds or until $(date --date="@$end")"
while [ $(date +%s) -le $end ]; do while [ $(date +%s) -le $end ]; do
echo "[i] New iteration at $(date +%s)" echo "[i] New iteration at $(date)"
kubectl -n cert-manager get po kubectl -n cert-manager get po
if [ $(kubectl -n cert-manager get po | grep Crash | wc -l) -gt 0 ]; then if [ $(kubectl -n cert-manager get po | grep Crash | wc -l) -gt 0 ]; then
@ -101,11 +106,10 @@ jobs:
""" > certificate.yaml """ > certificate.yaml
kubectl apply -f certificate.yaml kubectl apply -f certificate.yaml
- name: Assert that the DNS record was created - name: Assert that the DNS record was created
env: env:
DNSIMPLE_ZONE_NAME: ${{ secrets.DNSIMPLE_ZONE_NAME }} DNSIMPLE_ZONE_NAME: ${{ secrets.DNSIMPLE_ZONE_NAME }}
timeout-minutes: 10 timeout-minutes: 10
run: | run: |
while true; do while true; do
if nslookup -type=TXT _acme-challenge.gh-action-test.$DNSIMPLE_ZONE_NAME ns1.dnsimple.com; then if nslookup -type=TXT _acme-challenge.gh-action-test.$DNSIMPLE_ZONE_NAME ns1.dnsimple.com; then
@ -114,9 +118,36 @@ jobs:
sleep 30 sleep 30
done done
# This step can time out, but it timing out doesn't necessarily mean that the webhook is not working.
# Timeouts mainly happen due to the environment of the runner and/or parallelism, thus such occurrences will simply be dismissed as warnings.
- name: Check the certificate status - name: Check the certificate status
run: | run: |
kubectl wait --for=condition=ready --timeout=600s certificate/dnsimple-test max_wait_time_seconds=300
# this should not be necessary since the certificate is usually ready once the DNS record is propagated end=$(( $(date +%s) + $max_wait_time_seconds ))
kubectl get certificate dnsimple-test -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' | grep True start=$(date +%s)
sleep 5
while [ $(date +%s) -le $end ]; do
OUT_CRT=$(kubectl get certificate/dnsimple-test -o jsonpath='{.status.conditions}')
OUT_CRQ=$(kubectl get CertificateRequest -o json)
echo "Certificate:"
echo "$OUT_CRT"
echo "CertificateRequest:"
echo "$OUT_CRQ" | jq .items[0].status.conditions
if [ $(echo "$OUT_CRT" | grep -iE "Failed|Denied" | wc -l) -gt 0 ]; then
echo "::Error title=Certificate resource errored::The certificate ressource has an error"
exit 1
fi
if [ $(kubectl get certificate dnsimple-test -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}') == "True" ]; then
echo "Certificate is ready after $(( $(date +%s) - $start )) seconds"
exit 0
fi
sleep 20
echo -e "\n[i] New iteration at $(date)"
done
echo "::warning title=Certificate timed out::Have timed out waiting for certificate"

View file

@ -1,12 +1,14 @@
name: Run full test suite name: Run full test suite
on: on:
push: # To prevent this time intesive suite from running redundandtly, it will only run on PRs.
branches: # If a PR is merged, it also creates a push and thus this workflow unnecessarily runs again.
- master
pull_request: pull_request:
branches: branches:
- master - master
paths:
- '.github/workflows/**'
- 'src/**'
jobs: jobs:
code-test: code-test:

View file

@ -1,7 +1,9 @@
GO ?= $(shell which go) GO ?= $(shell which go)
OS ?= $(shell $(GO) env GOOS) OS ?= $(shell $(GO) env GOOS)
ARCH ?= $(shell $(GO) env GOARCH) ARCH ?= $(shell $(GO) env GOARCH)
KUBE_VERSION=1.25.0
# Available versions: https://storage.googleapis.com/kubebuilder-tools
KUBE_VERSION=$(shell curl -s https://storage.googleapis.com/kubebuilder-tools | grep -oP 'kubebuilder-tools-\K[0-9]+\.[0-9]+\.[0-9]+' | sort -V | tail -n 1 || echo "1.30.0")
# required by go tests # required by go tests
export TEST_ASSET_ETCD=../_test/kubebuilder/etcd export TEST_ASSET_ETCD=../_test/kubebuilder/etcd

View file

@ -75,26 +75,34 @@ The Helm chart accepts the following values:
All cert-manager webhooks have to pass the DNS01 provider conformance testing suite. All cert-manager webhooks have to pass the DNS01 provider conformance testing suite.
### Pull requests ### Pull requests
Prerequisites for PRs are implemented as GitHub-actions. All tests should pass before a PR is merged: Prerequisites for PRs are implemented as GitHub-actions. All tests should pass before a PR is merged:
- the `cert-manager` conformance suite is run with provided kubebuilder fixtures - The `cert-manager` conformance suite is run with provided kubebuilder fixtures
- a custom test suite running on a working k8s cluster (using `minikube`) is executed as well - A custom test suite running on a working k8s cluster (using `minikube`) is executed as well
### Local testing ### Local testing
#### Test suite #### Test suite
You can also run tests locally, as specified in the `Makefile`: Tests can be run locally according to the `Makefile`:
1. Set-up `testdata/` according to its [README][3]. 1. Set up `testdata/` according to its [README][3]
- `dnsimple-token.yaml` should be filled with a valid token (for either the sandbox or production environment) - `dnsimple-token.yaml` should be filled with a valid token (for either the sandbox or production environment)
- `dnsimple.env` should contain the remaining environment variables (non sensitive)
2. Execute the test suite: 2. Set env var `TEST_ZONE_NAME`, adding a trailing dot
- `export TEST_ZONE_NAME="<zone>."`
3. Execute the test suite:
```bash ```bash
make test make test
``` ```
> [!NOTE]
> Kubebuilder will always use the latest version available.
#### In-cluster testing #### In-cluster testing
1. Install cert-manager: 1. Install cert-manager:
```bash ```bash
kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.14.3/cert-manager.yaml kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.14.3/cert-manager.yaml
``` ```
2. Install the webhook: 2. Install the webhook:
```bash ```bash
helm install cert-manager-webhook-dnsimple \ helm install cert-manager-webhook-dnsimple \
@ -103,6 +111,7 @@ You can also run tests locally, as specified in the `Makefile`:
--set clusterIssuer.staging.enabled=true \ --set clusterIssuer.staging.enabled=true \
./charts/cert-manager-webhook-dnsimple ./charts/cert-manager-webhook-dnsimple
``` ```
3. Test away... You can create a sample certificate to ensure the webhook is working correctly: 3. Test away... You can create a sample certificate to ensure the webhook is working correctly:
```bash ```bash
kubectl apply -f - <<<EOF kubectl apply -f - <<<EOF
@ -120,10 +129,16 @@ You can also run tests locally, as specified in the `Makefile`:
EOF EOF
``` ```
#### GitHub Actions
Each PR is vetted against a full test suite that tests changes against multiple versions of both Kubernetes and Cert-Manager using a matrix strategy.
Generally, tested k8s versions are the [last 3 supported major versions](https://kubernetes.io/releases/).
Cert-Manager is tested uisng the [last 2 supported versions](https://cert-manager.io/docs/releases/).
## Releases ## Releases
### Docker images ### Docker images
Every push to `master` or on a pull-request triggers the upload of a new docker image to the GitHub Container Registry (this is configured through github actions). These images should **not considered stable** and are tagged with `commit-<hash>`. **We recommend using a specific version tag for production deployments instead.** Every push to `master` or on a pull-request triggers the upload of a new docker image to the GitHub Container Registry (this is configured through github actions).
These images should **not be considered stable** and are tagged with `commit-<hash>`. **We recommend using a specific version tag for production deployments instead.**
Tagged images are considered stable, these are the ones referenced by the default helm values. Tagged images are considered stable, these are the ones referenced by the default helm values.

View file

@ -1,22 +1,36 @@
package main package main
import ( import (
"fmt"
"os" "os"
"strings"
"testing" "testing"
dns "github.com/cert-manager/cert-manager/test/acme" dns "github.com/cert-manager/cert-manager/test/acme"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
) )
var ( const (
zone = os.Getenv("TEST_ZONE_NAME")
testdata_dir = "../testdata" testdata_dir = "../testdata"
) )
var (
zone = os.Getenv("TEST_ZONE_NAME")
)
func TestRunsSuite(t *testing.T) { func TestRunsSuite(t *testing.T) {
log.SetLogger(zap.New(zap.UseFlagOptions(&zap.Options{})))
// The manifest path should contain a file named config.json that is a // The manifest path should contain a file named config.json that is a
// snippet of valid configuration that should be included on the // snippet of valid configuration that should be included on the
// ChallengeRequest passed as part of the test cases. // ChallengeRequest passed as part of the test cases.
// Ensure trailing dot
if !strings.HasSuffix(zone, ".") {
zone = fmt.Sprintf("%s.", zone)
}
fixture := dns.NewFixture(&dnsimpleDNSProviderSolver{}, fixture := dns.NewFixture(&dnsimpleDNSProviderSolver{},
dns.SetResolvedZone(zone), dns.SetResolvedZone(zone),
dns.SetAllowAmbientCredentials(false), dns.SetAllowAmbientCredentials(false),

3
testdata/README.md vendored
View file

@ -6,4 +6,5 @@ Copy the `dnsimple-token.yaml.example` example file removing the `.example` suff
$ cp dnsimple-token.yaml{.example,} $ cp dnsimple-token.yaml{.example,}
``` ```
Replace the placeholders for the API token in `dnsimple-token.yaml`. The API token can be generated in your DNSimple account settings in the automation tab. Replace the placeholders for the API token in `dnsimple-token.yaml`.
The API token can be generated in your DNSimple account settings in the automation tab.