golang 비동기. tcp 서버(5). netpoller 2부. golang 내부 epoll 활용: Accept

December 3, 2024 8 minute read

해당 아티클은 다음 수준의 지식을 요구합니다.

POSIX 네트워크 syscall
epoll

이전 아티클 “golang 비동기. tcp 서버(4). netpoller 1부. golang 내부 epoll의 생성”를 보시는 걸 권장합니다.

이전 아티클까지 golang 내부 net 패키지의 net.Listen을 호출했을 때 발생하는 과정을 정리했습니다. net.Listen 내부에서 unix syscall을 호출하여 소켓 생성, 서버 호스팅, netpoller를 통한 epoll 생성, epoll 등록 과정을 확인했습니다. 이번에는 netpoller에서 데이터가 들어왔을 때 이를 받아서 처리하는 부분을 다뤄보겠습니다.

Accept 내부. 고루틴이 대기하는 위치 탐색

탐색을 시작하기 전 net.Listen을 호출한 결과로 다음이 진행되었습니다.

논블로킹 소켓에 주소가 바인딩되고 서버가 호스팅됩니다.
epoll이 생성되고 TCPListener의 소켓에 대한 이벤트가 등록됩니다.
소켓에 대하여 유일하게 netFD, FD, pollDesc가 생성됩니다.

이번에는 Listener.Accept()을 호출해보겠습니다.

func main() {
	listener, err := net.Listen("tcp", ":8989")
	if err != nil {
		fmt.Println("listen error", err)
		return
	}
	
	for {
		conn, err := listener.Accept()
		// ...
	}
}

listener에서 호출한 Accept는 내부에서 FD의 accept를 호출합니다. ✅표시된 부분을 위주로 확인하면 됩니다.

// https://github.com/golang/go/blob/a10e42f219abb9c5b/src/internal/poll/fd_unix.go#L592
func (fd *FD) Accept() (int, syscall.Sockaddr, string, error) {
	// ✅ readLock을 획득: 하나의 Listener에서 여러 고루틴이 Accept을 호출해도
	// 하나의 고루틴만 로직이 진행
	if err := fd.readLock(); err != nil {
		return -1, nil, "", err
	}
	defer fd.readUnlock()

	// Read 준비
	if err := fd.pd.prepareRead(fd.isFile); err != nil {
		return -1, nil, "", err
	}
	for {
		// ✅ 이 부분에서 Accept4Func(s, syscall.SOCK_NONBLOCK|syscall.SOCK_CLOEXEC) 호출
		s, rsa, errcall, err := accept(fd.Sysfd) 
		// ✅ 연결된 소켓이 있으면 소켓 fd와 주소 리턴
		if err == nil {
			return s, rsa, "", err
		}
		switch err {
		// ...
		// ✅ 바로 연결된 소켓이 없으면 EAGAIN
		case syscall.EAGAIN:
			if fd.pd.pollable() {
				if err = fd.pd.waitRead(fd.isFile); err == nil {
					continue
				}
			}
		// ...
	}
}

함수 동작에서 주요 부분을 살펴보면 다음과 같습니다.

readLock을 획득합니다. 따라서 같은 fd에서 접근하는 Accept요청에 대하여 하나의 고루틴만 진입하도록 합니다.
Read를 준비합니다. 여기서 Read란 소켓으로부터 수신된 데이터가 존재하는 경우를 의미합니다.
accept를 호출합니다. 논블로킹 소켓이기 때문에 바로 리턴됩니다.
만약 연결된 소켓이 존재하는 경우 if err == nil 분기로 이동 후 생성된 소켓에 대한 fd와 주소를 리턴합니다.
만약 연결된 소켓이 존재하지 않는 경우 EAGAIN 분기로 처리됩니다. 해당 분기에서 fd.pd.waitRead를 호출하여 데이터가 존재할 때까지 고루틴을 대기합니다.

위의 코드를 살펴봤을 때 연결된 소켓이 없는 경우 직접적으로 syscall을 이용하여 대기하지 않는다는 것을 알았습니다. syscall 대신 애플리케이션의 고루틴 스케줄링에 의한 대기가 진행됩니다. 이번에는 대기하는 코드인 waitRead를 살펴보겠습니다.

waitRead 내부의 주요 흐름은 다음과 같습니다.

// https://github.com/golang/go/blob/a10e42f219abb9c5b/src/internal/poll/fd_poll_runtime.go#L88
func (pd *pollDesc) waitRead(isFile bool) error {
	return pd.wait('r', isFile)
}

// https://github.com/golang/go/blob/a10e42f219abb9c5b/src/internal/poll/fd_poll_runtime.go#L80
func (pd *pollDesc) wait(mode int, isFile bool) error {
	// ...
	res := runtime_pollWait(pd.runtimeCtx, mode)
	return convertErr(res, isFile)
}

// https://github.com/golang/go/blob/a10e42f219abb9c5b/src/runtime/netpoll.go#L336
func poll_runtime_pollWait(pd *pollDesc, mode int) int {
	// ...
	for !netpollblock(pd, int32(mode), false) {
		// ...
	}
	return pollNoError
}

// https://github.com/golang/go/blob/a10e42f219abb9c5b/src/runtime/netpoll.go#L546
func netpollblock(pd *pollDesc, mode int32, waitio bool) bool {
	// 읽기 쓰기 모드에 따른 고루틴 저장용 pollDesc 멤버 가져오기
	gpp := &pd.rg
	if mode == 'w' {
		gpp = &pd.wg
	}

	// set the gpp semaphore to pdWait
	for {
		// Consume notification if already ready.
		if gpp.CompareAndSwap(pdReady, pdNil) {
			return true
		}
		
		// ✅ 이 부분에서 gpp의 상태가 pdNil에서 pdWait으로 변경
		if gpp.CompareAndSwap(pdNil, pdWait) {
			break
		}
		// ...
	}

	// need to recheck error states after setting gpp to pdWait
	// this is necessary because runtime_pollUnblock/runtime_pollSetDeadline/deadlineimpl
	// do the opposite: store to closing/rd/wd, publishInfo, load of rg/wg
	// ✅ gopark: 고루틴을 대기시키는 함수. 이 부분에서 listener.Accept()가 대기
	if waitio || netpollcheckerr(pd, mode) == pollNoError {
		gopark(netpollblockcommit, unsafe.Pointer(gpp), waitReasonIOWait, traceBlockNet, 5)
	}
	// be careful to not lose concurrent pdReady notification
	// ✅ 대기가 끝나면 gpp에 pdNil을 저장
	old := gpp.Swap(pdNil)
	if old > pdWait {
		throw("runtime: corrupted polldesc")
	}
	return old == pdReady
}

위 코드를 확인해보면 waitRead를 호출했을 때 netpollblock이 호출됩니다. netpollblock는 모드에 대한 *pollDesc의 필드를 가져오고 gopark를 호출합니다. 이때 gopark가 호출되면 고루틴이 대기 상태로 돌아가게 됩니다. 따라서 *TCPListener.Accept가 호출됐을 때 당장 처리할 클라이언트 연결이 없다면 gopark가 호출되어 고루틴이 대기하게 됩니다.

여기까지 epoll에 등록된 고루틴이 syscall을 호출했을 때 당장 처리할 수 없는 경우 대기하게 되는 흐름을 확인했습니다. 하지만 아직 대기하는 고루틴의 정보가 어디에 저장되는지 확인하지 못했습니다. epoll에 등록된 fd와 fd를 다루는 고루틴에 대한 정보가 어딘가 저장되어 있어야 할 것 같습니다. 이 정보는 고루틴이 대기 상태로 돌아가는 마지막 과정에서 저장됩니다.

gopark가 호출되는 라인은 gopark(netpollblockcommit, unsafe.Pointer(gpp), waitReasonIOWait, traceBlockNet, 5) 입니다. 이때 첫 번째 인자에 netpollblockcommit을 확인할 수 있습니다. 이 인자의 정의를 확인하면 다음과 같은 함수를 만나게 됩니다.

// https://github.com/golang/go/blob/a10e42f219abb9c5b/src/runtime/netpoll.go#L527
func netpollblockcommit(gp *g, gpp unsafe.Pointer) bool {
	// ✅ cas로 고루틴을 pollDesc의 필드에 저장 (이전 아티클 참고)
	r := atomic.Casuintptr((*uintptr)(gpp), pdWait, uintptr(unsafe.Pointer(gp)))
	if r {
		// Bump the count of goroutines waiting for the poller.
		// The scheduler uses this to decide whether to block
		// waiting for the poller if there is nothing else to do.
		netpollAdjustWaiters(1)
	}
	return r
}

해당 함수를 확인해보면 cas 연산을 활용하여 고루틴을 pollDesc의 필드인 rg에 저장하는 걸 확인할 수 있습니다. 따라서 pollDesc.rg는 해당 함수가 호출되는 시점에 대기된 고루틴의 정보를 담습니다. 그럼 해당 함수는 언제 호출될까요? 이 답은 gopark 내부에 있습니다.

// https://github.com/golang/go/blob/a10e42f219abb9c5b/src/runtime/proc.go#L385
func gopark(unlockf func(*g, unsafe.Pointer) bool, lock unsafe.Pointer, reason waitReason, traceReason traceBlockReason, traceskip int) {
	if reason != waitReasonSleep {
		checkTimeouts() // timeouts may expire while two goroutines keep the scheduler busy
	}
	mp := acquirem()
	gp := mp.curg
	status := readgstatus(gp)
	if status != _Grunning && status != _Gscanrunning {
		throw("gopark: bad g status")
	}
	mp.waitlock = lock
	mp.waitunlockf = unlockf // ✅ 쓰레드에 waitunlockf로 unlockf를 등록. unlockf는 netpollblockcommit
	gp.waitreason = reason
	mp.waitTraceBlockReason = traceReason
	mp.waitTraceSkip = traceskip
	releasem(mp)
	// can't do anything that might move the G between Ms here.
	mcall(park_m) // ✅ M(쓰레드)에서 park_m 실행
}

gopark에서 mp의 waitunlockf를 unlockf로 등록하고 마지막 mcall에서 park_m을 실행합니다.

// https://github.com/golang/go/blob/a10e42f219abb9c5b/src/runtime/proc.go#L4007
func park_m(gp *g) {
	mp := getg().m
	// ...
	casgstatus(gp, _Grunning, _Gwaiting) // 고루틴 상태 변경
	
	// ...
	dropg() // ✅ G를 실행하던 M과 G를 분리

	if fn := mp.waitunlockf; fn != nil {
		ok := fn(gp, mp.waitlock) // ✅ mp.waitunlockf를 호출 후 초기화
		mp.waitunlockf = nil
		mp.waitlock = nil
		// ...
	}

	// ...

	schedule() // 고루틴 스케줄링
}

park_m함수는 고루틴의 상태를 실제로 변경하고 G를 실행하던 쓰레드M과 G를 분리합니다. 분리 후 M에 저장된 waitlockf를 호출하는데 이 시점에 netpollblockcommit함수가 호출되어 pollDesc에 Accept를 호출한 고루틴의 정보가 pollDesc.rg 필드에 저장됩니다.

위 과정을 요약하면 다음과 같습니다.

Listener.Accept이 호출되고 accept syscall을 호출합니다.
바로 연결된 커넥션이 없는 경우 accept syscall에서 EAGAIN에러가 발생하며 waitRead가 호출됩니다.
waitRead 내부를 타면 netpollblock가 호출되는데 해당 함수는 gopark를 호출합니다.
gopark가 호출되면서 실행 중인 고루틴 G의 상태가 변경되고 G를 실행하던 쓰레드 M이 분리됩니다.
분리 후 netpollblockcommit이 호출되면서 pollDesc.rg에 고루틴이 저장됩니다.

다음과 같이 표현할 수 있습니다.

golang 비동기 시리즈 비공개 (2).png

고루틴이 Accept에서 대기하는 과정은 syscall에 대한 블로킹이 아닌 논블로킹 소켓에서 발생하는 에러 처리과정으로 애플리케이션 계층에서 대기를 호출하는 것이었습니다. 고루틴은 gopark로 대기하게 되고 마지막 과정에서 고루틴의 정보가 pollDesc에 저장되었습니다. 그럼 대기하는 과정은 추적했지만, 어디서 epoll의 이벤트를 받게 될까요? 이번에는 깨우는 과정을 추적해봅시다.

고루틴이 활성화되는 위치 탐색

고루틴이 활성화되는 위치를 바로 찾아보기 전에 어느 지점에서 활성화가 호출될 지 예측해봅시다. 생각보다 찾기 쉬운데, epoll을 사용하고 있으니 epollwait이 호출되는 지점이 될 것 입니다. 따라서 go 프로젝트 내부에서 epollwait이 호출되는 부분을 찾아보면, 아래 함수에서 대기가 호출되는 것을 확인할 수 있습니다.

// https://github.com/golang/go/blob/a10e42f219abb9c5b/src/runtime/netpoll_epoll.go#L98
func netpoll(delay int64) (gList, int32) {
	// ...
	var events [128]syscall.EpollEvent
	
	// ...
	// ✅ epollwait로 이벤트 대기 호출
	n, errno := syscall.EpollWait(epfd, events[:], int32(len(events)), waitms)
	if errno != 0 {
		if errno != _EINTR {
			println("runtime: epollwait on fd", epfd, "failed with", errno)
			throw("runtime: netpoll failed")
		}
		// ...
	}
	var toRun gList
	delta := int32(0)
	for i := int32(0); i < n; i++ {
		ev := events[i]
		if ev.Events == 0 {
			continue
		}

		if *(**uintptr)(unsafe.Pointer(&ev.Data)) == &netpollBreakRd {
			// netpollBreak fd에 대한 처리
			// ...
		}

		// 모드 설정
		var mode int32
		if ev.Events&(syscall.EPOLLIN|syscall.EPOLLRDHUP|syscall.EPOLLHUP|syscall.EPOLLERR) != 0 {
			mode += 'r'
		}
		if ev.Events&(syscall.EPOLLOUT|syscall.EPOLLHUP|syscall.EPOLLERR) != 0 {
			mode += 'w'
		}
		if mode != 0 {
			// ✅ epoll event의 데이터를 추출하여 *pollDesc로 변환
			tp := *(*taggedPointer)(unsafe.Pointer(&ev.Data))
			pd := (*pollDesc)(tp.pointer())
			tag := tp.tag()
			if pd.fdseq.Load() == tag {
				// ...
				// ✅ 준비된 pollDesc에 대한 고루틴 추가
				delta += netpollready(&toRun, pd, mode)
			}
		}
	}
	return toRun, delta
}

// https://github.com/golang/go/blob/a10e42f219abb9c5b/src/runtime/netpoll.go#L492
func netpollready(toRun *gList, pd *pollDesc, mode int32) int32 {
	// ...
	if mode == 'r' || mode == 'r'+'w' {
		rg = netpollunblock(pd, 'r', true, &delta) // pollDesc.rg 상태 변경
	}
	if mode == 'w' || mode == 'r'+'w' {
		wg = netpollunblock(pd, 'w', true, &delta)
	}
	// 모드에 따라 고루틴 추가
	if rg != nil {
		toRun.push(rg)
	}
	if wg != nil {
		toRun.push(wg)
	}
	return delta
}

netpoll함수의 동작을 요약하면 다음과 같습니다.

EpollWait을 호출하여 발생한 이벤트를 수집합니다.
수집한 이벤트를 순회하면서 epoll Data를 pollDesc로 변환합니다.
pollDesc로 대기를 해제할 고루틴을 연결리스트에 추가합니다.

netpoll함수에는 epollwait으로 이벤트가 발생한 fd를 감시합니다. 그러나 netpoll함수 리턴으로 받은 고루틴 리스트에 대한 처리는 구현되어 있지 않습니다. 따라서 netpoll을 이용하여 고루틴을 처리하는 부분을 확인해야 합니다. netpoll을 호출하는 부분은 여러 부분이 있지만 크게 runtime 내 sysmon과 findRunnable에서 다루고 있습니다.

먼저 sysmon을 살펴보겠습니다. sysmon은 다음 부분처럼 이루어져 있습니다.

// https://github.com/golang/go/blob/a10e42f219abb9c5b/src/runtime/proc.go#L5946
func sysmon() {

	// ...
	idle := 0 // how many cycles in succession we had not wokeup somebody
	delay := uint32(0)

	for {
		if idle == 0 { // start with 20us sleep...
			delay = 20
		} else if idle > 50 { // start doubling the sleep after 1ms...
			delay *= 2
		}
		if delay > 10*1000 { // up to 10ms
			delay = 10 * 1000
		}
		usleep(delay)

		// ...
		lastpoll := sched.lastpoll.Load()
		if netpollinited() && lastpoll != 0 && lastpoll+10*1000*1000 < now {
			sched.lastpoll.CompareAndSwap(lastpoll, now)
			// ✅ netpoll 호출
			list, delta := netpoll(0) // non-blocking - returns list of goroutines
			if !list.empty() {
		
				// ...
				// ✅ global runnable queue에 고루틴 삽입
				injectglist(&list)
				incidlelocked(1)
				netpollAdjustWaiters(delta)
			}
		}
		
		// ...
}

sysmon은 golang 런타임의 데몬 쓰레드로 반복적으로 스핀하면서 다양한 작업을 수행합니다. 위의 코드에서 netpoll의 리턴으로 발생한 고루틴을 런타임의 전역 대기 큐(global runnable queue)에 전달합니다.

한편 다른 부분은 findRunnable입니다.

func findRunnable() (gp *g, inheritTime, tryWakeP bool) {
	// ...
		// ✅ netpoll 호출
		list, delta := netpoll(delay) // block until new work is available
		// ...
		if pp == nil {
			injectglist(&list) // ✅ local runnable queue에 고루틴 삽입
			netpollAdjustWaiters(delta)
		}

findRunnable은 p가 실행할 고루틴을 찾는 역할을 하는 함수입니다. 이 함수에서도 netpoll이 등장하며 함수 내부에서 delay값을 주어 epollWait에서 블로킹되어 이벤트를 가져갈 수 있도록 합니다. delay가 계산되는 과정이 복잡합니다. 따라서 여기서는 findRunnable 내부에서 실행할 고루틴을 탐색하는 과정에 netpoll으로 delay가 있는 대기를 한다는 정도로 이해하면 되겠습니다.

동작을 요약하여 그림으로 표현하면 다음과 같습니다.

golang 비동기 시리즈 비공개 (3).png

다시 요약해보겠습니다.

Accept시 고루틴이 epoll에 저장되는 과정은 다음과 같습니다.

syscall의 accept을 호출하고 논블로킹이므로 EAGAIN을 리턴받습니다.
EAGAIN을 받은 netFD은 netpollblock이 호출됩니다.
gopark가 호출되면서 고루틴이 대기상태로 전환됩니다.
고루틴이 대기상태로 전환되면 마지막으로 netpollblockcommit에서 pollDesc의 필드에 고루틴을 저장합니다.

한편 epoll 이벤트로부터 고루틴이 다시 실행되는 과정은 다음과 같습니다.

netpoll은 sysmon, findRunnable등 고루틴 런타임 내부 함수로부터 호출됩니다.
netpoll호출 시 epollwait로 리턴받은 이벤트 리스트로 고루틴 리스트를 추출합니다.
sysmon은 grq에, findRunnable은 rlq에 고루틴을 저장합니다.

Share on

X Facebook LinkedIn Bluesky

atgane

golang 비동기. tcp 서버(5). netpoller 2부. golang 내부 epoll 활용: Accept

Accept 내부. 고루틴이 대기하는 위치 탐색

고루틴이 활성화되는 위치 탐색

Share on

Leave a comment

You may also enjoy

kubernetes data plane: containerd는 언제 결정하는가

kubernetes data plane: containerd CreateContainer & StartContainer

kubernetes data plane: containerd sandbox 생성 과정 추적

kubernetes data plane: containerd 플러그인 시스템과 gRPC 서버 초기화