V2EX = way to explore
V2EX 是一个关于分享和探索的地方
现在注册
已注册用户请  登录
The Go Programming Language
http://golang.org/
Go Playground
Go Projects
Revel Web Framework
hheedat
V2EX  ›  Go 编程语言

如何理解 golang pprof 的 cum ?感觉解释不通啊

  •  
  •   hheedat · 2019-01-15 11:51:05 +08:00 · 3444 次点击
    这是一个创建于 2200 天前的主题,其中的信息可能已经有所发展或是发生改变。

    https://www.reddit.com/r/golang/comments/7ony5f/what_is_the_meaning_of_flat_and_cum_in_golang/

    我看了一些资料,都是如这个帖子里面所说,cum ( cumulative )是本函数以及本函数调用的子函数们的时间和。 如帖子里面的调用方式我写了一个 demo,结果符合预期

    
    func ctA() int64 {
    	return ctB()
    }
    
    func ctB() int64 {
    	var x int64 = 0
    	for i := 0; i < 1e9; i++ {
    		x = x + 1
    	}
    	return x + ctC() + ctD()
    }
    
    func ctC() int64 {
    	var x int64 = 0
    	for i := 0; i < 1e8; i++ {
    		x = x + 1
    	}
    	return x
    }
    
    func ctD() int64 {
    	var x int64 = 0
    	for i := 0; i < 5e8; i++ {
    		x = x + 1
    	}
    	return x
    }
    
    (pprof) top
    Showing nodes accounting for 48.68s, 99.43% of 48.96s total
    Dropped 11 nodes (cum <= 0.24s)
          flat  flat%   sum%        cum   cum%
        30.31s 61.91% 61.91%     48.68s 99.43%  main.ctB
        15.35s 31.35% 93.26%     15.35s 31.35%  main.ctD
         3.02s  6.17% 99.43%      3.02s  6.17%  main.ctC
             0     0% 99.43%     48.68s 99.43%  main.ctA
             0     0% 99.43%     48.68s 99.43%  main.main
             0     0% 99.43%     48.68s 99.43%  runtime.main
             0     0% 99.43%      0.27s  0.55%  runtime.mstart
             0     0% 99.43%      0.27s  0.55%  runtime.mstart1
             0     0% 99.43%      0.27s  0.55%  runtime.sysmon
    

    但是稍微复杂一些的例子,里面的 main.main,cum 的值就很小了(如下),这就说不通了啊,main 不是一直在函数的调用栈上么?理论上应该是最长的呀。

    (pprof) top30
    Showing nodes accounting for 15.82s, 95.36% of 16.59s total
    Dropped 92 nodes (cum <= 0.08s)
    Showing top 30 nodes out of 67
          flat  flat%   sum%        cum   cum%
         6.69s 40.33% 40.33%      6.69s 40.33%  runtime.pthread_cond_wait
         4.61s 27.79% 68.11%      4.61s 27.79%  runtime.pthread_cond_signal
         1.35s  8.14% 76.25%      1.35s  8.14%  runtime.memmove
         0.84s  5.06% 81.31%      0.84s  5.06%  runtime.usleep
         0.51s  3.07% 84.39%      0.52s  3.13%  runtime.findnull
         0.49s  2.95% 87.34%      0.49s  2.95%  runtime.nanotime
         0.44s  2.65% 89.99%      0.44s  2.65%  runtime.(*semaRoot).dequeue
         0.34s  2.05% 92.04%      0.34s  2.05%  runtime.memclrNoHeapPointers
         0.20s  1.21% 93.25%      0.33s  1.99%  runtime.scanobject
         0.09s  0.54% 93.79%      0.09s  0.54%  runtime.pthread_mutex_lock
         0.05s   0.3% 94.09%      0.13s  0.78%  runtime.gentraceback
         0.03s  0.18% 94.27%      0.35s  2.11%  sync.(*Mutex).Lock
         0.03s  0.18% 94.45%      0.38s  2.29%  sync.(*RWMutex).Lock
         0.02s  0.12% 94.58%      0.14s  0.84%  bufio.(*Scanner).Scan
         0.02s  0.12% 94.70%      0.75s  4.52%  runtime.scang
         0.02s  0.12% 94.82%      8.40s 50.63%  runtime.schedule
         0.01s  0.06% 94.88%      0.19s  1.15%  main.Loop
         0.01s  0.06% 94.94%      0.14s  0.84%  runtime.callers
         0.01s  0.06% 95.00%      7.07s 42.62%  runtime.findrunnable
         0.01s  0.06% 95.06%      1.20s  7.23%  runtime.gcDrain
         0.01s  0.06% 95.12%      0.11s  0.66%  runtime.mallocgc
         0.01s  0.06% 95.18%      3.04s 18.32%  runtime.newproc.func1
         0.01s  0.06% 95.24%      3.03s 18.26%  runtime.newproc1
         0.01s  0.06% 95.30%      0.56s  3.38%  runtime.osyield
         0.01s  0.06% 95.36%      4.92s 29.66%  runtime.systemstack
             0     0% 95.36%      0.18s  1.08%  main.(*TrieNode).addWord
             0     0% 95.36%      0.52s  3.13%  main.BuildTire
             0     0% 95.36%      2.45s 14.77%  main.Loop.func1
             0     0% 95.36%      0.53s  3.19%  main.main
             0     0% 95.36%      0.10s   0.6%  runtime.(*gcWork).balance
    

    如何理解这件事情呢?

    proxy

    4 条回复    2019-01-15 15:59:46 +08:00
    ensonmj
        1
    ensonmj  
       2019-01-15 13:32:13 +08:00   ❤️ 1
    说明你的主要任务都是在其他 goroutine 里面跑的,main 只是等待?
    reus
        2
    reus  
       2019-01-15 13:44:47 +08:00
    runtime 的一些 goroutine 在 main 执行前就会开启了
    用户代码也可能在 main 前执行一些东西,例如 init 函数,或者其他初始化
    main 并不是 go 程序的入口
    janxin
        3
    janxin  
       2019-01-15 13:59:51 +08:00
    实际上你写程序的 main 是跟 C 程序中的 Entry Point 的理解不一样,即便是 VC 之类的程序也是在 main 之前有一段小的初始化代码在执行,完成之后才会跳到 main 中。你可以提供一下 demo 代码,没看到不知道你测试了什么呀...
    hheedat
        4
    hheedat  
    OP
       2019-01-15 15:59:46 +08:00
    @ensonmj 真相了,我把带 goroutine 的那个 LOOP 方法去掉,就符合预期了

    @janxin 下面是我的代码
    ```
    package main

    import (
    "flag"
    "os"
    "runtime/pprof"
    "log"
    "fmt"
    "runtime"
    "bufio"
    "sync"
    )

    var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`")
    var memprofile = flag.String("memprofile", "", "write memory profile to `file`")
    var blockprofile = flag.String("blockprofile", "", "write block profile to `file`")
    var mutexprofile = flag.String("mutexprofile", "", "write mutex profile to `file`")

    func main() {
    flag.Parse()
    if *cpuprofile != "" {
    f, err := os.Create(*cpuprofile)
    if err != nil {
    log.Fatal("could not create CPU profile: ", err)
    }
    if err := pprof.StartCPUProfile(f); err != nil {
    log.Fatal("could not start CPU profile: ", err)
    }
    defer pprof.StopCPUProfile()
    }
    if *blockprofile != "" {
    runtime.SetBlockProfileRate(10 * 1000)
    defer writeProfTo("block", *blockprofile)
    }
    if *mutexprofile != "" {
    runtime.SetMutexProfileFraction(2)
    defer writeProfTo("mutex", *mutexprofile)
    }

    m := make(map[int]*TrieNode, 4)
    for i := 0; i < 3; i++ {
    m[i] = BuildTire()
    //loop.Loop()
    }
    //for i := 1; i < 10; i++ {
    // ctA()
    //}

    if *memprofile != "" {
    f, err := os.Create(*memprofile)
    if err != nil {
    log.Fatal("could not create memory profile: ", err)
    }
    //runtime.GC() // get up-to-date statistics
    if err := pprof.WriteHeapProfile(f); err != nil {
    log.Fatal("could not write memory profile: ", err)
    }
    f.Close()
    }
    }

    func ctA() int64 {
    return ctB()
    }

    func ctB() int64 {
    var x int64 = 0
    for i := 0; i < 1e9; i++ {
    x = x + 1
    }
    return x + ctC() + ctD()
    }

    func ctC() int64 {
    var x int64 = 0
    for i := 0; i < 1e8; i++ {
    x = x + 1
    }
    return x
    }

    func ctD() int64 {
    var x int64 = 0
    for i := 0; i < 5e8; i++ {
    x = x + 1
    }
    return x
    }

    func writeProfTo(name, fn string) {
    p := pprof.Lookup(name)
    if p == nil {
    fmt.Errorf("%s prof not found", name)
    return
    }
    f, err := os.Create(fn)
    if err != nil {
    fmt.Errorf("%v", err.Error())
    return
    }
    defer f.Close()
    err = p.WriteTo(f, 0)
    if err != nil {
    fmt.Errorf("%v", err.Error())
    return
    }
    }

    type TrieNode struct {
    Child *map[string]TrieNode
    Exist bool
    }

    var mySlice []int
    var sliceLock sync.RWMutex

    func BuildTire() *TrieNode {
    root := TrieNode{nil, false}

    inFile, _ := os.Open("/Users/admin/stopwords.txt")
    defer inFile.Close()
    scanner := bufio.NewScanner(inFile)
    scanner.Split(bufio.ScanLines)

    for scanner.Scan() {
    lineStr := scanner.Text()
    root.addWord(lineStr)
    Loop()
    }

    return &root
    }

    func Loop() {
    var wg sync.WaitGroup
    for i := 0; i < 10; i++ {
    wg.Add(1)
    go func() {
    for i := 0; i < 10; i++ {
    sliceLock.Lock()
    mySlice = append(mySlice, i)
    sliceLock.Unlock()
    }
    wg.Done()
    }()
    }
    wg.Wait()
    }

    func GetMySliceLen() int {
    return len(mySlice)
    }

    func (n *TrieNode) addWord(words string) {

    runes := []rune(words)
    keyStr := string(runes[0])

    var exist bool
    var restStr string
    if len(runes) == 1 {
    exist = true
    restStr = ""
    } else {
    exist = false
    restStr = string(runes[1:])
    }

    if n.Child == nil {
    tm := make(map[string]TrieNode)
    n.Child = &tm
    }

    tmpMap := *n.Child

    if _, ok := tmpMap[keyStr]; !ok {
    tmpMap[keyStr] = TrieNode{nil, exist}
    } else {
    if exist {
    //tmpMap[keyStr].Exist = exist
    tm := tmpMap[keyStr]
    tm.Exist = exist
    tmpMap[keyStr] = tm
    }
    }

    n.Child = &tmpMap

    if len(restStr) > 0 {
    tm := tmpMap[keyStr]
    tm.addWord(restStr)
    tmpMap[keyStr] = tm
    }
    }

    ```
    关于   ·   帮助文档   ·   博客   ·   API   ·   FAQ   ·   实用小工具   ·   5149 人在线   最高记录 6679   ·     Select Language
    创意工作者们的社区
    World is powered by solitude
    VERSION: 3.9.8.5 · 25ms · UTC 06:02 · PVG 14:02 · LAX 22:02 · JFK 01:02
    Developed with CodeLauncher
    ♥ Do have faith in what you're doing.