-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add oom checker * add some future tasks;add oom_score_adj;add help link * add another oom log pattern Co-authored-by: Binjie Qian <[email protected]>
- Loading branch information
Showing
3 changed files
with
170 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
package oom | ||
|
||
import ( | ||
"bufio" | ||
"fmt" | ||
"github.com/Azure/kdebug/pkg/base" | ||
"github.com/Azure/kdebug/pkg/env" | ||
"os" | ||
"regexp" | ||
"strings" | ||
) | ||
|
||
const ( | ||
logPath = "/var/log/kern.log" | ||
cgroupOOMKeyStr = "Memory cgroup out of memory" | ||
outOfMemoryKey = "Out of memory" | ||
) | ||
|
||
var helpLink = []string{ | ||
"https://www.kernel.org/doc/gorman/html/understand/understand016.html", | ||
"https://stackoverflow.com/questions/18845857/what-does-anon-rss-and-total-vm-mean", | ||
"https://medium.com/tailwinds-navigator/kubernetes-tip-how-does-oomkilled-work-ba71b135993b", | ||
} | ||
|
||
var oomRegex = regexp.MustCompile("^(.*:.{2}:.{2}) .* process (.*) \\((.*)\\) .* anon-rss:(.*), file-rss.* oom_score_adj:(.*)") | ||
|
||
type OOMChecker struct { | ||
kernLogPath string | ||
} | ||
|
||
func (c *OOMChecker) Name() string { | ||
return "OOM" | ||
} | ||
|
||
func New() *OOMChecker { | ||
//todo: support other logpath | ||
return &OOMChecker{ | ||
kernLogPath: logPath, | ||
} | ||
} | ||
|
||
func (c *OOMChecker) Check(ctx *base.CheckContext) ([]*base.CheckResult, error) { | ||
var results []*base.CheckResult | ||
oomResult, err := c.checkOOM(ctx) | ||
if err != nil { | ||
return nil, err | ||
} | ||
results = append(results, oomResult) | ||
return results, nil | ||
} | ||
|
||
func (c *OOMChecker) checkOOM(ctx *base.CheckContext) (*base.CheckResult, error) { | ||
result := &base.CheckResult{ | ||
Checker: c.Name(), | ||
} | ||
if !envCheck(ctx.Environment) { | ||
result.Description = fmt.Sprint("Skip oom check in non-linux os") | ||
return result, nil | ||
} | ||
oomInfos, err := c.getAndParseOOMLog() | ||
if err != nil { | ||
return nil, err | ||
} else if len(oomInfos) > 0 { | ||
result.Error = strings.Join(oomInfos, "\n") | ||
result.Description = "Detect process oom killed" | ||
result.HelpLinks = helpLink | ||
} else { | ||
result.Description = "No OOM found in recent kernlog." | ||
} | ||
return result, nil | ||
} | ||
func (c *OOMChecker) getAndParseOOMLog() ([]string, error) { | ||
file, err := os.Open(c.kernLogPath) | ||
if err != nil { | ||
return nil, err | ||
} | ||
defer file.Close() | ||
|
||
var oomInfos []string | ||
scanner := bufio.NewScanner(file) | ||
for scanner.Scan() { | ||
tmp := scanner.Text() | ||
//todo: more sophisticated OOM context | ||
//pattern match. https://github.com/torvalds/linux/blob/551acdc3c3d2b6bc97f11e31dcf960bc36343bfc/mm/oom_kill.c#L1120, https://github.com/torvalds/linux/blob/551acdc3c3d2b6bc97f11e31dcf960bc36343bfc/mm/oom_kill.c#L895 | ||
if strings.Contains(tmp, cgroupOOMKeyStr) || strings.Contains(tmp, outOfMemoryKey) { | ||
oomInfo, err := parseOOMContent(tmp) | ||
if err != nil { | ||
return nil, err | ||
} else { | ||
oomInfos = append(oomInfos, oomInfo) | ||
} | ||
} | ||
} | ||
|
||
if err := scanner.Err(); err != nil { | ||
return nil, err | ||
} | ||
return oomInfos, nil | ||
} | ||
|
||
func parseOOMContent(content string) (string, error) { | ||
match := oomRegex.FindStringSubmatch(content) | ||
if len(match) != 6 { | ||
err := fmt.Errorf("Can't parse oom content:%s \n", content) | ||
return "", err | ||
} else { | ||
return fmt.Sprintf("progress:[%s %s] is OOM kill at time [%s]. [rss:%s] [oom_score_adj:%s]\n", match[2], match[3], match[1], match[4], match[5]), nil | ||
} | ||
} | ||
|
||
func envCheck(environment env.Environment) bool { | ||
//todo:support other os | ||
return environment.HasFlag("ubuntu") | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
package oom | ||
|
||
import ( | ||
"fmt" | ||
"github.com/Azure/kdebug/pkg/base" | ||
"github.com/Azure/kdebug/pkg/env" | ||
"io/ioutil" | ||
"os" | ||
"testing" | ||
) | ||
|
||
var testStrings = []string{ | ||
"Feb 22 16:15:02 k8s-ingress-11186066-z1-vmss0000B3 kernel: [989751.247878] Memory cgroup out of memory: Killed process 3841 (nginx) total-vm:240652kB, anon-rss:130344kB, file-rss:5212kB, shmem-rss:208kB, UID:101 pgtables:332kB oom_score_adj:986\n", | ||
"Feb 22 16:15:02 k8s-ingress-11186066-z1-vmss0000B3 kernel: [989751.247878] Out of memory: Killed process 3841 (nginx) total-vm:240652kB, anon-rss:130344kB, file-rss:5212kB, shmem-rss:208kB, UID:101 pgtables:332kB oom_score_adj:986\n", | ||
} | ||
|
||
func TestCheckOOMLogWhenOOM(t *testing.T) { | ||
environment := &env.StaticEnvironment{ | ||
Flags: []string{"ubuntu"}, | ||
} | ||
if !envCheck(env.GetEnvironment()) { | ||
fmt.Println("skip oom test") | ||
return | ||
} | ||
for _, testString := range testStrings { | ||
|
||
tmp, err := ioutil.TempFile("", "kernlog") | ||
if err != nil { | ||
t.Fatalf("error creating tmp file:%v", err) | ||
} | ||
check := OOMChecker{kernLogPath: tmp.Name()} | ||
defer func() { | ||
e := os.Remove(check.kernLogPath) | ||
if e != nil { | ||
t.Errorf(e.Error()) | ||
} | ||
}() | ||
//should be 600. But it fails in 600 | ||
err = os.WriteFile(check.kernLogPath, []byte(testString), 777) | ||
if err != nil { | ||
t.Errorf("Create tmp file error:%v", err) | ||
} | ||
result, _ := check.Check(&base.CheckContext{ | ||
Environment: environment, | ||
}) | ||
if len(result) != 1 { | ||
t.Errorf("Get unexpected OOM result length %v", len(result)) | ||
} | ||
checkErr := result[0].Error | ||
if checkErr != "progress:[3841 nginx] is OOM kill at time [Feb 22 16:15:02]. [rss:130344kB] [oom_score_adj:986]\n" { | ||
t.Errorf("Unexpected check result:\n %v \n %v", result[0].Description, checkErr) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters