Solved: sudo taking a lot of time, system responding very slow.

I have recently faced an issue when the system started responding very slowly and when we run any command with sudo it runs damn slow.

So to resolve this i tried to strace the command, and found something interesting.

I. Problem: Commands taking a large amount of time.

[jack@ngelinux001 log]$ service nscd status

^+C Killed –> Since its taking a long time

 

II. Tracing the problem.

[jack@ngelinux001 log]$ strace service nscd status

statfs("/selinux", 0x7ffc41cfaab0)      = -1 ENOENT (No such file or directory)
open("/proc/filesystems", O_RDONLY)     = 3
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7faa8f728000
read(3, "nodev\tsysfs\nnodev\trootfs\nnodev\tr"..., 1024) = 408
stat("/etc/sysconfig/64bit_strstr_via_64bit_strstr_sse2_unaligned", 0x7ffc41cf9ff0) = -1 ENOENT (No such file or directory)
read(3, "", 1024)                       = 0
close(3)                                = 0
munmap(0x7faa8f728000, 4096)            = 0
access("/etc/selinux/config", F_OK)     = 0
open("/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=106075056, ...}) = 0
mmap(NULL, 106075056, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7faa87280000
close(3)                                = 0
open("/proc/self/stat", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7faa8f728000
read(3, "371583 (systemctl) R 371581 3715"..., 1024) = 358
close(3)                                = 0
munmap(0x7faa8f728000, 4096)            = 0
ioctl(1, TCGETS, {B38400 opost isig icanon echo ...}) = 0
stat("/proc/1/root", 0x7ffc41cfa8b0)    = -1 EACCES (Permission denied)
stat("/proc/1/root", 0x7ffc41cfa8a0)    = -1 EACCES (Permission denied)
lstat("/run/systemd/system/", {st_mode=S_IFDIR|0755, st_size=840, ...}) = 0
geteuid()                               = 281678
socket(AF_LOCAL, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0) = 3
setsockopt(3, SOL_SOCKET, SO_PASSCRED, [0], 4) = 0
setsockopt(3, SOL_SOCKET, SO_PASSSEC, [0], 4) = 0
getsockopt(3, SOL_SOCKET, SO_RCVBUF, [212992], [4]) = 0
setsockopt(3, SOL_SOCKET, SO_RCVBUFFORCE, [8388608], 4) = -1 EPERM (Operation not permitted)
setsockopt(3, SOL_SOCKET, SO_RCVBUF, [8388608], 4) = 0
getsockopt(3, SOL_SOCKET, SO_SNDBUF, [212992], [4]) = 0
setsockopt(3, SOL_SOCKET, SO_SNDBUFFORCE, [8388608], 4) = -1 EPERM (Operation not permitted)
setsockopt(3, SOL_SOCKET, SO_SNDBUF, [8388608], 4) = 0
connect(3, {sa_family=AF_LOCAL, sun_path="/var/run/dbus/system_bus_socket"}, 33) = 0
getsockopt(3, SOL_SOCKET, SO_PEERCRED, {pid=1, uid=0, gid=0}, [12]) = 0
getsockopt(3, SOL_SOCKET, SO_PEERSEC, 0x560802c80820, 0x7ffc41cfa724) = -1 ENOPROTOOPT (Protocol not available)
fstat(3, {st_mode=S_IFSOCK|0777, st_size=0, ...}) = 0
getsockopt(3, SOL_SOCKET, SO_ACCEPTCONN, [0], [4]) = 0
getsockname(3, {sa_family=AF_LOCAL, NULL}, [2]) = 0
geteuid()                               = 281678
sendmsg(3, {msg_name(0)=NULL, msg_iov(3)=[{"\0AUTH EXTERNAL ", 15}, {"323831363738", 12}, {"\r\nNEGOTIATE_UNIX_FD\r\nBEGIN\r\n", 28}], msg_controllen=0, msg_flags=0}, MSG_DONTWAIT|MSG_NOSIGNAL) = 55
gettid()                                = 371583
getrandom("\366@\32\2229^\5'\327\1\37\336uA\344h", 16, GRND_NONBLOCK) = 16
stat("/proc/1/root", 0x7ffc41cfa8b0)    = -1 EACCES (Permission denied)
setrlimit(RLIMIT_NOFILE, {rlim_cur=16*1024, rlim_max=16*1024}) = 0
recvmsg(3, {msg_name(0)=NULL, msg_iov(1)=[{"OK 799848aaa0464be5e6caec896164d"..., 256}], msg_controllen=0, msg_flags=MSG_CMSG_CLOEXEC}, MSG_DONTWAIT|MSG_NOSIGNAL|MSG_CMSG_CLOEXEC) = 52
sendmsg(3, {msg_name(0)=NULL, msg_iov(1)=[{"l\1\0\1\0\0\0\0\1\0\0\0m\0\0\0\1\1o\0\25\0\0\0/org/fre"..., 128}], msg_controllen=0, msg_flags=0}, MSG_DONTWAIT|MSG_NOSIGNAL) = 128
recvmsg(3, 0x7ffc41cf8e80, MSG_DONTWAIT|MSG_NOSIGNAL|MSG_CMSG_CLOEXEC) = -1 EAGAIN (Resource temporarily unavailable)
ppoll([{fd=3, events=POLLIN}], 1, {24, 999809000}, NULL, 8) = 1 ([{fd=3, revents=POLLIN}], left {24, 999678750})
recvmsg(3, {msg_name(0)=NULL, msg_iov(1)=[{"l\2\1\1\20\0\0\0\1\0\0\0E\0\0\0\6\1s\0\v\0\0\0", 24}], msg_controllen=0, msg_flags=MSG_CMSG_CLOEXEC}, MSG_DONTWAIT|MSG_NOSIGNAL|MSG_CMSG_CLOEXEC) = 24
recvmsg(3, {msg_name(0)=NULL, msg_iov(1)=[{":1.19893272\0\0\0\0\0\5\1u\0\1\0\0\0\10\1g\0\1s\0\0"..., 80}], msg_controllen=0, msg_flags=MSG_CMSG_CLOEXEC}, MSG_DONTWAIT|MSG_NOSIGNAL|MSG_CMSG_CLOEXEC) = 80
sendmsg(3, {msg_name(0)=NULL, msg_iov(2)=[{"l\1\0\1\5\0\0\0\2\0\0\0\237\0\0\0\1\1o\0-\0\0\0/org/fre"..., 176}, {"\0\0\0\0\0", 5}], msg_controllen=0, msg_flags=0}, MSG_DONTWAIT|MSG_NOSIGNAL) = 181
recvmsg(3, {msg_name(0)=NULL, msg_iov(1)=[{"l\4\1\1\20\0\0\0\2\0\0\0\225\0\0\0\1\1o\0\25\0\0\0", 24}], msg_controllen=0, msg_flags=MSG_CMSG_CLOEXEC}, MSG_DONTWAIT|MSG_NOSIGNAL|MSG_CMSG_CLOEXEC) = 24
recvmsg(3, {msg_name(0)=NULL, msg_iov(1)=[{"/org/freedesktop/DBus\0\0\0\2\1s\0\24\0\0\0"..., 160}], msg_controllen=0, msg_flags=MSG_CMSG_CLOEXEC}, MSG_DONTWAIT|MSG_NOSIGNAL|MSG_CMSG_CLOEXEC) = 160
recvmsg(3, 0x7ffc41cf9050, MSG_DONTWAIT|MSG_NOSIGNAL|MSG_CMSG_CLOEXEC) = -1 EAGAIN (Resource temporarily unavailable)
ppoll([{fd=3, events=POLLIN}], 1, {24, 999890000}, NULL, 8


open("/usr/share/locale/en/LC_MESSAGES/libc.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
writev(2, [{"Failed to get unit for PID 2: Co"..., 50}, {"\n", 1}], 2Failed to get unit for PID 2: Connection timed out
) = 51
sendmsg(3, {msg_name(0)=NULL, msg_iov(2)=[{"l\1\0\1\5\0\0\0\3\0\0\0\237\0\0\0\1\1o\0-\0\0\0/org/fre"..., 176}, {"\0\0\0\0\0", 5}], msg_controllen=0, msg_flags=0}, MSG_DONTWAIT|MSG_NOSIGNAL) = 181
recvmsg(3, 0x7ffc156cfd50, MSG_DONTWAIT|MSG_NOSIGNAL|MSG_CMSG_CLOEXEC) = -1 EAGAIN (Resource temporarily unavailable)
ppoll([{fd=3, events=POLLIN}], 1, {24, 999966000}, NULL, 8) = 0 (Timeout)
writev(2, [{"Failed to get properties: Connec"..., 46}, {"\n", 1}], 2Failed to get properties: Connection timed out

 

III. Analysis
PID 2 is of kthread and we can see its not responding in time.

It means the kernel is not able to allocate thread for this task.

root         2     0  0 Jun03 ?        00:00:00 [kthreadd]

 

IV. Solution

Kthread pid 2 was generating issues due to multiple page-ins and out. 

To solve this issue, we need to clear all page and buffer cache from the server.

echo 3 > /proc/sys/vm/drop_caches


Afer this, it should work now and the performance should be improved.
0 0 votes
Article Rating
Subscribe
Notify of
guest

0 Comments
Newest
Oldest Most Voted
Inline Feedbacks
View all comments