SlsDetectorCPUAffinity.cpp 44.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
//###########################################################################
// This file is part of LImA, a Library for Image Acquisition
//
// Copyright (C) : 2009-2011
// European Synchrotron Radiation Facility
// BP 220, Grenoble 38043
// FRANCE
//
// This is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 3 of the License, or
// (at your option) any later version.
//
// This software is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, see <http://www.gnu.org/licenses/>.
//###########################################################################

#include "SlsDetectorCamera.h"

#include "lima/Timestamp.h"
#include "lima/CtAcquisition.h"
#include "lima/CtSaving.h"
#include "lima/SoftOpExternalMgr.h"
#include "lima/RegExUtils.h"
30
#include "lima/MiscUtils.h"
31
32
33
34
35

#include <signal.h>
#include <unistd.h>
#include <sys/wait.h>
#include <sys/types.h>
36
#include <pwd.h>
37
38
#include <numa.h>
#include <iomanip>
39
40
41
42
43

using namespace std;
using namespace lima;
using namespace lima::SlsDetector;

44
bool SystemCmd::UseSudo = true;
45

46
SystemCmd::SystemCmd(string cmd, string desc, bool try_sudo)
47
	: m_cmd(cmd), m_desc(desc), m_try_sudo(try_sudo),
48
	  m_stdin(NULL), m_stdout(NULL), m_stderr(NULL)
49
50
{
	DEB_CONSTRUCTOR();
51
	DEB_PARAM() << DEB_VAR3(m_cmd, m_desc, m_try_sudo);
52
53
54
55
}

SystemCmd::SystemCmd(const SystemCmd& o)
	: m_cmd(o.m_cmd), m_desc(o.m_desc), m_try_sudo(o.m_try_sudo),
56
	  m_stdin(o.m_stdin), m_stdout(o.m_stdout), m_stderr(o.m_stderr)
57
58
{
	DEB_CONSTRUCTOR();
59
60
	DEB_PARAM() << DEB_VAR3(m_cmd, m_desc, m_try_sudo);
	DEB_PARAM() << DEB_VAR3(m_stdin, m_stdout, m_stderr);
61
62
63
}

void SystemCmd::setUseSudo(bool use_sudo)
64
65
66
67
{
	UseSudo = use_sudo;
}

68
bool SystemCmd::getUseSudo()
69
70
71
72
{
	return UseSudo;
}

73
void SystemCmd::checkSudo()
74
75
76
77
78
{
	DEB_STATIC_FUNCT();

	typedef map<string, bool> CacheMap;
	static CacheMap cache_map;
79
	CacheMap::iterator it = cache_map.find(m_cmd);
80
	if (it == cache_map.end()) {
81
82
83
84
		SystemCmd sudo("sudo", "", false);
		sudo.args() << "-l " << m_cmd;
		bool ok = (sudo.execute() == 0);
		CacheMap::value_type entry(m_cmd, ok);
85
86
87
88
89
90
91
92
		pair<CacheMap::iterator, bool> v = cache_map.insert(entry);
		if (!v.second)
			THROW_HW_ERROR(Error) << "Error inserting cache entry";
		it = v.first;
	}
	if (it->second)
		return;

93
94
95
96
97
98
99
100
101
102
103
104
	const char *user = "Unknown";
	uid_t uid = getuid();
	DEB_TRACE() << DEB_VAR1(uid);
	struct passwd pw, *res;
	char buffer[4 * 1024];
	int ret = getpwuid_r(uid, &pw, buffer, sizeof(buffer), &res);
	if (ret != 0)
		DEB_WARNING() << "getpwuid_r failed: " << strerror(errno);
	else if (res == NULL)
		DEB_WARNING() << "could not get passwd entry for uid=" << uid;
	else
		user = pw.pw_name;
105

106
107
	DEB_ERROR() << "The command '" << m_cmd << "' is not allowed for "
		    << user << " in the sudoers database. ";
108
	DEB_ERROR() << "Check sudoers(5) man page and restart this process";
109
110
	if (!m_desc.empty())
		DEB_ERROR() << m_desc;
111

112
	THROW_HW_ERROR(Error) << "Cannot execute sudo " << m_cmd << "! "
113
114
115
			      << "See output for details";
}

116
117
118
119
120
121
122
123
124
void SystemCmd::setPipes(Pipe *stdin, Pipe *stdout, Pipe *stderr)
{
	DEB_MEMBER_FUNCT();
	DEB_PARAM() << DEB_VAR3(stdin, stdout, stderr);
	m_stdin = stdin;
	m_stdout = stdout;
	m_stderr = stderr;
}

125
126
127
128
int SystemCmd::execute()
{
	DEB_MEMBER_FUNCT();
	string args = m_args.str();
129
	DEB_PARAM() << DEB_VAR3(m_cmd, args, m_try_sudo);
130
131
132
133
134
135
136

	ostringstream os;
	if (m_try_sudo && getUseSudo()) {
		checkSudo();
		os << "sudo -n ";
	}
	os << m_cmd << " " << args;
137
138
139
140
	if (!DEB_CHECK_ANY(DebTypeTrace) && !m_stdout)
		os << " > /dev/null";
	if (sameOutErr())
		os << " 2>&1";
141
	DEB_TRACE() << "executing: '" << os.str() << "'";
142
	preparePipes();
143
	int ret = system(os.str().c_str());
144
	restorePipes();
145
146
147
148
	DEB_RETURN() << DEB_VAR1(ret);
	return ret;
}

149
150
151
152
153
154
void SystemCmd::preparePipes()
{
	DEB_MEMBER_FUNCT();
	DEB_PARAM() << DEB_VAR3(m_stdin, m_stdout, m_stderr);
	if (m_stdin) {
		DEB_TRACE() << "Duplicating PIPE into STDIN";
Alejandro Homs Puron's avatar
Alejandro Homs Puron committed
155
		m_stdin->dupInto(Pipe::ReadFd, 0);
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
	}
	if (m_stderr && !sameOutErr()) {
		DEB_TRACE() << "Duplicating PIPE into STDERR";
		m_stderr->dupInto(Pipe::WriteFd, 2);
	}
	if (m_stdout) {
		DEB_TRACE() << "Duplicating PIPE into STDOUT";
		m_stdout->dupInto(Pipe::WriteFd, 1);
	}
}

void SystemCmd::restorePipes()
{
	DEB_MEMBER_FUNCT();
	DEB_PARAM() << DEB_VAR3(m_stdin, m_stdout, m_stderr);
	if (m_stdout) {
		m_stdout->restoreDup(Pipe::WriteFd);
		DEB_TRACE() << "Restored STDOUT";
	}
	if (m_stderr && !sameOutErr()) {
		m_stderr->restoreDup(Pipe::WriteFd);
		DEB_TRACE() << "Restored STDERR";
	}
	if (m_stdin) {
		DEB_TRACE() << "Restored STDIN";
		m_stdin->restoreDup(Pipe::ReadFd);
	}
}

Alejandro Homs Puron's avatar
Alejandro Homs Puron committed
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
SystemCmdPipe::SystemCmdPipe(string cmd, string desc, bool try_sudo)
	: m_pipe_list(NbPipes), m_child_pid(-1), m_cmd(cmd, desc, try_sudo)
{
	DEB_CONSTRUCTOR();
}

SystemCmdPipe::~SystemCmdPipe()
{
	DEB_DESTRUCTOR();

	if (m_child_pid >= 0)
		wait();
}

void SystemCmdPipe::start()
{
	DEB_MEMBER_FUNCT();

	if (m_child_pid >= 0)
		THROW_HW_ERROR(Error) << "cmd already running";

	m_cmd.setPipes(m_pipe_list[StdIn].ptr,
		       m_pipe_list[StdOut].ptr,
		       m_pipe_list[StdErr].ptr);

	m_child_pid = fork();
	if (m_child_pid == 0) {
		m_pipe_list[StdIn].close(Pipe::WriteFd);
		m_pipe_list[StdOut].close(Pipe::ReadFd);
		m_pipe_list[StdErr].close(Pipe::ReadFd);

		int ret = m_cmd.execute();
		DEB_RETURN() << DEB_VAR1(ret);
		_exit(ret);
	} else {
		m_pipe_list[StdIn].close(Pipe::ReadFd);
		m_pipe_list[StdOut].close(Pipe::WriteFd);
		m_pipe_list[StdErr].close(Pipe::WriteFd);
	}
}

void SystemCmdPipe::wait()
{
	DEB_MEMBER_FUNCT();
	if (m_child_pid < 0)
		THROW_HW_ERROR(Error) << "cmd not running";
	int child_ret;
	waitpid(m_child_pid, &child_ret, 0);
	DEB_TRACE() << DEB_VAR1(child_ret);
	m_child_pid = -1;
}

void SystemCmdPipe::setPipe(PipeIdx idx, PipeType type)
{
	DEB_MEMBER_FUNCT();
	m_pipe_list[idx] = PipeData(type);
}

Pipe& SystemCmdPipe::getPipe(PipeIdx idx)
{
	DEB_MEMBER_FUNCT();
	if (!m_pipe_list[idx])
		THROW_HW_ERROR(InvalidValue) << "null pipe " << DEB_VAR1(idx);
	return *m_pipe_list[idx].ptr;
}

251
int CPUAffinity::findNbSystemCPUs()
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
{
	DEB_STATIC_FUNCT();
	int nb_cpus = 0;
	const char *proc_file_name = "/proc/cpuinfo";
	ifstream proc_file(proc_file_name);
	while (proc_file) {
		char buffer[1024];
		proc_file.getline(buffer, sizeof(buffer));
		istringstream is(buffer);
		string t;
		is >> t;
		if (t == "processor")
			++nb_cpus;
	}
	DEB_RETURN() << DEB_VAR1(nb_cpus);
	return nb_cpus;
}

270
int CPUAffinity::findMaxNbSystemCPUs()
271
272
273
274
275
276
277
278
{
	DEB_STATIC_FUNCT();
	NumericGlob proc_glob("/proc/sys/kernel/sched_domain/cpu");
	int max_nb_cpus = proc_glob.getNbEntries();
	DEB_RETURN() << DEB_VAR1(max_nb_cpus);
	return max_nb_cpus;
}

279
int CPUAffinity::getNbSystemCPUs(bool max_nb)
280
281
{
	static int nb_cpus = 0;
282
	EXEC_ONCE(nb_cpus = findNbSystemCPUs());
283
	static int max_nb_cpus = 0;
284
	EXEC_ONCE(max_nb_cpus = findMaxNbSystemCPUs());
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
	int cpus = (max_nb && max_nb_cpus) ? max_nb_cpus : nb_cpus;
	return cpus;
}

std::string CPUAffinity::getProcDir(bool local_threads)
{
	DEB_STATIC_FUNCT();
	DEB_PARAM() << DEB_VAR1(local_threads);
	ostringstream os;
	os << "/proc/";
	if (local_threads)
		os << getpid() << "/task/";
	string proc_dir = os.str();
	DEB_RETURN() << DEB_VAR1(proc_dir);
	return proc_dir;
}

std::string CPUAffinity::getTaskProcDir(pid_t task, bool is_thread)
{
	DEB_STATIC_FUNCT();
	DEB_PARAM() << DEB_VAR2(task, is_thread);
	ostringstream os;
	os << getProcDir(is_thread) << task << "/";
	string proc_dir = os.str();
	DEB_RETURN() << DEB_VAR1(proc_dir);
	return proc_dir;
}

void CPUAffinity::initCPUSet(cpu_set_t& cpu_set) const
{
	CPU_ZERO(&cpu_set);
316
	uint64_t mask = getMask();
317
318
319
320
321
322
323
	for (unsigned int i = 0; i < sizeof(mask) * 8; ++i) {
		if ((mask >> i) & 1)
			CPU_SET(i, &cpu_set);
	}
}

void CPUAffinity::applyToTask(pid_t task, bool incl_threads,
324
			      bool use_taskset) const
325
326
{
	DEB_MEMBER_FUNCT();
327
	DEB_PARAM() << DEB_VAR4(*this, task, incl_threads, use_taskset);
328
329
330
331
332
333
334
335
336
337
338
339
340
341

	string proc_status = getTaskProcDir(task, !incl_threads) + "status";
	if (access(proc_status.c_str(), F_OK) != 0)
		return;

	if (use_taskset)
		applyWithTaskset(task, incl_threads);
	else
		applyWithSetAffinity(task, incl_threads);
}

void CPUAffinity::applyWithTaskset(pid_t task, bool incl_threads) const
{
	DEB_MEMBER_FUNCT();
342
	DEB_PARAM() << DEB_VAR3(*this, task, incl_threads);
343

344
	SystemCmd taskset("taskset");
345
	const char *all_tasks_opt = incl_threads ? "-a " : "";
346
347
	taskset.args() << all_tasks_opt << "-p " << *this << " " << task;
	if (taskset.execute() != 0) {
348
349
350
351
352
353
		const char *th = incl_threads ? "and threads " : "";
		THROW_HW_ERROR(Error) << "Error setting task " << task 
				      << " " << th << "CPU affinity";
	}
}

354
void CPUAffinity::applyWithSetAffinity(pid_t task, bool incl_threads) const
355
356
{
	DEB_MEMBER_FUNCT();
357
	DEB_PARAM() << DEB_VAR3(*this, task, incl_threads);
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388

	IntList task_list;
	if (incl_threads) {
		string proc_dir = getTaskProcDir(task, false) + "task/";
		NumericGlob proc_glob(proc_dir);
		typedef NumericGlob::IntStringList IntStringList;
		IntStringList list = proc_glob.getIntPathList();
		if (list.empty())
			THROW_HW_ERROR(Error) << "Cannot find task " << task 
					      << " threads";
		IntStringList::const_iterator it, end = list.end();
		for (it = list.begin(); it != end; ++it)
			task_list.push_back(it->first);
	} else {
		task_list.push_back(task);
	}

	cpu_set_t cpu_set;
	initCPUSet(cpu_set);
	IntList::const_iterator it, end = task_list.end();
	for (it = task_list.begin(); it != end; ++it) {
		DEB_TRACE() << "setting " << task << " CPU mask: " << *this;
		int ret = sched_setaffinity(task, sizeof(cpu_set), &cpu_set);
		if (ret != 0) {
			const char *th = incl_threads ? "and threads " : "";
			THROW_HW_ERROR(Error) << "Error setting task " << task 
					      << " " << th << "CPU affinity";
		}
	}
}

389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
void CPUAffinity::getNUMANodeMask(vector<unsigned long>& node_mask,
				  int& max_node)
{
	DEB_MEMBER_FUNCT();

	typedef vector<unsigned long> Array;

	int nb_nodes = numa_max_node() + 1;
	const int item_bits = sizeof(Array::reference) * 8;
	int nb_items = nb_nodes / item_bits;
	if (nb_nodes % item_bits != 0)
		++nb_items;

	DEB_PARAM() << DEB_VAR2(*this, nb_items);
	max_node = nb_nodes + 1;

	node_mask.assign(nb_items, 0);

	uint64_t mask = getMask();
	for (unsigned int i = 0; i < sizeof(mask) * 8; ++i) {
		if ((mask >> i) & 1) {
			unsigned int n = numa_node_of_cpu(i);
			Array::reference v = node_mask[n / item_bits];
			v |= 1L << (n % item_bits);
		}
	}

416
	if (DEB_CHECK_ANY(DebTypeReturn)) {
417
418
419
420
421
422
		ostringstream os;
		os << hex << "0x" << setw(nb_nodes / 4) << setfill('0');
		bool first = true;
		Array::reverse_iterator it, end = node_mask.rend();
		for (it = node_mask.rbegin(); it != end; ++it, first = false)
			os << (!first ? "," : "") << *it;
423
		DEB_RETURN() << "node_mask=" << os.str() << ", "
424
425
426
427
			     << DEB_VAR1(max_node);
	}
}

428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
bool IrqMgr::m_irqbalance_stopped = false;
StringList IrqMgr::m_dev_list;

IrqMgr::IrqMgr(string net_dev)
{
	DEB_CONSTRUCTOR();
	DEB_PARAM() << DEB_VAR1(net_dev);

	if (!net_dev.empty())
		setDev(net_dev);
}

IrqMgr::~IrqMgr()
{
	DEB_DESTRUCTOR();
	DEB_PARAM() << DEB_VAR1(m_net_dev);

	if (isManaged(m_net_dev))
		updateRxQueueIrqAffinity(true);
}

void IrqMgr::setDev(string net_dev)
{
	DEB_CONSTRUCTOR();
	DEB_PARAM() << DEB_VAR1(net_dev);

	if (!m_net_dev.empty())
		THROW_HW_ERROR(Error) << "device already set";
	if (isManaged(net_dev))
		THROW_HW_ERROR(Error) << net_dev << " already managed";

	m_net_dev = net_dev;
}

bool IrqMgr::isManaged(string net_dev)
{
	if (net_dev.empty())
		return false;
	StringList::iterator it, end = m_dev_list.end();
	it = find(m_dev_list.begin(), end, net_dev);
	return (it != end);
}

IntList IrqMgr::getIrqList()
{
	DEB_MEMBER_FUNCT();
	DEB_PARAM() << DEB_VAR1(m_net_dev);

	string pci_irq_dir = (string("/sys/class/net/") + m_net_dev + 
			      "/device/msi_irqs/");
	NumericGlob pci_irqs(pci_irq_dir);
	typedef NumericGlob::IntStringList IntStringList;
	IntStringList dev_irqs = pci_irqs.getIntPathList();

	IntList act_irqs;
	ifstream proc_ints("/proc/interrupts");
	ostringstream os;
	int nb_cpus = CPUAffinity::getNbSystemCPUs();
	os << "[ \t]*"
	   << "(?P<irq>[0-9]+):[ \t]+"
	   << "(?P<counts>(([0-9]+)[ \t]+){" << nb_cpus << "})"
	   << "(?P<type>[-A-Za-z0-9_]+)[ \t]+"
	   << "(?P<name>[-A-Za-z0-9_]+)";
	DEB_TRACE() << DEB_VAR1(os.str());
	RegEx int_re(os.str());
	while (proc_ints) {
		char buffer[1024];
		proc_ints.getline(buffer, sizeof(buffer));
496
497
		string s = buffer;
		DEB_TRACE() << DEB_VAR1(s);
498
		RegEx::FullNameMatchType match;
499
		if (!int_re.matchName(s, match))
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
			continue;
		int irq;
		istringstream(match["irq"]) >> irq;
		DEB_TRACE() << "found match: " << irq << ": " 
			     << string(match["name"]);
		IntStringList::iterator it, end = dev_irqs.end();
		bool ok = false;
		for (it = dev_irqs.begin(); !ok && (it != end); ++it)
			ok = (it->first == irq);
		if (ok)
			act_irqs.push_back(irq);
	}

	DEB_RETURN() << PrettyIntList(act_irqs);
	return act_irqs;
}

void IrqMgr::updateRxQueueIrqAffinity(bool default_affinity)
{
	DEB_MEMBER_FUNCT();
	DEB_PARAM() << DEB_VAR2(m_net_dev, default_affinity);

	if (isManaged(m_net_dev) && default_affinity) {
		StringList::iterator it, end = m_dev_list.end();
		it = find(m_dev_list.begin(), end, m_net_dev);
		m_dev_list.erase(it);
		if (m_dev_list.empty())
			restoreIrqBalance();
	} else if (!isManaged(m_net_dev) && !default_affinity) {
		stopIrqBalance();
		m_dev_list.push_back(m_net_dev);
	}
}

void IrqMgr::stopIrqBalance()
{
	DEB_STATIC_FUNCT();
	DEB_PARAM() << DEB_VAR1(m_irqbalance_stopped);
	if (!getIrqBalanceActive()) {
		DEB_TRACE() << "nothing to do";
		return;
	} else if (m_irqbalance_stopped) {
		DEB_WARNING() << "irqbalance stopped and still running!";
	}
	setIrqBalanceActive(false);
	m_irqbalance_stopped = true;
}

void IrqMgr::restoreIrqBalance()
{
	DEB_STATIC_FUNCT();
	DEB_PARAM() << DEB_VAR1(m_irqbalance_stopped);
	if (!m_irqbalance_stopped)
		return;
	setIrqBalanceActive(true);
	m_irqbalance_stopped = false;
}

bool IrqMgr::getIrqBalanceActive()
{
	DEB_STATIC_FUNCT();
	SystemCmd bash("bash", "", false);
	ConstStr cmd = "ps -ef | grep -v grep | grep irqbalance";
	bash.args() << "-c '" << cmd << "'";
	bool act = (bash.execute() == 0);
	DEB_RETURN() << DEB_VAR1(act);
	return act;
}

void IrqMgr::setIrqBalanceActive(bool act)
{
	DEB_STATIC_FUNCT();
	DEB_PARAM() << DEB_VAR1(act);
	SystemCmd service("service");
	ConstStr cmd = act ? "start" : "stop";
	DEB_ALWAYS() << "irqbalance: executing " << cmd;
	service.args() << "irqbalance " << cmd;
	if (service.execute() != 0)
		THROW_HW_ERROR(Error) << "Could not " << cmd << " "
				      << "irqbalance service";
	DEB_ALWAYS() << "Done!";
}


584
NetDevRxQueueMgr::NetDevRxQueueMgr(string dev)
585
	: m_dev(dev), m_irq_mgr(dev)
586
587
588
589
590
{
	DEB_CONSTRUCTOR();
	DEB_PARAM() << DEB_VAR1(m_dev);
}

591
592
593
594
595
596
597
NetDevRxQueueMgr::~NetDevRxQueueMgr()
{
	DEB_DESTRUCTOR();
	if (!NetDevRxQueueAffinityMap_isDefault(m_aff_map))
		apply(NetDevRxQueueAffinityMap());
}

598
void NetDevRxQueueMgr::setDev(string dev)
599
600
{
	DEB_MEMBER_FUNCT();
601
	DEB_PARAM() << DEB_VAR1(dev);
602
	if (m_dev.empty()) {
603
		m_dev = dev;
604
605
		m_irq_mgr.setDev(dev);
	} else if (dev != m_dev) {
606
607
		THROW_HW_ERROR(InvalidValue) << "name mismatch: "
					     << DEB_VAR2(dev, m_dev);
608
	}
609
}
610

611
612
613
614
615
616
void NetDevRxQueueMgr::checkDev()
{
	DEB_MEMBER_FUNCT();
	if (m_dev.empty())
		THROW_HW_ERROR(InvalidValue) << "no device defined yet";
}
617
618

void NetDevRxQueueMgr::apply(int queue, const Affinity& queue_affinity)
619
620
621
622
623
{
	DEB_MEMBER_FUNCT();
	DEB_PARAM() << DEB_VAR3(m_dev, queue, queue_affinity);

	checkDev();
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
	apply(Irq, queue, queue_affinity.irq);	
	m_irq_mgr.updateRxQueueIrqAffinity(queue_affinity.irq.isDefault());
	apply(Processing, queue, queue_affinity.processing);
	m_aff_map[queue] = queue_affinity;
}

void NetDevRxQueueMgr::apply(const AffinityMap& affinity_map)
{
	m_aff_map.clear();
	if (NetDevRxQueueAffinityMap_isDefault(affinity_map)) {
		apply(-1, NetDevRxQueueCPUAffinity());
	} else {
		AffinityMap::const_iterator qit, qend = affinity_map.end();
		for (qit = affinity_map.begin(); qit != qend; ++qit)
			apply(qit->first, qit->second);
	}
640
641
}

642
void NetDevRxQueueMgr::apply(Task task, int queue, CPUAffinity a)
643
644
645
646
{
	DEB_MEMBER_FUNCT();
	DEB_PARAM() << DEB_VAR3(m_dev, queue, a);

647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
	FileSetterData file_setter;
	if (task == Irq)
		getIrqFileSetterData(queue, file_setter);
	else
		getProcessingFileSetterData(queue, file_setter);

	static bool did_error_files[NbTasks] = {false, false};
	if (!did_error_files[task]) {
		const StringList& list = file_setter.file;
		StringList::const_iterator it, end = list.end();
		bool ok = true;
		for (it = list.begin(); ok && (it != end); ++it)
			ok = applyWithFile(*it, a);
		if (ok)
			return;
		DEB_WARNING() << "Could not write to files. Will try setter...";
		did_error_files[task] = true;
	}

	static bool did_error_setter[NbTasks] = {false, false};
	if (!did_error_setter[task]) {
		const StringList& list = file_setter.setter;
		StringList::const_iterator it, end = list.end();
		bool ok = true;
		for (it = list.begin(); ok && (it != end); ++it)
			ok = applyWithSetter(task, *it, a);
		if (ok)
			return;
		DEB_ERROR() << "Could not use setter";
		did_error_setter[task] = true;
	}
}

void NetDevRxQueueMgr::getIrqFileSetterData(int queue,
					    FileSetterData& file_setter)
{
	DEB_MEMBER_FUNCT();

	if (queue != -1)
		THROW_HW_ERROR(NotSupported) << "only all queues (-1) mode "
					     << "is supported";

	IntList act_irqs = m_irq_mgr.getIrqList();
	IntList::const_iterator it, end = act_irqs.end();
	for (it = act_irqs.begin(); it != end; ++it) {
		ostringstream os1, os2;
		os1 << "/proc/irq/" << *it << "/smp_affinity";
		file_setter.file.push_back(os1.str());
		os2 << *it;
		file_setter.setter.push_back(os2.str());
	}
}

void NetDevRxQueueMgr::getProcessingFileSetterData(int queue,
						   FileSetterData& file_setter)
{
	DEB_MEMBER_FUNCT();
	DEB_PARAM() << DEB_VAR1(m_dev);

706
	enum {
707
		File, Setter, NbLists,
708
	};
709
710
711
712

	string glob_str(string("/sys/class/net/" + m_dev + "/queues/rx-"));
	NumericGlob rps_cpus_glob(glob_str, "/rps_cpus");

713
714
	typedef NumericGlob::IntStringList IntStringList;
	IntStringList list_array[NbLists];
715
716
	list_array[File] = rps_cpus_glob.getIntPathList();
	list_array[Setter] = rps_cpus_glob.getIntSubPathList(6);
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
	IntList filter_list;
	if (queue == -2)
		filter_list = getRxQueueList();
	else if (queue != -1)
		filter_list.push_back(queue);
	for (int i = 0; !filter_list.empty() && (i < NbLists); ++i) {
		IntStringList& list = list_array[i];
		while (true) {
			IntStringList::iterator lit, lend = list.end();
			bool ok = true;
			for (lit = list.begin(); ok && (lit != lend); ++lit) {
				IntList::iterator fit, fend;
				fend = filter_list.end();
				fit = find(filter_list.begin(), fend,
					   lit->first);
				ok = (fit != fend);
				if (!ok)
					list.erase(lit);
			}
			if (ok)
				break;
		}
	}
740
741
742
	if (list_array[File].size() != list_array[Setter].size()) {
		THROW_HW_ERROR(Error) << "File and Setter lists differ";
	} else if (list_array[File].empty()) {
743
744
745
		DEB_WARNING() << "No Rx queue (" << queue << ") for " << m_dev;
		return;
	}
746

747
748
749
750
	StringList *file_setter_list[NbLists] = {&file_setter.file,
						 &file_setter.setter};
	for (int i = 0; i < NbLists; ++i) {
		const IntStringList& list = list_array[i];
751
		IntStringList::const_iterator it, end = list.end();
752
753
		for (it = list.begin(); it != end; ++it)
			file_setter_list[i]->push_back(it->second);
754
755
756
	}
}

757
bool NetDevRxQueueMgr::applyWithFile(const string& fname, CPUAffinity a)
758
759
760
761
{
	DEB_MEMBER_FUNCT();

	ostringstream os;
762
	os << hex << a.getZeroDefaultMask();
763
	DEB_TRACE() << "writing " << os.str() << " to " << fname;
764
765
766
767
768
	ofstream aff_file(fname.c_str());
	if (aff_file)
		aff_file << os.str();
	if (aff_file)
		aff_file.close();
769
	bool file_ok(aff_file);
770
771
772
773
	DEB_RETURN() << DEB_VAR1(file_ok);
	return file_ok;
}

774
775
bool NetDevRxQueueMgr::applyWithSetter(Task task, const string& irq_queue,
				       CPUAffinity a)
776
777
{
	DEB_MEMBER_FUNCT();
778
	DEB_PARAM() << DEB_VAR2(m_dev, irq_queue);
779

780
	static string desc = getSetterSudoDesc();
781
782
783
	SystemCmd setter(AffinitySetterName, desc);
	ConstStr task_opt = (task == Irq) ? "-i" : "-r";
	setter.args() << task_opt << " " << m_dev << " " << irq_queue << " "
784
		      << hex << "0x" << a.getZeroDefaultMask();
785
	bool setter_ok = (setter.execute() == 0);
786
787
788
789
	DEB_RETURN() << DEB_VAR1(setter_ok);
	return setter_ok;
}

790
string NetDevRxQueueMgr::getSetterSudoDesc()
791
792
793
{
	DEB_STATIC_FUNCT();

794
	const string& setter_name = AffinitySetterName;
795
796
797
	string dir = "/tmp";
	string fname = dir + "/" + setter_name + ".c";
	ofstream src_file(fname.c_str());
798
	if (src_file) {
799
		const StringList& SrcList = AffinitySetterSrc;
800
801
802
803
804
805
806
807
808
809
		StringList::const_iterator it, end = SrcList.end();
		for (it = SrcList.begin(); src_file && (it != end); ++it)
			src_file << *it << endl;
		if (src_file)
			src_file.close();
		if (!src_file)
			DEB_WARNING() << "Error writing to " << fname;
	} else {
		DEB_WARNING() << "Error creating " << fname;
	}
810
811
812
813
814
815
816
817
818
819

	ostringstream desc;
	string aux_setter = dir + "/" + setter_name;
	desc << "In order to create " << setter_name << ", compile " << fname
	     << " with the following commands: " << endl
	     << "  gcc -Wall -o " << aux_setter << " " << fname << endl
	     << "  su -c \"cp " << aux_setter << " /usr/local/bin\"" << endl;
	return desc.str();
}

820
821
const string NetDevRxQueueMgr::AffinitySetterName = 
					"netdev_set_queue_cpu_affinity";
822

823
static const char *NetDevRxQueueMgrAffinitySetterSrcCList[] = {
824
825
826
827
828
829
830
831
832
833
834
"#include <stdio.h>",
"#include <stdlib.h>",
"#include <string.h>",
"#include <errno.h>",
"#include <unistd.h>",
"#include <sys/types.h>",
"#include <sys/stat.h>",
"#include <fcntl.h>",
"",
"int main(int argc, char *argv[])",
"{",
835
836
"	char *dev, *irq_queue, *p, fname[256], buffer[128];",
"	int irq, rps, fd, len, ret;",
837
838
"	long aff;",
"",
839
"	if (argc != 5)",
840
"		exit(1);",
841
842
843
844
845
"	irq = (strcmp(argv[1], \"-i\") == 0);",
"	rps = (strcmp(argv[1], \"-r\") == 0);",
"	if (!irq && !rps)",
"		exit(2);",
"	if (!strlen(argv[2]) || !strlen(argv[3]) || !strlen(argv[4]))",
846
847
"		exit(2);",
"",
848
849
"	dev = argv[2];",
"	irq_queue = argv[3];",
850
851
"",
"	errno = 0;",
852
"	aff = strtol(argv[4], &p, 0);",
853
854
855
856
"	if (errno || *p)",
"		exit(3);",
"",
"	len = sizeof(fname);",
857
858
859
860
861
862
"	if (irq)",
"		ret = snprintf(fname, len, \"/proc/irq/%s/smp_affinity\",", 
"			       irq_queue);",
"	else",
"		ret = snprintf(fname, len, \"/sys/class/net/%s/queues/%s/rps_cpus\",", 
"			       dev, irq_queue);",
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
"	if ((ret < 0) || (ret == len))",
"		exit(4);",
"",
"	len = sizeof(buffer);",
"	ret = snprintf(buffer, len, \"%016lx\", aff);",
"	if ((ret < 0) || (ret == len))",
"		exit(5);",
"",
"	fd = open(fname, O_WRONLY);",
"	if (fd < 0)",
"		exit(6);",
"",	
"	for (p = buffer; *p; p += ret)",
"		if ((ret = write(fd, p, strlen(p))) < 0)",
"			exit(7);",
"",
"	if (close(fd) < 0)",
"		exit(8);",
"	return 0;",
"}",
};
884
885
const StringList NetDevRxQueueMgr::AffinitySetterSrc(
		C_LIST_ITERS(NetDevRxQueueMgrAffinitySetterSrcCList));
886

887
888
889
IntList NetDevRxQueueMgr::getRxQueueList()
{
	DEB_MEMBER_FUNCT();
890

891
	checkDev();
892

893
	IntList queue_list;
894

Alejandro Homs Puron's avatar
Alejandro Homs Puron committed
895
896
897
898
899
	SystemCmdPipe ethtool("ethtool");
	ethtool.args() << "-S " << m_dev;
	ethtool.setPipe(SystemCmdPipe::StdOut, SystemCmdPipe::DoPipe);
	ethtool.start();
	Pipe& child_out = ethtool.getPipe(SystemCmdPipe::StdOut);
900

Alejandro Homs Puron's avatar
Alejandro Homs Puron committed
901
902
903
904
905
906
	RegEx re("^[ \t]*rx_queue_(?P<queue>[0-9]+)_packets:[ \t]+"
		 "(?P<packets>[0-9]+)\n$");
	while (true) {
		string s = child_out.readLine(1024, "\n");
		if (s.empty())
			break;
907

Alejandro Homs Puron's avatar
Alejandro Homs Puron committed
908
909
910
		RegEx::FullNameMatchType match;
		if (!re.matchName(s, match))
			continue;
911

Alejandro Homs Puron's avatar
Alejandro Homs Puron committed
912
913
914
915
916
917
918
919
		int queue;
		istringstream(match["queue"]) >> queue;
		unsigned long packets;
		istringstream(match["packets"]) >> packets;
		if (packets != 0) {
			DEB_TRACE() << m_dev << " RxQueue " << queue << ": "
				    << packets << " packets";
			queue_list.push_back(queue);
920
921
		}
	}
922
923
924
925
926
	if (queue_list.empty())
		DEB_WARNING() << "No queue found for " << m_dev;

	DEB_RETURN() << DEB_VAR1(PrettyIntList(queue_list));
	return queue_list;
927
928
}

929
SystemCPUAffinityMgr::WatchDog::WatchDog()
930
	: m_cmd_pipe(0, true), m_res_pipe(0, true)
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
{
	DEB_CONSTRUCTOR();

	m_lima_pid = getpid();

	m_child_pid = fork();
	if (m_child_pid == 0) {
		m_child_pid = getpid();
		DEB_TRACE() << DEB_VAR2(m_lima_pid, m_child_pid);

		m_cmd_pipe.close(Pipe::WriteFd);
		m_res_pipe.close(Pipe::ReadFd);

		signal(SIGINT, SIG_IGN);
		signal(SIGTERM, sigTermHandler);

		childFunction();
		_exit(0);
	} else {
		m_cmd_pipe.close(Pipe::ReadFd);
		m_res_pipe.close(Pipe::WriteFd);

953
		sendChildCmd(Packet(Init));
954
955
956
957
		DEB_TRACE() << "Child is ready";
	}
}

958
SystemCPUAffinityMgr::WatchDog::~WatchDog()
959
960
961
962
{
	DEB_DESTRUCTOR();

	if (!childEnded()) {
963
		sendChildCmd(Packet(CleanUp));
964
965
966
967
		waitpid(m_child_pid, NULL, 0);
	}
}

968
void SystemCPUAffinityMgr::WatchDog::sigTermHandler(int /*signo*/)
969
970
971
{
}

972
bool SystemCPUAffinityMgr::WatchDog::childEnded()
973
974
975
976
{
	return (waitpid(m_child_pid, NULL, WNOHANG) != 0);
}

977
void SystemCPUAffinityMgr::WatchDog::sendChildCmd(const Packet& packet)
978
979
{
	DEB_MEMBER_FUNCT();
980
	DEB_PARAM() << DEB_VAR1(packet.cmd);
981
982
983
984

	if (childEnded())
		THROW_HW_ERROR(Error) << "Watchdog child process killed: " 
				      << m_child_pid;
985

986
	void *p = static_cast<void *>(const_cast<Packet *>(&packet));
987
988
989
990
991
992
993
994
995
996
	string s(static_cast<char *>(p), sizeof(packet));
	m_cmd_pipe.write(s);

	s = m_res_pipe.read(1);
	Cmd res = Cmd(s.data()[0]);
	if (res != Ok)
		THROW_HW_ERROR(Error) << "Invalid watchdog child ack";
	DEB_TRACE() << "Watchdog child acknowledged Ok";
}

997
998
SystemCPUAffinityMgr::WatchDog::Packet
SystemCPUAffinityMgr::WatchDog::readParentCmd()
999
1000
{
	DEB_MEMBER_FUNCT();