SlsDetectorCPUAffinity.cpp 48.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
//###########################################################################
// This file is part of LImA, a Library for Image Acquisition
//
// Copyright (C) : 2009-2011
// European Synchrotron Radiation Facility
// BP 220, Grenoble 38043
// FRANCE
//
// This is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 3 of the License, or
// (at your option) any later version.
//
// This software is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, see <http://www.gnu.org/licenses/>.
//###########################################################################

#include "SlsDetectorCamera.h"

#include "lima/Timestamp.h"
#include "lima/CtAcquisition.h"
#include "lima/CtSaving.h"
#include "lima/SoftOpExternalMgr.h"
#include "lima/RegExUtils.h"
30
#include "lima/MiscUtils.h"
31
32
33
34
35

#include <signal.h>
#include <unistd.h>
#include <sys/wait.h>
#include <sys/types.h>
36
#include <pwd.h>
37
38
#include <numa.h>
#include <iomanip>
39
40
41
42
43

using namespace std;
using namespace lima;
using namespace lima::SlsDetector;

44
bool SystemCmd::UseSudo = true;
45

46
SystemCmd::SystemCmd(string cmd, string desc, bool try_sudo)
47
	: m_cmd(cmd), m_desc(desc), m_try_sudo(try_sudo),
48
	  m_stdin(NULL), m_stdout(NULL), m_stderr(NULL)
49
50
{
	DEB_CONSTRUCTOR();
51
	DEB_PARAM() << DEB_VAR3(m_cmd, m_desc, m_try_sudo);
52
53
54
55
}

SystemCmd::SystemCmd(const SystemCmd& o)
	: m_cmd(o.m_cmd), m_desc(o.m_desc), m_try_sudo(o.m_try_sudo),
56
	  m_stdin(o.m_stdin), m_stdout(o.m_stdout), m_stderr(o.m_stderr)
57
58
{
	DEB_CONSTRUCTOR();
59
60
	DEB_PARAM() << DEB_VAR3(m_cmd, m_desc, m_try_sudo);
	DEB_PARAM() << DEB_VAR3(m_stdin, m_stdout, m_stderr);
61
62
63
}

void SystemCmd::setUseSudo(bool use_sudo)
64
65
66
67
{
	UseSudo = use_sudo;
}

68
bool SystemCmd::getUseSudo()
69
70
71
72
{
	return UseSudo;
}

73
void SystemCmd::checkSudo()
74
75
76
77
78
{
	DEB_STATIC_FUNCT();

	typedef map<string, bool> CacheMap;
	static CacheMap cache_map;
79
	CacheMap::iterator it = cache_map.find(m_cmd);
80
	if (it == cache_map.end()) {
81
82
83
84
		SystemCmd sudo("sudo", "", false);
		sudo.args() << "-l " << m_cmd;
		bool ok = (sudo.execute() == 0);
		CacheMap::value_type entry(m_cmd, ok);
85
86
87
88
89
90
91
92
		pair<CacheMap::iterator, bool> v = cache_map.insert(entry);
		if (!v.second)
			THROW_HW_ERROR(Error) << "Error inserting cache entry";
		it = v.first;
	}
	if (it->second)
		return;

93
94
95
96
97
98
99
100
101
102
103
104
	const char *user = "Unknown";
	uid_t uid = getuid();
	DEB_TRACE() << DEB_VAR1(uid);
	struct passwd pw, *res;
	char buffer[4 * 1024];
	int ret = getpwuid_r(uid, &pw, buffer, sizeof(buffer), &res);
	if (ret != 0)
		DEB_WARNING() << "getpwuid_r failed: " << strerror(errno);
	else if (res == NULL)
		DEB_WARNING() << "could not get passwd entry for uid=" << uid;
	else
		user = pw.pw_name;
105

106
107
	DEB_ERROR() << "The command '" << m_cmd << "' is not allowed for "
		    << user << " in the sudoers database. ";
108
	DEB_ERROR() << "Check sudoers(5) man page and restart this process";
109
110
	if (!m_desc.empty())
		DEB_ERROR() << m_desc;
111

112
	THROW_HW_ERROR(Error) << "Cannot execute sudo " << m_cmd << "! "
113
114
115
			      << "See output for details";
}

116
117
118
119
120
121
122
123
124
void SystemCmd::setPipes(Pipe *stdin, Pipe *stdout, Pipe *stderr)
{
	DEB_MEMBER_FUNCT();
	DEB_PARAM() << DEB_VAR3(stdin, stdout, stderr);
	m_stdin = stdin;
	m_stdout = stdout;
	m_stderr = stderr;
}

125
126
127
128
int SystemCmd::execute()
{
	DEB_MEMBER_FUNCT();
	string args = m_args.str();
129
	DEB_PARAM() << DEB_VAR3(m_cmd, args, m_try_sudo);
130
131
132
133
134
135
136

	ostringstream os;
	if (m_try_sudo && getUseSudo()) {
		checkSudo();
		os << "sudo -n ";
	}
	os << m_cmd << " " << args;
137
138
139
140
	if (!DEB_CHECK_ANY(DebTypeTrace) && !m_stdout)
		os << " > /dev/null";
	if (sameOutErr())
		os << " 2>&1";
141
	DEB_TRACE() << "executing: '" << os.str() << "'";
142
	preparePipes();
143
	int ret = system(os.str().c_str());
144
	restorePipes();
145
146
147
148
	DEB_RETURN() << DEB_VAR1(ret);
	return ret;
}

149
150
151
152
153
154
void SystemCmd::preparePipes()
{
	DEB_MEMBER_FUNCT();
	DEB_PARAM() << DEB_VAR3(m_stdin, m_stdout, m_stderr);
	if (m_stdin) {
		DEB_TRACE() << "Duplicating PIPE into STDIN";
Alejandro Homs Puron's avatar
Alejandro Homs Puron committed
155
		m_stdin->dupInto(Pipe::ReadFd, 0);
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
	}
	if (m_stderr && !sameOutErr()) {
		DEB_TRACE() << "Duplicating PIPE into STDERR";
		m_stderr->dupInto(Pipe::WriteFd, 2);
	}
	if (m_stdout) {
		DEB_TRACE() << "Duplicating PIPE into STDOUT";
		m_stdout->dupInto(Pipe::WriteFd, 1);
	}
}

void SystemCmd::restorePipes()
{
	DEB_MEMBER_FUNCT();
	DEB_PARAM() << DEB_VAR3(m_stdin, m_stdout, m_stderr);
	if (m_stdout) {
		m_stdout->restoreDup(Pipe::WriteFd);
		DEB_TRACE() << "Restored STDOUT";
	}
	if (m_stderr && !sameOutErr()) {
		m_stderr->restoreDup(Pipe::WriteFd);
		DEB_TRACE() << "Restored STDERR";
	}
	if (m_stdin) {
		DEB_TRACE() << "Restored STDIN";
		m_stdin->restoreDup(Pipe::ReadFd);
	}
}

Alejandro Homs Puron's avatar
Alejandro Homs Puron committed
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
SystemCmdPipe::SystemCmdPipe(string cmd, string desc, bool try_sudo)
	: m_pipe_list(NbPipes), m_child_pid(-1), m_cmd(cmd, desc, try_sudo)
{
	DEB_CONSTRUCTOR();
}

SystemCmdPipe::~SystemCmdPipe()
{
	DEB_DESTRUCTOR();

	if (m_child_pid >= 0)
		wait();
}

void SystemCmdPipe::start()
{
	DEB_MEMBER_FUNCT();

	if (m_child_pid >= 0)
		THROW_HW_ERROR(Error) << "cmd already running";

	m_cmd.setPipes(m_pipe_list[StdIn].ptr,
		       m_pipe_list[StdOut].ptr,
		       m_pipe_list[StdErr].ptr);

	m_child_pid = fork();
	if (m_child_pid == 0) {
212
213
214
215
		// if reading the output, disable debug
		if (m_pipe_list[StdOut].ptr)
			DebParams::setTypeFlags(0);

Alejandro Homs Puron's avatar
Alejandro Homs Puron committed
216
217
218
219
220
221
222
223
224
225
226
227
228
229
		m_pipe_list[StdIn].close(Pipe::WriteFd);
		m_pipe_list[StdOut].close(Pipe::ReadFd);
		m_pipe_list[StdErr].close(Pipe::ReadFd);

		int ret = m_cmd.execute();
		DEB_RETURN() << DEB_VAR1(ret);
		_exit(ret);
	} else {
		m_pipe_list[StdIn].close(Pipe::ReadFd);
		m_pipe_list[StdOut].close(Pipe::WriteFd);
		m_pipe_list[StdErr].close(Pipe::WriteFd);
	}
}

230
int SystemCmdPipe::wait()
Alejandro Homs Puron's avatar
Alejandro Homs Puron committed
231
232
233
234
235
236
237
{
	DEB_MEMBER_FUNCT();
	if (m_child_pid < 0)
		THROW_HW_ERROR(Error) << "cmd not running";
	int child_ret;
	waitpid(m_child_pid, &child_ret, 0);
	m_child_pid = -1;
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
	DEB_RETURN() << DEB_VAR1(child_ret);
	return child_ret;
}

int SystemCmdPipe::wait(StringList& out, StringList& err)
{
	DEB_MEMBER_FUNCT();

	struct Reader {
		StringList& out;
		Pipe *pipe;
		
		const int buffer_len = 1024;
		const char *term = "\n";

		Reader(SystemCmdPipe& c, PipeIdx i, StringList& l)
			: out(l), pipe(c.m_pipe_list[i] ? &c.getPipe(i) : NULL)
		{ out.clear(); }

		bool read()
		{
			if (!pipe)
				return false;
			string s = pipe->readLine(buffer_len, term);
			bool had_data = !s.empty();
			if (had_data)
				out.push_back(s);
			return had_data;
		}
	};
	std::vector<Reader> reader = {Reader(*this, StdOut, out),
				      Reader(*this, StdErr, err)};
	if (!reader[0].pipe && !reader[1].pipe)
		THROW_HW_ERROR(InvalidValue) << "Cmd has no pipe";

	while (true) {
		bool had_out = reader[0].read();
		bool had_err = reader[1].read();
		if (!had_out && !had_err)
			break;
	}

	int ret = wait();
	DEB_RETURN() << DEB_VAR3(ret, out, err);
	return ret;
Alejandro Homs Puron's avatar
Alejandro Homs Puron committed
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
}

void SystemCmdPipe::setPipe(PipeIdx idx, PipeType type)
{
	DEB_MEMBER_FUNCT();
	m_pipe_list[idx] = PipeData(type);
}

Pipe& SystemCmdPipe::getPipe(PipeIdx idx)
{
	DEB_MEMBER_FUNCT();
	if (!m_pipe_list[idx])
		THROW_HW_ERROR(InvalidValue) << "null pipe " << DEB_VAR1(idx);
	return *m_pipe_list[idx].ptr;
}

299
int CPUAffinity::findNbSystemCPUs()
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
{
	DEB_STATIC_FUNCT();
	int nb_cpus = 0;
	const char *proc_file_name = "/proc/cpuinfo";
	ifstream proc_file(proc_file_name);
	while (proc_file) {
		char buffer[1024];
		proc_file.getline(buffer, sizeof(buffer));
		istringstream is(buffer);
		string t;
		is >> t;
		if (t == "processor")
			++nb_cpus;
	}
	DEB_RETURN() << DEB_VAR1(nb_cpus);
	return nb_cpus;
}

318
int CPUAffinity::findMaxNbSystemCPUs()
319
320
321
322
323
324
325
326
{
	DEB_STATIC_FUNCT();
	NumericGlob proc_glob("/proc/sys/kernel/sched_domain/cpu");
	int max_nb_cpus = proc_glob.getNbEntries();
	DEB_RETURN() << DEB_VAR1(max_nb_cpus);
	return max_nb_cpus;
}

327
int CPUAffinity::getNbSystemCPUs(bool max_nb)
328
329
{
	static int nb_cpus = 0;
330
	EXEC_ONCE(nb_cpus = findNbSystemCPUs());
331
	static int max_nb_cpus = 0;
332
	EXEC_ONCE(max_nb_cpus = findMaxNbSystemCPUs());
333
334
335
	if (max_nb_cpus > MaxNbCPUs)
		throw LIMA_HW_EXC(Error, "Too many CPUS: ")
			<< max_nb_cpus << ", MaxNbCPUs=" << MaxNbCPUs;
336
337
338
339
	int cpus = (max_nb && max_nb_cpus) ? max_nb_cpus : nb_cpus;
	return cpus;
}

340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
const CPUAffinity::Mask &CPUAffinity::allCPUs(bool max_nb)
{
	if (max_nb) {
		static Mask max_nb_mask;
		EXEC_ONCE(for (int i = 0; i < getNbSystemCPUs(true); ++i)
				max_nb_mask.set(i));
		return max_nb_mask;
	} else {
		static Mask mask;
		EXEC_ONCE(for (int i = 0; i < getNbSystemCPUs(false); ++i)
				mask.set(i));
		return mask;
	}
}

355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
std::string CPUAffinity::getProcDir(bool local_threads)
{
	DEB_STATIC_FUNCT();
	DEB_PARAM() << DEB_VAR1(local_threads);
	ostringstream os;
	os << "/proc/";
	if (local_threads)
		os << getpid() << "/task/";
	string proc_dir = os.str();
	DEB_RETURN() << DEB_VAR1(proc_dir);
	return proc_dir;
}

std::string CPUAffinity::getTaskProcDir(pid_t task, bool is_thread)
{
	DEB_STATIC_FUNCT();
	DEB_PARAM() << DEB_VAR2(task, is_thread);
	ostringstream os;
	os << getProcDir(is_thread) << task << "/";
	string proc_dir = os.str();
	DEB_RETURN() << DEB_VAR1(proc_dir);
	return proc_dir;
}

void CPUAffinity::initCPUSet(cpu_set_t& cpu_set) const
{
	CPU_ZERO(&cpu_set);
382
383
384
	Mask mask = getMask();
	for (unsigned int i = 0; i < MaxNbCPUs; ++i) {
		if (mask.test(i))
385
386
387
388
389
			CPU_SET(i, &cpu_set);
	}
}

void CPUAffinity::applyToTask(pid_t task, bool incl_threads,
390
			      bool use_taskset) const
391
392
{
	DEB_MEMBER_FUNCT();
393
	DEB_PARAM() << DEB_VAR4(*this, task, incl_threads, use_taskset);
394
395
396
397
398
399
400
401
402
403
404
405
406
407

	string proc_status = getTaskProcDir(task, !incl_threads) + "status";
	if (access(proc_status.c_str(), F_OK) != 0)
		return;

	if (use_taskset)
		applyWithTaskset(task, incl_threads);
	else
		applyWithSetAffinity(task, incl_threads);
}

void CPUAffinity::applyWithTaskset(pid_t task, bool incl_threads) const
{
	DEB_MEMBER_FUNCT();
408
	DEB_PARAM() << DEB_VAR3(*this, task, incl_threads);
409

410
	SystemCmd taskset("taskset");
411
	const char *all_tasks_opt = incl_threads ? "-a " : "";
412
413
	taskset.args() << all_tasks_opt << "-p " << *this << " " << task;
	if (taskset.execute() != 0) {
414
415
416
417
418
419
		const char *th = incl_threads ? "and threads " : "";
		THROW_HW_ERROR(Error) << "Error setting task " << task 
				      << " " << th << "CPU affinity";
	}
}

420
void CPUAffinity::applyWithSetAffinity(pid_t task, bool incl_threads) const
421
422
{
	DEB_MEMBER_FUNCT();
423
	DEB_PARAM() << DEB_VAR3(*this, task, incl_threads);
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454

	IntList task_list;
	if (incl_threads) {
		string proc_dir = getTaskProcDir(task, false) + "task/";
		NumericGlob proc_glob(proc_dir);
		typedef NumericGlob::IntStringList IntStringList;
		IntStringList list = proc_glob.getIntPathList();
		if (list.empty())
			THROW_HW_ERROR(Error) << "Cannot find task " << task 
					      << " threads";
		IntStringList::const_iterator it, end = list.end();
		for (it = list.begin(); it != end; ++it)
			task_list.push_back(it->first);
	} else {
		task_list.push_back(task);
	}

	cpu_set_t cpu_set;
	initCPUSet(cpu_set);
	IntList::const_iterator it, end = task_list.end();
	for (it = task_list.begin(); it != end; ++it) {
		DEB_TRACE() << "setting " << task << " CPU mask: " << *this;
		int ret = sched_setaffinity(task, sizeof(cpu_set), &cpu_set);
		if (ret != 0) {
			const char *th = incl_threads ? "and threads " : "";
			THROW_HW_ERROR(Error) << "Error setting task " << task 
					      << " " << th << "CPU affinity";
		}
	}
}

455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
void CPUAffinity::getNUMANodeMask(vector<unsigned long>& node_mask,
				  int& max_node)
{
	DEB_MEMBER_FUNCT();

	typedef vector<unsigned long> Array;

	int nb_nodes = numa_max_node() + 1;
	const int item_bits = sizeof(Array::reference) * 8;
	int nb_items = nb_nodes / item_bits;
	if (nb_nodes % item_bits != 0)
		++nb_items;

	DEB_PARAM() << DEB_VAR2(*this, nb_items);
	max_node = nb_nodes + 1;

	node_mask.assign(nb_items, 0);

473
474
475
	Mask mask = getMask();
	for (unsigned int i = 0; i < MaxNbCPUs; ++i) {
		if (mask.test(i)) {
476
477
478
479
480
481
			unsigned int n = numa_node_of_cpu(i);
			Array::reference v = node_mask[n / item_bits];
			v |= 1L << (n % item_bits);
		}
	}

482
	if (DEB_CHECK_ANY(DebTypeReturn)) {
483
484
485
486
487
488
		ostringstream os;
		os << hex << "0x" << setw(nb_nodes / 4) << setfill('0');
		bool first = true;
		Array::reverse_iterator it, end = node_mask.rend();
		for (it = node_mask.rbegin(); it != end; ++it, first = false)
			os << (!first ? "," : "") << *it;
489
		DEB_RETURN() << "node_mask=" << os.str() << ", "
490
491
492
493
			     << DEB_VAR1(max_node);
	}
}

494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
void CPUAffinity::maskToULongArray(const Mask& mask, ULongArray& array)
{
	constexpr Mask ULongMask(std::numeric_limits<unsigned long>::max());
	for (int i = 0; i < NbULongs; ++i)
		array[i] = ((mask >> (i * NbULongBits)) & ULongMask).to_ulong();
}

CPUAffinity::Mask CPUAffinity::maskFromULongArray(const ULongArray& array)
{
	Mask mask;
	for (int i = 0; i < NbULongs; ++i)
		mask |= Mask(array[i]) << (i * NbULongBits);
	return mask;
}

string CPUAffinity::maskToString(const Mask& mask, int base, bool comma_sep)
{
	if (base == 2)
		return mask.to_string();
	else if (base != 16)
		throw LIMA_HW_EXC(InvalidValue, "Invalid base: ") << base;
		
	ostringstream os;
	ULongArray array;
	maskToULongArray(mask, array);
	os << hex << setfill('0');
	string sep;
	for (int i = NbULongs - 1; i >= 0; --i) {
		unsigned long ul_val = array[i];
		constexpr int NbULongInts = NbULongBits / 32;
		for (int j = NbULongInts - 1; j >= 0; --j) {
			uint32_t ui_val = (ul_val >> (j * 32)) & 0xffffffff;
			os << sep << setw(32 / 4) << ui_val;
			if (sep.empty() && comma_sep)
				sep = ",";
		}
	}
	return os.str();
}

CPUAffinity::Mask CPUAffinity::maskFromString(string aff_str, int base)
{
	if ((base != 2) && (base != 16))
		throw LIMA_HW_EXC(InvalidValue, "Invalid base: ") << base;
	
	if (!aff_str.empty() && (base == 16) && (aff_str.find("0x") == 0))
		aff_str.erase(0, 2);
	size_t len = aff_str.size();
	if (len && (aff_str.rfind('L') == len - 1))
		aff_str.erase(len - 1);

#define convert_to_ulong(s, v)						\
	do {								\
		size_t pos;						\
		v = stoul(s, &pos, base);				\
		if (pos != s.size())					\
			throw LIMA_HW_EXC(InvalidValue, "Invalid mask: ") \
						<< aff_str;		\
	} while (0)

	ULongArray array;
	const size_t bits_per_char = (base == 16) ? 4 : 1;
	const size_t chars_per_ulong = NbULongBits / bits_per_char;
	int idx = 0;
	string val;
	string::const_reverse_iterator it, end = aff_str.rend();
	for (it = aff_str.rbegin(); it != end; ++it) {
		if (*it == ',')
			continue;
		else if (idx == NbULongs)
			throw LIMA_HW_EXC(InvalidValue, "Invalid mask: ")
							<< aff_str;
		val.insert(0, 1, *it);
		if (val.size() == chars_per_ulong) {
			convert_to_ulong(val, array[idx++]);
			val.clear();
		}
	}
	if (!val.empty())
		convert_to_ulong(val, array[idx++]);
	while (idx < NbULongs)
		array[idx++] = 0;

#undef convert_to_ulong

	return maskFromULongArray(array);
}


583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
bool IrqMgr::m_irqbalance_stopped = false;
StringList IrqMgr::m_dev_list;

IrqMgr::IrqMgr(string net_dev)
{
	DEB_CONSTRUCTOR();
	DEB_PARAM() << DEB_VAR1(net_dev);

	if (!net_dev.empty())
		setDev(net_dev);
}

IrqMgr::~IrqMgr()
{
	DEB_DESTRUCTOR();
	DEB_PARAM() << DEB_VAR1(m_net_dev);

	if (isManaged(m_net_dev))
		updateRxQueueIrqAffinity(true);
}

void IrqMgr::setDev(string net_dev)
{
	DEB_CONSTRUCTOR();
	DEB_PARAM() << DEB_VAR1(net_dev);

	if (!m_net_dev.empty())
		THROW_HW_ERROR(Error) << "device already set";
	if (isManaged(net_dev))
		THROW_HW_ERROR(Error) << net_dev << " already managed";

	m_net_dev = net_dev;
}

bool IrqMgr::isManaged(string net_dev)
{
	if (net_dev.empty())
		return false;
	StringList::iterator it, end = m_dev_list.end();
	it = find(m_dev_list.begin(), end, net_dev);
	return (it != end);
}

IntList IrqMgr::getIrqList()
{
	DEB_MEMBER_FUNCT();
	DEB_PARAM() << DEB_VAR1(m_net_dev);

	string pci_irq_dir = (string("/sys/class/net/") + m_net_dev + 
			      "/device/msi_irqs/");
	NumericGlob pci_irqs(pci_irq_dir);
	typedef NumericGlob::IntStringList IntStringList;
	IntStringList dev_irqs = pci_irqs.getIntPathList();

	IntList act_irqs;
	ifstream proc_ints("/proc/interrupts");
	ostringstream os;
	int nb_cpus = CPUAffinity::getNbSystemCPUs();
641
	string info_token_re = "([^ \t]+)[ \t]*";
642
643
644
	os << "[ \t]*"
	   << "(?P<irq>[0-9]+):[ \t]+"
	   << "(?P<counts>(([0-9]+)[ \t]+){" << nb_cpus << "})"
645
	   << "(?P<info>(" << info_token_re << "){2,4})";
646
647
648
	DEB_TRACE() << DEB_VAR1(os.str());
	RegEx int_re(os.str());
	while (proc_ints) {
649
		char buffer[4096];
650
		proc_ints.getline(buffer, sizeof(buffer));
651
652
		string s = buffer;
		DEB_TRACE() << DEB_VAR1(s);
653
		RegEx::FullNameMatchType match;
654
		if (!int_re.matchName(s, match))
655
656
657
			continue;
		int irq;
		istringstream(match["irq"]) >> irq;
658
659
660
661
662
663
664
665
		if (DEB_CHECK_ANY(DebTypeTrace)) {
			string info = match["info"];
			SimpleRegEx info_re(info_token_re);
			RegEx::MatchListType match_list;
			info_re.multiSearch(info, match_list);
			string name = match_list.back()[1];
			DEB_TRACE() << "found match: " << irq << ": " << name;
		}
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
		IntStringList::iterator it, end = dev_irqs.end();
		bool ok = false;
		for (it = dev_irqs.begin(); !ok && (it != end); ++it)
			ok = (it->first == irq);
		if (ok)
			act_irqs.push_back(irq);
	}

	DEB_RETURN() << PrettyIntList(act_irqs);
	return act_irqs;
}

void IrqMgr::updateRxQueueIrqAffinity(bool default_affinity)
{
	DEB_MEMBER_FUNCT();
	DEB_PARAM() << DEB_VAR2(m_net_dev, default_affinity);

	if (isManaged(m_net_dev) && default_affinity) {
		StringList::iterator it, end = m_dev_list.end();
		it = find(m_dev_list.begin(), end, m_net_dev);
		m_dev_list.erase(it);
		if (m_dev_list.empty())
			restoreIrqBalance();
	} else if (!isManaged(m_net_dev) && !default_affinity) {
		stopIrqBalance();
		m_dev_list.push_back(m_net_dev);
	}
}

void IrqMgr::stopIrqBalance()
{
	DEB_STATIC_FUNCT();
	DEB_PARAM() << DEB_VAR1(m_irqbalance_stopped);
	if (!getIrqBalanceActive()) {
		DEB_TRACE() << "nothing to do";
		return;
	} else if (m_irqbalance_stopped) {
		DEB_WARNING() << "irqbalance stopped and still running!";
	}
	setIrqBalanceActive(false);
	m_irqbalance_stopped = true;
}

void IrqMgr::restoreIrqBalance()
{
	DEB_STATIC_FUNCT();
	DEB_PARAM() << DEB_VAR1(m_irqbalance_stopped);
	if (!m_irqbalance_stopped)
		return;
	setIrqBalanceActive(true);
	m_irqbalance_stopped = false;
}

bool IrqMgr::getIrqBalanceActive()
{
	DEB_STATIC_FUNCT();
	SystemCmd bash("bash", "", false);
	ConstStr cmd = "ps -ef | grep -v grep | grep irqbalance";
	bash.args() << "-c '" << cmd << "'";
	bool act = (bash.execute() == 0);
	DEB_RETURN() << DEB_VAR1(act);
	return act;
}

void IrqMgr::setIrqBalanceActive(bool act)
{
	DEB_STATIC_FUNCT();
	DEB_PARAM() << DEB_VAR1(act);
	SystemCmd service("service");
	ConstStr cmd = act ? "start" : "stop";
	DEB_ALWAYS() << "irqbalance: executing " << cmd;
	service.args() << "irqbalance " << cmd;
	if (service.execute() != 0)
		THROW_HW_ERROR(Error) << "Could not " << cmd << " "
				      << "irqbalance service";
	DEB_ALWAYS() << "Done!";
}


745
NetDevRxQueueMgr::NetDevRxQueueMgr(string dev)
746
	: m_dev(dev), m_irq_mgr(dev)
747
748
749
750
751
{
	DEB_CONSTRUCTOR();
	DEB_PARAM() << DEB_VAR1(m_dev);
}

752
753
754
755
756
757
758
NetDevRxQueueMgr::~NetDevRxQueueMgr()
{
	DEB_DESTRUCTOR();
	if (!NetDevRxQueueAffinityMap_isDefault(m_aff_map))
		apply(NetDevRxQueueAffinityMap());
}

759
void NetDevRxQueueMgr::setDev(string dev)
760
761
{
	DEB_MEMBER_FUNCT();
762
	DEB_PARAM() << DEB_VAR1(dev);
763
	if (m_dev.empty()) {
764
		m_dev = dev;
765
766
		m_irq_mgr.setDev(dev);
	} else if (dev != m_dev) {
767
768
		THROW_HW_ERROR(InvalidValue) << "name mismatch: "
					     << DEB_VAR2(dev, m_dev);
769
	}
770
}
771

772
773
774
775
776
777
void NetDevRxQueueMgr::checkDev()
{
	DEB_MEMBER_FUNCT();
	if (m_dev.empty())
		THROW_HW_ERROR(InvalidValue) << "no device defined yet";
}
778
779

void NetDevRxQueueMgr::apply(int queue, const Affinity& queue_affinity)
780
781
782
783
784
{
	DEB_MEMBER_FUNCT();
	DEB_PARAM() << DEB_VAR3(m_dev, queue, queue_affinity);

	checkDev();
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
	apply(Irq, queue, queue_affinity.irq);	
	m_irq_mgr.updateRxQueueIrqAffinity(queue_affinity.irq.isDefault());
	apply(Processing, queue, queue_affinity.processing);
	m_aff_map[queue] = queue_affinity;
}

void NetDevRxQueueMgr::apply(const AffinityMap& affinity_map)
{
	m_aff_map.clear();
	if (NetDevRxQueueAffinityMap_isDefault(affinity_map)) {
		apply(-1, NetDevRxQueueCPUAffinity());
	} else {
		AffinityMap::const_iterator qit, qend = affinity_map.end();
		for (qit = affinity_map.begin(); qit != qend; ++qit)
			apply(qit->first, qit->second);
	}
801
802
}

803
void NetDevRxQueueMgr::apply(Task task, int queue, CPUAffinity a)
804
805
806
807
{
	DEB_MEMBER_FUNCT();
	DEB_PARAM() << DEB_VAR3(m_dev, queue, a);

808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
	FileSetterData file_setter;
	if (task == Irq)
		getIrqFileSetterData(queue, file_setter);
	else
		getProcessingFileSetterData(queue, file_setter);

	static bool did_error_files[NbTasks] = {false, false};
	if (!did_error_files[task]) {
		const StringList& list = file_setter.file;
		StringList::const_iterator it, end = list.end();
		bool ok = true;
		for (it = list.begin(); ok && (it != end); ++it)
			ok = applyWithFile(*it, a);
		if (ok)
			return;
		DEB_WARNING() << "Could not write to files. Will try setter...";
		did_error_files[task] = true;
	}

	static bool did_error_setter[NbTasks] = {false, false};
	if (!did_error_setter[task]) {
		const StringList& list = file_setter.setter;
		StringList::const_iterator it, end = list.end();
		bool ok = true;
		for (it = list.begin(); ok && (it != end); ++it)
			ok = applyWithSetter(task, *it, a);
		if (ok)
			return;
836
837
		DEB_ERROR() << "Could not use setter: "
			    << DEB_VAR4(m_dev, task, queue, a);
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
		did_error_setter[task] = true;
	}
}

void NetDevRxQueueMgr::getIrqFileSetterData(int queue,
					    FileSetterData& file_setter)
{
	DEB_MEMBER_FUNCT();

	if (queue != -1)
		THROW_HW_ERROR(NotSupported) << "only all queues (-1) mode "
					     << "is supported";

	IntList act_irqs = m_irq_mgr.getIrqList();
	IntList::const_iterator it, end = act_irqs.end();
	for (it = act_irqs.begin(); it != end; ++it) {
		ostringstream os1, os2;
		os1 << "/proc/irq/" << *it << "/smp_affinity";
		file_setter.file.push_back(os1.str());
		os2 << *it;
		file_setter.setter.push_back(os2.str());
	}
}

void NetDevRxQueueMgr::getProcessingFileSetterData(int queue,
						   FileSetterData& file_setter)
{
	DEB_MEMBER_FUNCT();
	DEB_PARAM() << DEB_VAR1(m_dev);

868
	enum {
869
		File, Setter, NbLists,
870
	};
871
872
873
874

	string glob_str(string("/sys/class/net/" + m_dev + "/queues/rx-"));
	NumericGlob rps_cpus_glob(glob_str, "/rps_cpus");

875
876
	typedef NumericGlob::IntStringList IntStringList;
	IntStringList list_array[NbLists];
877
878
	list_array[File] = rps_cpus_glob.getIntPathList();
	list_array[Setter] = rps_cpus_glob.getIntSubPathList(6);
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
	IntList filter_list;
	if (queue == -2)
		filter_list = getRxQueueList();
	else if (queue != -1)
		filter_list.push_back(queue);
	for (int i = 0; !filter_list.empty() && (i < NbLists); ++i) {
		IntStringList& list = list_array[i];
		while (true) {
			IntStringList::iterator lit, lend = list.end();
			bool ok = true;
			for (lit = list.begin(); ok && (lit != lend); ++lit) {
				IntList::iterator fit, fend;
				fend = filter_list.end();
				fit = find(filter_list.begin(), fend,
					   lit->first);
				ok = (fit != fend);
				if (!ok)
					list.erase(lit);
			}
			if (ok)
				break;
		}
	}
902
903
904
	if (list_array[File].size() != list_array[Setter].size()) {
		THROW_HW_ERROR(Error) << "File and Setter lists differ";
	} else if (list_array[File].empty()) {
905
906
907
		DEB_WARNING() << "No Rx queue (" << queue << ") for " << m_dev;
		return;
	}
908

909
910
911
912
	StringList *file_setter_list[NbLists] = {&file_setter.file,
						 &file_setter.setter};
	for (int i = 0; i < NbLists; ++i) {
		const IntStringList& list = list_array[i];
913
		IntStringList::const_iterator it, end = list.end();
914
915
		for (it = list.begin(); it != end; ++it)
			file_setter_list[i]->push_back(it->second);
916
917
918
	}
}

919
bool NetDevRxQueueMgr::applyWithFile(const string& fname, CPUAffinity a)
920
921
922
{
	DEB_MEMBER_FUNCT();

923
924
	string s = a.toString(16, true);
	DEB_TRACE() << "writing " << s << " to " << fname;
925
926
	ofstream aff_file(fname.c_str());
	if (aff_file)
927
		aff_file << s;
928
929
	if (aff_file)
		aff_file.close();
930
	bool file_ok(aff_file);
931
932
933
934
	DEB_RETURN() << DEB_VAR1(file_ok);
	return file_ok;
}

935
936
bool NetDevRxQueueMgr::applyWithSetter(Task task, const string& irq_queue,
				       CPUAffinity a)
937
938
{
	DEB_MEMBER_FUNCT();
939
	DEB_PARAM() << DEB_VAR2(m_dev, irq_queue);
940

941
	static string desc = getSetterSudoDesc();
942
943
	SystemCmd setter(AffinitySetterName, desc);
	ConstStr task_opt = (task == Irq) ? "-i" : "-r";
944
	const CPUAffinity::Mask& mask = a.getZeroDefaultMask();
945
	setter.args() << task_opt << " " << m_dev << " " << irq_queue << " "
946
		      << CPUAffinity::maskToString(mask, 16, true);
947
	bool setter_ok = (setter.execute() == 0);
948
949
950
951
	DEB_RETURN() << DEB_VAR1(setter_ok);
	return setter_ok;
}

952
string NetDevRxQueueMgr::getSetterSudoDesc()
953
954
955
{
	DEB_STATIC_FUNCT();

956
	const string& setter_name = AffinitySetterName;
957
958
959
	string dir = "/tmp";
	string fname = dir + "/" + setter_name + ".c";
	ofstream src_file(fname.c_str());
960
	if (src_file) {
961
		const StringList& SrcList = AffinitySetterSrc;
962
963
964
965
966
967
968
969
970
971
		StringList::const_iterator it, end = SrcList.end();
		for (it = SrcList.begin(); src_file && (it != end); ++it)
			src_file << *it << endl;
		if (src_file)
			src_file.close();
		if (!src_file)
			DEB_WARNING() << "Error writing to " << fname;
	} else {
		DEB_WARNING() << "Error creating " << fname;
	}
972
973
974
975
976
977
978
979
980
981

	ostringstream desc;
	string aux_setter = dir + "/" + setter_name;
	desc << "In order to create " << setter_name << ", compile " << fname
	     << " with the following commands: " << endl
	     << "  gcc -Wall -o " << aux_setter << " " << fname << endl
	     << "  su -c \"cp " << aux_setter << " /usr/local/bin\"" << endl;
	return desc.str();
}

982
983
const string NetDevRxQueueMgr::AffinitySetterName = 
					"netdev_set_queue_cpu_affinity";
984

985
static const char *NetDevRxQueueMgrAffinitySetterSrcCList[] = {
986
987
988
989
990
991
992
993
994
995
996
"#include <stdio.h>",
"#include <stdlib.h>",
"#include <string.h>",
"#include <errno.h>",
"#include <unistd.h>",
"#include <sys/types.h>",
"#include <sys/stat.h>",
"#include <fcntl.h>",
"",
"int main(int argc, char *argv[])",
"{",
997
"	char *dev, *irq_queue, *p, fname[256];",
998
"	int irq, rps, fd, len, ret;",
999
"",
1000
"	if (argc != 5)",