Issue
I have ~100 text files 200MB each and I need to parse them. The program below loads files and processes them in parallel. It can create a Thread per file or a Process per file.
The problem: If I use threads it never uses 100% CPU and takes longer to complete.
THREAD PER FILE
total time: 430 sec
CPU usage 15-20%
CPU frequency 1.2 GHz
PROCESS PER FILE
total time 100 sec
CPU usage 100%
CPU frequency 3.75 GHz
I’m using E5-1650 v3 Hexa-Core with HT, therefore I process 12 files at a time.
How can I achive 100% CPU utilisation by threads?
Code below does not use result of processing since it doen not affect the problem.
using System;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Reflection;
using System.Text;
using System.Threading;
namespace libsvm2tsv
{
class Program
{
static void Main(string[] args)
{
var sw = Stopwatch.StartNew();
switch (args[0])
{
case "-t": LoadAll(args[1], LoadFile); break;
case "-p": LoadAll(args[1], RunChild); break;
case "-f": LoadFile(args[1]); return;
}
Console.WriteLine("ELAPSED: {0} sec.", sw.ElapsedMilliseconds / 1000);
Console.ReadLine();
}
static void LoadAll(string folder, Action<string> algorithm)
{
var sem = new SemaphoreSlim(12);
Directory.EnumerateFiles(folder).ToList().ForEach(f=> {
sem.Wait();
new Thread(() => { try { algorithm(f); } finally { sem.Release(); } }).Start();
});
}
static void RunChild(string file)
{
Process.Start(new ProcessStartInfo
{
FileName = Assembly.GetEntryAssembly().Location,
Arguments = "-f \"" + file + "\"",
UseShellExecute = false,
CreateNoWindow = true
})
.WaitForExit();
}
static void LoadFile(string inFile)
{
using (var ins = File.OpenText(inFile))
while (ins.Peek() >= 0)
ParseLine(ins.ReadLine());
}
static long[] ParseLine(string line)
{
return line
.Split()
.Skip(1)
.Select(r => (long)(double.Parse(r.Split(':')[1]) * 1000))
.Select(r => r < 0 ? -1 : r)
.ToArray();
}
}
}
Solution
Finally, I’ve found the bottleneck. I’m using string.Split to parse numbers from every line of data, so I get billions short strings. These strings are put in heap. Since all threads share single heap memory allocation is synchronized. Since processes have separate heaps – no syncronization occures and things work fast. That’s the root of issue. So, I rewrote parsing using IndexOf rather than Split and threads started to perform even better than separate processes. Just as I expected it to be.
Since .NET has no default tool to parse real numbers out of the certain position inside string I used this one: https://codereview.stackexchange.com/questions/75791/optimize-custom-double-parse with small modification.
using System;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Reflection;
using System.Threading;
using System.Threading.Tasks;
namespace libsvm2tsv
{
class Program
{
static void Main(string[] args)
{
var sw = Stopwatch.StartNew();
switch (args[0])
{
case "-t": LoadAll(args[1], LoadFile); break;
case "-p": LoadAll(args[1], RunChild); break;
case "-f": LoadFile(args[1]); return;
}
Console.WriteLine("ELAPSED: {0} sec.", sw.ElapsedMilliseconds / 1000);
Console.ReadLine();
}
static void LoadAll(string folder, Action<string> algorithm)
{
Parallel.ForEach(
Directory.EnumerateFiles(folder),
new ParallelOptions { MaxDegreeOfParallelism = 12 },
f => algorithm(f));
}
static void RunChild(string file)
{
Process.Start(new ProcessStartInfo
{
FileName = Assembly.GetEntryAssembly().Location,
Arguments = "-f \"" + file + "\"",
UseShellExecute = false,
CreateNoWindow = true
})
.WaitForExit();
}
static void LoadFile(string inFile)
{
using (var ins = File.OpenText(inFile))
while (ins.Peek() >= 0)
ParseLine(ins.ReadLine());
}
static long[] ParseLine(string line)
{
// first, count number of items
var items = 1;
for (var i = 0; i < line.Length; i++)
if (line[i] == ' ') items++;
//allocate memory and parse items
var all = new long[items];
var n = 0;
var index = 0;
while (index < line.Length)
{
var next = line.IndexOf(' ', index);
if (next < 0) next = line.Length;
if (next > index)
{
var v = (long)(parseDouble(line, line.IndexOf(':', index) + 1, next - 1) * 1000);
if (v < 0) v = -1;
all[n++] = v;
}
index = next + 1;
}
return all;
}
private readonly static double[] pow10Cache;
static Program()
{
pow10Cache = new double[309];
double p = 1.0;
for (int i = 0; i < 309; i++)
{
pow10Cache[i] = p;
p /= 10;
}
}
static double parseDouble(string input, int from, int to)
{
long inputLength = to - from + 1;
long digitValue = long.MaxValue;
long output1 = 0;
long output2 = 0;
long sign = 1;
double multiBy = 0.0;
int k;
//integer part
for (k = 0; k < inputLength; ++k)
{
digitValue = input[k + from] - 48; // '0'
if (digitValue >= 0 && digitValue <= 9)
{
output1 = digitValue + (output1 * 10);
}
else if (k == 0 && digitValue == -3 /* '-' */)
{
sign = -1;
}
else if (digitValue == -2 /* '.' */ || digitValue == -4 /* ',' */)
{
break;
}
else
{
return double.NaN;
}
}
//decimal part
if (digitValue == -2 /* '.' */ || digitValue == -4 /* ',' */)
{
multiBy = pow10Cache[inputLength - (++k)];
for (; k < inputLength; ++k)
{
digitValue = input[k + from] - 48; // '0'
if (digitValue >= 0 && digitValue <= 9)
{
output2 = digitValue + (output2 * 10);
}
else
{
return Double.NaN;
}
}
multiBy *= output2;
}
return sign * (output1 + multiBy);
}
}
}
Answered By – Anton Burtsev
Answer Checked By – Katrina (BugsFixing Volunteer)