Organizational Research By

Surprising Reserch Topic

node js count the number of lines in a file


node js count the number of lines in a file  using -'javascript,node.js'

I have large text files, which range between 30MB and 10GB. How can I count the number of lines in a file using Node.js?

I have these limitations:


The entire file does not need to be written to memory
A child process is not required to perform the task

    

asked Sep 18, 2015 by rajeevr642
0 votes
70 views



Related Hot Questions

5 Answers

0 votes

solution without using wc:

var i;
var count = 0;
require('fs').createReadStream(process.argv[2])
  .on('data', function(chunk) {
    for (i=0; i < chunk.length; ++i)
      if (chunk[i] == 10) count++;
  })
  .on('end', function() {
    console.log(count);
  });

it's slower, but not that much you might expect - 0.6s for 140M+ file including node.js loading & startup time

>time node countlines.js video.mp4 
619643

real    0m0.614s
user    0m0.489s
sys 0m0.132s

>time wc -l video.mp4 
619643 video.mp4
real    0m0.133s
user    0m0.108s
sys 0m0.024s

>wc -c video.mp4
144681406  video.mp4
answered Sep 18, 2015 by shikhar jain
0 votes

You could do this as the comments suggest using wc

var exec = require('child_process').exec;

exec('wc /path/to/file', function (error, results) {
    console.log(results);
});
answered Sep 18, 2015 by santosh soni
0 votes
var i;
var count = 0;
require('fs').createReadStream(process.argv[2])
  .on('data', function(chunk) {
    for (i=0; i < chunk.length; ++i)
      if (chunk[i] == 10) count++;
  })
  .on('end', function() {
    console.log(count);
  });

why chunk[i] == 10 ???
may be it is the code of new line ???

answered Sep 18, 2015 by santosh soni
0 votes

since iojs 1.5.0 there is Buffer#indexOf() method, using it to compare to Andrey Sidorov' answer:

ubuntu@server:~$ wc logs
  7342500  27548750 427155000 logs
ubuntu@server:~$ time wc -l logs 
7342500 logs

real    0m0.180s
user    0m0.088s
sys 0m0.084s
ubuntu@server:~$ nvm use node
Now using node v0.12.1
ubuntu@server:~$ time node countlines.js logs 
7342500

real    0m2.559s
user    0m2.200s
sys 0m0.340s
ubuntu@server:~$ nvm use iojs
Now using node iojs-v1.6.2
ubuntu@server:~$ time iojs countlines2.js logs 
7342500

real    0m1.363s
user    0m0.920s
sys 0m0.424s
ubuntu@server:~$ cat countlines.js 
var i;
var count = 0;
require('fs').createReadStream(process.argv[2])
  .on('data', function(chunk) {
    for (i=0; i < chunk.length; ++i)
      if (chunk[i] == 10) count++;
  })
  .on('end', function() {
    console.log(count);
  });
ubuntu@server:~$ cat countlines2.js 
var i;
var count = 0;
require('fs').createReadStream(process.argv[2])
  .on('data', function(chunk) {
    var index = -1;
    while((index = chunk.indexOf(10, index + 1)) > -1) count++
  })
  .on('end', function() {
    console.log(count);
  });
ubuntu@server:~$ 
answered Sep 18, 2015 by loknath.ganji
0 votes

Here is another way without so much nesting.

var fs = require('fs');
filePath = process.argv[2];
fileBuffer =  fs.readFileSync(filePath);
to_string = fileBuffer.toString();
split_lines = to_string.split("\n");
console.log(split_lines.length-1);
answered Sep 18, 2015 by deepak07.s

...