I have Server with Centos OS, 10 core processors, 68 gb of ram
I have created an app in which I am uploading csv of records 500k.
After uploading, read the data in chunks and dispatched the job for each chunk
$file = ListcleaningFiles::find($this->file_id);
if($file)
{
$data = file($file->file_location);
// Chunking file
$chunks = array_chunk($data, config('app.CLEANING_EMAIL_BATCH'));
$header = [];
$file->update(['total_chunks' => count($chunks)]);
$counter = 0;
$path = storage_path('app/public/counters/'.$file->id);
if(!File::isDirectory($path)){
File::makeDirectory($path, 0777, true, true);
}
foreach ($chunks as $key => $chunk) {
$data = array_map('str_getcsv', $chunk);
if ($key === 0) {
$header = $data[0];
unset($data[0]);
}
$queue_name = "insert_emails_".$file->id."_".$key;
$process_id = exec('nohup /usr/bin/php '.base_path().'/artisan queue:work --queue='.$queue_name.' --timeout=7200 --stop-when-empty > /dev/null 2>&1 & echo $!');
File::put($path.'/'.$process_id.'_'.$key.'.txt','0');
dispatch(new \App\Jobs\ImportEmails($data,$file->id,$key,$process_id))->onQueue($queue_name);
}
}else{
Log::debug("File not found with id : ".$this->file_id);
return true;
}
Import job code
namespace App\Jobs;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldBeUnique;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
use App\Events\Listcleaning;
use Illuminate\Support\Facades\Log;
use App\Models\Mongo\ListcleaningFiles;
use App\Models\Mongo\ListcleaningEmails;
use Illuminate\Support\Str;
use Illuminate\Bus\Batchable;
use Illuminate\Support\Facades\Bus;
use Cache;
use DB;
use File;
use Storage;
class ImportEmails implements ShouldQueue
{
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels, Batchable;
public $rows;
public $file_id;
public $filelist;
public $total_rows;
public $domains;
public $chunk_id;
public $process_id;
/**
* Create a new job instance.
*
* @return void
*/
public function __construct($rows,$file_id,$chunk_id,$process_id)
{
$this->rows = $rows;
$this->file_id = $file_id;
$this->filelist = ListcleaningFiles::find($file_id);
if($this->filelist){
$this->total_rows = $this->filelist->total_rows;
}else{
$this->total_rows = 0;
}
$this->chunk_id = $chunk_id;
$this->process_id = $process_id;
}
/**
* Execute the job.
*
* @return void
*/
public function handle()
{
if (!$this->filelist) {
exit(0);
}
if(is_array($this->rows) && count($this->rows) > 0)
{
$db = DB::connection('mongodb');
$temp_emails = [];
$temp_domains = [];
$domain_data = [];
$email_data = [];
$counter = 1;
$path = storage_path('app/public/counters/'.$this->filelist->id).'/'.$this->process_id.'_'.$this->chunk_id.'.txt';
foreach($this->rows as $index => $row)
{
File::put($path,$counter);
$counter++;
// remove columns that have no email
if(empty($row[1])){
continue;
}
// chek if the email is valid or not
if (!filter_var($row[1], FILTER_VALIDATE_EMAIL)) {
continue;
}
// remove emails that start with number
if ( is_numeric($row[1][0]) ) {
continue;
}
$email = $row[1];
//prevent duplicate emails
if(in_array(strtolower($email),$temp_emails))
{
continue;
}
$temp_emails[] = strtolower($email);
$domain = strtolower(substr(strrchr($email, "@"), 1));
$email_data[$email] = [
'file_id' => $this->filelist->id,
'fname' => $row[0],
'email' => $email,
'country' => $row[2],
];
$this->logProgress($this->filelist);
}
$existing_emails = $db->collection('cleaned_emails_list')->where(['file_id' => $this->filelist->id])->whereIn('email',$temp_emails)->get();
if(count($existing_emails) > 0)
{
foreach($existing_emails as $e)
{
if(isset($email_data[$e['email']]))
{
unset($email_data[$e['email']]);
}
}
}
$email_data = array_values($email_data);
// insert emails to database
if(count($email_data) > 0){
$db->collection('cleaned_emails_list')->insert($email_data);
}
}
//exec('kill -9 '.$this->process_id);
}
public function logProgress($file)
{
$path = storage_path('app/public/counters/'.$this->filelist->id);
$files = File::allFiles($path);
$counter = 0;
if(count($files) > 0)
{
foreach($files as $f)
{
$content = File::get($f);
$counter += (int)$content;
}
}
$p = ($counter/$file->total_rows)*100;
$file->update(['progress' => round($p,2)]);
event(new Listcleaning([
'type' => 'cleaning',
'label' => 'Importing',
'file_id' => $file->id,
'progress' => round($p,2),
'total_records' => $file->total_rows
]));
}
public function failed($exception)
{
$this->filelist->update(['is_cleaned' => 0,'is_locked' => 0,'is_mx_done' => 0,'is_ip_done' => 0,'is_ns_done' => 0]);
Log::debug($exception);
}
}
websoket giving pusher connect error. increased the ulimit but still gives the same error or some time too many connections.