0

I have Server with Centos OS, 10 core processors, 68 gb of ram

I have created an app in which I am uploading csv of records 500k.

After uploading, read the data in chunks and dispatched the job for each chunk

$file = ListcleaningFiles::find($this->file_id);
        
if($file)
{
    $data   =   file($file->file_location);
    
    // Chunking file
    $chunks = array_chunk($data, config('app.CLEANING_EMAIL_BATCH'));
    $header = [];
    $file->update(['total_chunks' => count($chunks)]);

    $counter = 0;

    $path = storage_path('app/public/counters/'.$file->id);
    if(!File::isDirectory($path)){
        File::makeDirectory($path, 0777, true, true);
    }
    
    foreach ($chunks as $key => $chunk) {
        $data = array_map('str_getcsv', $chunk);

        if ($key === 0) {
            $header = $data[0];
            unset($data[0]);
        }
        
        $queue_name = "insert_emails_".$file->id."_".$key;

        $process_id = exec('nohup /usr/bin/php '.base_path().'/artisan queue:work --queue='.$queue_name.' --timeout=7200 --stop-when-empty > /dev/null 2>&1 & echo $!');

        File::put($path.'/'.$process_id.'_'.$key.'.txt','0');
        dispatch(new \App\Jobs\ImportEmails($data,$file->id,$key,$process_id))->onQueue($queue_name);
    }
}else{
    Log::debug("File not found with id : ".$this->file_id);
    return true;
}

Import job code

namespace App\Jobs;

use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldBeUnique;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
use App\Events\Listcleaning;
use Illuminate\Support\Facades\Log;
use App\Models\Mongo\ListcleaningFiles;
use App\Models\Mongo\ListcleaningEmails;
use Illuminate\Support\Str;
use Illuminate\Bus\Batchable;
use Illuminate\Support\Facades\Bus;
use Cache;
use DB;
use File;
use Storage;

class ImportEmails implements ShouldQueue
{
    use Dispatchable, InteractsWithQueue, Queueable, SerializesModels, Batchable;
    public $rows;
    public $file_id;
    public $filelist;
    public $total_rows;
    public $domains;
    public $chunk_id;
    public $process_id;
    /**
     * Create a new job instance.
     *
     * @return void
     */
    public function __construct($rows,$file_id,$chunk_id,$process_id)
    {
        $this->rows = $rows;
        $this->file_id = $file_id;
        $this->filelist = ListcleaningFiles::find($file_id);
        if($this->filelist){
            $this->total_rows = $this->filelist->total_rows;
        }else{
            $this->total_rows = 0;
        }
        
        $this->chunk_id = $chunk_id;
        $this->process_id = $process_id;
    }

    /**
     * Execute the job.
     *
     * @return void
     */
    public function handle()
    {
        if (!$this->filelist) {
            exit(0);
        }
        
        if(is_array($this->rows) && count($this->rows) > 0)
        {
            $db = DB::connection('mongodb');

            $temp_emails = [];
            $temp_domains = [];
            $domain_data = [];
            $email_data = [];

            $counter = 1;
            $path = storage_path('app/public/counters/'.$this->filelist->id).'/'.$this->process_id.'_'.$this->chunk_id.'.txt';
            foreach($this->rows as $index => $row)
            {
                File::put($path,$counter);
                $counter++;

                // remove columns that have no email
                if(empty($row[1])){
                    continue;
                }

                // chek if the email is valid or not
                if (!filter_var($row[1], FILTER_VALIDATE_EMAIL)) {
                    continue;
                }

                // remove emails that start with number
                if ( is_numeric($row[1][0]) ) {
                    continue;
                }

                $email = $row[1];

                //prevent duplicate emails
                if(in_array(strtolower($email),$temp_emails))
                {
                    continue;
                }
                $temp_emails[] = strtolower($email);
                $domain = strtolower(substr(strrchr($email, "@"), 1));

                $email_data[$email] = [
                    'file_id' => $this->filelist->id,
                    'fname' => $row[0],
                    'email' => $email,
                    'country' => $row[2],
                ];

                $this->logProgress($this->filelist);
            }
            
            
            
            $existing_emails = $db->collection('cleaned_emails_list')->where(['file_id' => $this->filelist->id])->whereIn('email',$temp_emails)->get();
            if(count($existing_emails) > 0)
            {
                foreach($existing_emails as $e)
                {
                    if(isset($email_data[$e['email']]))
                    {
                        unset($email_data[$e['email']]);
                    }
                }
            }
            $email_data = array_values($email_data);
            
            // insert emails to database
            if(count($email_data) > 0){
                $db->collection('cleaned_emails_list')->insert($email_data);
            }
            
        }
        //exec('kill -9 '.$this->process_id);
    }

    public function logProgress($file)
    {        
        $path = storage_path('app/public/counters/'.$this->filelist->id);

        $files = File::allFiles($path);
        $counter = 0;
        if(count($files) > 0)
        {
            foreach($files as $f)
            {
                $content = File::get($f);
                $counter += (int)$content;
            }
        }

        $p = ($counter/$file->total_rows)*100;

        $file->update(['progress' => round($p,2)]);
        
        event(new Listcleaning([
            'type' => 'cleaning',
            'label' => 'Importing',
            'file_id' => $file->id,
            'progress' => round($p,2),
            'total_records' => $file->total_rows
        ]));
    }

    public function failed($exception)
    {
        $this->filelist->update(['is_cleaned' => 0,'is_locked' => 0,'is_mx_done' => 0,'is_ip_done' => 0,'is_ns_done' => 0]);
        Log::debug($exception);
    }
}

websoket giving pusher connect error. increased the ulimit but still gives the same error or some time too many connections.

Dharman
  • 30,962
  • 25
  • 85
  • 135
Manoj Thakur
  • 75
  • 1
  • 9

1 Answers1

0

Can you give a screenshot of the error ?

And, Have you tried this ??? changing-the-event-loop

The ulimit command only temporarily increases the maximum number of open file descriptors. To permanently modify this value, you can edit it in your operating system limits.conf file.