import os
import sys
import shutil
from src.logger import logging
from src.exception import CustomException
from dataclasses import dataclass
## intitialize the Data Ingestion configuration
@dataclass
class DataIngestionconfig:
train_data_path:str=os.path.join('artifacts','train')
test_data_path:str=os.path.join('artifacts','test')
valid_data_path:str=os.path.join('artifacts','valid')
## create the data ingestion class
class DataIngestion:
def __init__(self):
self.ingestion_config=DataIngestionconfig()
def initiate_data_ingestion(self):
logging.info('Data Ingestion method starts')
try:
Train = os.path.join('notebooks\data\Train')
print(Train)
Test = os.path.join('notebooks\data\Test')
def get_filenames(directory_path):
filenames = [os.path.join(directory_path, filename) for filename in os.listdir(directory_path)]
return filenames
train_filenames = get_filenames(Train)
test_filenames = get_filenames(Test)
print(len(train_filenames))
print(len(test_filenames))
# Divide the train_filenames list into train and validation sets
valid_filenames = train_filenames[50:]
train_filenames = train_filenames[:50]
logging.info('Split Data into Train and Validation')
os.makedirs(self.ingestion_config.train_data_path, exist_ok=True)
for filename in train_filenames:
shutil.copy(filename, self.ingestion_config.train_data_path)
os.makedirs(self.ingestion_config.test_data_path, exist_ok=True)
for filename in test_filenames:
shutil.copy(filename, self.ingestion_config.test_data_path)
os.makedirs(self.ingestion_config.valid_data_path, exist_ok=True)
for filename in valid_filenames:
shutil.copy(filename, self.ingestion_config.valid_data_path)
return(
self.ingestion_config.train_data_path,
self.ingestion_config.test_data_path,
self.ingestion_config.valid_data_path
)
except Exception as e:
logging.info('Exception occured at Data Ingestion Stage')
raise CustomException(e,sys)
# if __name__ == '__main__':
# obj = DataIngestion()
# train_path, test_path, valid_path = obj.initiate_data_ingestion()
# print(train_path, test_path, valid_path)
whenever i'll run this code file i'll get my parameters
notebooks\data\Train
60
1
artifacts/train
artifacts/test
artifacts/valid
import os
import sys
import shutil
from src.logger import logging
from src.exception import CustomException
from src.components.data_ingestion import DataIngestion
if __name__=='__main__':
obj = DataIngestion()
train_path, test_path, valid_path = obj.initiate_data_ingestion()
print(train_path,test_path,valid_path)
but when i'll use that as module i'll always get this
Traceback (most recent call last):
File "d:\project\BMI\src\pipelines\training_pipeline.py", line 13, in <module>
train_path, test_path, valid_path = obj.initiate_data_ingestion()
TypeError: cannot unpack non-iterable NoneType object
i have been stuck at this, can somebody solve this