0

After cleaning the text, i am trying to vectoriz the text then i am getting the following error: "tuple index out of range"

I am not sure if I am doing something wrong? Please your help.

Thank you in advance

from __future__ import print_function
from tensorflow.keras.callbacks import LambdaCallback
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.utils import get_file
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras.layers import Input, Embedding, Dropout, Activation
import numpy as np
import random
import sys
import io
import string
path = get_file('21000-8.txt',
                origin='http://www.gutenberg.org/files/21000/21000-8.txt')
    

with io.open(path, encoding='ISO-8859-1', errors='ignore') as f:
    text = f.read()

# print somewhere in the middle of the text 200 characters
print(text[1200:1400])

# print corpus length
print('corpus length in characters:', len(text))
# turn a doc into clean tokens
def clean_doc(doc):
    # replace '--' with a space ' '
    doc = doc.replace('--', ' ')
    # split into tokens by white space
    tokens = doc.split()
    # remove punctuation from each token
    table = str.maketrans('', '', string.punctuation)
    tokens = [w.translate(table) for w in tokens]
    # remove remaining tokens that are not alphabetic
    tokens = [word for word in tokens if word.isalpha()]
    # make lower case
    tokens = [word.lower() for word in tokens]
    return tokens
max_features = 10000
sequence_length = 250

text_vectorization = TextVectorization(
    standardize= clean_doc,
    max_tokens=20000,
    # Encode the output tokens 
    output_mode="int"

)
text_vectorization.adapt(text)
IndexError                                Traceback (most recent call last)
<ipython-input-41-8eabca3cac65> in <module>
----> 1 text_vectorization.adapt(text)

6 frames
/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/tensor_shape.py in __getitem__(self, key)
    907       else:
    908         if self._v2_behavior:
--> 909           return self._dims[key]
    910         else:
    911           return self.dims[key]

IndexError: tuple index out of range
Maik
  • 11
  • 2
  • Please clarify your specific problem or provide additional details to highlight exactly what you need. As it's currently written, it's hard to tell exactly what you're asking. – Community Jan 31 '23 at 05:23

0 Answers0