-1

this is my program of LSA, in this fonction i want to tokenize all my text and then transform it to stem. i'm trying to integrate them program of stemming and then i get this: for word in titles.split(" "): AttributeError: 'list' object has no attribute 'split'

this code lsa:

# -*- coding: utf-8 -*-

from numpy import zeros
from scipy.linalg import svd
from math import log
from numpy import asarray, sum
#from nltk.corpus import stopwords
from sklearn.metrics.pairwise import cosine_similarity
#from nltk.stem import PorterStemmer
#from nltk.stem.isri import ISRIStemmer
import nltk
#from matplotlib import pyplot as plt
from snowballstemmer import stemmer 


titles = [" ذهبت الاخت الى المدرسة","تقع المدرسة في الجبال",
    "ذهب الام لزيارة ابنتها في المدرسة ","تحضر الام الكعكة" ]

ar_stemmer = stemmer("arabic")

stopwords = ['ثم','و','حتى','الى','على','في']

ignorechars = ''',:'!'''



class LSA(object):
def __init__(self, stopwords, ignorechars):
    self.stopwords = stopwords
    self.ignorechars = ignorechars
    self.wdict = {}
    self.dcount = 0    


def parse(self, doc):

    for word in titles.split(" "):
             stem = ar_stemmer.stemWord(word)

    if stem in self.stopwords:
       pass
    elif stem in self.wdict:
            self.wdict[stem].append(self.dcount)
    else:
            self.wdict[stem] = [self.dcount]
            self.dcount += 1

and this is what i want integrate:

from snowballstemmer import stemmer
ar_stemmer = stemmer("arabic")
sentence = u" ذهبت الاخت الى المدرسة, تقع المدرسة في الجبال"

for word in sentence.split(" "):
stem = ar_stemmer.stemWord(word)
print stem
YayaYaya
  • 125
  • 2
  • 3
  • 10

2 Answers2

2

titles is already a list; do this instead:

for sentence in titles:
    for word in sentence.split(" "):
        ...
muratgu
  • 7,241
  • 3
  • 24
  • 26
2

List objects don't have a split method like strings do. If you want to split every string that you have in a titles list, you could nest a loop and do something like this:

def parse(self, doc):

    for title in titles:
        for word in title.split():
            stem = ar_stemmer.stemWord(word)

            if stem in self.stopwords:
                pass
    ...
illright
  • 3,991
  • 2
  • 29
  • 54
  • I get this: for titles in titles: UnboundLocalError: local variable 'titles' referenced before assignment – YayaYaya Jun 02 '16 at 17:19
  • @YayaYaya it's because your variable `titles` is declared in the main scope, so there is a different variable `titles` inside the `LSA` class, which is not defined. To solve this, copy the variable 'titles' and what you've assigned to it inside the `LSA` class – illright Jun 02 '16 at 17:40