#import threading from sets import Set as set def baseline(document): dictionar={} c=document.split(".") sentence=[] for i in c: sentence.append(i+".") #dictionar={} counter=len(sentence) for i in sentence: dictionar[i]=counter counter=counter-1 return dictionar def removeStopWords(document): stop=["for","the","a","an","this","of","at","that","to","and","in","An","For","That","But","From","from", "his","after","After","Before","before","our","Our","who","Who","The","We","we","him","Him","s","And","Many","many","he","she","He","She","AT",] words=[] import re rex=re.compile("\w+") c=rex.findall(document) for i in c: if not i in stop: words.append(i) return words def sortDict(dicta): stackvalue=[] stackkey=[] for i,v in dicta.iteritems(): stackvalue.append(v) stackkey.append(i) while not max(stackvalue)==stackvalue[0]: for i in range(len(stackvalue)-1): if stackvalue[i]0: dictionar[i]=1 return dictionar def decisionTree(base,term,length,noun,week,number,quotes,document_words_length): dictionar={} for i,v in base.iteritems(): dictionar[i]=float(v)/100 for i,v in length.iteritems(): dictionar[i]=dictionar[i]+float(v)/100 for i,v in noun.iteritems(): dictionar[i]=dictionar[i]+v for i,v in week.iteritems(): dictionar[i]=dictionar[i]+v for i,v in number.iteritems(): dictionar[i]=dictionar[i]+v for i,v in quotes.iteritems(): dictionar[i]=dictionar[i]+v for i,v in term.iteritems(): suma=0 suma=suma+v[1] t=float(suma)/document_words_length dictionar[i]=dictionar[i]+t return dictionar def summarize(document): baser={} termF={} sentLength={} nouns={} weeks={} numbers={} quote={} rex=re.compile("\w+") document_words_length=len(rex.findall(document)) baser=baseline(document) termF=termFrequency(document) quote=quotes(document) sentLength=sentenceLength(document) nouns=NNP(document) numbers=Numbers(document) weeks=weekDay(document) rezultat=decisionTree(baser,termF,sentLength,nouns,weeks,numbers,quote,document_words_length) return rezultat #if averageLexicalConectivity==True: #print numbers #print nouns #print sentLength fis=open("articol.txt","r") a=fis.read() fis.close() rez=summarize(a) c=sortDict(rez) t=a.split(".") sentence=[] for i in t: if len(i)>4: sentence.append(i+".") dictionar={} fis=open("summarization.xml","w") fis.write("\n") fis.write("\t\n\t\t\n\t\t\t") for i in c[:int((float(1)/8)*len(sentence))]: fis.write(i[0]+"\n") fis.write("\t\t\n") fis.close()