Corpus type - Categories, Tagged, UDHR
import pandas as pdfrom nltk.corpus import brown#CategorizedTaggedCorpusReaderprint(brown) brown.fileids()[:5]brown.words()brown.tagged_words()brown.categories()[('The', 'AT'), ('Fulton', 'NP-TL'), ...]['adventure', 'belles_lettres', 'editorial', 'fiction', 'government', 'hobbies', 'humor', 'learned', 'lore', 'mystery', 'news', 'religion', 'reviews', 'romance', 'science_fiction'] brown.words(cat..
2018. 12. 11.
말뭉치Corpus 다루기, Pandas 기초
import nltkfrom nltk.corpus import gutenberg#Plaintextgutenbergprint(gutenberg.fileids()[:10])['austen-emma.txt', 'austen-persuasion.txt', 'austen-sense.txt', 'bible-kjv.txt', 'blake-poems.txt', 'bryant-stories.txt', 'burgess-busterbrown.txt', 'carroll-alice.txt', 'chesterton-ball.txt', 'chesterton-brown.txt'] fid_1 = gutenberg.fileids()[0] raw_text = gutenberg.raw(fid_1) print(type(raw_text)) p..
2018. 12. 11.