@@ -50,7 +50,6 @@ def term_frequency(term : str, document : str) -> int:
50
50
found within the document
51
51
52
52
@examples:
53
- >>> document = "To be, or not to be"
54
53
>>> term_frequency("to", "To be, or not to be")
55
54
2
56
55
"""
@@ -74,16 +73,10 @@ def document_frequency(term: str, corpus: str) -> int:
74
73
@returns : the number of documents in the corpus that contain the term you are
75
74
searching for and the number of documents in the corpus
76
75
@examples :
77
- >>> corpus = \
78
- "This is the first document in the corpus.\n ThIs is \
79
- the second document in the corpus. \n THIS is \
80
- the third document in the corpus."
81
- >>> term = "first"
82
- 1
83
- >>> term = "document"
84
- 3
85
- >>> term = "this"
86
- 3
76
+ >>> document_frequency("first", "This is the first document in the corpus.\\ nThIs is\
77
+ the second document in the corpus.\\ nTHIS is \
78
+ the third document in the corpus.")
79
+ (1, 3)
87
80
"""
88
81
corpus_without_punctuation = corpus .translate (
89
82
str .maketrans ("" , "" , string .punctuation )
@@ -107,9 +100,8 @@ def inverse_document_frequency(df : int, N: int) -> float:
107
100
the number of documents in the corpus.
108
101
@returns : log10(N/df)
109
102
@examples :
110
- >>> df = 1
111
- >>> N = 3
112
- .477
103
+ >>> inverse_document_frequency(1, 3)
104
+ 0.477
113
105
"""
114
106
try :
115
107
idf = round (log10 (N / df ), 3 )
@@ -128,5 +120,8 @@ def tf_idf(tf : int, idf: int) -> float:
128
120
frequency : tf-idf = TF * IDF
129
121
@params : tf, the term frequency, and idf, the inverse document
130
122
frequency
123
+ @examples :
124
+ >>> tf_idf(2, 0.477)
125
+ 0.954
131
126
"""
132
127
return round (tf * idf , 3 )
0 commit comments