Skip to content

Commit 2412204

Browse files
icywang86ruiDan Choi
authored and
Dan Choi
committed
documentation: update doc strings for feature_store (aws#500) (aws#506)
1 parent 0dbf11c commit 2412204

File tree

3 files changed

+87
-34
lines changed

3 files changed

+87
-34
lines changed

src/sagemaker/feature_store/feature_definition.py

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,14 @@
1010
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
1111
# ANY KIND, either express or implied. See the License for the specific
1212
# language governing permissions and limitations under the License.
13-
"""The Feature Definitions for FeatureStore."""
13+
"""The Feature Definitions for FeatureStore.
14+
15+
A feature is a measurable property or characteristic that encapsulates an observed phenomenon.
16+
In the Amazon SageMaker Feature Store API, a feature is an attribute of a record.
17+
You can define a name and type for every feature stored in Feature Store. Name uniquely
18+
identifies a feature within a feature group. Type identifies
19+
the datatype for the values of the Feature.
20+
"""
1421
from __future__ import absolute_import
1522

1623
from enum import Enum
@@ -22,7 +29,10 @@
2229

2330

2431
class FeatureTypeEnum(Enum):
25-
"""Enum of feature types."""
32+
"""Enum of feature types.
33+
34+
The data type of a feature can be Fractional, Integral or String.
35+
"""
2636

2737
FRACTIONAL = "Fractional"
2838
INTEGRAL = "Integral"
@@ -33,6 +43,8 @@ class FeatureTypeEnum(Enum):
3343
class FeatureDefinition(Config):
3444
"""Feature definition.
3545
46+
This instantiates a Feature Definition object where FeatureDefinition is a subclass of Config.
47+
3648
Attributes:
3749
feature_name (str): The name of the feature
3850
feature_type (FeatureTypeEnum): The type of the feature
@@ -42,7 +54,7 @@ class FeatureDefinition(Config):
4254
feature_type: FeatureTypeEnum = attr.ib()
4355

4456
def to_dict(self) -> Dict[str, Any]:
45-
"""Constructs a dictionary based on the attributes"""
57+
"""Construct a dictionary based on each attribute."""
4658
return Config.construct_dict(
4759
FeatureName=self.feature_name, FeatureType=self.feature_type.value
4860
)
@@ -51,13 +63,16 @@ def to_dict(self) -> Dict[str, Any]:
5163
class FractionalFeatureDefinition(FeatureDefinition):
5264
"""Fractional feature definition.
5365
66+
This class instantiates a FractionalFeatureDefinition object, a subclass of FeatureDefinition
67+
where the data type of the feature being defined is a Fractional.
68+
5469
Attributes:
5570
feature_name (str): The name of the feature
5671
feature_type (FeatureTypeEnum): A `FeatureTypeEnum.FRACTIONAL` type
5772
"""
5873

5974
def __init__(self, feature_name: str):
60-
"""Constructs an instance of FractionalFeatureDefinition.
75+
"""Construct an instance of FractionalFeatureDefinition.
6176
6277
Args:
6378
feature_name (str): the name of the feature.
@@ -68,13 +83,16 @@ def __init__(self, feature_name: str):
6883
class IntegralFeatureDefinition(FeatureDefinition):
6984
"""Fractional feature definition.
7085
86+
This class instantiates a IntegralFeatureDefinition object, a subclass of FeatureDefinition
87+
where the data type of the feature being defined is a Integral.
88+
7189
Attributes:
7290
feature_name (str): the name of the feature.
7391
feature_type (FeatureTypeEnum): a `FeatureTypeEnum.INTEGRAL` type.
7492
"""
7593

7694
def __init__(self, feature_name: str):
77-
"""Constructs an instance of IntegralFeatureDefinition.
95+
"""Construct an instance of IntegralFeatureDefinition.
7896
7997
Args:
8098
feature_name (str): the name of the feature.
@@ -85,13 +103,16 @@ def __init__(self, feature_name: str):
85103
class StringFeatureDefinition(FeatureDefinition):
86104
"""Fractional feature definition.
87105
106+
This class instantiates a StringFeatureDefinition object, a subclass of FeatureDefinition
107+
where the data type of the feature being defined is a String.
108+
88109
Attributes:
89110
feature_name (str): the name of the feature.
90111
feature_type (FeatureTypeEnum): a `FeatureTypeEnum.STRING` type.
91112
"""
92113

93114
def __init__(self, feature_name: str):
94-
"""Constructs an instance of StringFeatureDefinition.
115+
"""Construct an instance of StringFeatureDefinition.
95116
96117
Args:
97118
feature_name (str): the name of the feature.

src/sagemaker/feature_store/feature_group.py

Lines changed: 36 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,15 @@
1010
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
1111
# ANY KIND, either express or implied. See the License for the specific
1212
# language governing permissions and limitations under the License.
13-
"""The FeatureGroup entity for FeatureStore."""
13+
"""The FeatureGroup entity for FeatureStore.
14+
15+
A feature group is a logical grouping of features, defined in the Feature Store,
16+
to describe records. A feature group definition is composed of a list of feature definitions,
17+
a record identifier name, and configurations for its online and offline store.
18+
Create feature group, describe feature group, update feature groups, delete feature group and
19+
list feature groups APIs can be used to manage feature groups.
20+
"""
21+
1422
from __future__ import absolute_import
1523

1624
import logging
@@ -45,7 +53,10 @@
4553

4654
@attr.s
4755
class AthenaQuery:
48-
"""Class to manager querying of feature store data with AWS Athena
56+
"""Class to manage querying of feature store data with AWS Athena.
57+
58+
This class instantiates a AthenaQuery object that is used to retrieve data from feature store
59+
via standard SQL queries.
4960
5061
Attributes:
5162
catalog (str): name of the data catalog.
@@ -63,12 +74,15 @@ class AthenaQuery:
6374
_result_file_prefix: str = attr.ib(init=False, default=None)
6475

6576
def run(self, query_string: str, output_location: str, kms_key: str = None) -> str:
66-
"""Run athena query with the given query_string
77+
"""Execute a SQL query given a query string, output location and kms key.
78+
79+
This method executes the SQL query using Athena and outputs the results to output_location
80+
and returns the execution id of the query.
6781
6882
Args:
6983
query_string: SQL query string.
70-
output_location: s3 uri of the query result.
71-
kms_key: KMS key id, if set will be used to encrypt the query result file.
84+
output_location: S3 URI of the query result.
85+
kms_key: KMS key id. If set, will be used to encrypt the query result file.
7286
7387
Returns:
7488
Execution id of the query.
@@ -103,7 +117,7 @@ def get_query_execution(self) -> Dict[str, Any]:
103117
)
104118

105119
def as_dataframe(self) -> DataFrame:
106-
"""Download the result of the current query and load it into a DataFrame
120+
"""Download the result of the current query and load it into a DataFrame.
107121
108122
Returns:
109123
A pandas DataFrame contains the query result.
@@ -132,10 +146,12 @@ def as_dataframe(self) -> DataFrame:
132146
class IngestionManagerPandas:
133147
"""Class to manage the multi-threaded data ingestion process.
134148
149+
This class will manage the data ingestion process which is multi-threaded.
150+
135151
Attributes:
136152
feature_group_name (str): name of the Feature Group.
137153
sagemaker_session (Session): instance of the Session class to perform boto calls.
138-
data_frame (DataFrame): pandas data_frame to be ingested to the given feature group.
154+
data_frame (DataFrame): pandas DataFrame to be ingested to the given feature group.
139155
max_works (int): number of threads to create.
140156
"""
141157

@@ -201,9 +217,8 @@ def run(self, wait=True, timeout=None):
201217
Args:
202218
wait (bool): whether to wait for the ingestion to finish or not.
203219
timeout (Union[int, float]): ``concurrent.futures.TimeoutError`` will be raised
204-
if timeout is reached.
220+
if timeout is reached.
205221
"""
206-
207222
executor = ThreadPoolExecutor(max_workers=self.max_workers)
208223
batch_size = math.ceil(self.data_frame.shape[0] / self.max_workers)
209224

@@ -230,7 +245,10 @@ def run(self, wait=True, timeout=None):
230245

231246
@attr.s
232247
class FeatureGroup:
233-
"""FeatureGroup for FeatureStore
248+
"""FeatureGroup definition.
249+
250+
This class instantiates a FeatureGroup object that comprises of a name for the FeatureGroup,
251+
session instance, and a list of feature definition objects i.e., FeatureDefinition.
234252
235253
Attributes:
236254
name (str): name of the FeatureGroup instance.
@@ -282,7 +300,7 @@ def create(
282300
description: str = None,
283301
tags: List[Dict[str, str]] = None,
284302
) -> Dict[str, Any]:
285-
"""Creates a SageMaker FeatureStore FeatureGroup
303+
"""Create a SageMaker FeatureStore FeatureGroup.
286304
287305
Args:
288306
s3_uri (str): S3 URI of the offline store.
@@ -300,7 +318,6 @@ def create(
300318
Returns:
301319
Response dict from service.
302320
"""
303-
304321
create_feature_store_args = dict(
305322
feature_group_name=self.name,
306323
record_identifier_name=record_identifier_name,
@@ -336,7 +353,7 @@ def create(
336353
return self.sagemaker_session.create_feature_group(**create_feature_store_args)
337354

338355
def delete(self):
339-
"""Deletes a FeatureGroup"""
356+
"""Delete a FeatureGroup."""
340357
self.sagemaker_session.delete_feature_group(feature_group_name=self.name)
341358

342359
def describe(self, next_token: str = None) -> Dict[str, Any]:
@@ -354,7 +371,7 @@ def load_feature_definitions(
354371
self,
355372
data_frame: DataFrame,
356373
) -> Sequence[FeatureDefinition]:
357-
"""Loads feature definitions from a Pandas DataFrame
374+
"""Load feature definitions from a Pandas DataFrame.
358375
359376
Column name is used as feature name. Feature type is inferred from the dtype
360377
of the column. Dtype int_, int8, int16, int32, int64, uint8, uint16, uint32
@@ -389,7 +406,7 @@ def load_feature_definitions(
389406
return self.feature_definitions
390407

391408
def put_record(self, record: Sequence[FeatureValue]):
392-
"""Puts a single record in the FeatureGroup
409+
"""Put a single record in the FeatureGroup.
393410
394411
Args:
395412
record (Sequence[FeatureValue]): a list contains feature values.
@@ -430,7 +447,7 @@ def ingest(
430447
return manager
431448

432449
def athena_query(self) -> AthenaQuery:
433-
"""Creates an AthenaQuery instance
450+
"""Create an AthenaQuery instance.
434451
435452
Returns:
436453
An instance of AthenaQuery initialized with data catalog configurations.
@@ -449,10 +466,11 @@ def athena_query(self) -> AthenaQuery:
449466
raise RuntimeError("No metastore is configured with this feature group.")
450467

451468
def as_hive_ddl(self, database: str = "sagemaker_featurestore", table_name: str = None) -> str:
452-
"""Generate DDL can be used to create Hive table
469+
"""Generate Hive DDL commands that can be used to define or change structure of tables or
470+
databases in Hive.
453471
454472
Schema of the table is generated based on the feature definitions. Columns are named
455-
after feature name and data-type are infered based on feature type. Integral feature
473+
after feature name and data-type are inferred based on feature type. Integral feature
456474
type is mapped to INT data-type. Fractional feature type is mapped to FLOAT data-type.
457475
String feature type is mapped to STRING data-type.
458476
@@ -464,7 +482,6 @@ def as_hive_ddl(self, database: str = "sagemaker_featurestore", table_name: str
464482
Returns:
465483
Generated create table DDL string.
466484
"""
467-
468485
if not table_name:
469486
table_name = self.name
470487

src/sagemaker/feature_store/inputs.py

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,22 @@
1010
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
1111
# ANY KIND, either express or implied. See the License for the specific
1212
# language governing permissions and limitations under the License.
13-
"""The input configs for FeatureStore."""
13+
"""The input configs for FeatureStore.
14+
15+
A feature store serves as the single source of truth to store, retrieve,
16+
remove, track, share, discover, and control access to features.
17+
18+
You can configure two types of feature stores, an online features store
19+
and an offline feature store.
20+
21+
The online features store is a low latency, high availability cache for a
22+
feature group that enables real-time lookup of records. Only the latest record is stored.
23+
24+
The offline feature store use when low (sub-second) latency reads are not needed.
25+
This is the case when you want to store and serve features for exploration, model training,
26+
and batch inference. The offline store uses your Amazon Simple Storage Service (Amazon S3)
27+
bucket for storage. A prefixing scheme based on event time is used to store your data in Amazon S3.
28+
"""
1429
from __future__ import absolute_import
1530

1631
import abc
@@ -27,7 +42,7 @@ class Config(abc.ABC):
2742

2843
@abc.abstractmethod
2944
def to_dict(self) -> Dict[str, Any]:
30-
"""Gets the dictionary from attributes.
45+
"""Get the dictionary from attributes.
3146
3247
Returns:
3348
dict contains the attributes.
@@ -64,7 +79,7 @@ class OnlineStoreSecurityConfig(Config):
6479
kms_key_id: str = attr.ib(factory=str)
6580

6681
def to_dict(self) -> Dict[str, Any]:
67-
"""Constructs a dictionary based on the attributes"""
82+
"""Construct a dictionary based on the attributes."""
6883
return Config.construct_dict(KmsKeyId=self.kms_key_id)
6984

7085

@@ -81,7 +96,7 @@ class OnlineStoreConfig(Config):
8196
online_store_security_config: OnlineStoreSecurityConfig = attr.ib(default=None)
8297

8398
def to_dict(self) -> Dict[str, Any]:
84-
"""Constructs a dictionary based on the attributes.
99+
"""Construct a dictionary based on the attributes.
85100
86101
Returns:
87102
dict represents the attributes.
@@ -105,7 +120,7 @@ class S3StorageConfig(Config):
105120
kms_key_id: str = attr.ib(default=None)
106121

107122
def to_dict(self) -> Dict[str, Any]:
108-
"""Constructs a dictionary based on the attributes.
123+
"""Construct a dictionary based on the attributes provided.
109124
110125
Returns:
111126
dict represents the attributes.
@@ -131,7 +146,7 @@ class DataCatalogConfig(Config):
131146
database: str = attr.ib(factory=str)
132147

133148
def to_dict(self) -> Dict[str, Any]:
134-
"""Constructs a dictionary based on the attributes.
149+
"""Construct a dictionary based on the attributes provided.
135150
136151
Returns:
137152
dict represents the attributes.
@@ -158,7 +173,7 @@ class OfflineStoreConfig(Config):
158173
data_catalog_config: DataCatalogConfig = attr.ib(default=None)
159174

160175
def to_dict(self) -> Dict[str, Any]:
161-
"""Constructs a dictionary based on the attributes.
176+
"""Construct a dictionary based on the attributes.
162177
163178
Returns:
164179
dict represents the attributes.
@@ -172,7 +187,7 @@ def to_dict(self) -> Dict[str, Any]:
172187

173188
@attr.s
174189
class FeatureValue(Config):
175-
"""FeatureValue for FeatureStore
190+
"""FeatureValue for FeatureStore.
176191
177192
Attributes:
178193
feature_name (str): name of the Feature.
@@ -183,7 +198,7 @@ class FeatureValue(Config):
183198
value_as_string: str = attr.ib(default=None)
184199

185200
def to_dict(self) -> Dict[str, Any]:
186-
"""Constructs a dictionary based on the attributes.
201+
"""Construct a dictionary based on the attributes provided.
187202
188203
Returns:
189204
dict represents the attributes.

0 commit comments

Comments
 (0)