Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Shape related code updated to address TODO(b/208879020) #270

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 7 additions & 12 deletions examples/census_example_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,14 @@
]


RAW_DATA_FEATURE_SPEC = dict([(name, tf.io.FixedLenFeature([], tf.string))
RAW_DATA_FEATURE_SPEC = dict([(name, tf.io.FixedLenFeature(shape=(1,), dtype=tf.string))
for name in CATEGORICAL_FEATURE_KEYS] +
[(name, tf.io.FixedLenFeature([], tf.float32))
[(name, tf.io.FixedLenFeature(shape=(1,), dtype=tf.float32))
for name in NUMERIC_FEATURE_KEYS] +
[(name, tf.io.VarLenFeature(tf.float32))
[(name, tf.io.FixedLenFeature(shape=(1,), dtype=tf.float32))
for name in OPTIONAL_NUMERIC_FEATURE_KEYS] +
[(LABEL_KEY,
tf.io.FixedLenFeature([], tf.string))])
tf.io.FixedLenFeature(shape=(1,), dtype=tf.string))])

_SCHEMA = dataset_metadata.DatasetMetadata(
schema_utils.schema_from_feature_spec(RAW_DATA_FEATURE_SPEC)).schema
Expand Down Expand Up @@ -125,14 +125,9 @@ def preprocessing_fn(inputs):
outputs[key] = tft.scale_to_0_1(inputs[key])

for key in OPTIONAL_NUMERIC_FEATURE_KEYS:
# This is a SparseTensor because it is optional. Here we fill in a default
# value when it is missing.
sparse = tf.sparse.SparseTensor(inputs[key].indices, inputs[key].values,
[inputs[key].dense_shape[0], 1])
dense = tf.sparse.to_dense(sp_input=sparse, default_value=0.)
# Reshaping from a batch of vectors of size 1 to a batch to scalars.
dense = tf.squeeze(dense, axis=1)
outputs[key] = tft.scale_to_0_1(dense)
# This is being treated as a dense tensor that might be missing
# values. Might call for some sort of imputation.
outputs[key] = tft.scale_to_0_1(inputs[key])

# For all categorical columns except the label column, we generate a
# vocabulary, and convert the string feature to a one-hot encoding.
Expand Down
15 changes: 10 additions & 5 deletions examples/census_example_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,12 @@ def transform_dataset(data):
raw_features[key] = tf.RaggedTensor.from_tensor(
tf.expand_dims(val, -1)).to_sparse()
continue
raw_features[key] = val
# We receive the raw data as scalars of length batch, but
# we need them to be tensors of shape (1,) with
# batch number of them. This can be thought of as adding a batch
# dimension, but more simply, this is us saying we want to treat
# each observation as a tensor of shape (1, ), which is a vector.
raw_features[key] = tf.expand_dims(val, -1)
transformed_features = tft_layer(raw_features)
data_labels = transformed_features.pop(common.LABEL_KEY)
return (transformed_features, data_labels)
Expand Down Expand Up @@ -128,7 +133,7 @@ def serve_tf_examples_fn(serialized_tf_examples):
return {'classes': classes, 'scores': outputs}

concrete_serving_fn = serve_tf_examples_fn.get_concrete_function(
tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs'))
tf.TensorSpec(shape=(1,), dtype=tf.string, name='inputs'))
signatures = {'serving_default': concrete_serving_fn}

# This is required in order to make this model servable with model_server.
Expand Down Expand Up @@ -191,12 +196,12 @@ def train_and_evaluate(raw_train_eval_data_path_pattern,
for key, spec in feature_spec.items():
if isinstance(spec, tf.io.VarLenFeature):
inputs[key] = tf.keras.layers.Input(
shape=[None], name=key, dtype=spec.dtype, sparse=True)
shape=(1,), name=key, dtype=spec.dtype, sparse=True)
elif isinstance(spec, tf.io.FixedLenFeature):
# TODO(b/208879020): Move into schema such that spec.shape is [1] and not
# [] for scalars.
inputs[key] = tf.keras.layers.Input(
shape=spec.shape or [1], name=key, dtype=spec.dtype)
shape=spec.shape, name=key, dtype=spec.dtype)
else:
raise ValueError('Spec type is not supported: ', key, spec)

Expand Down Expand Up @@ -237,11 +242,11 @@ def main(input_data_dir,
train_data_file = os.path.join(input_data_dir, 'adult.data')
test_data_file = os.path.join(input_data_dir, 'adult.test')

common.transform_data(train_data_file, test_data_file, working_dir)

if read_raw_data_for_training:
raw_train_and_eval_patterns = (train_data_file, test_data_file)
transformed_train_and_eval_patterns = None
common.transform_data(train_data_file, test_data_file, working_dir)
else:
train_pattern = os.path.join(working_dir,
common.TRANSFORMED_TRAIN_DATA_FILEBASE + '*')
Expand Down