replace tensorflow io API with python standard API or API from popular third party library

4 years ago · ae101dec11
--- a/model_zoo/official/recommend/ncf/src/movielens.py
+++ b/model_zoo/official/recommend/ncf/src/movielens.py
@@ -22,6 +22,7 @@ from __future__ import division
 from __future__ import print_function

 import os
 import shutil
 import tempfile
 import zipfile
 import argparse
@@ -32,7 +33,6 @@ from six.moves import urllib
 import numpy as np
 import pandas as pd
 from absl import logging
 import tensorflow as tf

 ML_1M = "ml-1m"
 ML_20M = "ml-20m"
@@ -100,9 +100,9 @@ def _download_and_clean(dataset, data_dir):

    expected_files = ["{}.zip".format(dataset), RATINGS_FILE, MOVIES_FILE]

    tf.io.gfile.makedirs(data_subdir)
    os.makedirs(data_subdir, exist_ok=True)
    if set(expected_files).intersection(
            tf.io.gfile.listdir(data_subdir)) == set(expected_files):
            os.listdir(data_subdir)) == set(expected_files):
        logging.info("Dataset {} has already been downloaded".format(dataset))
        return

@@ -127,16 +127,16 @@ def _download_and_clean(dataset, data_dir):
        else:
            _regularize_20m_dataset(temp_dir)

        for fname in tf.io.gfile.listdir(temp_dir):
            if not tf.io.gfile.exists(os.path.join(data_subdir, fname)):
                tf.io.gfile.copy(os.path.join(temp_dir, fname),
                                 os.path.join(data_subdir, fname))
        for fname in os.listdir(temp_dir):
            if not os.path.exists(os.path.join(data_subdir, fname)):
                shutil.copy(os.path.join(temp_dir, fname),
                            os.path.join(data_subdir, fname))
            else:
                logging.info("Skipping copy of {}, as it already exists in the "
                             "destination folder.".format(fname))

    finally:
        tf.io.gfile.rmtree(temp_dir)
        shutil.rmtree(temp_dir)


 def _transform_csv(input_path, output_path, names, skip_first, separator=","):
@@ -152,8 +152,8 @@ def _transform_csv(input_path, output_path, names, skip_first, separator=","):
    if six.PY2:
        names = [six.ensure_text(n, "utf-8") for n in names]

    with tf.io.gfile.GFile(output_path, "wb") as f_out, \
            tf.io.gfile.GFile(input_path, "rb") as f_in:
    with open(output_path, "wb") as f_out, \
            open(input_path, "rb") as f_in:

        # Write column names to the csv.
        f_out.write(",".join(names).encode("utf-8"))
@@ -199,7 +199,7 @@ def _regularize_1m_dataset(temp_dir):
        output_path=os.path.join(temp_dir, MOVIES_FILE),
        names=MOVIE_COLUMNS, skip_first=False, separator="::")

    tf.io.gfile.rmtree(working_dir)
    shutil.rmtree(working_dir)


 def _regularize_20m_dataset(temp_dir):
@@ -233,7 +233,7 @@ def _regularize_20m_dataset(temp_dir):
        output_path=os.path.join(temp_dir, MOVIES_FILE),
        names=MOVIE_COLUMNS, skip_first=True, separator=",")

    tf.io.gfile.rmtree(working_dir)
    shutil.rmtree(working_dir)


 def download(dataset, data_dir):
@@ -244,14 +244,14 @@ def download(dataset, data_dir):


 def ratings_csv_to_dataframe(data_dir, dataset):
    with tf.io.gfile.GFile(os.path.join(data_dir, dataset, RATINGS_FILE)) as f:
    with open(os.path.join(data_dir, dataset, RATINGS_FILE)) as f:
        return pd.read_csv(f, encoding="utf-8")


 def csv_to_joint_dataframe(data_dir, dataset):
    ratings = ratings_csv_to_dataframe(data_dir, dataset)

    with tf.io.gfile.GFile(os.path.join(data_dir, dataset, MOVIES_FILE)) as f:
    with open(os.path.join(data_dir, dataset, MOVIES_FILE)) as f:
        movies = pd.read_csv(f, encoding="utf-8")

    df = ratings.merge(movies, on=ITEM_COLUMN)