zzy34407230
/
mindspore2022

 
			
							# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""The analyzer for MindData profiling files."""
import copy
import csv
import json
import os
import stat

from mindspore.profiler.common.exceptions.exceptions import \
    ProfilerPathErrorException, ProfilerFileNotFoundException, \
    ProfilerDirNotFoundException, ProfilerRawFileException
from mindspore import log as logger
from mindspore.profiler.common.validator.validate_path import validate_and_normalize_path


class MinddataProfilingAnalyzer:
    """
    The analyzer for MindData profiling files.

    Args:
        source_dir (str): The source directory for MindData profiling input files.
        device_id (str): The device ID.
        output_path (str): The target directory for the analyzed summary. Default: `./`.

    Raises:
        ProfilerPathErrorException: If the source directory or the output path is invalid.
        ProfilerDirNotFoundException: If the source directory or the output path does not exist.
        ProfilerFileNotFoundException: If any of the MindData profiling input files do not exist.
    """

    def __init__(self, source_dir, device_id, output_path='./'):
        # Validate and save input parameters
        self._device_id = device_id
        self._source_dir = self._validate_directory(source_dir, 'Source directory')
        self._output_path = self._validate_directory(output_path, 'Output path')

        # Get MindData profiling input filenames
        self._pipeline_path_filename = self._get_pipeline_path_filename(source_dir)
        self._cpu_utilization_path_filename = self._get_cpu_utilization_path_filename(source_dir)
        self._device_trace_path_filename, self._device_queue_file_found = \
            self._get_device_trace_path_filename(source_dir)

        # Save output filename
        self._save_path = self._get_save_path(output_path)

    @property
    def save_path(self):
        """
        The property of save path.

        Returns:
            str, the save path.
        """
        return self._save_path

    def analyze(self):
        """
        Analyze the MindData profiling files, produce summary pipeline information, including potential
        bottleneck operator in the MindData pipeline, and save the result to disk.

        Returns:
            dict, Analyzed MindData pipeline summary information, which is also written to disk in
               JSON file 'minddata_pipeline_summary_<device_id>.json' and
               CSV file 'minddata_pipeline_summary_<device_id>.csv'.

        Raises:
            ProfilerRawFileException: If fails to find a MindData profiling file or a file is empty.
        """

        # Open the MindData pipeline file
        with open(self._pipeline_path_filename, 'r') as pipeline_file:
            try:
                pipeline_info = json.load(pipeline_file)
            except (json.JSONDecodeError, TypeError) as path_filename_error:
                logger.warning(path_filename_error)
                raise ProfilerRawFileException(
                    'Failed to find the MindData pipeline profiling file.') from path_filename_error
        if not pipeline_info:
            logger.warning('The MindData pipeline file <%s> is empty.', self._pipeline_path_filename)
            raise ProfilerRawFileException('The MindData pipeline file is empty.')

        # Open the CPU utilization file
        with open(self._cpu_utilization_path_filename, 'r') as cpu_util_file:
            try:
                cpu_util_info = json.load(cpu_util_file)
            except (json.JSONDecodeError, TypeError) as path_filename_error:
                logger.warning(path_filename_error)
                raise ProfilerRawFileException(
                    'Failed to find the MindData CPU utilization file.') from path_filename_error
        if not cpu_util_info:
            logger.warning('The MindData CPU utilization file <%s> is empty.', self._cpu_utilization_path_filename)
            raise ProfilerRawFileException('The MindData CPU utilization file is empty.')

        # Open the device queue or dataset iterator trace profiling file
        with open(self._device_trace_path_filename, 'r') as device_trace_file:
            try:
                device_trace_info = device_trace_file.readlines()
            except (TypeError) as path_filename_error:
                logger.warning(path_filename_error)
                raise ProfilerRawFileException(
                    'Failed to find the MindData trace profiling file.') from path_filename_error
        if not device_trace_info:
            logger.warning('The MindData trace profiling file <%s> is empty.', self._device_trace_path_filename)
            raise ProfilerRawFileException('The MindData trace profiling file is empty.')

        # Analyze the MindData profiling file information and save the result
        summary_dict = self._analyze_and_save(pipeline_info, cpu_util_info, device_trace_info)
        return summary_dict

    @staticmethod
    def _validate_directory(dir_name, dir_type):
        """
        Validate the input directory.

        Args:
             dir_name (str): The directory name.
             dir_type (str): The type of directory.  (Should begin with capital since is used for output messages.)
        """
        try:
            validated_dir = validate_and_normalize_path(dir_name)
        except RuntimeError as path_error:
            logger.warning('<%s> <%s> is invalid.', dir_type, validated_dir)
            raise ProfilerPathErrorException(dir_type + 'is invalid.') from path_error

        if not os.path.isdir(validated_dir):
            logger.warning('<%s> <%s> not found.', dir_type, validated_dir)
            raise ProfilerDirNotFoundException(validated_dir)
        return validated_dir

    def _get_pipeline_path_filename(self, source_dir):
        """
        Get the MindData pipeline full path filename.
        The filename is 'pipeline_profiling_<device_id>.json'.

        Args:
            source_dir (str): The source directory for MindData profiling files.

        Returns:
            str, the MindData pipeline full path filename.
        """

        pipeline_profiling_templatename = 'pipeline_profiling_{}.json'
        pipeline_path_filename = os.path.join(
            source_dir,
            pipeline_profiling_templatename.format(self._device_id))

        try:
            pipeline_path_filename = validate_and_normalize_path(pipeline_path_filename)
        except RuntimeError as path_filename_error:
            logger.warning('The MindData pipeline path %s is invalid.', pipeline_path_filename)
            raise ProfilerPathErrorException('The MindData pipeline path is invalid.') from path_filename_error

        if not os.path.isfile(pipeline_path_filename):
            logger.warning('The MindData pipeline file <%s> is not found.', pipeline_path_filename)
            raise ProfilerFileNotFoundException(pipeline_path_filename)

        return pipeline_path_filename

    def _get_cpu_utilization_path_filename(self, source_dir):
        """
        Get the MindData CPU utilization full path filename.
        The filename is 'minddata_cpu_utilization_<device_id>.json'.

        Args:
            source_dir (str): The source directory for MindData profiling files.

        Returns:
            str, the MindData CPU utilization full path filename.
        """
        cpu_utilization_templatename = 'minddata_cpu_utilization_{}.json'
        cpu_utilization_path_filename = os.path.join(
            source_dir,
            cpu_utilization_templatename.format(self._device_id))

        try:
            cpu_utilization_path_filename = validate_and_normalize_path(cpu_utilization_path_filename)
        except RuntimeError as path_filename_error:
            logger.warning('The MindData CPU utilization path <%s> is invalid.', cpu_utilization_path_filename)
            raise ProfilerPathErrorException('The MindData CPU utilization path is invalid.') from path_filename_error

        if not os.path.isfile(cpu_utilization_path_filename):
            logger.warning('The MindData CPU utilization file <%s> is not found.', cpu_utilization_path_filename)
            raise ProfilerFileNotFoundException(cpu_utilization_path_filename)

        return cpu_utilization_path_filename

    def _get_device_trace_path_filename(self, source_dir):
        """
        Get the MindData device trace profiling full path filename.
        File search order:
        1) 'device_queue_profiling_<device_id>.txt' and then
        2) 'dataset_iterator_profiling_<device_id>.txt'.

        Args:
            source_dir (str): The source directory for MindData profiling files.

        Returns:
            str, the MindData device trace profiling full path filename.
            bool, flag which indicates if 'device_queue_profiling_<device_id>.txt' has been found or not
        """
        # Initialize variable for MindData device trace profiling filename
        device_trace_path_filename = ''
        # Initialize flag that 'device_queue_profiling_<device_id>.txt' has not yet been found
        device_queue_file_found = False

        txt_names = [os.path.join(
            source_dir,
            txt_name.format(self._device_id)) for txt_name in
                     ('device_queue_profiling_{}.txt', 'dataset_iterator_profiling_{}.txt')]

        # Search for a device trace profiling file
        if os.path.exists(txt_names[0]):
            device_trace_path_filename = txt_names[0]
            device_queue_file_found = True
        elif os.path.exists(txt_names[1]):
            device_trace_path_filename = txt_names[1]
        else:
            logger.warning('A MindData device trace profiling file <%s> nor <%s> cannot be found.',
                           txt_names[0], txt_names[1])
            raise ProfilerPathErrorException('A MindData device trace profiling file cannot be found.')

        if not os.path.isfile(device_trace_path_filename):
            logger.warning('The MindData device trace profiling file <%s> is not found.', device_trace_path_filename)
            raise ProfilerFileNotFoundException(device_trace_path_filename)

        return device_trace_path_filename, device_queue_file_found

    def _get_save_path(self, output_path):
        """
        Get the full pathname for the output file to save MindData pipeline summary analyzed information.
        The output filename is 'minddata_pipeline_summary_<device_id>.json'.

        Args:
            output_path (str): The output directory.

        Returns:
            str, the save path.
        """
        try:
            output_dir = validate_and_normalize_path(output_path)
        except RuntimeError as path_error:
            logger.warning('Output path <%s> is invalid.', output_path)
            raise ProfilerPathErrorException('Output path is invalid.') from path_error

        if not os.path.isdir(output_dir):
            logger.warning('The output directory <%s> not found.', output_dir)
            raise ProfilerDirNotFoundException(output_dir)

        summary_templatename = 'minddata_pipeline_summary_{}.json'
        return os.path.join(output_dir, summary_templatename.format(self._device_id))

    @staticmethod
    def _parse_pipeline_metrics_info(metrics):
        """
        Parse and process the pipeline profiling metrics information for a given op.

        Args:
            metrics (dict): The pipeline profiling metrics information for a given op.

        Returns:
            List with the following analyzed metrics information:
                output queue size
                output queue length
                output queue average size,
                output queue utilization percentage
                output queue empty frequency percentage
        """
        # Note: Some ops like DeviceQueue and inline ops do not have metrics information
        queue_size = -1
        queue_length = -1
        queue_average_size = -1
        queue_utilization_pct = -1
        queue_empty_freq_pct = -1
        if metrics and metrics['output_queue']:
            queue_size = metrics['output_queue']['size']
            queue_length = metrics['output_queue']['length']
            queue_average_size = round(sum(queue_size) / len(queue_size), 2) if queue_size else -1
            queue_utilization_pct = round(100 * queue_average_size / queue_length, 2) if queue_length else -1
            # Compute percentage of time queue is empty
            empty_count = 0
            for q_size in queue_size:
                if q_size == 0:
                    empty_count += 1
            queue_empty_freq_pct = round(100 * empty_count / len(queue_size), 2) if queue_size else -1
        return [queue_size, queue_length, queue_average_size, queue_utilization_pct, queue_empty_freq_pct]

    def _parse_pipeline_info(self, pipeline_info):
        """
        Parse and process the pipeline profiling information.

        Args:
            pipeline_info (dict): The pipeline profiling information.

        Returns:
            Dictionary with analyzed summary output information
            For the following key-value pairs, each value is a list ordered by increasing op id
                pipeline_ops: operator name and operator id, a string, with example format Batch(id=0)
                op_names: operator name, a string
                op_ids: operator id, an integer
                num_workers: number of workers for the op, an integer
                queue_average_size: average queue size for the op, a float
                queue_utilization_pct: average percentage of time queue is used for op, a float from 0.00 to 1.00
                queue_empty_freq_pct: percentage of time queue is empty for op, a float from 0.00 to 1.00
                children_ids: children op ids of op; list if empty [] if op has no children
                parent_id: parent id of op

        Raises:
            ProfilerRawFileException: If the format of the input is wrong.
        """
        # Perform sanity checks for pipeline information
        pipeline_op_info = pipeline_info.get('op_info')
        for item in pipeline_op_info:
            if not item:
                raise ProfilerRawFileException('The contents of MindData pipeline JSON file is wrong.')

        # Parse and process pipeline information
        # Obtain the following for each op (and build a list), ordered by increasing op id
        # - op id (handy for user output)
        # - op name (needed for basic processing)
        # - op name with op id (handy for user output)
        # - num_workers
        # - various queue information
        # - children op ids
        # - parent op id
        dict_opid_pipeline_ops = {}
        dict_opid_opname = {}
        dict_opid_numworkers = {}
        dict_opid_queue_info = {}
        dict_opid_children_ids = {}
        dict_opid_parent_id = {}
        # Note: Will process the input pipeline ops in "reversed" order since typically they are ordered
        #       from largest op id (usually leaf/source op) to smallest op id (usually root).
        #       However, since there may be non-linear pipelines, the processed op info needs to be sorted
        #       before final output is produced and saved.
        for op_info in reversed(pipeline_info['op_info']):
            op_id = op_info.get('op_id')
            op_name = op_info.get('op_type')[0:-2]
            dict_opid_pipeline_ops[op_id] = op_name + '(id=' + str(op_id) + ')'
            dict_opid_opname[op_id] = op_name
            dict_opid_numworkers[op_id] = op_info.get('num_workers')

            # Obtain the output queue metrics information for the current op
            dict_opid_queue_info[op_id] = self._parse_pipeline_metrics_info(op_info.get('metrics'))

            # For current op, initialize parent_id=-1, in case after processing all children in pipeline,
            # it is determined that current op has no parent
            if dict_opid_parent_id.get(op_id) is None:
                dict_opid_parent_id[op_id] = -1

            children_ids = op_info.get('children')
            if children_ids:
                # Set children op ids for current op
                dict_opid_children_ids[op_id] = children_ids
                # For each child op, set parent op to be current op
                for child_op_id in children_ids:
                    dict_opid_parent_id[child_op_id] = op_id
            else:
                dict_opid_children_ids[op_id] = []

        # Build resultant dictionary
        return_dict = {}

        return_dict['pipeline_ops'] = [x[1] for x in sorted(dict_opid_pipeline_ops.items())]
        return_dict['op_names'] = [x[1] for x in sorted(dict_opid_opname.items())]
        return_dict['op_ids'] = sorted(dict_opid_opname.keys())
        return_dict['num_workers'] = [x[1] for x in sorted(dict_opid_numworkers.items())]

        queue_info_items = [x[1] for x in sorted(dict_opid_queue_info.items())]
        return_dict['queue_average_size'] = [y[2] for y in queue_info_items]
        return_dict['queue_utilization_pct'] = [y[3] for y in queue_info_items]
        return_dict['queue_empty_freq_pct'] = [y[4] for y in queue_info_items]

        return_dict['children_ids'] = [x[1] for x in sorted(dict_opid_children_ids.items())]
        return_dict['parent_id'] = [x[1] for x in sorted(dict_opid_parent_id.items())]

        return return_dict

    @staticmethod
    def _parse_cpu_util_info(cpu_util_info):
        """
        Parse and process the CPU profiling information.

        Args:
            cpu_util_info (dict): The CPU utilization profiling information.

        Returns:
            Dictionary with analyzed summary output information
            Dictionary consists of:
                avg_cpu_pct: Average CPU utilization percentage for each op, a list ordered by increasing op id

        Raises:
            ProfilerRawFileException: If the format of the input is wrong.
        """
        # Perform sanity checks for CPU utilization information
        cpu_processor_num = cpu_util_info.get('cpu_processor_num')
        cpu_op_info = cpu_util_info.get('op_info')
        if cpu_processor_num is None or not cpu_op_info:
            raise ProfilerRawFileException('The format of MindData CPU utilization JSON file is wrong.')

        for item in cpu_op_info:
            if not item:
                raise ProfilerRawFileException('The contents of MindData CPU utilization JSON file is wrong.')

        # Parse and process the following CPU utilization information:
        # - overage cpu utilization for each op
        dict_opid_cpuutil = {}
        for op in cpu_util_info["op_info"]:
            # Note: The CPU utilization data may have an extra entry with op_id=-1
            # Omit info for op_id=1
            if op["op_id"] != -1:
                op_sys, op_usr = op["metrics"]["sys_utilization"], op["metrics"]["user_utilization"]
                dict_opid_cpuutil[op["op_id"]] = [op_sys[i] + op_usr[i] for i in range(len(op_sys))]

        # Initialize oplist_avg_cpu_pct with -1 for each pipeline op, since
        # CPU utilization data may not have information for each pipeline op
        oplist_avg_cpu_pct = [-1] * len(dict_opid_cpuutil)
        total_cpu = 0
        for op_id, cpu in dict_opid_cpuutil.items():
            op_avg_cpu_pct = sum(cpu) / len(cpu) if cpu else 0
            oplist_avg_cpu_pct[op_id] = round(op_avg_cpu_pct, 2)
            total_cpu += op_avg_cpu_pct

        return_dict = {}
        return_dict['avg_cpu_pct'] = oplist_avg_cpu_pct
        return return_dict

    def _parse_device_trace_info(self, device_trace_info):
        """
        Parse and process the device trace profiling information.

        Args:
            device_trace_info: The device trace profiling information in text format, one line per record.

        Returns:
            Dictionary with analyzed summary output information
            Dictionary consists of:
                per_batch_time: Average per batch time for pipeline in milliseconds
                per_pipeline_time: Average per pipeline time in milliseconds
                per_push_queue_time: Average per queue push time in milliseconds
        """
        # Information on the format of the device tracing profiling information.
        # Format is: type extra-info batch-num value timestamp
        # 0) type: 0: time,  1: connector size
        # 1) extra-info: if type is 0 - 0: pipeline time, 1: push tdt time, 2: batch time
        #                if type is 1 - connector capacity
        # 2) batch-num: batch number
        # 3) value: if type is 0 - value is time(ms)
        #           if type is 1 - value is connector size
        # 4) timestamp
        # Examples:
        # 0 0 20 10 xxx - The 20th batch took 10ms to get data from pipeline.
        # 1 64 20 5 yyy - Connector size is 5 when get the 20th batch.Connector capacity is 64.

        prev_time = 0
        q_time = [[], [], []]  # pipeline time, push TDT time, batch time

        # Parse each record
        for line_data in device_trace_info:
            record = [int(d) for d in line_data.split(" ")][0:5]
            if record[2] < 2:  # skip 1st batch
                prev_time = record[4]
                continue

            if record[0] == 0:  # type 0: time record
                q_time[record[1]].append(record[3])
            elif record[0] == 1:  # type 1: connector size record
                # Check if dataset_iterator trace profiling file was found
                if not self._device_queue_file_found:
                    q_time[2].append(record[4] - prev_time)
                    prev_time = record[4]

        # Compute average queue times
        avg_pipeline_time = sum(q_time[0]) / len(q_time[0]) if q_time[0] else -1
        avg_push_queue_time = sum(q_time[1]) / len(q_time[1]) if q_time[1] else -1
        avg_batch_time = sum(q_time[2]) / len(q_time[2]) if q_time[2] else -1

        return_dict = {}
        return_dict['per_batch_time'] = [round(avg_batch_time, 3)]
        return_dict['per_pipeline_time'] = [round(avg_pipeline_time, 3)]
        return_dict['per_push_queue_time'] = [round(avg_push_queue_time, 3)]

        return return_dict

    def _compute_composite_info(self, summary_dict):
        """
        Compute composite analysis information from the current summary pipeline data.

        Args:
            summary_dict (dict): Input summary pipeline information.

        Returns:
            Dictionary with composite analysis output information
            Dictionary consists of:
                avg_cpu_pct_per_worker: Average CPU utilization percentage per worker
        """
        return_dict = {}

        # Build list: average CPU utilization percentage per worker - for each op
        avg_cpu_pct_per_worker = []
        for c, n in zip(summary_dict.get('avg_cpu_pct'), summary_dict.get('num_workers')):
            avg_cpu_pct_per_worker.append(round(c / n if (n != 0 and c >= 0) else -1, 2))
        return_dict['avg_cpu_pct_per_worker'] = avg_cpu_pct_per_worker

        return return_dict

    @staticmethod
    def _analyze_for_bottleneck_op(summary_dict):
        """
        Analyze the MindData summary information and identify any potential bottleneck operator
        in the MindData pipeline.

        Args:
            summary_dict (dict): Input summary pipeline information.

        Returns:
            Dictionary with the following information, if applicable:
            - CPU utilization analysis
            - queue utilization analysis
            - bottleneck warning: Information on the bottleneck op
                (This is returned only if a potential bottleneck is identified.)
            - bottleneck suggestion: Reason why the subject op is it is identified as
                a potential bottleneck, plus suggestion on how to resolve the bottleneck.
                (This is returned only if a potential bottleneck is identified.)
        """
        try:
            bottleneck_analyzer = BottleneckAnalyzer(summary_dict)
            return_dict = bottleneck_analyzer.analyze()
        except IndexError:
            return_dict = {}

        return return_dict

    def _save_as_csv_file(self, data_dict):
        """
        Save data dictionary information to CSV file.

        Args:
            data_dict (dict): Input data dictionary information.

        Returns:
            Data dictionary information is saved to CSV file named 'minddata_pipeline_summary_<device_id>.csv'.
        """

        summary_templatename = 'minddata_pipeline_summary_{}.csv'
        output_csv_path_filename = os.path.join(self._output_path, summary_templatename.format(self._device_id))

        # Open file for writing
        data_file = open(output_csv_path_filename, 'w')

        # Create CSV writer object
        csv_writer = csv.writer(data_file)

        # Write the dictionary information to CSV file
        # Create deepcopy of input data_dict so zip processing in this function does NOT change the data_dict
        temp_dict = copy.deepcopy(data_dict)
        for data_key, data_value in zip(temp_dict.keys(), temp_dict.values()):
            # Begin/prefix the data value with the data key
            data_value.insert(0, data_key)
            csv_writer.writerow(data_value)

        # Close file for writing
        data_file.close()

        # Update file permissions
        os.chmod(output_csv_path_filename, stat.S_IREAD | stat.S_IWRITE)

    def _analyze_and_save(self, pipeline_info, cpu_util_info, device_trace_info):
        """
        Analyze and save the MindData summary information to file.

        Args:
            pipeline_info (dict): The pipeline information read from the input JSON file.
            cpu_util_info (dict): The CPU utilization information read from the input JSON file.
            device_trace_info (text): The dataset iterator (CPU) or device queue (GPU, Ascend) trace profiling
                                     text file. Value is None if such file could not be identified.

        Returns:
            summary_dict (dict): Analyzed summary information.
            The summary dictionary information is doubly saved to a JSON file and a CSV file
            (so that these different formats are available to the users).
        """

        # Initialize summary output dictionary
        summary_dict = {}

        # Parse and process pipeline information
        summary_dict.update(self._parse_pipeline_info(pipeline_info))

        # Parse and process CPU utilization information
        summary_dict.update(self._parse_cpu_util_info(cpu_util_info))

        if device_trace_info is not None:
            # Parse and process device queue or dataset iterator trace profiling information
            summary_dict.update(self._parse_device_trace_info(device_trace_info))

        # Check if both pipeline data and CPU utilization data have the same number of ops
        num_pipeline_ops = len(summary_dict.get('pipeline_ops'))
        num_cpu_util_ops = len(summary_dict.get('avg_cpu_pct'))
        if num_pipeline_ops == num_cpu_util_ops:
            # Compute composite analysis information
            summary_dict.update(self._compute_composite_info(summary_dict))

            # Analyze pipeline info for potential bottleneck op
            bottleneck_dict = self._analyze_for_bottleneck_op(summary_dict)
            if bottleneck_dict:
                summary_dict.update(bottleneck_dict)

        else:
            # Produce a warning since the pipeline data and the CPU utilization data do not include information
            # for the same number of ops
            warning_msg = 'Number of ops for pipeline data: ' + str(num_pipeline_ops) + \
                          ' does not match number of ops for CPU utilization data: ' + str(num_cpu_util_ops)
            logger.warning(warning_msg)

        # Save summary output dictionary to JSON output file (format#1)
        with open(self._save_path, 'w') as save_file:
            json.dump(summary_dict, save_file)

        os.chmod(self._save_path, stat.S_IREAD | stat.S_IWRITE)

        # Save summary output to CSV file (format#2)
        self._save_as_csv_file(summary_dict)
        # Return summary output dictionary (format#3)
        return summary_dict


class BottleneckAnalyzer:
    """ analyzer for bottleneck """

    def __init__(self, summary_dict):
        """ constructor for BottleneckAnalyzer """
        self.pipeline_ops = summary_dict["pipeline_ops"]
        self.op_names = summary_dict["op_names"]
        self.op_ids = summary_dict["op_ids"]
        self.num_workers = summary_dict["num_workers"]
        self.queue_average_size = summary_dict["queue_average_size"]
        self.queue_utilization_pct = summary_dict["queue_utilization_pct"]
        self.queue_empty_freq_pct = summary_dict["queue_empty_freq_pct"]
        self.children_ids = summary_dict["children_ids"]
        self.parent_id = summary_dict["parent_id"]
        self.avg_cpu_pct = summary_dict["avg_cpu_pct"]
        self.avg_cpu_pct_per_worker = summary_dict["avg_cpu_pct_per_worker"]

        self.op_id_not_exist = -1
        self.queue_usage_not_exist = -1
        self.non_multithreaded_ops = set(["Barrier",
                                          "Concat",
                                          "EpochCtrl",
                                          "Rename",
                                          "Repeat",
                                          "Shuffle",
                                          "Skip",
                                          "Take",
                                          "Zip"])

        # These are the threshold values used in the pipeline bottleneck analyzer algorithm
        self._AVG_CPU_UTIL_PCT_PER_WORKER_MAXIMUM = 75.0
        self._AVG_CPU_UTIL_PCT_PER_WORKER_MINIMUM = 20.0
        self._LEAF_OUTPUT_QUEUE_EMPTY_FREQ_PCT_MAXIMUM = 50
        self._DEVICEQUEUE_INPUT_QUEUE_EMPTY_FREQ_PCT_MAXIMUM = 60
        self._IN_OUT_QUEUE_UTIL_PCT_DIFF_MAXIMUM = 50
        self._IN_QUEUE_UTIL_PCT_MAXIMUM = 10

    def analyze(self):
        """ analyze all op's usage """
        detailed_analysis = {}

        cpu_analysis = self.analyze_cpu_usage()
        queue_analysis = self.analyze_queue_usage()

        if cpu_analysis:
            detailed_analysis["cpu_analysis_details"] = cpu_analysis

        if queue_analysis:
            detailed_analysis["queue_analysis_details"] = queue_analysis

        bottleneck, suggestion = self.analyze_bottleneck()

        if bottleneck[0]:
            detailed_analysis["bottleneck_warning"] = bottleneck
            detailed_analysis["bottleneck_suggestion"] = suggestion

        return detailed_analysis

    def __get_non_inline_child_recur(self, cur_op_id):
        """get the child id of cur op which isn't an inline op"""
        if cur_op_id == self.op_id_not_exist or not self.children_ids[cur_op_id]:
            return self.op_id_not_exist
        cur_child_id = self.children_ids[cur_op_id][0]
        if self.queue_average_size[cur_child_id] != -1:
            return cur_child_id
        return self.__get_non_inline_child_recur(cur_child_id)

    def analyze_cpu_usage(self):
        """ analyze cpu usage of each op """
        cpu_usage_analysis = []
        for op_id in self.op_ids:
            if op_id == self.op_id_not_exist or self.op_names[op_id] in self.non_multithreaded_ops:
                continue

            if self.avg_cpu_pct_per_worker[op_id] > self._AVG_CPU_UTIL_PCT_PER_WORKER_MAXIMUM and \
                    self.op_names[op_id]:
                cpu_usage_analysis.append(
                    ("{} is using {}% CPU per worker."
                     " Setting num_parallel_workers"
                     ">{} might bring extra performance.").format(self.pipeline_ops[op_id],
                                                                  self.avg_cpu_pct_per_worker[op_id],
                                                                  self.num_workers[op_id]))
            elif self.avg_cpu_pct_per_worker[op_id] < self._AVG_CPU_UTIL_PCT_PER_WORKER_MINIMUM and \
                    self.num_workers[op_id] > 1:
                cpu_usage_analysis.append(
                    ("{} is using {}% CPU per worker. Using num_parallel_workers={} might not bring as much benefit"
                     " due to low CPU usage per worker.").format(self.pipeline_ops[op_id],
                                                                 self.avg_cpu_pct_per_worker[op_id],
                                                                 self.num_workers[op_id]))
        return cpu_usage_analysis

    def analyze_queue_usage(self):
        """ analyze queue usage of each op """
        queue_usage_analysis = []
        for op_id in self.op_ids:
            if op_id == self.op_id_not_exist or self.op_names[op_id] in self.non_multithreaded_ops:
                continue

            if self.op_names[op_id] == "Batch":
                continue
            in_op_id, out_q = self.__get_non_inline_child_recur(
                op_id), self.queue_utilization_pct[op_id]
            if in_op_id == self.op_id_not_exist and out_q != self.queue_usage_not_exist:
                # This is a leaf node since input queue does not exist and output queue exists
                if out_q < self._LEAF_OUTPUT_QUEUE_EMPTY_FREQ_PCT_MAXIMUM:
                    queue_usage_analysis.append(("Leaf op {} is using {}% of its output queue."
                                                 "Setting num_parallel_workers"
                                                 ">{} might speed up I/O.").format(self.pipeline_ops[op_id],
                                                                                   out_q,
                                                                                   self.num_workers[op_id]))
            elif self.op_names[op_id] == "DeviceQueue" and in_op_id != self.op_id_not_exist:
                # if this is device_queue op,
                if self.queue_empty_freq_pct[in_op_id] > self._DEVICEQUEUE_INPUT_QUEUE_EMPTY_FREQ_PCT_MAXIMUM:
                    queue_usage_analysis.append((
                        "{}'s input queue is empty {}% of the time. This might indicate dataset bottlenecks."
                        " Hence host cannot keep up with the device {}% of the time."
                        " Device waits whenever input queue is empty.").format(self.pipeline_ops[op_id],
                                                                               self.queue_empty_freq_pct[in_op_id],
                                                                               self.queue_empty_freq_pct[in_op_id]))
            elif in_op_id != self.op_id_not_exist and out_q != self.queue_usage_not_exist:
                in_q = self.queue_utilization_pct[in_op_id]
                if in_q != self.queue_usage_not_exist and in_q - out_q > self._IN_OUT_QUEUE_UTIL_PCT_DIFF_MAXIMUM:
                    queue_usage_analysis.append((
                        "{}'s input queue usage={}% is greater output queue usage={}%."
                        " This indicates child op {} might be producing faster than its parent {} can consume."
                        " If this op has low CPU utilization, try increasing "
                        "prefetch_size or increasing num_workers.").format(self.pipeline_ops[op_id],
                                                                           in_q, out_q, self.pipeline_ops[in_op_id],
                                                                           self.pipeline_ops[op_id]))
        return queue_usage_analysis

    def analyze_bottleneck(self):
        """ analyze bottleneck by using both cpu and queue usage """
        bottleneck, suggestion = "", ""
        for op_id in reversed(self.op_ids):
            in_op_id, out_q = self.__get_non_inline_child_recur(
                op_id), self.queue_utilization_pct[op_id]
            wkr_cpu = self.avg_cpu_pct_per_worker[op_id]
            if op_id == self.op_id_not_exist or \
                    self.op_names[op_id] in self.non_multithreaded_ops \
                    or self.op_names[op_id] == "DeviceQueue":
                continue

            if wkr_cpu > self._AVG_CPU_UTIL_PCT_PER_WORKER_MAXIMUM:
                bottleneck = self.pipeline_ops[op_id]
                suggestion = "{} has high CPU utilization per worker of {}%".format(
                    self.pipeline_ops[op_id], wkr_cpu)
                suggestion += " Try increasing num_parallel_workers above {}.".format(self.num_workers[op_id])
            elif wkr_cpu < self._AVG_CPU_UTIL_PCT_PER_WORKER_MINIMUM:
                in_op_id = self.__get_non_inline_child_recur(op_id)
                in_q_usage = self.queue_utilization_pct[in_op_id]
                if in_op_id != self.op_id_not_exist and (
                        in_q_usage < self._IN_QUEUE_UTIL_PCT_MAXIMUM or out_q -
                        in_q_usage > self._IN_OUT_QUEUE_UTIL_PCT_DIFF_MAXIMUM):
                    bottleneck = self.pipeline_ops[op_id]
                    suggestion = "{} has low CPU utilization per worker of {}%".format(
                        self.pipeline_ops[op_id], wkr_cpu)
                    suggestion += " and abnormal queue usage. Try increasing prefetch_size."

        return [bottleneck], [suggestion]