Refactor mojo_benchmark.

This patch extract the logic that interacts with the mojo benchmark app
from `mojo_benchmark` into devtoolslib and adds some tests.

R=qsr@chromium.org

Review URL: https://codereview.chromium.org/1435523002 .

Cr-Mirrored-From: https://github.com/domokit/mojo
Cr-Mirrored-Commit: 0a40516baad86d171958347cd2bb95199b287110
diff --git a/devtoolslib/benchmark.py b/devtoolslib/benchmark.py
new file mode 100644
index 0000000..3383dd4
--- /dev/null
+++ b/devtoolslib/benchmark.py
@@ -0,0 +1,103 @@
+# Copyright 2015 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Logic that drives runs of the benchmarking mojo app and parses its output."""
+
+import os.path
+import re
+
+_BENCHMARK_APP = 'https://core.mojoapps.io/benchmark.mojo'
+
+# Additional time in seconds allocated per shell run to accommodate start-up.
+# The shell should terminate before hitting this time out, it is an error if it
+# doesn't.
+_EXTRA_TIMEOUT = 20
+
+_MEASUREMENT_RESULT_FORMAT = r"""
+^              # Beginning of the line.
+measurement:   # Hard-coded tag.
+\s+(\S+)       # Match measurement spec.
+\s+(\S+)       # Match measurement result.
+$              # End of the line.
+"""
+
+_MEASUREMENT_REGEX = re.compile(_MEASUREMENT_RESULT_FORMAT, re.VERBOSE)
+
+
+def _parse_measurement_results(output):
+  """Parses the measurement results present in the benchmark output and returns
+  the dictionary of correctly recognized and parsed results.
+  """
+  measurement_results = {}
+  output_lines = [line.strip() for line in output.split('\n')]
+  for line in output_lines:
+    match = re.match(_MEASUREMENT_REGEX, line)
+    if match:
+      measurement_spec = match.group(1)
+      measurement_result = match.group(2)
+      try:
+        measurement_results[measurement_spec] = float(measurement_result)
+      except ValueError:
+        pass
+  return measurement_results
+
+
+class Results(object):
+  """Holds results of a benchmark run."""
+
+  def __init__(self, succeeded, error_str, output):
+    self.succeeded = succeeded
+    self.error_str = error_str
+    self.output = output
+    self.measurements = None
+
+
+def run(shell, shell_args, app, duration_seconds, measurements, verbose,
+        android, output_file):
+  """Runs the given benchmark by running `benchmark.mojo` in mojo shell with
+  appropriate arguments and returns the produced output.
+
+  Returns:
+    A tuple of (succeeded, error_msg, output).
+  """
+  timeout = duration_seconds + _EXTRA_TIMEOUT
+  benchmark_args = []
+  benchmark_args.append('--app=' + app)
+  benchmark_args.append('--duration=' + str(duration_seconds))
+
+  device_output_file = None
+  if output_file:
+    if android:
+      device_output_file = os.path.join(shell.get_tmp_dir_path(), output_file)
+      benchmark_args.append('--trace-output=' + device_output_file)
+    else:
+      benchmark_args.append('--trace-output=' + output_file)
+
+  for measurement in measurements:
+    benchmark_args.append(measurement['spec'])
+
+  shell_args = list(shell_args)
+  shell_args.append(_BENCHMARK_APP)
+  shell_args.append('--force-offline-by-default')
+  shell_args.append('--args-for=%s %s' % (_BENCHMARK_APP,
+                                          ' '.join(benchmark_args)))
+
+  if verbose:
+    print 'shell arguments: ' + str(shell_args)
+  return_code, output, did_time_out = shell.run_and_get_output(
+      shell_args, timeout=timeout)
+
+  if did_time_out:
+    return Results(False, 'timed out', output)
+  if return_code:
+    return Results(False, 'return code: ' + str(return_code), output)
+
+  # Pull the trace file even if some measurements are missing, as it can be
+  # useful in debugging.
+  if device_output_file:
+    shell.pull_file(device_output_file, output_file, remove_original=True)
+
+  results = Results(True, None, output)
+  results.measurements = _parse_measurement_results(output)
+  return results
diff --git a/devtoolslib/benchmark_unittest.py b/devtoolslib/benchmark_unittest.py
new file mode 100644
index 0000000..6257041
--- /dev/null
+++ b/devtoolslib/benchmark_unittest.py
@@ -0,0 +1,44 @@
+# Copyright 2015 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Tests for the logic that drives runs of the benchmarking mojo app and parses
+its output."""
+
+import imp
+import os.path
+import sys
+import unittest
+
+try:
+  imp.find_module("devtoolslib")
+except ImportError:
+  sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from devtoolslib import benchmark
+
+
+class BenchmarkTest(unittest.TestCase):
+  """Tests the benchmark module."""
+
+  def test_parse_measurement_results_empty(self):
+    """Tests parsing empty output."""
+    output = """"""
+    results = benchmark._parse_measurement_results(output)
+    self.assertEquals({}, results)
+
+  def test_parse_measurement_results_typical(self):
+    """Tests parsing typical output with unrelated log entries."""
+    output = """
+[INFO:network_fetcher.cc(322)] Caching mojo app http://127.0.0.1:31839/benchmark.mojo at /usr/local/google/home/user/.mojo_url_response_disk_cache/cache/4F6FAE752C7958AE122C6A2D778F2014C15578250B3C6746D54B99E4F15A4458/4F6FAE752C7958AE122C6A2D778F2014C15578250B3C6746D54B99E4F15A4458
+[INFO:network_fetcher.cc(322)] Caching mojo app http://127.0.0.1:31839/dart_traced_application.mojo at /usr/local/google/home/user/.mojo_url_response_disk_cache/cache/AB290478907A1DC5434CBCFD053BE2E74254D882644E76B3C28E3E7E1BCDCC3D/AB290478907A1DC5434CBCFD053BE2E74254D882644E76B3C28E3E7E1BCDCC3D
+Observatory listening on http://127.0.0.1:38128
+[1109/155613:WARNING:event.cc(234)] Ignoring incorrect complete event (no duration)
+measurement: time_until/a/b 42.5
+measurement: time_between/a/b/c/d 21.1
+measurement: time_between/a/b/e/f FAILED
+some measurements failed
+"""
+    results = benchmark._parse_measurement_results(output)
+    self.assertEquals({'time_until/a/b': 42.5,
+                       'time_between/a/b/c/d': 21.1}, results)
diff --git a/mojo_benchmark b/mojo_benchmark
index 6ade1b1..b6c17ae 100755
--- a/mojo_benchmark
+++ b/mojo_benchmark
@@ -7,14 +7,13 @@
 
 import argparse
 import logging
-import os.path
-import re
 import sys
 import time
 
+from devtoolslib import benchmark
+from devtoolslib import perf_dashboard
 from devtoolslib import shell_arguments
 from devtoolslib import shell_config
-from devtoolslib import perf_dashboard
 
 
 _DESCRIPTION = """Runner for Mojo application benchmarks.
@@ -26,7 +25,6 @@
 
 _logger = logging.getLogger()
 
-_BENCHMARK_APP = 'https://core.mojoapps.io/benchmark.mojo'
 _CACHE_SERVICE_URL = 'mojo:url_response_disk_cache'
 _NETWORK_SERVICE_URL = 'mojo:network_service'
 
@@ -35,21 +33,6 @@
     '--args-for=%s %s' % (_NETWORK_SERVICE_URL, '--clear'),
 ]
 
-# Additional time in seconds allocated per shell run to accommodate start-up.
-# The shell should terminate before hitting this time out, it is an error if it
-# doesn't.
-_EXTRA_TIMEOUT = 20
-
-_MEASUREMENT_RESULT_FORMAT = r"""
-^              # Beginning of the line.
-measurement:   # Hard-coded tag.
-\s+(\S+)       # Match measurement spec.
-\s+(\S+)       # Match measurement result.
-$              # End of the line.
-"""
-
-_MEASUREMENT_REGEX = re.compile(_MEASUREMENT_RESULT_FORMAT, re.VERBOSE)
-
 
 def _generate_benchmark_variants(benchmark_spec):
   """Generates benchmark specifications for individual variants of the given
@@ -76,72 +59,6 @@
   return variants
 
 
-def _run_benchmark(shell, shell_args, app, duration_seconds, measurements,
-                   verbose, android, output_file):
-  """Runs the given benchmark by running `benchmark.mojo` in mojo shell with
-  appropriate arguments and returns the produced output.
-
-  Returns:
-    A tuple of (succeeded, error_msg, output).
-  """
-  timeout = duration_seconds + _EXTRA_TIMEOUT
-  benchmark_args = []
-  benchmark_args.append('--app=' + app)
-  benchmark_args.append('--duration=' + str(duration_seconds))
-
-  device_output_file = None
-  if output_file:
-    if android:
-      device_output_file = os.path.join(shell.get_tmp_dir_path(), output_file)
-      benchmark_args.append('--trace-output=' + device_output_file)
-    else:
-      benchmark_args.append('--trace-output=' + output_file)
-
-  for measurement in measurements:
-    benchmark_args.append(measurement['spec'])
-
-  shell_args = list(shell_args)
-  shell_args.append(_BENCHMARK_APP)
-  shell_args.append('--force-offline-by-default')
-  shell_args.append('--args-for=%s %s' % (_BENCHMARK_APP,
-                                          ' '.join(benchmark_args)))
-
-  if verbose:
-    print 'shell arguments: ' + str(shell_args)
-  return_code, output, did_time_out = shell.run_and_get_output(
-      shell_args, timeout=timeout)
-
-  if did_time_out:
-    return False, 'timed out', output
-  if return_code:
-    return False, 'return code: ' + str(return_code), output
-
-  # Pull the trace file even if some measurements are missing, as it can be
-  # useful in debugging.
-  if device_output_file:
-    shell.pull_file(device_output_file, output_file, remove_original=True)
-
-  return True, None, output
-
-
-def _parse_measurement_results(output):
-  """Parses the measurement results present in the benchmark output and returns
-  the dictionary of correctly recognized and parsed results.
-  """
-  measurement_results = {}
-  output_lines = [line.strip() for line in output.split('\n')]
-  for line in output_lines:
-    match = re.match(_MEASUREMENT_REGEX, line)
-    if match:
-      measurement_spec = match.group(1)
-      measurement_result = match.group(2)
-      try:
-        measurement_results[measurement_spec] = float(measurement_result)
-      except ValueError:
-        pass
-  return measurement_results
-
-
 def main():
   parser = argparse.ArgumentParser(
       formatter_class=argparse.RawDescriptionHelpFormatter,
@@ -190,7 +107,7 @@
         chart_data_recorder = perf_dashboard.ChartDataRecorder(
             script_args.test_name)
 
-      benchmark_succeeded, benchmark_error, output = _run_benchmark(
+      results = benchmark.run(
           shell, shell_args, app, duration, measurements, script_args.verbose,
           script_args.android, output_file)
 
@@ -198,13 +115,12 @@
 
       some_measurements_failed = False
       some_measurements_succeeded = False
-      if benchmark_succeeded:
-        measurement_results = _parse_measurement_results(output)
+      if results.succeeded:
         # Iterate over the list of specs, not the dictionary, to detect missing
         # results and preserve the required order.
         for measurement in measurements:
-          if measurement['spec'] in measurement_results:
-            result = measurement_results[measurement['spec']]
+          if measurement['spec'] in results.measurements:
+            result = results.measurements[measurement['spec']]
             print '%10.4f  %s' % (result, measurement['name'])
 
             if chart_data_recorder:
@@ -218,14 +134,14 @@
             print '?  %s' % measurement['name']
             some_measurements_failed = True
 
-      if not benchmark_succeeded or some_measurements_failed:
-        if not benchmark_succeeded:
-          print 'benchmark failed: ' + benchmark_error
+      if not results.succeeded or some_measurements_failed:
+        if not results.succeeded:
+          print 'benchmark failed: ' + results.error_str
         if some_measurements_failed:
           print 'some measurements failed'
         print 'output: '
         print '-' * 72
-        print output
+        print results.output
         print '-' * 72
         exit_code = 1