James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | // Histogram is an object that aggregates statistics, and can summarize them in |
| 6 | // various forms, including ASCII graphical, HTML, and numerically (as a |
| 7 | // vector of numbers corresponding to each of the aggregating buckets). |
| 8 | |
| 9 | // It supports calls to accumulate either time intervals (which are processed |
| 10 | // as integral number of milliseconds), or arbitrary integral units. |
| 11 | |
| 12 | // For Histogram(exponential histogram), LinearHistogram and CustomHistogram, |
| 13 | // the minimum for a declared range is 1 (instead of 0), while the maximum is |
| 14 | // (HistogramBase::kSampleType_MAX - 1). Currently you can declare histograms |
| 15 | // with ranges exceeding those limits (e.g. 0 as minimal or |
| 16 | // HistogramBase::kSampleType_MAX as maximal), but those excesses will be |
| 17 | // silently clamped to those limits (for backwards compatibility with existing |
| 18 | // code). Best practice is to not exceed the limits. |
| 19 | |
| 20 | // Each use of a histogram with the same name will reference the same underlying |
| 21 | // data, so it is safe to record to the same histogram from multiple locations |
| 22 | // in the code. It is a runtime error if all uses of the same histogram do not |
| 23 | // agree exactly in type, bucket size and range. |
| 24 | |
| 25 | // For Histogram and LinearHistogram, the maximum for a declared range should |
| 26 | // always be larger (not equal) than minimal range. Zero and |
| 27 | // HistogramBase::kSampleType_MAX are implicitly added as first and last ranges, |
| 28 | // so the smallest legal bucket_count is 3. However CustomHistogram can have |
| 29 | // bucket count as 2 (when you give a custom ranges vector containing only 1 |
| 30 | // range). |
| 31 | // For these 3 kinds of histograms, the max bucket count is always |
| 32 | // (Histogram::kBucketCount_MAX - 1). |
| 33 | |
| 34 | // The buckets layout of class Histogram is exponential. For example, buckets |
| 35 | // might contain (sequentially) the count of values in the following intervals: |
| 36 | // [0,1), [1,2), [2,4), [4,8), [8,16), [16,32), [32,64), [64,infinity) |
| 37 | // That bucket allocation would actually result from construction of a histogram |
| 38 | // for values between 1 and 64, with 8 buckets, such as: |
| 39 | // Histogram count("some name", 1, 64, 8); |
| 40 | // Note that the underflow bucket [0,1) and the overflow bucket [64,infinity) |
| 41 | // are also counted by the constructor in the user supplied "bucket_count" |
| 42 | // argument. |
| 43 | // The above example has an exponential ratio of 2 (doubling the bucket width |
| 44 | // in each consecutive bucket. The Histogram class automatically calculates |
| 45 | // the smallest ratio that it can use to construct the number of buckets |
| 46 | // selected in the constructor. An another example, if you had 50 buckets, |
| 47 | // and millisecond time values from 1 to 10000, then the ratio between |
| 48 | // consecutive bucket widths will be approximately somewhere around the 50th |
| 49 | // root of 10000. This approach provides very fine grain (narrow) buckets |
| 50 | // at the low end of the histogram scale, but allows the histogram to cover a |
| 51 | // gigantic range with the addition of very few buckets. |
| 52 | |
Benjamin Lerman | 5799890 | 2014-11-18 16:06:02 +0100 | [diff] [blame] | 53 | // Usually we use macros to define and use a histogram, which are defined in |
| 54 | // base/metrics/histogram_macros.h. Note: Callers should include that header |
| 55 | // directly if they only access the histogram APIs through macros. |
| 56 | // |
| 57 | // Macros use a pattern involving a function static variable, that is a pointer |
| 58 | // to a histogram. This static is explicitly initialized on any thread |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 59 | // that detects a uninitialized (NULL) pointer. The potentially racy |
| 60 | // initialization is not a problem as it is always set to point to the same |
| 61 | // value (i.e., the FactoryGet always returns the same value). FactoryGet |
| 62 | // is also completely thread safe, which results in a completely thread safe, |
| 63 | // and relatively fast, set of counters. To avoid races at shutdown, the static |
| 64 | // pointer is NOT deleted, and we leak the histograms at process termination. |
| 65 | |
| 66 | #ifndef BASE_METRICS_HISTOGRAM_H_ |
| 67 | #define BASE_METRICS_HISTOGRAM_H_ |
| 68 | |
| 69 | #include <map> |
| 70 | #include <string> |
| 71 | #include <vector> |
| 72 | |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 73 | #include "base/base_export.h" |
| 74 | #include "base/basictypes.h" |
| 75 | #include "base/compiler_specific.h" |
| 76 | #include "base/gtest_prod_util.h" |
| 77 | #include "base/logging.h" |
| 78 | #include "base/memory/scoped_ptr.h" |
| 79 | #include "base/metrics/bucket_ranges.h" |
| 80 | #include "base/metrics/histogram_base.h" |
Benjamin Lerman | 5799890 | 2014-11-18 16:06:02 +0100 | [diff] [blame] | 81 | // TODO(asvitkine): Migrate callers to to include this directly and remove this. |
| 82 | #include "base/metrics/histogram_macros.h" |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 83 | #include "base/metrics/histogram_samples.h" |
| 84 | #include "base/time/time.h" |
| 85 | |
| 86 | class Pickle; |
| 87 | class PickleIterator; |
| 88 | |
| 89 | namespace base { |
| 90 | |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 91 | class BooleanHistogram; |
| 92 | class CustomHistogram; |
| 93 | class Histogram; |
| 94 | class LinearHistogram; |
Benjamin Lerman | 5799890 | 2014-11-18 16:06:02 +0100 | [diff] [blame] | 95 | class SampleVector; |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 96 | |
| 97 | class BASE_EXPORT Histogram : public HistogramBase { |
| 98 | public: |
| 99 | // Initialize maximum number of buckets in histograms as 16,384. |
| 100 | static const size_t kBucketCount_MAX; |
| 101 | |
| 102 | typedef std::vector<Count> Counts; |
| 103 | |
| 104 | //---------------------------------------------------------------------------- |
| 105 | // For a valid histogram, input should follow these restrictions: |
| 106 | // minimum > 0 (if a minimum below 1 is specified, it will implicitly be |
| 107 | // normalized up to 1) |
| 108 | // maximum > minimum |
| 109 | // buckets > 2 [minimum buckets needed: underflow, overflow and the range] |
| 110 | // Additionally, |
| 111 | // buckets <= (maximum - minimum + 2) - this is to ensure that we don't have |
| 112 | // more buckets than the range of numbers; having more buckets than 1 per |
| 113 | // value in the range would be nonsensical. |
| 114 | static HistogramBase* FactoryGet(const std::string& name, |
| 115 | Sample minimum, |
| 116 | Sample maximum, |
| 117 | size_t bucket_count, |
| 118 | int32 flags); |
| 119 | static HistogramBase* FactoryTimeGet(const std::string& name, |
| 120 | base::TimeDelta minimum, |
| 121 | base::TimeDelta maximum, |
| 122 | size_t bucket_count, |
| 123 | int32 flags); |
| 124 | |
| 125 | static void InitializeBucketRanges(Sample minimum, |
| 126 | Sample maximum, |
| 127 | BucketRanges* ranges); |
| 128 | |
| 129 | // This constant if for FindCorruption. Since snapshots of histograms are |
| 130 | // taken asynchronously relative to sampling, and our counting code currently |
| 131 | // does not prevent race conditions, it is pretty likely that we'll catch a |
| 132 | // redundant count that doesn't match the sample count. We allow for a |
| 133 | // certain amount of slop before flagging this as an inconsistency. Even with |
| 134 | // an inconsistency, we'll snapshot it again (for UMA in about a half hour), |
| 135 | // so we'll eventually get the data, if it was not the result of a corruption. |
| 136 | static const int kCommonRaceBasedCountMismatch; |
| 137 | |
| 138 | // Check to see if bucket ranges, counts and tallies in the snapshot are |
| 139 | // consistent with the bucket ranges and checksums in our histogram. This can |
| 140 | // produce a false-alarm if a race occurred in the reading of the data during |
| 141 | // a SnapShot process, but should otherwise be false at all times (unless we |
| 142 | // have memory over-writes, or DRAM failures). |
James Robinson | e1b30cf | 2014-10-21 12:25:40 -0700 | [diff] [blame] | 143 | int FindCorruption(const HistogramSamples& samples) const override; |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 144 | |
| 145 | //---------------------------------------------------------------------------- |
| 146 | // Accessors for factory construction, serialization and testing. |
| 147 | //---------------------------------------------------------------------------- |
| 148 | Sample declared_min() const { return declared_min_; } |
| 149 | Sample declared_max() const { return declared_max_; } |
| 150 | virtual Sample ranges(size_t i) const; |
| 151 | virtual size_t bucket_count() const; |
| 152 | const BucketRanges* bucket_ranges() const { return bucket_ranges_; } |
| 153 | |
| 154 | // This function validates histogram construction arguments. It returns false |
| 155 | // if some of the arguments are totally bad. |
| 156 | // Note. Currently it allow some bad input, e.g. 0 as minimum, but silently |
| 157 | // converts it to good input: 1. |
| 158 | // TODO(kaiwang): Be more restrict and return false for any bad input, and |
| 159 | // make this a readonly validating function. |
| 160 | static bool InspectConstructionArguments(const std::string& name, |
| 161 | Sample* minimum, |
| 162 | Sample* maximum, |
| 163 | size_t* bucket_count); |
| 164 | |
| 165 | // HistogramBase implementation: |
James Robinson | e1b30cf | 2014-10-21 12:25:40 -0700 | [diff] [blame] | 166 | HistogramType GetHistogramType() const override; |
| 167 | bool HasConstructionArguments(Sample expected_minimum, |
| 168 | Sample expected_maximum, |
| 169 | size_t expected_bucket_count) const override; |
| 170 | void Add(Sample value) override; |
| 171 | scoped_ptr<HistogramSamples> SnapshotSamples() const override; |
| 172 | void AddSamples(const HistogramSamples& samples) override; |
| 173 | bool AddSamplesFromPickle(PickleIterator* iter) override; |
| 174 | void WriteHTMLGraph(std::string* output) const override; |
| 175 | void WriteAscii(std::string* output) const override; |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 176 | |
| 177 | protected: |
| 178 | // |ranges| should contain the underflow and overflow buckets. See top |
| 179 | // comments for example. |
| 180 | Histogram(const std::string& name, |
| 181 | Sample minimum, |
| 182 | Sample maximum, |
| 183 | const BucketRanges* ranges); |
| 184 | |
James Robinson | e1b30cf | 2014-10-21 12:25:40 -0700 | [diff] [blame] | 185 | ~Histogram() override; |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 186 | |
| 187 | // HistogramBase implementation: |
James Robinson | e1b30cf | 2014-10-21 12:25:40 -0700 | [diff] [blame] | 188 | bool SerializeInfoImpl(Pickle* pickle) const override; |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 189 | |
| 190 | // Method to override to skip the display of the i'th bucket if it's empty. |
| 191 | virtual bool PrintEmptyBucket(size_t index) const; |
| 192 | |
| 193 | // Get normalized size, relative to the ranges(i). |
| 194 | virtual double GetBucketSize(Count current, size_t i) const; |
| 195 | |
| 196 | // Return a string description of what goes in a given bucket. |
| 197 | // Most commonly this is the numeric value, but in derived classes it may |
| 198 | // be a name (or string description) given to the bucket. |
| 199 | virtual const std::string GetAsciiBucketRange(size_t it) const; |
| 200 | |
| 201 | private: |
| 202 | // Allow tests to corrupt our innards for testing purposes. |
| 203 | FRIEND_TEST_ALL_PREFIXES(HistogramTest, BoundsTest); |
| 204 | FRIEND_TEST_ALL_PREFIXES(HistogramTest, BucketPlacementTest); |
| 205 | FRIEND_TEST_ALL_PREFIXES(HistogramTest, CorruptBucketBounds); |
| 206 | FRIEND_TEST_ALL_PREFIXES(HistogramTest, CorruptSampleCounts); |
| 207 | FRIEND_TEST_ALL_PREFIXES(HistogramTest, NameMatchTest); |
| 208 | |
| 209 | friend class StatisticsRecorder; // To allow it to delete duplicates. |
| 210 | friend class StatisticsRecorderTest; |
| 211 | |
| 212 | friend BASE_EXPORT_PRIVATE HistogramBase* DeserializeHistogramInfo( |
| 213 | PickleIterator* iter); |
| 214 | static HistogramBase* DeserializeInfoImpl(PickleIterator* iter); |
| 215 | |
| 216 | // Implementation of SnapshotSamples function. |
| 217 | scoped_ptr<SampleVector> SnapshotSampleVector() const; |
| 218 | |
| 219 | //---------------------------------------------------------------------------- |
| 220 | // Helpers for emitting Ascii graphic. Each method appends data to output. |
| 221 | |
| 222 | void WriteAsciiImpl(bool graph_it, |
| 223 | const std::string& newline, |
| 224 | std::string* output) const; |
| 225 | |
| 226 | // Find out how large (graphically) the largest bucket will appear to be. |
| 227 | double GetPeakBucketSize(const SampleVector& samples) const; |
| 228 | |
| 229 | // Write a common header message describing this histogram. |
| 230 | void WriteAsciiHeader(const SampleVector& samples, |
| 231 | Count sample_count, |
| 232 | std::string* output) const; |
| 233 | |
| 234 | // Write information about previous, current, and next buckets. |
| 235 | // Information such as cumulative percentage, etc. |
| 236 | void WriteAsciiBucketContext(const int64 past, const Count current, |
| 237 | const int64 remaining, const size_t i, |
| 238 | std::string* output) const; |
| 239 | |
| 240 | // WriteJSON calls these. |
James Robinson | e1b30cf | 2014-10-21 12:25:40 -0700 | [diff] [blame] | 241 | void GetParameters(DictionaryValue* params) const override; |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 242 | |
James Robinson | e1b30cf | 2014-10-21 12:25:40 -0700 | [diff] [blame] | 243 | void GetCountAndBucketData(Count* count, |
| 244 | int64* sum, |
| 245 | ListValue* buckets) const override; |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 246 | |
| 247 | // Does not own this object. Should get from StatisticsRecorder. |
| 248 | const BucketRanges* bucket_ranges_; |
| 249 | |
| 250 | Sample declared_min_; // Less than this goes into the first bucket. |
| 251 | Sample declared_max_; // Over this goes into the last bucket. |
| 252 | |
| 253 | // Finally, provide the state that changes with the addition of each new |
| 254 | // sample. |
| 255 | scoped_ptr<SampleVector> samples_; |
| 256 | |
| 257 | DISALLOW_COPY_AND_ASSIGN(Histogram); |
| 258 | }; |
| 259 | |
| 260 | //------------------------------------------------------------------------------ |
| 261 | |
| 262 | // LinearHistogram is a more traditional histogram, with evenly spaced |
| 263 | // buckets. |
| 264 | class BASE_EXPORT LinearHistogram : public Histogram { |
| 265 | public: |
James Robinson | e1b30cf | 2014-10-21 12:25:40 -0700 | [diff] [blame] | 266 | ~LinearHistogram() override; |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 267 | |
| 268 | /* minimum should start from 1. 0 is as minimum is invalid. 0 is an implicit |
| 269 | default underflow bucket. */ |
| 270 | static HistogramBase* FactoryGet(const std::string& name, |
| 271 | Sample minimum, |
| 272 | Sample maximum, |
| 273 | size_t bucket_count, |
| 274 | int32 flags); |
| 275 | static HistogramBase* FactoryTimeGet(const std::string& name, |
| 276 | TimeDelta minimum, |
| 277 | TimeDelta maximum, |
| 278 | size_t bucket_count, |
| 279 | int32 flags); |
| 280 | |
| 281 | struct DescriptionPair { |
| 282 | Sample sample; |
| 283 | const char* description; // Null means end of a list of pairs. |
| 284 | }; |
| 285 | |
| 286 | // Create a LinearHistogram and store a list of number/text values for use in |
| 287 | // writing the histogram graph. |
| 288 | // |descriptions| can be NULL, which means no special descriptions to set. If |
| 289 | // it's not NULL, the last element in the array must has a NULL in its |
| 290 | // "description" field. |
| 291 | static HistogramBase* FactoryGetWithRangeDescription( |
| 292 | const std::string& name, |
| 293 | Sample minimum, |
| 294 | Sample maximum, |
| 295 | size_t bucket_count, |
| 296 | int32 flags, |
| 297 | const DescriptionPair descriptions[]); |
| 298 | |
| 299 | static void InitializeBucketRanges(Sample minimum, |
| 300 | Sample maximum, |
| 301 | BucketRanges* ranges); |
| 302 | |
| 303 | // Overridden from Histogram: |
James Robinson | e1b30cf | 2014-10-21 12:25:40 -0700 | [diff] [blame] | 304 | HistogramType GetHistogramType() const override; |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 305 | |
| 306 | protected: |
| 307 | LinearHistogram(const std::string& name, |
| 308 | Sample minimum, |
| 309 | Sample maximum, |
| 310 | const BucketRanges* ranges); |
| 311 | |
James Robinson | e1b30cf | 2014-10-21 12:25:40 -0700 | [diff] [blame] | 312 | double GetBucketSize(Count current, size_t i) const override; |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 313 | |
| 314 | // If we have a description for a bucket, then return that. Otherwise |
| 315 | // let parent class provide a (numeric) description. |
James Robinson | e1b30cf | 2014-10-21 12:25:40 -0700 | [diff] [blame] | 316 | const std::string GetAsciiBucketRange(size_t i) const override; |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 317 | |
| 318 | // Skip printing of name for numeric range if we have a name (and if this is |
| 319 | // an empty bucket). |
James Robinson | e1b30cf | 2014-10-21 12:25:40 -0700 | [diff] [blame] | 320 | bool PrintEmptyBucket(size_t index) const override; |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 321 | |
| 322 | private: |
| 323 | friend BASE_EXPORT_PRIVATE HistogramBase* DeserializeHistogramInfo( |
| 324 | PickleIterator* iter); |
| 325 | static HistogramBase* DeserializeInfoImpl(PickleIterator* iter); |
| 326 | |
| 327 | // For some ranges, we store a printable description of a bucket range. |
| 328 | // If there is no description, then GetAsciiBucketRange() uses parent class |
| 329 | // to provide a description. |
| 330 | typedef std::map<Sample, std::string> BucketDescriptionMap; |
| 331 | BucketDescriptionMap bucket_description_; |
| 332 | |
| 333 | DISALLOW_COPY_AND_ASSIGN(LinearHistogram); |
| 334 | }; |
| 335 | |
| 336 | //------------------------------------------------------------------------------ |
| 337 | |
| 338 | // BooleanHistogram is a histogram for booleans. |
| 339 | class BASE_EXPORT BooleanHistogram : public LinearHistogram { |
| 340 | public: |
| 341 | static HistogramBase* FactoryGet(const std::string& name, int32 flags); |
| 342 | |
James Robinson | e1b30cf | 2014-10-21 12:25:40 -0700 | [diff] [blame] | 343 | HistogramType GetHistogramType() const override; |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 344 | |
| 345 | private: |
| 346 | BooleanHistogram(const std::string& name, const BucketRanges* ranges); |
| 347 | |
| 348 | friend BASE_EXPORT_PRIVATE HistogramBase* DeserializeHistogramInfo( |
| 349 | PickleIterator* iter); |
| 350 | static HistogramBase* DeserializeInfoImpl(PickleIterator* iter); |
| 351 | |
| 352 | DISALLOW_COPY_AND_ASSIGN(BooleanHistogram); |
| 353 | }; |
| 354 | |
| 355 | //------------------------------------------------------------------------------ |
| 356 | |
| 357 | // CustomHistogram is a histogram for a set of custom integers. |
| 358 | class BASE_EXPORT CustomHistogram : public Histogram { |
| 359 | public: |
| 360 | // |custom_ranges| contains a vector of limits on ranges. Each limit should be |
| 361 | // > 0 and < kSampleType_MAX. (Currently 0 is still accepted for backward |
| 362 | // compatibility). The limits can be unordered or contain duplication, but |
| 363 | // client should not depend on this. |
| 364 | static HistogramBase* FactoryGet(const std::string& name, |
| 365 | const std::vector<Sample>& custom_ranges, |
| 366 | int32 flags); |
| 367 | |
| 368 | // Overridden from Histogram: |
James Robinson | e1b30cf | 2014-10-21 12:25:40 -0700 | [diff] [blame] | 369 | HistogramType GetHistogramType() const override; |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 370 | |
| 371 | // Helper method for transforming an array of valid enumeration values |
| 372 | // to the std::vector<int> expected by UMA_HISTOGRAM_CUSTOM_ENUMERATION. |
| 373 | // This function ensures that a guard bucket exists right after any |
| 374 | // valid sample value (unless the next higher sample is also a valid value), |
| 375 | // so that invalid samples never fall into the same bucket as valid samples. |
| 376 | // TODO(kaiwang): Change name to ArrayToCustomEnumRanges. |
| 377 | static std::vector<Sample> ArrayToCustomRanges(const Sample* values, |
| 378 | size_t num_values); |
| 379 | protected: |
| 380 | CustomHistogram(const std::string& name, |
| 381 | const BucketRanges* ranges); |
| 382 | |
| 383 | // HistogramBase implementation: |
James Robinson | e1b30cf | 2014-10-21 12:25:40 -0700 | [diff] [blame] | 384 | bool SerializeInfoImpl(Pickle* pickle) const override; |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 385 | |
James Robinson | e1b30cf | 2014-10-21 12:25:40 -0700 | [diff] [blame] | 386 | double GetBucketSize(Count current, size_t i) const override; |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 387 | |
| 388 | private: |
| 389 | friend BASE_EXPORT_PRIVATE HistogramBase* DeserializeHistogramInfo( |
| 390 | PickleIterator* iter); |
| 391 | static HistogramBase* DeserializeInfoImpl(PickleIterator* iter); |
| 392 | |
| 393 | static bool ValidateCustomRanges(const std::vector<Sample>& custom_ranges); |
| 394 | static BucketRanges* CreateBucketRangesFromCustomRanges( |
| 395 | const std::vector<Sample>& custom_ranges); |
| 396 | |
| 397 | DISALLOW_COPY_AND_ASSIGN(CustomHistogram); |
| 398 | }; |
| 399 | |
| 400 | } // namespace base |
| 401 | |
| 402 | #endif // BASE_METRICS_HISTOGRAM_H_ |