forked from elastic/built-docs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path_approximate_aggregations.html
296 lines (276 loc) · 14.3 KB
/
_approximate_aggregations.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
<!DOCTYPE html>
<html lang="en-us">
<head>
<meta charset="UTF-8">
<title>Approximate Aggregations | Elasticsearch: The Definitive Guide [2.x] | Elastic</title>
<meta class="elastic" name="content" content="Approximate Aggregations | Elasticsearch: The Definitive Guide [2.x]">
<link rel="home" href="index.html" title="Elasticsearch: The Definitive Guide [2.x]"/>
<link rel="up" href="aggregations.html" title="Aggregations"/>
<link rel="prev" href="_sorting_based_on_deep_metrics.html" title="Sorting Based on "Deep" Metrics"/>
<link rel="next" href="cardinality.html" title="Finding Distinct Counts"/>
<meta class="elastic" name="product_version" content="2.x"/>
<meta class="elastic" name="product_name" content="Elasticsearch"/>
<meta class="elastic" name="website_area" content="documentation"/>
<meta name="DC.type" content="Learn/Docs/Legacy/Elasticsearch/Definitive Guide/2.x"/>
<meta name="DC.subject" content="Elasticsearch"/>
<meta name="DC.identifier" content="2.x"/>
<meta name="robots" content="noindex,nofollow"/>
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<script src="https://cdn.optimizely.com/js/18132920325.js"></script>
<link rel="apple-touch-icon" sizes="57x57" href="/apple-icon-57x57.png">
<link rel="apple-touch-icon" sizes="60x60" href="/apple-icon-60x60.png">
<link rel="apple-touch-icon" sizes="72x72" href="/apple-icon-72x72.png">
<link rel="apple-touch-icon" sizes="76x76" href="/apple-icon-76x76.png">
<link rel="apple-touch-icon" sizes="114x114" href="/apple-icon-114x114.png">
<link rel="apple-touch-icon" sizes="120x120" href="/apple-icon-120x120.png">
<link rel="apple-touch-icon" sizes="144x144" href="/apple-icon-144x144.png">
<link rel="apple-touch-icon" sizes="152x152" href="/apple-icon-152x152.png">
<link rel="apple-touch-icon" sizes="180x180" href="/apple-icon-180x180.png">
<link rel="icon" type="image/png" href="/favicon-32x32.png" sizes="32x32">
<link rel="icon" type="image/png" href="/android-chrome-192x192.png" sizes="192x192">
<link rel="icon" type="image/png" href="/favicon-96x96.png" sizes="96x96">
<link rel="icon" type="image/png" href="/favicon-16x16.png" sizes="16x16">
<link rel="manifest" href="/manifest.json">
<meta name="apple-mobile-web-app-title" content="Elastic">
<meta name="application-name" content="Elastic">
<meta name="msapplication-TileColor" content="#ffffff">
<meta name="msapplication-TileImage" content="/mstile-144x144.png">
<meta name="theme-color" content="#ffffff">
<meta name="naver-site-verification" content="936882c1853b701b3cef3721758d80535413dbfd" />
<meta name="yandex-verification" content="d8a47e95d0972434" />
<meta name="localized" content="true" />
<meta name="st:robots" content="follow,index" />
<meta property="og:image" content="https://static-www.elastic.co/v3/assets/bltefdd0b53724fa2ce/blt280217a63b82a734/6202d3378b1f312528798412/elastic-logo.svg" />
<meta property="og:image:width" content="500" />
<meta property="og:image:height" content="172" />
<link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
<link rel="icon" href="/favicon.ico" type="image/x-icon">
<link rel="apple-touch-icon-precomposed" sizes="64x64" href="/favicon_64x64_16bit.png">
<link rel="apple-touch-icon-precomposed" sizes="32x32" href="/favicon_32x32.png">
<link rel="apple-touch-icon-precomposed" sizes="16x16" href="/favicon_16x16.png">
<!-- Give IE8 a fighting chance -->
<!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]-->
<link rel="stylesheet" type="text/css" href="/guide/static/styles.css" />
</head>
<!--© 2015-2022 Elasticsearch B.V. -->
<!-- All Elastic documentation is licensed under a Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International License. -->
<!-- http://creativecommons.org/licenses/by-nc-nd/4.0/ -->
<body>
<!-- Google Tag Manager -->
<script>dataLayer = [];</script><noscript><iframe src="//www.googletagmanager.com/ns.html?id=GTM-58RLH5" height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
<script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0], j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src= '//www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f); })(window,document,'script','dataLayer','GTM-58RLH5');</script>
<!-- End Google Tag Manager -->
<!-- Global site tag (gtag.js) - Google Analytics -->
<script async src="https://www.googletagmanager.com/gtag/js?id=UA-12395217-16"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'UA-12395217-16');
</script>
<!-- Google Tag Manager for GA4 -->
<script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0], j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src='https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-KNJMG2M');</script>
<noscript><iframe src="https://www.googletagmanager.com/ns.html?id=GTM-KNJMG2M" height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
<!-- End Google Tag Manager for GA4-->
<div id='elastic-nav' style="display:none;"></div>
<script src='https://www.elastic.co/elastic-nav.js'></script>
<div class="main-container">
<section id="content" >
<div class="content-wrapper">
<section id="guide" lang="en">
<div class="container-fluid">
<div class="row pb-3">
<div class="col-12 order-2 col-md-4 order-md-1 col-lg-3 h-almost-full-md sticky-top-md" id="left_col">
<!-- The TOC is appended here -->
</div>
<div class="col-12 order-1 col-md-8 order-md-2 col-lg-7 order-lg-2 guide-section" id="middle_col">
<!-- start body -->
<div class="page_header">
<p>
<strong>WARNING</strong>: The 2.x versions of Elasticsearch have passed their
<a href="https://www.elastic.co/support/eol">EOL dates</a>. If you are running
a 2.x version, we strongly advise you to upgrade.
</p>
<p>
This documentation is no longer maintained and may be removed. For the latest
information, see the <a
href="https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html">current
Elasticsearch documentation</a>.
</p>
</div>
<div id="content">
<div class="breadcrumbs">
<span class="breadcrumb-link"><a href="/guide/">Elastic Docs</a></span>
<span class="chevron-right">›</span><span class="breadcrumb-link"><a href="index.html">Elasticsearch: The Definitive Guide [2.x]</a></span>
<span class="chevron-right">›</span><span class="breadcrumb-link"><a href="aggregations.html">Aggregations</a></span>
</div>
<div class="navheader">
<span class="prev">
<a href="_sorting_based_on_deep_metrics.html">« Sorting Based on "Deep" Metrics</a>
</span>
<span class="next">
<a href="cardinality.html">Finding Distinct Counts »</a>
</span>
</div>
<div class="chapter">
<div class="titlepage"><div><div>
<h2 class="title"><a id="_approximate_aggregations"></a>Approximate Aggregations<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch-definitive-guide/edit/2.x/300_Aggregations/55_approx_intro.asciidoc">edit</a></h2>
</div></div></div>
<p>Life is easy if all your data fits on a single machine. Classic algorithms
taught in CS201 will be sufficient for all your needs. But if all your data fits
on a single machine, there would be no need for distributed software
like Elasticsearch at all. But once you start distributing data, algorithm
selection needs to be made carefully.</p>
<p>Some algorithms are amenable to distributed execution. All of the aggregations
discussed thus far execute in a single pass and give exact results. These types
of algorithms are often referred to as <em>embarrassingly parallel</em>,
because they parallelize to multiple machines with little effort. When
performing a <code class="literal">max</code> metric, for example, the underlying algorithm is very simple:</p>
<div class="olist orderedlist">
<ol class="orderedlist">
<li class="listitem">
Broadcast the request to all shards.
</li>
<li class="listitem">
Look at the <code class="literal">price</code> field for each document. If <code class="literal">price > current_max</code>, replace
<code class="literal">current_max</code> with <code class="literal">price</code>.
</li>
<li class="listitem">
Return the maximum price from all shards to the coordinating node.
</li>
<li class="listitem">
Find the maximum price returned from all shards. This is the true maximum.
</li>
</ol>
</div>
<p>The algorithm scales linearly with machines because the algorithm requires no
coordination (the machines don’t need to discuss intermediate results), and the
memory footprint is very small (a single integer representing the maximum).</p>
<p>Not all algorithms are as simple as taking the maximum value, unfortunately.
More complex operations require algorithms that make conscious trade-offs in
performance and memory utilization. There is a triangle of factors at play:
big data, exactness, and real-time latency.</p>
<p>You get to choose two from this triangle:</p>
<div class="variablelist">
<dl class="variablelist">
<dt>
<span class="term">
Exact + real time
</span>
</dt>
<dd>
Your data fits in the RAM of a single machine. The world
is your oyster; use any algorithm you want. Results will be 100% accurate and
relatively fast.
</dd>
<dt>
<span class="term">
Big data + exact
</span>
</dt>
<dd>
A classic Hadoop installation. Can handle petabytes of data
and give you exact answers—​but it may take a week to give you that answer.
</dd>
<dt>
<span class="term">
Big data + real time
</span>
</dt>
<dd>
Approximate algorithms that give you accurate, but not
exact, results.
</dd>
</dl>
</div>
<p>Elasticsearch currently supports two approximate algorithms (<code class="literal">cardinality</code> and
<code class="literal">percentiles</code>). These will give you accurate results, but not 100% exact.
In exchange for a little bit of estimation error, these algorithms give you
fast execution and a small memory footprint.</p>
<p>For <em>most</em> domains, highly accurate results that return <em>in real time</em> across
<em>all your data</em> is more important than 100% exactness. At first blush, this may be an alien concept to you. <em>"We need exact answers!"</em>
you may yell. But consider the implications of a 0.5% error:</p>
<div class="ulist itemizedlist">
<ul class="itemizedlist">
<li class="listitem">
The true 99th percentile of latency for your website is 132ms.
</li>
<li class="listitem">
An approximation with 0.5% error will be within +/- 0.66ms of 132ms.
</li>
<li class="listitem">
The approximation returns in milliseconds, while the "true" answer may take seconds, or
be impossible.
</li>
</ul>
</div>
<p>For simply checking on your website’s latency, do you care if the approximate
answer is 132.66ms instead of 132ms? Certainly, not all domains can tolerate
approximations—​but the vast majority will have no problem. Accepting
an approximate answer is more often a <em>cultural</em> hurdle rather than a business
or technical imperative.</p>
</div>
<div class="navfooter">
<span class="prev">
<a href="_sorting_based_on_deep_metrics.html">« Sorting Based on "Deep" Metrics</a>
</span>
<span class="next">
<a href="cardinality.html">Finding Distinct Counts »</a>
</span>
</div>
</div>
<!-- end body -->
</div>
<div class="col-12 order-3 col-lg-2 order-lg-3 h-almost-full-lg sticky-top-lg" id="right_col">
<div id="sticky_content">
<!-- The OTP is appended here -->
<div class="row">
<div class="col-0 col-md-4 col-lg-0" id="bottom_left_col"></div>
<div class="col-12 col-md-8 col-lg-12">
<div id="rtpcontainer">
<div class="mktg-promo" id="most-popular">
<p class="aside-heading">Most Popular</p>
<div class="pb-2">
<p class="media-type">Video</p>
<a href="https://www.elastic.co/webinars/getting-started-elasticsearch?page=docs&placement=top-video">
<p class="mb-0">Get Started with Elasticsearch</p>
</a>
</div>
<div class="pb-2">
<p class="media-type">Video</p>
<a href="https://www.elastic.co/webinars/getting-started-kibana?page=docs&placement=top-video">
<p class="mb-0">Intro to Kibana</p>
</a>
</div>
<div class="pb-2">
<p class="media-type">Video</p>
<a href="https://www.elastic.co/webinars/introduction-elk-stack?page=docs&placement=top-video">
<p class="mb-0">ELK for Logs & Metrics</p>
</a>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</section>
</div>
<div id='elastic-footer'></div>
<script src='https://www.elastic.co/elastic-footer.js'></script>
<!-- Footer Section end-->
</section>
</div>
<script src="/guide/static/jquery.js"></script>
<script type="text/javascript" src="/guide/static/docs.js"></script>
<script type="text/javascript">
window.initial_state = {}</script>
</body>
</html>