forked from elastic/built-docs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path_basic_concepts.html
260 lines (242 loc) · 19.3 KB
/
_basic_concepts.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
<!DOCTYPE html>
<html lang="en-us">
<head>
<meta charset="UTF-8">
<title>Basic Concepts | Elasticsearch Guide [1.6] | Elastic</title>
<meta class="elastic" name="content" content="Basic Concepts | Elasticsearch Guide [1.6]">
<link rel="home" href="index.html" title="Elasticsearch Guide [1.6]"/>
<link rel="up" href="getting-started.html" title="Getting Started"/>
<link rel="prev" href="getting-started.html" title="Getting Started"/>
<link rel="next" href="_installation.html" title="Installation"/>
<meta class="elastic" name="product_version" content="1.6"/>
<meta class="elastic" name="product_name" content="Elasticsearch"/>
<meta class="elastic" name="website_area" content="documentation"/>
<meta name="DC.type" content="Learn/Docs/Elasticsearch/Reference/1.6"/>
<meta name="DC.subject" content="Elasticsearch"/>
<meta name="DC.identifier" content="1.6"/>
<meta name="robots" content="noindex,nofollow"/>
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<script src="https://cdn.optimizely.com/js/18132920325.js"></script>
<link rel="apple-touch-icon" sizes="57x57" href="/apple-icon-57x57.png">
<link rel="apple-touch-icon" sizes="60x60" href="/apple-icon-60x60.png">
<link rel="apple-touch-icon" sizes="72x72" href="/apple-icon-72x72.png">
<link rel="apple-touch-icon" sizes="76x76" href="/apple-icon-76x76.png">
<link rel="apple-touch-icon" sizes="114x114" href="/apple-icon-114x114.png">
<link rel="apple-touch-icon" sizes="120x120" href="/apple-icon-120x120.png">
<link rel="apple-touch-icon" sizes="144x144" href="/apple-icon-144x144.png">
<link rel="apple-touch-icon" sizes="152x152" href="/apple-icon-152x152.png">
<link rel="apple-touch-icon" sizes="180x180" href="/apple-icon-180x180.png">
<link rel="icon" type="image/png" href="/favicon-32x32.png" sizes="32x32">
<link rel="icon" type="image/png" href="/android-chrome-192x192.png" sizes="192x192">
<link rel="icon" type="image/png" href="/favicon-96x96.png" sizes="96x96">
<link rel="icon" type="image/png" href="/favicon-16x16.png" sizes="16x16">
<link rel="manifest" href="/manifest.json">
<meta name="apple-mobile-web-app-title" content="Elastic">
<meta name="application-name" content="Elastic">
<meta name="msapplication-TileColor" content="#ffffff">
<meta name="msapplication-TileImage" content="/mstile-144x144.png">
<meta name="theme-color" content="#ffffff">
<meta name="naver-site-verification" content="936882c1853b701b3cef3721758d80535413dbfd" />
<meta name="yandex-verification" content="d8a47e95d0972434" />
<meta name="localized" content="true" />
<meta name="st:robots" content="follow,index" />
<meta property="og:image" content="https://static-www.elastic.co/v3/assets/bltefdd0b53724fa2ce/blt280217a63b82a734/6202d3378b1f312528798412/elastic-logo.svg" />
<meta property="og:image:width" content="500" />
<meta property="og:image:height" content="172" />
<link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
<link rel="icon" href="/favicon.ico" type="image/x-icon">
<link rel="apple-touch-icon-precomposed" sizes="64x64" href="/favicon_64x64_16bit.png">
<link rel="apple-touch-icon-precomposed" sizes="32x32" href="/favicon_32x32.png">
<link rel="apple-touch-icon-precomposed" sizes="16x16" href="/favicon_16x16.png">
<!-- Give IE8 a fighting chance -->
<!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]-->
<link rel="stylesheet" type="text/css" href="/guide/static/styles.css" />
</head>
<!--© 2015-2022 Elasticsearch B.V. -->
<!-- All Elastic documentation is licensed under a Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International License. -->
<!-- http://creativecommons.org/licenses/by-nc-nd/4.0/ -->
<body>
<!-- Google Tag Manager -->
<script>dataLayer = [];</script><noscript><iframe src="//www.googletagmanager.com/ns.html?id=GTM-58RLH5" height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
<script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0], j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src= '//www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f); })(window,document,'script','dataLayer','GTM-58RLH5');</script>
<!-- End Google Tag Manager -->
<!-- Global site tag (gtag.js) - Google Analytics -->
<script async src="https://www.googletagmanager.com/gtag/js?id=UA-12395217-16"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'UA-12395217-16');
</script>
<!-- Google Tag Manager for GA4 -->
<script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0], j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src='https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-KNJMG2M');</script>
<noscript><iframe src="https://www.googletagmanager.com/ns.html?id=GTM-KNJMG2M" height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
<!-- End Google Tag Manager for GA4-->
<div id='elastic-nav' style="display:none;"></div>
<script src='https://www.elastic.co/elastic-nav.js'></script>
<div class="main-container">
<section id="content" >
<div class="content-wrapper">
<section id="guide" lang="en">
<div class="container-fluid">
<div class="row pb-3">
<div class="col-12 order-2 col-md-4 order-md-1 col-lg-3 h-almost-full-md sticky-top-md" id="left_col">
<!-- The TOC is appended here -->
</div>
<div class="col-12 order-1 col-md-8 order-md-2 col-lg-7 order-lg-2 guide-section" id="middle_col">
<!-- start body -->
<div class="page_header">
<p>
<strong>WARNING</strong>: Version 1.6 of Elasticsearch has passed its
<a href="https://www.elastic.co/support/eol">EOL date</a>.
</p>
<p>
This documentation is no longer being maintained and may be removed.
If you are running this version, we strongly advise you to upgrade.
For the latest information, see the
<a href="../current/index.html">current release documentation</a>.
</p>
</div>
<div id="content">
<div class="breadcrumbs">
<span class="breadcrumb-link"><a href="/guide/">Elastic Docs</a></span>
<span class="chevron-right">›</span><span class="breadcrumb-link"><a href="index.html">Elasticsearch Guide [1.6]</a></span>
<span class="chevron-right">›</span><span class="breadcrumb-link"><a href="getting-started.html">Getting Started</a></span>
</div>
<div class="navheader">
<span class="prev">
<a href="getting-started.html">« Getting Started</a>
</span>
<span class="next">
<a href="_installation.html">Installation »</a>
</span>
</div>
<div class="chapter">
<div class="titlepage"><div><div>
<h2 class="title"><a id="_basic_concepts"></a>Basic Concepts<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch/edit/1.6/docs/reference/getting-started.asciidoc">edit</a></h2>
</div></div></div>
<p>There are a few concepts that are core to Elasticsearch. Understanding these concepts from the outset will tremendously help ease the learning process.</p>
<h3><a id="_near_realtime_nrt"></a>Near Realtime (NRT)<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch/edit/1.6/docs/reference/getting-started.asciidoc">edit</a></h3>
<p>Elasticsearch is a near real time search platform. What this means is there is a slight latency (normally one second) from the time you index a document until the time it becomes searchable.</p>
<h3><a id="_cluster"></a>Cluster<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch/edit/1.6/docs/reference/getting-started.asciidoc">edit</a></h3>
<p>A cluster is a collection of one or more nodes (servers) that together holds your entire data and provides federated indexing and search capabilities across all nodes. A cluster is identified by a unique name which by default is "elasticsearch". This name is important because a node can only be part of a cluster if the node is set up to join the cluster by its name.</p>
<p>Make sure that you don’t reuse the same cluster names in different
environments, otherwise you might end up with nodes joining the wrong cluster.
For instance you could use <code class="literal">logging-dev</code>, <code class="literal">logging-stage</code>, and <code class="literal">logging-prod</code>
for the development, staging, and production clusters.</p>
<p>Note that it is valid and perfectly fine to have a cluster with only a single node in it. Furthermore, you may also have multiple independent clusters each with its own unique cluster name.</p>
<h3><a id="_node"></a>Node<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch/edit/1.6/docs/reference/getting-started.asciidoc">edit</a></h3>
<p>A node is a single server that is part of your cluster, stores your data, and participates in the cluster’s indexing and search capabilities. Just like a cluster, a node is identified by a name which by default is a random Marvel character name that is assigned to the node at startup. You can define any node name you want if you do not want the default. This name is important for administration purposes where you want to identify which servers in your network correspond to which nodes in your Elasticsearch cluster.</p>
<p>A node can be configured to join a specific cluster by the cluster name. By default, each node is set up to join a cluster named <code class="literal">elasticsearch</code> which means that if you start up a number of nodes on your network and—​assuming they can discover each other—​they will all automatically form and join a single cluster named <code class="literal">elasticsearch</code>.</p>
<p>In a single cluster, you can have as many nodes as you want. Furthermore, if there are no other Elasticsearch nodes currently running on your network, starting a single node will by default form a new single-node cluster named <code class="literal">elasticsearch</code>.</p>
<h3><a id="_index"></a>Index<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch/edit/1.6/docs/reference/getting-started.asciidoc">edit</a></h3>
<p>An index is a collection of documents that have somewhat similar characteristics. For example, you can have an index for customer data, another index for a product catalog, and yet another index for order data. An index is identified by a name (that must be all lowercase) and this name is used to refer to the index when performing indexing, search, update, and delete operations against the documents in it.</p>
<p>In a single cluster, you can define as many indexes as you want.</p>
<h3><a id="_type"></a>Type<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch/edit/1.6/docs/reference/getting-started.asciidoc">edit</a></h3>
<p>Within an index, you can define one or more types. A type is a logical category/partition of your index whose semantics is completely up to you. In general, a type is defined for documents that have a set of common fields. For example, let’s assume you run a blogging platform and store all your data in a single index. In this index, you may define a type for user data, another type for blog data, and yet another type for comments data.</p>
<h3><a id="_document"></a>Document<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch/edit/1.6/docs/reference/getting-started.asciidoc">edit</a></h3>
<p>A document is a basic unit of information that can be indexed. For example, you can have a document for a single customer, another document for a single product, and yet another for a single order. This document is expressed in <a href="http://json.org/" class="ulink" target="_top">JSON</a> (JavaScript Object Notation) which is an ubiquitous internet data interchange format.</p>
<p>Within an index/type, you can store as many documents as you want. Note that although a document physically resides in an index, a document actually must be indexed/assigned to a type inside an index.</p>
<h3><a id="_shards_amp_replicas"></a>Shards & Replicas<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch/edit/1.6/docs/reference/getting-started.asciidoc">edit</a></h3>
<p>An index can potentially store a large amount of data that can exceed the hardware limits of a single node. For example, a single index of a billion documents taking up 1TB of disk space may not fit on the disk of a single node or may be too slow to serve search requests from a single node alone.</p>
<p>To solve this problem, Elasticsearch provides the ability to subdivide your index into multiple pieces called shards. When you create an index, you can simply define the number of shards that you want. Each shard is in itself a fully-functional and independent "index" that can be hosted on any node in the cluster.</p>
<p>Sharding is important for two primary reasons:</p>
<div class="ulist itemizedlist">
<ul class="itemizedlist">
<li class="listitem">
It allows you to horizontally split/scale your content volume
</li>
<li class="listitem">
It allows you to distribute and parallelize operations across shards (potentially on multiple nodes) thus increasing performance/throughput
</li>
</ul>
</div>
<p>The mechanics of how a shard is distributed and also how its documents are aggregated back into search requests are completely managed by Elasticsearch and is transparent to you as the user.</p>
<p>In a network/cloud environment where failures can be expected anytime, it is very useful and highly recommended to have a failover mechanism in case a shard/node somehow goes offline or disappears for whatever reason. To this end, Elasticsearch allows you to make one or more copies of your index’s shards into what are called replica shards, or replicas for short.</p>
<p>Replication is important for two primary reasons:</p>
<div class="ulist itemizedlist">
<ul class="itemizedlist">
<li class="listitem">
It provides high availability in case a shard/node fails. For this reason, it is important to note that a replica shard is never allocated on the same node as the original/primary shard that it was copied from.
</li>
<li class="listitem">
It allows you to scale out your search volume/throughput since searches can be executed on all replicas in parallel.
</li>
</ul>
</div>
<p>To summarize, each index can be split into multiple shards. An index can also be replicated zero (meaning no replicas) or more times. Once replicated, each index will have primary shards (the original shards that were replicated from) and replica shards (the copies of the primary shards).
The number of shards and replicas can be defined per index at the time the index is created. After the index is created, you may change the number of replicas dynamically anytime but you cannot change the number shards after-the-fact.</p>
<p>By default, each index in Elasticsearch is allocated 5 primary shards and 1 replica which means that if you have at least two nodes in your cluster, your index will have 5 primary shards and another 5 replica shards (1 complete replica) for a total of 10 shards per index.</p>
<div class="note admon">
<div class="icon"></div>
<div class="admon_content">
<p>Each Elasticsearch shard is a Lucene index. There is a maximum number of documents you can have in a single Lucene index. As of <a href="https://issues.apache.org/jira/browse/LUCENE-5843" class="ulink" target="_top"><code class="literal">LUCENE-5843</code></a>, the limit is <code class="literal">2,147,483,519</code> (= Integer.MAX_VALUE - 128) documents.
You can monitor shard sizes using the <a class="xref" href="cat-shards.html" title="cat shards"><code class="literal">_cat/shards</code></a> api.</p>
</div>
</div>
<p>With that out of the way, let’s get started with the fun part…​</p>
</div>
<div class="navfooter">
<span class="prev">
<a href="getting-started.html">« Getting Started</a>
</span>
<span class="next">
<a href="_installation.html">Installation »</a>
</span>
</div>
</div>
<!-- end body -->
</div>
<div class="col-12 order-3 col-lg-2 order-lg-3 h-almost-full-lg sticky-top-lg" id="right_col">
<div id="sticky_content">
<!-- The OTP is appended here -->
<div class="row">
<div class="col-0 col-md-4 col-lg-0" id="bottom_left_col"></div>
<div class="col-12 col-md-8 col-lg-12">
<div id="rtpcontainer">
<div class="mktg-promo" id="most-popular">
<p class="aside-heading">Most Popular</p>
<div class="pb-2">
<p class="media-type">Video</p>
<a href="https://www.elastic.co/webinars/getting-started-elasticsearch?page=docs&placement=top-video">
<p class="mb-0">Get Started with Elasticsearch</p>
</a>
</div>
<div class="pb-2">
<p class="media-type">Video</p>
<a href="https://www.elastic.co/webinars/getting-started-kibana?page=docs&placement=top-video">
<p class="mb-0">Intro to Kibana</p>
</a>
</div>
<div class="pb-2">
<p class="media-type">Video</p>
<a href="https://www.elastic.co/webinars/introduction-elk-stack?page=docs&placement=top-video">
<p class="mb-0">ELK for Logs & Metrics</p>
</a>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</section>
</div>
<div id='elastic-footer'></div>
<script src='https://www.elastic.co/elastic-footer.js'></script>
<!-- Footer Section end-->
</section>
</div>
<script src="/guide/static/jquery.js"></script>
<script type="text/javascript" src="/guide/static/docs.js"></script>
<script type="text/javascript">
window.initial_state = {}</script>
</body>
</html>