ノーマライザー
[normalizer
]のプロパティは、keyword
フィールドに似ていますが、分析チェーンが単一のトークンを生成することを保証します。
[normalizer
]は、キーワードのインデックス作成前および[keyword
]フィールドがmatch
クエリやterm
のようなタームレベルのクエリを介して検索される際に適用されます。
[lowercase
]というシンプルなノーマライザーがelasticsearchに付属しており、使用できます。カスタムノーマライザーは、分析設定の一部として次のように定義できます。
Python
resp = client.indices.create(
index="index",
settings={
"analysis": {
"normalizer": {
"my_normalizer": {
"type": "custom",
"char_filter": [],
"filter": [
"lowercase",
"asciifolding"
]
}
}
}
},
mappings={
"properties": {
"foo": {
"type": "keyword",
"normalizer": "my_normalizer"
}
}
},
)
print(resp)
resp1 = client.index(
index="index",
id="1",
document={
"foo": "BÀR"
},
)
print(resp1)
resp2 = client.index(
index="index",
id="2",
document={
"foo": "bar"
},
)
print(resp2)
resp3 = client.index(
index="index",
id="3",
document={
"foo": "baz"
},
)
print(resp3)
resp4 = client.indices.refresh(
index="index",
)
print(resp4)
resp5 = client.search(
index="index",
query={
"term": {
"foo": "BAR"
}
},
)
print(resp5)
resp6 = client.search(
index="index",
query={
"match": {
"foo": "BAR"
}
},
)
print(resp6)
Ruby
response = client.indices.create(
index: 'index',
body: {
settings: {
analysis: {
normalizer: {
my_normalizer: {
type: 'custom',
char_filter: [],
filter: [
'lowercase',
'asciifolding'
]
}
}
}
},
mappings: {
properties: {
foo: {
type: 'keyword',
normalizer: 'my_normalizer'
}
}
}
}
)
puts response
response = client.index(
index: 'index',
id: 1,
body: {
foo: 'BÀR'
}
)
puts response
response = client.index(
index: 'index',
id: 2,
body: {
foo: 'bar'
}
)
puts response
response = client.index(
index: 'index',
id: 3,
body: {
foo: 'baz'
}
)
puts response
response = client.indices.refresh(
index: 'index'
)
puts response
response = client.search(
index: 'index',
body: {
query: {
term: {
foo: 'BAR'
}
}
}
)
puts response
response = client.search(
index: 'index',
body: {
query: {
match: {
foo: 'BAR'
}
}
}
)
puts response
Js
const response = await client.indices.create({
index: "index",
settings: {
analysis: {
normalizer: {
my_normalizer: {
type: "custom",
char_filter: [],
filter: ["lowercase", "asciifolding"],
},
},
},
},
mappings: {
properties: {
foo: {
type: "keyword",
normalizer: "my_normalizer",
},
},
},
});
console.log(response);
const response1 = await client.index({
index: "index",
id: 1,
document: {
foo: "BÀR",
},
});
console.log(response1);
const response2 = await client.index({
index: "index",
id: 2,
document: {
foo: "bar",
},
});
console.log(response2);
const response3 = await client.index({
index: "index",
id: 3,
document: {
foo: "baz",
},
});
console.log(response3);
const response4 = await client.indices.refresh({
index: "index",
});
console.log(response4);
const response5 = await client.search({
index: "index",
query: {
term: {
foo: "BAR",
},
},
});
console.log(response5);
const response6 = await client.search({
index: "index",
query: {
match: {
foo: "BAR",
},
},
});
console.log(response6);
コンソール
PUT index
{
"settings": {
"analysis": {
"normalizer": {
"my_normalizer": {
"type": "custom",
"char_filter": [],
"filter": ["lowercase", "asciifolding"]
}
}
}
},
"mappings": {
"properties": {
"foo": {
"type": "keyword",
"normalizer": "my_normalizer"
}
}
}
}
PUT index/_doc/1
{
"foo": "BÀR"
}
PUT index/_doc/2
{
"foo": "bar"
}
PUT index/_doc/3
{
"foo": "baz"
}
POST index/_refresh
GET index/_search
{
"query": {
"term": {
"foo": "BAR"
}
}
}
GET index/_search
{
"query": {
"match": {
"foo": "BAR"
}
}
}
上記のクエリは、[BÀR
]がインデックス作成時とクエリ時の両方で[bar
]に変換されるため、ドキュメント1と2に一致します。
コンソール-結果
{
"took": $body.took,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped" : 0,
"failed": 0
},
"hits": {
"total" : {
"value": 2,
"relation": "eq"
},
"max_score": 0.4700036,
"hits": [
{
"_index": "index",
"_id": "1",
"_score": 0.4700036,
"_source": {
"foo": "BÀR"
}
},
{
"_index": "index",
"_id": "2",
"_score": 0.4700036,
"_source": {
"foo": "bar"
}
}
]
}
}
また、キーワードがインデックス作成前に変換されるという事実は、集約が正規化された値を返すことを意味します:
Python
resp = client.search(
index="index",
size=0,
aggs={
"foo_terms": {
"terms": {
"field": "foo"
}
}
},
)
print(resp)
Ruby
response = client.search(
index: 'index',
body: {
size: 0,
aggregations: {
foo_terms: {
terms: {
field: 'foo'
}
}
}
}
)
puts response
Js
const response = await client.search({
index: "index",
size: 0,
aggs: {
foo_terms: {
terms: {
field: "foo",
},
},
},
});
console.log(response);
コンソール
GET index/_search
{
"size": 0,
"aggs": {
"foo_terms": {
"terms": {
"field": "foo"
}
}
}
}
コンソール-結果
{
"took": 43,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped" : 0,
"failed": 0
},
"hits": {
"total" : {
"value": 3,
"relation": "eq"
},
"max_score": null,
"hits": []
},
"aggregations": {
"foo_terms": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "bar",
"doc_count": 2
},
{
"key": "baz",
"doc_count": 1
}
]
}
}
}